comparison relabel_fasta.py @ 24:fe354f5dd0ee draft

planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 34034189622f4cf14edd12a4de43739c37b50730
author pjbriggs
date Thu, 30 Aug 2018 08:13:55 -0400
parents
children
comparison
equal deleted inserted replaced
23:545f23776953 24:fe354f5dd0ee
1 #!/usr/bin/env python
2
3 DESCRIPTION = \
4 """Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2,
5 <PREFIX>3 ... (<PREFIX> is provided by the user via the command
6 line).
7
8 Can be used to label OTUs as OTU_1, OTU_2 etc.
9
10 This reimplements the functionality of the fasta_number.py utility
11 from https://drive5.com/python/fasta_number_py.html
12 """
13
14 import argparse
15
16 def relabel_fasta(fp,prefix,include_size=False):
17 """
18 Relabel sequence records in a FASTA file
19
20 Arguments:
21 fp (File): file-like object opened for reading
22 input FASTA data from
23 prefix (str): prefix to use in new labels
24 include_size (bool): if True then copy
25 'size=...' records into new labels (default
26 is not to copy the size)
27
28 Yields: updated lines from the input FASTA.
29 """
30 # Iterate over lines in file
31 nlabel = 0
32 for line in fp:
33 # Strip trailing newlines
34 line = line.rstrip('\n')
35 if not line:
36 # Skip blank lines
37 continue
38 elif line.startswith('>'):
39 # Deal with start of a sequence record
40 nlabel += 1
41 label = line[1:].strip()
42 if include_size:
43 # Extract size from the label
44 try:
45 size = filter(
46 lambda x: x.startswith("size="),
47 label.split(';'))[0]
48 except Exception as ex:
49 raise Exception("Couldn't locate 'size' in "
50 "label: %s" % label)
51 yield ">%s%d;%s" % (args.prefix,
52 nlabel,
53 size)
54 else:
55 yield ">%s%d" % (args.prefix,
56 nlabel)
57 else:
58 # Echo the line to output
59 yield line
60
61 if __name__ == "__main__":
62 # Set up command line parser
63 p = argparse.ArgumentParser(description=DESCRIPTION)
64 p.add_argument("--needsize",
65 action="store_true",
66 help="include the size as part of the "
67 "output label ('size=...' must be present "
68 "in the input FASTA labels). Output labels "
69 "will be '<PREFIX><NUMBER>;size=<SIZE>'")
70 p.add_argument("--nosize",
71 action="store_true",
72 help="don't include the size as part of "
73 "the output label (this is the default)")
74 p.add_argument("fasta",
75 metavar="FASTA",
76 help="input FASTA file")
77 p.add_argument("prefix",
78 metavar="PREFIX",
79 help="prefix to use for labels in output")
80 # Process command line
81 args = p.parse_args()
82 # Relabel FASTA
83 with open(args.fasta,'rU') as fasta:
84 for line in relabel_fasta(fasta,
85 args.prefix,
86 include_size=args.needsize):
87 print line
88