5
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2,
|
|
5 <PREFIX>3 etc (where <PREFIX> is a user-provided argument).
|
|
6
|
|
7 Can be used to label OTUs as OTU_1, OTU_2 etc.
|
|
8
|
|
9 This is a reimplementation of the fasta_number.py script from
|
|
10 https://drive5.com/python/fasta_number_py.html
|
|
11 """
|
|
12
|
|
13 import argparse
|
|
14
|
|
15 def relabel_fasta(fp,prefix,include_size=False):
|
|
16 """
|
|
17 """
|
|
18 # Iterate over lines in file
|
|
19 nlabel = 0
|
|
20 for line in fp:
|
|
21 # Strip trailing newlines
|
|
22 line = line.rstrip('\n')
|
|
23 if not line:
|
|
24 # Skip blank lines
|
|
25 continue
|
|
26 elif line.startswith('>'):
|
|
27 #
|
|
28 nlabel += 1
|
|
29 label = line[1:].strip()
|
|
30 if args.needsize:
|
|
31 # Extract size from the label
|
|
32 try:
|
|
33 size = filter(
|
|
34 lambda x: x.startswith("size="),
|
|
35 label.split(';'))[0]
|
|
36 except Exception as ex:
|
|
37 raise Exception("Couldn't locate 'size' in "
|
|
38 "label: %s" % label)
|
|
39 yield ">%s%d;%s" % (args.prefix,
|
|
40 nlabel,
|
|
41 size)
|
|
42 else:
|
|
43 yield ">%s%d" % (args.prefix,
|
|
44 nlabel)
|
|
45 else:
|
|
46 # Echo the line to output
|
|
47 yield line
|
|
48
|
|
49 if __name__ == "__main__":
|
|
50
|
|
51 # Set up command line parser
|
|
52 p = argparse.ArgumentParser()
|
|
53 p.add_argument("--needsize",action="store_true")
|
|
54 p.add_argument("--nosize",action="store_true")
|
|
55 p.add_argument("fasta")
|
|
56 p.add_argument("prefix")
|
|
57
|
|
58 # Process command line
|
|
59 args = p.parse_args()
|
|
60
|
|
61 # Relabel FASTA
|
|
62 with open(args.fasta,'rU') as fasta:
|
|
63 for line in relabel_fasta(fasta,
|
|
64 args.prefix,
|
|
65 include_size=args.needsize):
|
|
66 print line
|
|
67
|
|
68
|