comparison relabel_fasta.py @ 5:bbfc9638ba84 draft

First version with (partial) bioconda deps.
author pjbriggs
date Wed, 13 Jun 2018 08:39:26 -0400
parents
children
comparison
equal deleted inserted replaced
4:013bf1e2cc8f 5:bbfc9638ba84
1 #!/usr/bin/env python
2
3 """
4 Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2,
5 <PREFIX>3 etc (where <PREFIX> is a user-provided argument).
6
7 Can be used to label OTUs as OTU_1, OTU_2 etc.
8
9 This is a reimplementation of the fasta_number.py script from
10 https://drive5.com/python/fasta_number_py.html
11 """
12
13 import argparse
14
15 def relabel_fasta(fp,prefix,include_size=False):
16 """
17 """
18 # Iterate over lines in file
19 nlabel = 0
20 for line in fp:
21 # Strip trailing newlines
22 line = line.rstrip('\n')
23 if not line:
24 # Skip blank lines
25 continue
26 elif line.startswith('>'):
27 #
28 nlabel += 1
29 label = line[1:].strip()
30 if args.needsize:
31 # Extract size from the label
32 try:
33 size = filter(
34 lambda x: x.startswith("size="),
35 label.split(';'))[0]
36 except Exception as ex:
37 raise Exception("Couldn't locate 'size' in "
38 "label: %s" % label)
39 yield ">%s%d;%s" % (args.prefix,
40 nlabel,
41 size)
42 else:
43 yield ">%s%d" % (args.prefix,
44 nlabel)
45 else:
46 # Echo the line to output
47 yield line
48
49 if __name__ == "__main__":
50
51 # Set up command line parser
52 p = argparse.ArgumentParser()
53 p.add_argument("--needsize",action="store_true")
54 p.add_argument("--nosize",action="store_true")
55 p.add_argument("fasta")
56 p.add_argument("prefix")
57
58 # Process command line
59 args = p.parse_args()
60
61 # Relabel FASTA
62 with open(args.fasta,'rU') as fasta:
63 for line in relabel_fasta(fasta,
64 args.prefix,
65 include_size=args.needsize):
66 print line
67
68