Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison relabel_fasta.py @ 5:bbfc9638ba84 draft
First version with (partial) bioconda deps.
author | pjbriggs |
---|---|
date | Wed, 13 Jun 2018 08:39:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:013bf1e2cc8f | 5:bbfc9638ba84 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2, | |
5 <PREFIX>3 etc (where <PREFIX> is a user-provided argument). | |
6 | |
7 Can be used to label OTUs as OTU_1, OTU_2 etc. | |
8 | |
9 This is a reimplementation of the fasta_number.py script from | |
10 https://drive5.com/python/fasta_number_py.html | |
11 """ | |
12 | |
13 import argparse | |
14 | |
15 def relabel_fasta(fp,prefix,include_size=False): | |
16 """ | |
17 """ | |
18 # Iterate over lines in file | |
19 nlabel = 0 | |
20 for line in fp: | |
21 # Strip trailing newlines | |
22 line = line.rstrip('\n') | |
23 if not line: | |
24 # Skip blank lines | |
25 continue | |
26 elif line.startswith('>'): | |
27 # | |
28 nlabel += 1 | |
29 label = line[1:].strip() | |
30 if args.needsize: | |
31 # Extract size from the label | |
32 try: | |
33 size = filter( | |
34 lambda x: x.startswith("size="), | |
35 label.split(';'))[0] | |
36 except Exception as ex: | |
37 raise Exception("Couldn't locate 'size' in " | |
38 "label: %s" % label) | |
39 yield ">%s%d;%s" % (args.prefix, | |
40 nlabel, | |
41 size) | |
42 else: | |
43 yield ">%s%d" % (args.prefix, | |
44 nlabel) | |
45 else: | |
46 # Echo the line to output | |
47 yield line | |
48 | |
49 if __name__ == "__main__": | |
50 | |
51 # Set up command line parser | |
52 p = argparse.ArgumentParser() | |
53 p.add_argument("--needsize",action="store_true") | |
54 p.add_argument("--nosize",action="store_true") | |
55 p.add_argument("fasta") | |
56 p.add_argument("prefix") | |
57 | |
58 # Process command line | |
59 args = p.parse_args() | |
60 | |
61 # Relabel FASTA | |
62 with open(args.fasta,'rU') as fasta: | |
63 for line in relabel_fasta(fasta, | |
64 args.prefix, | |
65 include_size=args.needsize): | |
66 print line | |
67 | |
68 |