Mercurial > repos > bcclaywell > argo_navis
comparison bin/metadata_from_seqnames.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 #!/usr/bin/env python | |
2 """Little script for parsing metadata out of sequence names given regular expressions. Supports parsing out | |
3 deme information and data information.""" | |
4 | |
5 import argparse | |
6 import csv | |
7 import re | |
8 from Bio import SeqIO | |
9 | |
10 | |
11 def get_args(): | |
12 parser = argparse.ArgumentParser(description=__doc__) | |
13 parser.add_argument('inseqs', help="Input sequences in fasta format") | |
14 parser.add_argument('-d', '--deme-regex', required=True, type=re.compile, | |
15 help="Regular expression with which to parse deme information") | |
16 parser.add_argument('-t', '--time-regex', type=re.compile, | |
17 help="Regular expression with which to parse date information") | |
18 parser.add_argument('output', type=argparse.FileType('w')) | |
19 return parser.parse_args() | |
20 | |
21 | |
22 def main(): | |
23 args = get_args() | |
24 seqreader = SeqIO.parse(args.inseqs, 'fasta') | |
25 | |
26 header = ['sequence', 'deme'] | |
27 if args.time_regex: | |
28 header.append('date') | |
29 | |
30 outwriter = csv.DictWriter(args.output, header) | |
31 outwriter.writeheader() | |
32 for seqrec in seqreader: | |
33 seqname = seqrec.id | |
34 try: | |
35 deme = args.deme_regex.match(seqname).groups()[0] | |
36 except Exception: | |
37 raise Exception, "There was a problem parsing deme information for sequence %s. Try again." % seqname | |
38 rowdict = dict(sequence=seqname, deme=deme) | |
39 if args.time_regex: | |
40 try: | |
41 rowdict['date'] = args.time_regex.match(seqname).groups()[0] | |
42 except Exception: | |
43 raise Exception, "There was a problem parsing date information for sequence %s. Try again." % seqname | |
44 outwriter.writerow(rowdict) | |
45 | |
46 args.output.close() | |
47 | |
48 | |
49 if __name__ == '__main__': | |
50 main() | |
51 | |
52 |