Mercurial > repos > bcclaywell > argo_navis
diff bin/metadata_from_seqnames.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
| author | bcclaywell |
|---|---|
| date | Mon, 12 Oct 2015 17:43:33 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/metadata_from_seqnames.py Mon Oct 12 17:43:33 2015 -0400 @@ -0,0 +1,52 @@ +#!/usr/bin/env python +"""Little script for parsing metadata out of sequence names given regular expressions. Supports parsing out +deme information and data information.""" + +import argparse +import csv +import re +from Bio import SeqIO + + +def get_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('inseqs', help="Input sequences in fasta format") + parser.add_argument('-d', '--deme-regex', required=True, type=re.compile, + help="Regular expression with which to parse deme information") + parser.add_argument('-t', '--time-regex', type=re.compile, + help="Regular expression with which to parse date information") + parser.add_argument('output', type=argparse.FileType('w')) + return parser.parse_args() + + +def main(): + args = get_args() + seqreader = SeqIO.parse(args.inseqs, 'fasta') + + header = ['sequence', 'deme'] + if args.time_regex: + header.append('date') + + outwriter = csv.DictWriter(args.output, header) + outwriter.writeheader() + for seqrec in seqreader: + seqname = seqrec.id + try: + deme = args.deme_regex.match(seqname).groups()[0] + except Exception: + raise Exception, "There was a problem parsing deme information for sequence %s. Try again." % seqname + rowdict = dict(sequence=seqname, deme=deme) + if args.time_regex: + try: + rowdict['date'] = args.time_regex.match(seqname).groups()[0] + except Exception: + raise Exception, "There was a problem parsing date information for sequence %s. Try again." % seqname + outwriter.writerow(rowdict) + + args.output.close() + + +if __name__ == '__main__': + main() + +
