Mercurial > repos > bcclaywell > argo_navis
view bin/metadata_from_seqnames.py @ 2:7eaf6f9abd28 draft default tip
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b-dirty
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:57:38 -0400 |
parents | d67268158946 |
children |
line wrap: on
line source
#!/usr/bin/env python """Little script for parsing metadata out of sequence names given regular expressions. Supports parsing out deme information and data information.""" import argparse import csv import re from Bio import SeqIO def get_args(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('inseqs', help="Input sequences in fasta format") parser.add_argument('-d', '--deme-regex', required=True, type=re.compile, help="Regular expression with which to parse deme information") parser.add_argument('-t', '--time-regex', type=re.compile, help="Regular expression with which to parse date information") parser.add_argument('output', type=argparse.FileType('w')) return parser.parse_args() def main(): args = get_args() seqreader = SeqIO.parse(args.inseqs, 'fasta') header = ['sequence', 'deme'] if args.time_regex: header.append('date') outwriter = csv.DictWriter(args.output, header) outwriter.writeheader() for seqrec in seqreader: seqname = seqrec.id try: deme = args.deme_regex.match(seqname).groups()[0] except Exception: raise Exception, "There was a problem parsing deme information for sequence %s. Try again." % seqname rowdict = dict(sequence=seqname, deme=deme) if args.time_regex: try: rowdict['date'] = args.time_regex.match(seqname).groups()[0] except Exception: raise Exception, "There was a problem parsing date information for sequence %s. Try again." % seqname outwriter.writerow(rowdict) args.output.close() if __name__ == '__main__': main()