diff bin/metadata_from_seqnames.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/metadata_from_seqnames.py	Mon Oct 12 17:43:33 2015 -0400
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+"""Little script for parsing metadata out of sequence names given regular expressions. Supports parsing out
+deme information and data information."""
+
+import argparse
+import csv
+import re
+from Bio import SeqIO
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('inseqs', help="Input sequences in fasta format")
+    parser.add_argument('-d', '--deme-regex', required=True, type=re.compile,
+            help="Regular expression with which to parse deme information")
+    parser.add_argument('-t', '--time-regex', type=re.compile,
+            help="Regular expression with which to parse date information")
+    parser.add_argument('output', type=argparse.FileType('w'))
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    seqreader = SeqIO.parse(args.inseqs, 'fasta')
+
+    header = ['sequence', 'deme']
+    if args.time_regex:
+        header.append('date')
+
+    outwriter = csv.DictWriter(args.output, header)
+    outwriter.writeheader()
+    for seqrec in seqreader:
+        seqname = seqrec.id
+        try:
+            deme = args.deme_regex.match(seqname).groups()[0]
+        except Exception:
+            raise Exception, "There was a problem parsing deme information for sequence %s. Try again." % seqname
+        rowdict = dict(sequence=seqname, deme=deme)
+        if args.time_regex:
+            try:
+                rowdict['date'] = args.time_regex.match(seqname).groups()[0]
+            except Exception:
+                raise Exception, "There was a problem parsing date information for sequence %s. Try again." % seqname
+        outwriter.writerow(rowdict)
+
+    args.output.close()
+
+
+if __name__ == '__main__':
+    main()
+
+