diff gbk_to_fasta.py @ 0:cc961e057668 draft

Uploaded
author rijst
date Wed, 12 Dec 2012 06:26:26 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk_to_fasta.py	Wed Dec 12 06:26:26 2012 -0500
@@ -0,0 +1,29 @@
+import sys
+
+if len(sys.argv) < 3:
+    exit("Not enough arguments passed, pleas provide names of input- and output file")
+
+input_name = sys.argv[1]
+output_name = sys.argv[2]
+
+from Bio import GenBank
+
+try: seq_record = GenBank.RecordParser().parse(open(input_name))
+except: exit("Error reading %s, check file correctness." % input_name)
+
+try: out_file = open(output_name, 'w')
+except IOError as e:
+    exit("Error trying to open '%s': {1}".format(e.errno, e.strerror))
+
+accession = definition = ''
+if seq_record.accession[0] != '': accession = '|gb|'+seq_record.accession[0]
+if seq_record.definition != '': definition = '|'+seq_record.definition
+
+out_file.write(">gi|%s%s%s\n" % (seq_record.gi,accession,definition))
+
+i = 0
+while i < len(seq_record.sequence):
+    out_file.write(seq_record.sequence[i:i+70]+"\n")
+    i += 70
+
+out_file.close()