diff parse_output.py @ 0:598c3e720567 draft default tip

planemo upload commit 4a2876135fca2f46ffa6451e463eb167a0b51b65
author stephenshank
date Thu, 21 Mar 2019 12:47:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/parse_output.py	Thu Mar 21 12:47:00 2019 -0400
@@ -0,0 +1,27 @@
+import os
+
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio import SeqIO
+
+
+def parse_abayesqr_output(input_text, output_fasta):
+    wd = os.path.join(os.getcwd())
+    with open(os.path.join(wd, input_text)) as input_file:
+        lines = input_file.readlines()
+    records = []
+    for i, line in enumerate(lines):
+        if i % 2 == 0:
+            freq = float(line.split()[-1])
+            number = int(i/2)+1
+            header = 'haplotype-%d_freq-%f' % (number, freq)
+        else:
+            seq = Seq(line.strip())
+            record = SeqRecord(seq, id=header, description='')
+            records.append(record)
+    SeqIO.write(records, os.path.join(wd, output_fasta), 'fasta')
+
+
+if __name__ == '__main__':
+    parse_abayesqr_output("test_ViralSeq.txt", "haplotypes.fasta")
+