annotate summary_to_fasta.py @ 2:72d68e2e1fd3 draft

"planemo upload commit a8565b413cbe0ef70bc22994ecdd48b400ba6185"
author rhpvorderman
date Fri, 05 Nov 2021 14:43:13 +0000
parents 64d74ba01a7c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
1 import argparse
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
2
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
3 parser = argparse.ArgumentParser()
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
5 parser.add_argument("--fasta", help="The output fasta file")
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
6
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
7 args = parser.parse_args()
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
8
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
9 infile = args.input
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
10 fasta = args.fasta
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
11
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
12 with open(infile, 'r') as i, open(fasta, 'w') as o:
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
13 first = True
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
14 id_col = 0
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
15 seq_col = 0
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
16 no_results = 0
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
17 no_seqs = 0
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
18 passed = 0
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
19 for line in i:
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
20 splt = line.split("\t")
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
21 if first:
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
22 id_col = splt.index("Sequence ID")
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
23 seq_col = splt.index("Sequence")
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
24 first = False
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
25 continue
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
26 if len(splt) < 5:
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
27 no_results += 1
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
28 continue
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
29
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
30 ID = splt[id_col]
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
31 seq = splt[seq_col]
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
32
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
33 if not len(seq) > 0:
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
34 no_seqs += 1
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
35 continue
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
36
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
37 o.write(">" + ID + "\n" + seq + "\n")
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
38 passed += 1
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
39
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
40 print("No results:", no_results)
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
41 print("No sequences:", no_seqs)
64d74ba01a7c "planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff changeset
42 print("Written to fasta file:", passed)