Mercurial > repos > rhpvorderman > shm_csr
annotate summary_to_fasta.py @ 2:72d68e2e1fd3 draft
"planemo upload commit a8565b413cbe0ef70bc22994ecdd48b400ba6185"
| author | rhpvorderman |
|---|---|
| date | Fri, 05 Nov 2021 14:43:13 +0000 |
| parents | 64d74ba01a7c |
| children |
| rev | line source |
|---|---|
|
0
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
1 import argparse |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
2 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
3 parser = argparse.ArgumentParser() |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
5 parser.add_argument("--fasta", help="The output fasta file") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
6 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
7 args = parser.parse_args() |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
8 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
9 infile = args.input |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
10 fasta = args.fasta |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
11 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
12 with open(infile, 'r') as i, open(fasta, 'w') as o: |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
13 first = True |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
14 id_col = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
15 seq_col = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
16 no_results = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
17 no_seqs = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
18 passed = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
19 for line in i: |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
20 splt = line.split("\t") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
21 if first: |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
22 id_col = splt.index("Sequence ID") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
23 seq_col = splt.index("Sequence") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
24 first = False |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
25 continue |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
26 if len(splt) < 5: |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
27 no_results += 1 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
28 continue |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
29 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
30 ID = splt[id_col] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
31 seq = splt[seq_col] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
32 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
33 if not len(seq) > 0: |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
34 no_seqs += 1 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
35 continue |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
36 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
37 o.write(">" + ID + "\n" + seq + "\n") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
38 passed += 1 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
39 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
40 print("No results:", no_results) |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
41 print("No sequences:", no_seqs) |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
42 print("Written to fasta file:", passed) |
