Mercurial > repos > rhpvorderman > shm_csr
diff summary_to_fasta.py @ 0:64d74ba01a7c draft
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
| author | rhpvorderman |
|---|---|
| date | Wed, 27 Oct 2021 12:34:47 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summary_to_fasta.py Wed Oct 27 12:34:47 2021 +0000 @@ -0,0 +1,42 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") +parser.add_argument("--fasta", help="The output fasta file") + +args = parser.parse_args() + +infile = args.input +fasta = args.fasta + +with open(infile, 'r') as i, open(fasta, 'w') as o: + first = True + id_col = 0 + seq_col = 0 + no_results = 0 + no_seqs = 0 + passed = 0 + for line in i: + splt = line.split("\t") + if first: + id_col = splt.index("Sequence ID") + seq_col = splt.index("Sequence") + first = False + continue + if len(splt) < 5: + no_results += 1 + continue + + ID = splt[id_col] + seq = splt[seq_col] + + if not len(seq) > 0: + no_seqs += 1 + continue + + o.write(">" + ID + "\n" + seq + "\n") + passed += 1 + + print("No results:", no_results) + print("No sequences:", no_seqs) + print("Written to fasta file:", passed)
