Mercurial > repos > iuc > glimmer_knowledge_based
comparison glimmer_gbk_to_orf.py @ 0:ab65146c4ff1 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
| author | iuc |
|---|---|
| date | Tue, 28 Nov 2017 09:57:03 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ab65146c4ff1 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 ################################################################### | |
| 4 # | |
| 5 # gbk2orf.py by Errol Strain (estrain@gmail.com) | |
| 6 # | |
| 7 # Read a GenBank file and export fasta formatted amino acid and | |
| 8 # CDS files | |
| 9 # | |
| 10 ################################################################### | |
| 11 | |
| 12 import sys | |
| 13 from optparse import OptionParser | |
| 14 | |
| 15 from Bio import SeqIO | |
| 16 from Bio.Seq import Seq | |
| 17 from Bio.SeqRecord import SeqRecord | |
| 18 | |
| 19 | |
| 20 # Command line usage | |
| 21 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta" | |
| 22 p = OptionParser(usage) | |
| 23 p.add_option("-t", "--translate", dest="transtabl", type="int", default=11, | |
| 24 help="Translation table used to translate coding regions (default=11)") | |
| 25 p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file") | |
| 26 p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output") | |
| 27 p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output") | |
| 28 (opts, args) = p.parse_args() | |
| 29 # Do I need this next line? | |
| 30 if not opts and not args: | |
| 31 p.error("Use --help to see usage") | |
| 32 if len(sys.argv) == 1: | |
| 33 p.error("Use --help to see usage") | |
| 34 | |
| 35 # Lists to hold SeqRecords | |
| 36 aalist = [] | |
| 37 nuclist = [] | |
| 38 | |
| 39 # If the CDS does not have a locus tag the name will be assigned using the | |
| 40 # order in which it was found | |
| 41 feat_count = 0 | |
| 42 | |
| 43 # Iterate through genbank records in input file | |
| 44 for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"): | |
| 45 for (index, feature) in enumerate(gb_record.features): | |
| 46 if feature.type == "CDS": | |
| 47 feat_count = feat_count + 1 | |
| 48 gene = feature.extract(gb_record.seq) | |
| 49 if "locus_tag" in feature.qualifiers: | |
| 50 value = feature.qualifiers["locus_tag"][0] | |
| 51 else: | |
| 52 value = "Index_" + str(feat_count) | |
| 53 nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value)) | |
| 54 pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True))) | |
| 55 aalist.append(SeqRecord(pro, id=value, name=value)) | |
| 56 | |
| 57 # Write out lists in fasta format | |
| 58 aa_handle = open(opts.aa_file, "w") | |
| 59 SeqIO.write(aalist, aa_handle, "fasta") | |
| 60 aa_handle.close() | |
| 61 orf_handle = open(opts.orf_file, "w") | |
| 62 SeqIO.write(nuclist, orf_handle, "fasta") | |
| 63 orf_handle.close() |
