augustus_training: extract_features.py comparison

comparison extract_features.py @ 4:da01a05d91c5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"

author	iuc
date	Thu, 15 Jul 2021 17:15:16 +0000
parents	101933e63fa8
children

comparison

equal deleted inserted replaced

-:7d3fa213c3d3
+:da01a05d91c5
 import argparse
 import sys
 import textwrap
-def main( args ):
+def main(args):
 """
 Extract the protein and coding section from an augustus gff, gtf file
 Example file:
 HS04636	AUGUSTUS	stop_codon	6901	6903	.	+	0	Parent=g1.t1
 HS04636	AUGUSTUS	transcription_end_site	8857	8857	.	+	.	Parent=g1.t1
 # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL
 # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD
 # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG
 # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH
 # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE
 # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV
 # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL]
 # end gene g1
 ###
 #
 # ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
 #
 # Predicted genes for sequence number 2 on both strands
 # start gene g2
 HS08198	AUGUSTUS	gene	86	2344	1	+	.	ID=g2
 HS08198	AUGUSTUS	transcript	86	2344	.	+	.	ID=g2.t1;Parent=g2
 HS08198	AUGUSTUS	transcription_start_site	86	86	.	+	.	Parent=g2.t1
 HS08198	AUGUSTUS	exon	86	582	.	+	.	Parent=g2.t1
 HS08198	AUGUSTUS	start_codon	445	447	.	+	0	Parent=g2.t1
 """
-protein_seq = ''
+protein_seq = ""
-coding_seq = ''
+coding_seq = ""
 if args.protein:
-po = open( args.protein, 'w+' )
+po = open(args.protein, "w+")
 if args.codingseq:
-co = open( args.codingseq, 'w+' )
+co = open(args.codingseq, "w+")
 for line in sys.stdin:
 # protein- and coding-sequence are stored as comments
-if line.startswith('#'):
+if line.startswith("#"):
 line = line[2:].strip()
-if line.startswith('start gene'):
+if line.startswith("start gene"):
 gene_name = line[11:].strip()
 if protein_seq:
-if line.endswith(']'):
+if line.endswith("]"):
 protein_seq += line[:-1]
-po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
+po.write(
-protein_seq = ''
+">%s\n%s\n"
+% (gene_name, "\n".join(textwrap.wrap(protein_seq, 80)))
+)
+protein_seq = ""
 else:
 protein_seq += line
 if coding_seq:
-if line.endswith(']'):
+if line.endswith("]"):
 coding_seq += line[:-1]
-co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) )
+co.write(
-coding_seq = ''
+">%s\n%s\n"
+% (gene_name, "\n".join(textwrap.wrap(coding_seq, 80)))
+)
+coding_seq = ""
 else:
 coding_seq += line
-if args.protein and line.startswith('protein sequence = ['):
+if args.protein and line.startswith("protein sequence = ["):
-if line.endswith(']'):
+if line.endswith("]"):
 protein_seq = line[20:-1]
-po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
+po.write(
-protein_seq = ''
+">%s\n%s\n"
+% (gene_name, "\n".join(textwrap.wrap(protein_seq, 80)))
+)
+protein_seq = ""
 else:
 line = line[20:]
 protein_seq = line
-if args.codingseq and line.startswith('coding sequence = ['):
+if args.codingseq and line.startswith("coding sequence = ["):
-if line.endswith(']'):
+if line.endswith("]"):
 coding_seq = line[19:-1]
-co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) )
+co.write(
-coding_seq = ''
+">%s\n%s\n"
+% (gene_name, "\n".join(textwrap.wrap(coding_seq, 80)))
+)
+coding_seq = ""
 else:
 line = line[19:]
 coding_seq = line
 if args.codingseq:
 co.close()
 if args.protein:
 po.close()
-if __name__ == '__main__':
+if __name__ == "__main__":
 parser = argparse.ArgumentParser()
-parser.add_argument('-p', '--protein', help='Path to the protein file.')
+parser.add_argument("-p", "--protein", help="Path to the protein file.")
-parser.add_argument('-c', '--codingseq', help='Path to the coding file.')
+parser.add_argument("-c", "--codingseq", help="Path to the coding file.")
 args = parser.parse_args()
-main( args )
+main(args)

Mercurial > repos > bgruening > augustus_training

comparison extract_features.py @ 4:da01a05d91c5 draft