Mercurial > repos > bgruening > glimmer_not_knowledge_based
diff glimmer2seq.py @ 0:3aa8a98be9a3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author | bgruening |
---|---|
date | Tue, 28 Nov 2017 10:15:43 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer2seq.py Tue Nov 28 10:15:43 2017 -0500 @@ -0,0 +1,47 @@ +#!/usr/bin/env python +""" +Input: DNA FASTA file + Glimmer ORF file +Output: ORF sequences as FASTA file +Author: Bjoern Gruening +""" +import sys + +from Bio import SeqIO +from Bio.SeqRecord import SeqRecord + + +def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]): + if len(sys.argv) >= 4: + glimmerfile = open(glimmer_prediction, "r") + sequence = open(genome_sequence) + else: + print("Missing input values.") + sys.exit() + + fastafile = SeqIO.parse(sequence, "fasta") + + sequences = dict() + seq_records = list() + for entry in fastafile: + sequences[entry.description] = entry + + for line in glimmerfile: + if line.startswith('>'): + entry = sequences[line[1:].strip()] + else: + orf_start = int(line[8:17]) + orf_end = int(line[18:26]) + + orf_name = line[0:8] + if orf_start <= orf_end: + seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description)) + else: + seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description)) + + SeqIO.write(seq_records, outfile, "fasta") + glimmerfile.close() + sequence.close() + + +if __name__ == "__main__": + glimmer2seq()