Mercurial > repos > bgruening > glimmer
view glimmer_predict.py @ 2:2d0c26885604
Uploaded
author | bgruening |
---|---|
date | Fri, 07 Jun 2013 07:51:49 -0400 |
parents | 8624069d7a0e |
children | ce7228503d49 |
line wrap: on
line source
#!/usr/bin/env python """ Input: DNA Fasta File Output: Tabular Return Tabular File with predicted ORF's Bjoern Gruening """ import sys, os import tempfile import subprocess import shutil from glimmer_orf_to_seq import glimmer2seq def main(): genome_seq_file = sys.argv[1] outfile_classic_glimmer = sys.argv[2] outfile_ext_path = sys.argv[3] oufile_genes = sys.argv[8] tag = 'glimmer_non_knowlegde_based_prediction' tempdir = tempfile.gettempdir() trainingset = os.path.join( tempdir, tag + ".train" ) icm = os.path.join( tempdir, tag + ".icm" ) longorfs = tempfile.NamedTemporaryFile() trainingset = tempfile.NamedTemporaryFile() icm = tempfile.NamedTemporaryFile() #glimmeropts = "-o0 -g110 -t30 -l" glimmeropts = "-o%s -g%s -t%s" % (sys.argv[4], sys.argv[5], sys.argv[6]) if sys.argv[7] == "true": glimmeropts += " -l" """ 1. Find long, non-overlapping orfs to use as a training set """ subprocess.Popen(["long-orfs", "-n", "-t", "1.15", genome_seq_file, "-"], stdout = longorfs, stderr = subprocess.PIPE).communicate() """ 2. Extract the training sequences from the genome file """ subprocess.Popen(["extract", "-t", genome_seq_file, longorfs.name], stdout=trainingset, stderr=subprocess.PIPE).communicate() """ 3. Build the icm from the training sequences """ # the "-" parameter is used to redirect the output to stdout subprocess.Popen(["build-icm", "-r", "-"], stdin=open(trainingset.name), stdout = icm, stderr=subprocess.PIPE).communicate() """ Run Glimmer3 """ b = subprocess.Popen(["glimmer3", glimmeropts, genome_seq_file, icm.name, os.path.join(tempdir, tag)], stdout = subprocess.PIPE, stderr=subprocess.PIPE).communicate() shutil.copyfile( os.path.join( tempdir, tag + ".predict" ), outfile_classic_glimmer ) if outfile_ext_path.strip() != 'None': shutil.copyfile( os.path.join( tempdir, tag + ".detail" ), outfile_ext_path ) glimmer2seq( outfile_classic_glimmer, genome_seq_file, oufile_genes ) if __name__ == "__main__" : main()