view glimmer_predict.py @ 2:2d0c26885604

Uploaded
author bgruening
date Fri, 07 Jun 2013 07:51:49 -0400
parents 8624069d7a0e
children ce7228503d49
line wrap: on
line source

#!/usr/bin/env python
"""
Input: DNA Fasta File
Output: Tabular
Return Tabular File with predicted ORF's
Bjoern Gruening
"""
import sys, os
import tempfile
import subprocess
import shutil
from glimmer_orf_to_seq import glimmer2seq

def main():
    genome_seq_file = sys.argv[1]
    outfile_classic_glimmer = sys.argv[2]
    outfile_ext_path = sys.argv[3]
    oufile_genes = sys.argv[8]

    tag = 'glimmer_non_knowlegde_based_prediction'
    tempdir = tempfile.gettempdir()

    trainingset = os.path.join( tempdir, tag + ".train" )
    icm = os.path.join( tempdir, tag + ".icm" )

    longorfs = tempfile.NamedTemporaryFile()
    trainingset = tempfile.NamedTemporaryFile()
    icm = tempfile.NamedTemporaryFile()

    #glimmeropts = "-o0 -g110 -t30 -l"
    glimmeropts = "-o%s -g%s -t%s" % (sys.argv[4], sys.argv[5], sys.argv[6])
    if sys.argv[7] == "true":
        glimmeropts += " -l"

    """
        1. Find long, non-overlapping orfs to use as a training set
    """
    subprocess.Popen(["long-orfs", "-n", "-t", "1.15",
        genome_seq_file, "-"], stdout = longorfs,
        stderr = subprocess.PIPE).communicate()

    """
        2. Extract the training sequences from the genome file
    """
    subprocess.Popen(["extract", "-t",
        genome_seq_file, longorfs.name], stdout=trainingset,
        stderr=subprocess.PIPE).communicate()

    """
        3. Build the icm from the training sequences
    """

    # the "-" parameter is used to redirect the output to stdout
    subprocess.Popen(["build-icm", "-r", "-"], 
        stdin=open(trainingset.name), stdout = icm, 
        stderr=subprocess.PIPE).communicate()

    """
        Run Glimmer3
    """
    b = subprocess.Popen(["glimmer3", glimmeropts, 
        genome_seq_file, icm.name, os.path.join(tempdir, tag)], 
        stdout = subprocess.PIPE, stderr=subprocess.PIPE).communicate()

    shutil.copyfile( os.path.join( tempdir, tag + ".predict" ), outfile_classic_glimmer )
    if outfile_ext_path.strip() != 'None':
        shutil.copyfile( os.path.join( tempdir, tag + ".detail" ), outfile_ext_path )

    glimmer2seq( outfile_classic_glimmer, genome_seq_file, oufile_genes )


if __name__ == "__main__" :
    main()