changeset 0:95a776023fbc draft

Uploaded
author bgruening
date Thu, 12 Jun 2014 11:35:21 -0400
parents
children f8111c9f037d
files eden_cluster_motif_finder.py eden_cluster_splitter.py eden_iterative_motif_finder.py eden_utilities.py eden_wrapper.py motif_finder.xml readme.rst tool_dependencies.xml
diffstat 8 files changed, 640 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eden_cluster_motif_finder.py	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import os
+import subprocess
+from Bio import SeqIO
+import random
+import tempfile
+import shlex
+import shutil
+import logging
+from eden_wrapper import EDeNWrapper
+import eden_iterative_motif_finder
+from eden_utilities import log
+from eden_utilities import create_params
+import eden_cluster_splitter
+    
+def main(args):
+    #run cluster split
+    eden_cluster_splitter.main(args)
+    #work in the output dir
+    print 'start motif finder', args.output_dir_path
+    motif_set = set()
+    for root, dirs, files in os.walk( args.output_dir_path ):
+        for filename in files:
+            if filename.startswith('cluster_'):
+                filepath = os.path.join(root, filename)
+                args.fasta_file = filepath
+                out_filepath = os.path.join(root, '%s.motif' % filename)
+                args.output_file_path = open(out_filepath, 'w+')
+                eden_iterative_motif_finder.main(args)
+                args.output_file_path.close()
+                for line in open(out_filepath):
+                    motif_set.add(line.split()[0])
+
+    with open(args.fasta_file, 'r') as f:
+        for record in SeqIO.parse(f, 'fasta'):
+            for motif_id, motif in enumerate(motif_set):
+                seq = eden_iterative_motif_finder.NormaliseSequenceToDNA(record.seq)
+                motif_count = seq.count(motif)
+                if motif_count > 0:
+                    res = '%s\t%s\t%s\t%s\n' % (record.id, motif_id, motif, motif_count)
+                    print res,
+    
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Extract motifs patterns with EDeN.')
+    parser = create_params(parser, 'eden_cluster_motif')
+    args = parser.parse_args()
+    main(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eden_cluster_splitter.py	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import os
+import subprocess
+from Bio import SeqIO
+import random
+import tempfile
+import shlex
+import shutil
+import logging
+from eden_wrapper import EDeNWrapper
+import eden_iterative_motif_finder
+from eden_utilities import log
+from eden_utilities import create_params
+    
+def main(args):
+    #setup directory for output
+    if not os.path.exists(args.output_dir_path) :
+        os.mkdir(args.output_dir_path)
+    else:
+        sys.exit("Output directory %s already exists. Bailing out." % args.output_dir_path) 
+
+    #make seq file
+    tmp_output_dir=tempfile.mkdtemp()
+    seq_pos_file_path=eden_iterative_motif_finder.MakePositiveSequenceFile(args.fasta_file, tmp_output_dir)
+        
+    #cluster sequences 
+    param = vars(args)
+    param.update({'dat_file_path':seq_pos_file_path,'output_dir':tmp_output_dir})   
+    EDeN=EDeNWrapper(param)
+    EDeN.Cluster()
+    
+    #build the inverse index seqid->clusterid
+    #and create a dict of file_handles
+    files_handle_list=dict()
+    map_seqid2clusterid = dict()
+    with open(os.path.join( tmp_output_dir, 'cluster' )) as f:
+        for clusterid, line in enumerate(f):
+            str_vec = line.strip().split()
+            int_vec = map(int, str_vec)
+            for seqid in int_vec:
+                map_seqid2clusterid[seqid]=clusterid
+            filename=os.path.join( args.output_dir_path, 'cluster_%s.fa' % clusterid )
+            files_handle_list[clusterid]=open(filename, 'w+')
+    
+    #write each fasta sequence in the corresponding cluster files
+    log.info( "Writing %s cluster files in directory %s " % (clusterid,args.output_dir_path) )
+    with open(args.fasta_file, 'r') as f:
+        for seqid, record in enumerate(SeqIO.parse(f, 'fasta')):
+            if seqid in map_seqid2clusterid: #note: not all sequences have a cluster
+                clusterid=map_seqid2clusterid[seqid];
+                SeqIO.write(record, files_handle_list[clusterid], "fasta")
+    
+    #cleanup        
+    shutil.rmtree(tmp_output_dir)
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Extract motifs patterns with EDeN.')
+    parser = create_params(parser, 'eden_cluster_splitter')
+    args = parser.parse_args()
+    main(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eden_iterative_motif_finder.py	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+import logging
+import argparse
+import sys
+import os
+import subprocess
+from Bio import SeqIO
+import random
+import tempfile
+import shlex
+import shutil
+import logging
+from  eden_wrapper import EDeNWrapper
+from eden_utilities import log
+from eden_utilities import create_params
+
+
+SEQ_POS_FILE="pos.seq"
+SEQ_NEG_FILE="neg.seq"
+SEQ_FILE="dat.seq"
+TARGET_FILE="dat.target"
+SCORE_FILE="pos.score"
+
+def SolveMaximumSubarrayProblem(vec):
+    begin_temp = begin = end = 0
+    start_val=vec[0]
+    max_ending_here = max_so_far = start_val
+    for pos,x in enumerate(vec[1:],1):
+        if max_ending_here < 0:
+            max_ending_here=x
+            begin_temp=pos
+        else:
+            max_ending_here=max_ending_here+x
+        if max_ending_here > max_so_far:
+            max_so_far=max_ending_here
+            begin=begin_temp
+            end=pos
+    return begin,end
+
+def ComputeIteratedMaximumSubarray(seq_list, score_list, min_motif_size):
+    motif_list=[]
+    assert len(seq_list) == len(score_list)
+    for seq,score in zip(seq_list, score_list):
+        while 1:
+            #find (begin,end) of motif in each element
+            begin,end=SolveMaximumSubarrayProblem(score)
+            if end-begin < min_motif_size:
+                break
+            else:            
+                #extract subsequence
+                motif=seq[begin:end+1]
+                #print motif, begin, end, seq, score
+                motif_item=[motif, begin, end, seq]
+                motif_list.append(motif_item)
+                #remove motif by zeoring importance values
+                motif_size=end+1-begin
+                assert motif_size == len(motif)
+                score[begin:end+1]=[0.0]*motif_size
+                #iterate
+    return motif_list
+
+def ComputeAndOutputIteratedMaximumSubarray(seq_file, score_file, min_subarray_size, opath):
+    """
+    Description and unit test
+    """
+    #import the sequence file
+    seq_list=[]
+    with open(seq_file, 'r') as f:
+        for line in f:
+            seq_list.append(line) 
+    #import the score file        
+    score_list=[]
+    with open(score_file, 'r') as f:
+        for line in f:
+            str_vec=line.split()
+            int_vec = map(float, str_vec)
+            score_list.append(int_vec) 
+
+    #extract for each sequence the motif
+    motif_list=ComputeIteratedMaximumSubarray(seq_list, score_list, min_subarray_size)
+
+    #output to file
+    for motif, begin, end, seq in motif_list:
+        opath.write('{}\t{}\t{}\t{}'.format( motif, begin, end, seq))
+
+def IsRNA(seq):
+    if 'U' in seq.upper():
+        return True
+    else:
+        return False
+
+def ShuffleSequence(seq, order =1):
+    nucleotides = [str(seq)[nucl:nucl+order] for nucl in range(0,len(seq)-order+1,order)]
+    if order != 1:
+        start = len(seq)%order
+        if start != 0 :
+            nucleotides.append(str(seq)[-start:])
+    random.shuffle(nucleotides)
+    shuffled_seq=''.join(nucleotides)
+    return shuffled_seq
+
+def NormaliseSequenceToDNA(seq):
+    dna=seq
+    if IsRNA(seq):
+        dna=seq.back_transcribe()
+    normdna=dna.upper()
+    return normdna
+
+def MakePositiveSequenceFile(fasta_file, output_dir):
+    seq_pos_file_path = os.path.join(output_dir, SEQ_POS_FILE)
+    seq_pos_handle=open(seq_pos_file_path, 'w+')
+    pos_counter=0
+    with open(fasta_file, 'r') as f:
+        for record in SeqIO.parse(f, 'fasta'):
+            dna=NormaliseSequenceToDNA(record.seq)
+            seq_pos_handle.write("%s\n" % dna)
+            pos_counter+=1
+    seq_pos_handle.close()
+    log.info( "Created %s positive sequences in file: %s" % (pos_counter, seq_pos_file_path) )
+    return seq_pos_file_path
+
+def MakeNegativeSequenceFile(sequence_file, output_dir, negative_repeat_factor, max_shuffle_order):
+    seq_neg_file_path = os.path.join(output_dir, SEQ_NEG_FILE)
+    seq_neg_handle=open(seq_neg_file_path, 'w+')
+    neg_counter=0
+    with open(sequence_file, 'r') as file_handle:
+        for seq in file_handle:
+            seq = seq.strip()
+            for rep in range(negative_repeat_factor):
+                for order in range(1,max_shuffle_order):
+                    sdna=ShuffleSequence(seq,order)
+                    seq_neg_handle.write("%s\n" % sdna)
+                    neg_counter+=1
+    seq_neg_handle.close()
+    log.info( "Created %s negative sequences in file: %s" % (neg_counter, seq_neg_file_path) )
+    return seq_neg_file_path
+
+def MakeDataFile(fasta_file, negative_repeat_factor, max_shuffle_order):
+    output_dir=tempfile.mkdtemp()
+    
+    seq_pos_file_path=MakePositiveSequenceFile(fasta_file, output_dir)
+    seq_neg_file_path=MakeNegativeSequenceFile(seq_pos_file_path, output_dir, negative_repeat_factor, max_shuffle_order)
+    
+    dat_file_path = os.path.join(output_dir, SEQ_FILE)
+    target_file_path = os.path.join(output_dir, TARGET_FILE)
+    
+    merged_seq_file_path = open(dat_file_path, 'wb')
+    shutil.copyfileobj(open(seq_pos_file_path,'rb'), merged_seq_file_path)
+    shutil.copyfileobj(open(seq_neg_file_path,'rb'), merged_seq_file_path)
+    merged_seq_file_path.close()
+    
+    with open(target_file_path, 'w+') as target_handle:
+        for line in open(seq_pos_file_path):
+            target_handle.write('1\n')
+        for line in open(seq_neg_file_path):
+            target_handle.write('-1\n')
+     
+    return {'output_dir': output_dir, 
+            'seq_pos_file_path': seq_pos_file_path,
+            'seq_neg_file_path': seq_neg_file_path,
+            'dat_file_path':dat_file_path, 
+            'target_file_path':target_file_path}
+
+def MakeScoreFile(output_dir):    
+    score_file_path = os.path.join(output_dir, SCORE_FILE)
+    score_file = open( score_file_path, 'w+')
+    counter_old = -1
+    values = list()
+    for line in open(os.path.join( output_dir, 'prediction_part' )):
+        counter, vertex_id, value = line.strip().split()
+        counter = int(counter)
+        if counter != counter_old :
+            if values :
+                score_file.write(' '.join(values) + '\n')
+            values = list()
+        values.append(value)
+        counter_old = counter
+    if values :
+        score_file.write(' '.join(values) + '\n')
+    score_file.close()
+    log.info( "Score file in: %s" % score_file_path )
+    return score_file_path
+    
+def main(args):
+    #create data file, i.e. positive normalised sequences and negative shuffled sequences
+    param_dict = MakeDataFile(args.fasta_file,
+                       args.neg_rep,
+                       args.max_order)
+    param_dict.update({'radius':args.radius})
+    param_dict.update({'distance':args.distance})
+    #train a model     
+    eden=EDeNWrapper(param_dict)
+    eden.Train()
+    #test
+    eden.param.update({'dat_file_path':param_dict['seq_pos_file_path']})    
+    eden.TestPart()
+    
+    #extract scores
+    score_file_path=MakeScoreFile(param_dict['output_dir'])
+    
+    ComputeAndOutputIteratedMaximumSubarray( param_dict['seq_pos_file_path'], 
+                                                     score_file_path, 
+                                                     args.min_motif_size, 
+                                                     args.output_file_path )
+    #cleanup
+    shutil.rmtree(param_dict['output_dir'])
+    
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Extract motifs patterns with EDeN.')
+    parser = create_params(parser, 'eden_iterative_motif_finder')
+    args = parser.parse_args()
+    main(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eden_utilities.py	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+__author__ = "Fabrizio Costa, Bjoern Gruening"
+__copyright__ = "Copyright 2014, Fabrizio Costa"
+__credits__ = ["Fabrizio Costa", "Bjoern Gruening"]
+__license__ = "GPL"
+__version__ = "1.0.1"
+__maintainer__ = "Fabrizio Costa"
+__email__ = "costa@informatik.uni-freiburg.de"
+__status__ = "Production"
+
+import logging
+import sys
+import argparse
+
+log = logging.getLogger()
+log.setLevel(logging.DEBUG)
+
+std = logging.StreamHandler(sys.stdout)
+std.setLevel(logging.DEBUG)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+std.setFormatter(formatter)
+log.addHandler(std)
+
+def create_params(parser, mode):
+    #common parameters
+    parser.add_argument('--input-file', '-i', 
+            dest="fasta_file",
+            type=str, 
+            help='fasta file name with sequences.', 
+            required=True)
+    parser.add_argument('--radius', '-r',
+            dest="radius",
+            type=int, 
+            help='Size of the largest radius used in EDeN.', 
+            default=2)
+    parser.add_argument('--distance', '-d',
+            dest="distance",
+            type=int, 
+            help='Size of the largest distance used in EDeN.', 
+            default=5)
+    parser.add_argument('--version', action="version", version=__version__)
+    
+    if mode == 'eden_cluster_splitter' or  mode == 'eden_cluster_motif' :
+        parser.add_argument("-o", "--output-dir", 
+            dest="output_dir_path", 
+            help="Path to output directory.")
+        parser.add_argument('-n', '--num-nearest-neighbors', 
+            dest="num_nearest_neighbors",
+            type=int, 
+            help='Number of nearest neighbors used in the density estimation.', 
+            default=10)
+        parser.add_argument('-z', '--neighborhood-intersection-size', 
+            dest="neighborhood_intersection_size",
+            type=int, 
+            help='Number of nearest neighbors that must be shared in neighborhood in order to trigger neighborhood joining.', 
+            default=5)
+        parser.add_argument('-c','--fraction-center-scan', 
+            dest="fraction_center_scan",
+            type=int, 
+            help='Fraction of total number of instances which is used to compute densities.', 
+            default=1.0)
+        
+    if mode == 'eden_iterative_motif_finder' or  mode == 'eden_cluster_motif' :
+        parser.add_argument("-f", "--output-file", 
+            dest="output_file_path", 
+            type=argparse.FileType('w'),
+            default=sys.stdout,
+            help="Path to output file.")
+        parser.add_argument('-k', '--max-order', 
+            dest="max_order",
+            type=int, 
+            help='Size of the order for negative shuffling.', 
+            default=3)
+        parser.add_argument('-s', '--min-motif-size', 
+            dest="min_motif_size",
+            type=int, 
+            help='Size of the smallest returned motif.', 
+            default=4)
+        parser.add_argument('-e', '--num-negative-repetitions',
+            dest="neg_rep",
+            type=int, 
+            help='Number of negative instances per order per positive instance.', 
+            default=3)
+
+    return parser
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eden_wrapper.py	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import os
+import subprocess
+import random
+import tempfile
+import shlex
+import shutil
+from eden_utilities import log
+
+class EDeNWrapper:
+    
+    def __init__(self, args):
+        self._param={}
+        #default values
+        self._param['path_to_EDeN']="EDeN"
+        self._param['model_file_path']="model"
+        self._param['radius']=2
+        self._param['distance']=5
+        self._param['output_dir']=tempfile.mkdtemp() 
+        self._param['kernel_type']="STRING"
+        self._param['file_type']="STRINGSEQ"
+        self._param['cluster_type']="DENSE_CONNECTED_CENTERS"
+        self._param['sample_size']=1000
+        self._param['num_nearest_neighbors']=10
+        self._param['neighborhood_intersection_size']=int(self._param['num_nearest_neighbors']/2)
+        self._param['fraction_center_scan']=1.0
+        #update 
+        self._param.update(args)
+    
+    @property
+    def param(self):
+        return self._param
+    
+    def Train(self):
+        assert self._param.has_key('dat_file_path')
+        assert self._param.has_key('target_file_path')
+        assert self._param.has_key('model_file_path')
+        
+        cmd = "%(path_to_EDeN)s -a TRAIN -i %(dat_file_path)s -t %(target_file_path)s --kernel_type %(kernel_type)s --file_type %(file_type)s --radius %(radius)s --distance %(distance)s -y %(output_dir)s -m %(model_file_path)s"
+        cmd = cmd % self._param
+
+        log.info( "Executing call: %s" % cmd )
+        subprocess.call( shlex.split(cmd) )
+
+    def TestPart(self):
+        assert self._param.has_key('dat_file_path')
+        assert self._param.has_key('model_file_path')
+       
+        cmd = "%(path_to_EDeN)s -a TEST_PART -i %(dat_file_path)s --kernel_type %(kernel_type)s --file_type %(file_type)s --radius %(radius)s --distance %(distance)s -y %(output_dir)s -m %(output_dir)s/%(model_file_path)s"
+        cmd = cmd % self._param
+
+        log.info( "Executing call: %s" % cmd )
+        subprocess.call( shlex.split(cmd) )
+        
+    def Test(self):
+        assert self._param.has_key('dat_file_path')
+        assert self._param.has_key('model_file_path')
+       
+        cmd = "%(path_to_EDeN)s -a TEST -i %(dat_file_path)s --kernel_type %(kernel_type)s --file_type %(file_type)s --radius %(radius)s --distance %(distance)s -y %(output_dir)s -m %(output_dir)s/%(model_file_path)s"
+        cmd = cmd % self._param
+
+        log.info( "Executing call: %s" % cmd )
+        subprocess.call( shlex.split(cmd) )
+    
+    def Cluster(self):
+        assert self._param.has_key('dat_file_path')
+
+        cmd = "%(path_to_EDeN)s -a CLUSTER -i %(dat_file_path)s --kernel_type %(kernel_type)s --file_type %(file_type)s --radius %(radius)s --distance %(distance)s -y %(output_dir)s "
+        cmd = cmd + "--cluster_type %(cluster_type)s --sample_size %(sample_size)s --num_nearest_neighbors %(num_nearest_neighbors)s --neighborhood_intersection_size %(neighborhood_intersection_size)s --fraction_center_scan %(fraction_center_scan)s"
+        cmd = cmd % self._param
+
+        log.info( "Executing call: %s" % cmd )
+        subprocess.call( shlex.split(cmd) )
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/motif_finder.xml	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,57 @@
+<tool id="eden_cluster_motif_finder" name="Motif finder" version="0.1.0">
+    <description>based on EDeN</description>
+    <requirements>
+        <requirement type="package" version="1.6.1">biopython</requirement>
+        <requirement type="package" version="13.5">eden</requirement>
+    </requirements>
+    <command interpreter="python">
+        eden_cluster_motif_finder.py
+
+        --input-file $infile
+        --radius $radius
+        --distance $distance
+        --output-dir ./
+        --num-nearest-neighbors $num_nearest_neighbors
+        --neighborhood-intersection-size $neighborhood_intersection_size
+        --fraction-center-scan $fraction_center_scan
+        --output-file foo.txt
+        --max-order $max_order
+        --min-motif-size $min_motif_size
+        --num-negative-repetitions $num_negative_repetitions
+        > $output
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="sequences" help="in FASTA format (--input-file)"/>
+        <param name="radius" type="integer" value="2" label="Size of the largest radius used in EDeN" help="(--radius)" />
+        <param name="distance" type="integer" value="5" label="Size of the largest distance used in EDeN" help="(--distance)" />
+        <param name="num_nearest_neighbors" type="integer" value="10" label="Number of nearest neighbors used in the density estimation" help="(--num-nearest-neighbors)" />
+        <param name="neighborhood_intersection_size" type="integer" value="5" label="Number of nearest neighbors that must be shared in neighborhood in order to trigger neighborhood joining."
+            help="(--neighborhood-intersection-size)" />
+        <param name="fraction_center_scan" type="float" value="1.0" label="Fraction of total number of instances which is used to compute densities" help="(--fraction-center-scan)" />
+        <param name="max_order" type="integer" value="3" label="Size of the order for negative shuffling" help="(--max_order)" />
+        <param name="min_motif_size" type="integer" value="4" label="Size of the smallest returned motif" help="(--min-motif-size)" />
+        <param name="num_negative_repetitions" type="integer" value="3" label="Size of the smallest returned motif" help="(--num-negative-repetitions)" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+
+**What it does**
+
+EDeN Motif finder
+
+-----
+
+**Parameters**
+
+
+
+**References**
+
+Fabrizio Costa
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,84 @@
+Galaxy wrapper for Augustus
+===========================
+
+This wrapper is copyright 2012-2013 by Björn Grüning.
+
+This is a wrapper for the command line tool of Augustus_.
+
+.. _augustus: http://bioinf.uni-greifswald.de/augustus/
+
+AUGUSTUS is a program that predicts genes in eukaryotic genomic sequences.
+
+Oliver Keller, Martin Kollmar, Mario Stanke, Stephan Waack (2011)
+A novel hybrid gene prediction method employing protein multiple sequence alignments
+Bioinformatics, doi: 10.1093/bioinformatics/btr010
+
+Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008)
+Using native and syntenically mapped cDNA alignments to improve de novo gene finding
+Bioinformatics, doi: 10.1093/bioinformatics/btn013
+
+Mario Stanke and Stephan Waack (2003)
+Gene Prediction with a Hidden-Markov Model and a new Intron Submodel. 
+Bioinformatics, Vol. 19, Suppl. 2, pages ii215-ii225
+
+
+Installation
+============
+
+The recommended installation is by means of the toolshed_.
+If you need to install it manually here is a short introduction.
+
+.. _toolshed:  http://toolshed.g2.bx.psu.edu/view/bgruening/augustus
+
+
+Install or downlaod augustus from::
+
+    http://bioinf.uni-greifswald.de/augustus/binaries/
+
+and follow the installation instructions or copy the binaries into your $PATH. To install the wrapper copy the augustus folder in the galaxy tools folder and modify the tools_conf.xml file to make the tool available to Galaxy.
+
+For example::
+
+  <section name="Gene Prediction" id="gene_prediction">
+    <tool file="gene_prediction/tools/augustus/augustus.xml" />
+  </section>
+
+
+Set the *AUGUSTUS_CONFIG_PATH* to /path_to_augustus/augustus/config with::
+
+  export AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config
+
+or modify the wrapper and use the following additional commandline switch::
+
+  --AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config
+
+
+History
+=======
+
+- v0.1: Initial public release
+- v0.2: Added tool_dependencies.xml file and update the augustus version (thanks to James Johnson)
+- v0.3: upgrade to augustus 2.7, added new organisms and new parameters, output additional sequence files
+- v0.3.1: added parallelism and changed the output parameters from boolean to a select box
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jun 12 11:35:21 2014 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="eden" version="1.3.5">
+        <repository changeset_revision="dc13e98b0e3d" name="package_eden_1_3" owner="rnateam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="biopython" version="1.61">
+        <repository changeset_revision="2f6c871cfa35" name="package_biopython_1_61" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>