changeset 22:225d40beff1a draft

Uploaded
author bgruening
date Mon, 14 Oct 2013 03:26:11 -0400
parents 93b2baa4b9d6
children d26e8aa37ce9
files antismash.xml src/genecluster_sequence/__init__.py tool_dependencies.xml
diffstat 3 files changed, 87 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/antismash.xml	Sat Oct 12 09:18:04 2013 -0400
+++ b/antismash.xml	Mon Oct 14 03:26:11 2013 -0400
@@ -13,8 +13,12 @@
     </requirements>
     <command>
         #import os, glob
-        #set $outputfolder = os.path.join($html.files_path, 'antismash')
-        ln -s $infile #echo 'input_tempfile.' + $infile.ext#;
+        #set $outputfolder = $html.files_path
+        #if $infile.ext == 'genbank':
+            #set $file_extension = 'gb'
+        #end if
+
+        ln -s $infile #echo 'input_tempfile.' + $file_extension#;
         mkdir -p $outputfolder;
         run_antismash.py 
             --cpus 4
@@ -35,7 +39,7 @@
             --outputfolder $outputfolder
 
 
-            #echo 'input_tempfile.' + $infile.ext#
+            #echo 'input_tempfile.' + $file_extension#
 
             ## leave out the start and end features, it can be easily replaced with Galaxy tools
             ##--from START          Start analysis at nucleotide specified
@@ -51,13 +55,22 @@
         
         ## gene clusters
         #if 'geneclusterprots_tabular' in str($outputs).split(','):
-            cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots;
+            cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular;
         #end if
 
+        #if 'geneclusterprots_fasta' in str($outputs).split(','):
+            cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta;
+        #end if
+
+
         ##SVG images
         #if 'archive_svgs' in str($outputs).split(','):
             cd #echo os.path.join($outputfolder, 'svg')#;
-            tar cfz $archive_svgs *_all.svg genecluster*;   
+            #if $clusterblast:
+                tar cfz $archive_svgs *_all.svg genecluster*;
+            #else:
+                tar cfz $archive_svgs genecluster*;
+            #end if
         #end if
 
         ##all files in a archive
@@ -68,24 +81,32 @@
 
         ## genbank
         #if 'gb' in str($outputs).split(','):
-            #for $gb_file in glob.glob( os.path.join($outputfolder, '*.gbk') ):
-                cat $gb_file >> $genbank;
-            #end for
+            cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank;
         #end if
 
     </command>
     <inputs>
-        <param name="infile" type="data" format="gb,embl" label="Nucleotide sequence file in GenBank or EMBL format"/>
+        <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/>
 
-        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="True" />
-        <param name="smcogs" type="boolean" label="analysis of secondary metabolism gene families (smCOGs)" 
+        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters"
+            help="(--clusterblast)"
+            truevalue="--clusterblast" falsevalue="" checked="True" />
+        <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis"
+            help="(--subclusterblast)"
+            truevalue="--subclusterblast" falsevalue="" checked="false" />
+        <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)"
             falsevalue="" truevalue="--smcogs" checked="True" />
 
-        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="False" />
-        <param name="subclusterblast" type="boolean" label="Subcluster Blast analysis" truevalue="--subclusterblast" falsevalue="" checked="false" />
-        <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" truevalue="--full-hmmer" falsevalue="" checked="false" />
+        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis"
+            help="(--full-blast)"
+            truevalue="--full-blast" falsevalue="" checked="False" />
+        <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis"
+            help="(--full-hmmer)"
+            truevalue="--full-hmmer" falsevalue="" checked="false" />
 
-        <param name="inclusive" type="boolean" label="Use inclusive algorithm for cluster detection" truevalue="--inclusive" falsevalue="" checked="false" />
+        <param name="inclusive" type="boolean" label="Use inclusive algorithm for cluster detection"
+            help="(--inclusive)"
+            truevalue="--inclusive" falsevalue="" checked="false" />
 
         <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models">
             <options from_file="antismash.loc">
@@ -134,16 +155,16 @@
 
     </inputs>
     <outputs>
-        <data format="fasta" name="geneclusterprots" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
+        <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
           <filter>'geneclusterprots_fasta' in outputs</filter>
         </data>
-        <data format="fasta" name="geneclusterprots" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
+        <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
           <filter>'geneclusterprots_tabular' in outputs</filter>
         </data>
-        <data format="gzipped" name="archive" label="${tool.name} on ${on_string} (all files compressed)">
+        <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)">
           <filter>'archive' in outputs</filter>
         </data>
-        <data format="gzipped" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)">
+        <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)">
           <filter>'archive_svgs' in outputs</filter>
         </data>
         <data format="html" name="html" label="${tool.name} on ${on_string} (html report)">
@@ -165,8 +186,8 @@
 
 **Input**
 
-The ideal input for antiSMASH is an annotated nucleotide file in Genbank format or EMBL format. If no annotation is available, 
-we recommend running your sequence through an annotation pipeline like RAST are one included in Galaxy.
+The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, 
+we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy.
 
 
 There are several optional analyses that may or may not be run on your sequence.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/genecluster_sequence/__init__.py	Mon Oct 14 03:26:11 2013 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+"""
+    antiSMASH 2.0 output plugin to write all cluster proteins to a file (*_genecluster_proteins.fa)
+"""
+import logging
+from os import path
+from antismash import utils
+
+name = "genecluster_proteins"
+short_description = "Ouptut gene clusters as FASTA sequences"
+# Output plugins are sorted by priority, lower numbers get run first
+priority = 9
+
+def write(seq_records, options):
+    """Write all cluster proteins to a file
+
+    Args:
+        seq_records (iterable): An iterable containing Bio.SeqRecords
+        options (argparse.Namespace): The options passed to the program
+    """
+    basename = seq_records[0].id
+    output_name = path.join(options.outputfoldername, "%s_genecluster_proteins.fa" % basename)
+    logging.debug("Writing seq_records to %r" % output_name)
+
+    with open(output_name, 'w+') as handle:
+        for seq_record in seq_records:
+            clusters = utils.get_cluster_features(seq_record)
+            for cluster in clusters:
+                clustertype = utils.get_cluster_type(cluster)
+                clusternr = utils.get_cluster_number(cluster)
+                for feature in utils.get_cluster_cds_features(cluster, seq_record):
+                    qual = feature.qualifiers
+                    fasta_header = '>%s:%s %s #%s - %s\n' % (qual['locus_tag'][0], qual['protein_id'][0], clustertype, clusternr, qual['product'][0])
+                    handle.write( fasta_header )
+                    handle.write( '%s\n' % qual['translation'][0] )
+
+
--- a/tool_dependencies.xml	Sat Oct 12 09:18:04 2013 -0400
+++ b/tool_dependencies.xml	Mon Oct 14 03:26:11 2013 -0400
@@ -137,6 +137,13 @@
                     <action type="shell_command">sed -i 's/check_prereqs(plugins, options) &gt; 0/False/g' $INSTALL_DIR/run_antismash.py</action>
                     <action type="download_file">https://bitbucket.org/antismash/antismash2/downloads/clusterblast.tar.gz</action>
                     <action type="shell_command">tar xfvz clusterblast.tar.gz -C $INSTALL_DIR/antismash/generic_modules/clusterblast</action>
+                    <!-- As last step we created an additional output plugin, that generates protein FASTA files from all clusters and
+                        we need to copy this plugin into the correct location.
+                    -->
+                    <action type="move_directory_files">
+                        <source_directory>$REPOSITORY_INSTALL_DIR/src/</source_directory>
+                        <destination_directory>$INSTALL_DIR/antismash/output_modules/</destination_directory>
+                    </action>
                 </actions>
                 <!-- Download the binaries for AntiSmash compatible with 32-bit Linux. -->
                 <actions architecture="i386" os="linux">