diff antismash.xml @ 11:d2c2eb518142 draft

Uploaded
author bgruening
date Wed, 09 Oct 2013 11:14:23 -0400
parents b11e1dfbc7c9
children 9cfa2fb488b0
line wrap: on
line diff
--- a/antismash.xml	Wed Oct 09 10:06:13 2013 -0400
+++ b/antismash.xml	Wed Oct 09 11:14:23 2013 -0400
@@ -5,7 +5,6 @@
         <requirement type="package" version="2.3.2">hmmer</requirement>
         <requirement type="package" version="2.2.28">blast+</requirement>
         <requirement type="package" version="3.8.31">muscle</requirement>
-        <requirement type="package" version="1.62">biopython</requirement>
         <requirement type="package" version="2.0.2">antismash_python_deps</requirement>
         <requirement type="package" version="2.0.2">antismash</requirement>
     </requirements>
@@ -34,13 +33,15 @@
     <inputs>
         <param name="infile" type="data" format="gb,embl" label="Nucleotide sequence file in GenBank or EMBL format"/>
 
-        <param name="smcogs" type="boolean" label="Look for sec.met. clusters of orthologous groups" falsevalue="" truevalue="--smcogs" checked="false" />
-        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="false" />
-        <param name="subclusterblast" type="boolean" label="BLAST identified clusters against known subclusters" truevalue="--subclusterblast" falsevalue="" checked="false" />
+        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="True" />
+        <param name="smcogs" type="boolean" label="analysis of secondary metabolism gene families (smCOGs)" 
+            falsevalue="" truevalue="--smcogs" checked="True" />
+
+        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="False" />
+        <param name="subclusterblast" type="boolean" label="Subcluster Blast analysis" truevalue="--subclusterblast" falsevalue="" checked="false" />
+        <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" truevalue="--full-hmmer" falsevalue="" checked="false" />
 
         <param name="inclusive" type="boolean" label="Use inclusive algorithm for cluster detection" truevalue="--inclusive" falsevalue="" checked="false" />
-        <param name="full_hmmer" type="boolean" label="Run a whole-genome HMMer analysis" truevalue="--full-hmmer" falsevalue="" checked="false" />
-        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="false" />
 
         <param name="pfam_database" type="select" label="Pfam database" help="Pfam Covariance models">
             <options from_file="antismash.loc">
@@ -91,11 +92,6 @@
     
 .. class:: infomark
 
-That version of antiSMASH can only handle one sequence. So multi-sequence FASTA files are not supported.
-For multiple sequences please use multi-antiSMASH. The advantage of that tool is that it will provide you with a 
-archive of all results created from antiSMASH (It can be large!) and a HTML output, for better inspection.
-
-
 **What it does**
 
 antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. 
@@ -104,7 +100,26 @@
 
 **Input**
 
-If you don't have an annotated GenBank or embl file you also can provide a glimmer prediction output. You can created it with glimmer or glimmerHMM.
+The ideal input for antiSMASH is an annotated nucleotide file in Genbank format or EMBL format. If no annotation is available, 
+we recommend running your sequence through an annotation pipeline like RAST are one included in Galaxy.
+
+
+There are several optional analyses that may or may not be run on your sequence.
+Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a 
+query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify 
+the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.
+
+
+Also available is the analysis of secondary metabolism gene families (smCOGs). 
+This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene 
+family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. 
+Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment.
+
+
+For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. 
+Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage 
+(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). 
+Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented.
 
 
 **References**