Mercurial > repos > bgruening > antismash
diff antismash.xml @ 11:d2c2eb518142 draft
Uploaded
author | bgruening |
---|---|
date | Wed, 09 Oct 2013 11:14:23 -0400 |
parents | b11e1dfbc7c9 |
children | 9cfa2fb488b0 |
line wrap: on
line diff
--- a/antismash.xml Wed Oct 09 10:06:13 2013 -0400 +++ b/antismash.xml Wed Oct 09 11:14:23 2013 -0400 @@ -5,7 +5,6 @@ <requirement type="package" version="2.3.2">hmmer</requirement> <requirement type="package" version="2.2.28">blast+</requirement> <requirement type="package" version="3.8.31">muscle</requirement> - <requirement type="package" version="1.62">biopython</requirement> <requirement type="package" version="2.0.2">antismash_python_deps</requirement> <requirement type="package" version="2.0.2">antismash</requirement> </requirements> @@ -34,13 +33,15 @@ <inputs> <param name="infile" type="data" format="gb,embl" label="Nucleotide sequence file in GenBank or EMBL format"/> - <param name="smcogs" type="boolean" label="Look for sec.met. clusters of orthologous groups" falsevalue="" truevalue="--smcogs" checked="false" /> - <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="false" /> - <param name="subclusterblast" type="boolean" label="BLAST identified clusters against known subclusters" truevalue="--subclusterblast" falsevalue="" checked="false" /> + <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="True" /> + <param name="smcogs" type="boolean" label="analysis of secondary metabolism gene families (smCOGs)" + falsevalue="" truevalue="--smcogs" checked="True" /> + + <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="False" /> + <param name="subclusterblast" type="boolean" label="Subcluster Blast analysis" truevalue="--subclusterblast" falsevalue="" checked="false" /> + <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" truevalue="--full-hmmer" falsevalue="" checked="false" /> <param name="inclusive" type="boolean" label="Use inclusive algorithm for cluster detection" truevalue="--inclusive" falsevalue="" checked="false" /> - <param name="full_hmmer" type="boolean" label="Run a whole-genome HMMer analysis" truevalue="--full-hmmer" falsevalue="" checked="false" /> - <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="false" /> <param name="pfam_database" type="select" label="Pfam database" help="Pfam Covariance models"> <options from_file="antismash.loc"> @@ -91,11 +92,6 @@ .. class:: infomark -That version of antiSMASH can only handle one sequence. So multi-sequence FASTA files are not supported. -For multiple sequences please use multi-antiSMASH. The advantage of that tool is that it will provide you with a -archive of all results created from antiSMASH (It can be large!) and a HTML output, for better inspection. - - **What it does** antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. @@ -104,7 +100,26 @@ **Input** -If you don't have an annotated GenBank or embl file you also can provide a glimmer prediction output. You can created it with glimmer or glimmerHMM. +The ideal input for antiSMASH is an annotated nucleotide file in Genbank format or EMBL format. If no annotation is available, +we recommend running your sequence through an annotation pipeline like RAST are one included in Galaxy. + + +There are several optional analyses that may or may not be run on your sequence. +Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a +query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify +the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence. + + +Also available is the analysis of secondary metabolism gene families (smCOGs). +This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene +family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. +Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment. + + +For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. +Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage +(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). +Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented. **References**