comparison antismash.xml @ 11:d2c2eb518142 draft

Uploaded
author bgruening
date Wed, 09 Oct 2013 11:14:23 -0400
parents b11e1dfbc7c9
children 9cfa2fb488b0
comparison
equal deleted inserted replaced
10:d2c785cdf23e 11:d2c2eb518142
3 <requirements> 3 <requirements>
4 <requirement type="package" version="3.0">hmmer</requirement> 4 <requirement type="package" version="3.0">hmmer</requirement>
5 <requirement type="package" version="2.3.2">hmmer</requirement> 5 <requirement type="package" version="2.3.2">hmmer</requirement>
6 <requirement type="package" version="2.2.28">blast+</requirement> 6 <requirement type="package" version="2.2.28">blast+</requirement>
7 <requirement type="package" version="3.8.31">muscle</requirement> 7 <requirement type="package" version="3.8.31">muscle</requirement>
8 <requirement type="package" version="1.62">biopython</requirement>
9 <requirement type="package" version="2.0.2">antismash_python_deps</requirement> 8 <requirement type="package" version="2.0.2">antismash_python_deps</requirement>
10 <requirement type="package" version="2.0.2">antismash</requirement> 9 <requirement type="package" version="2.0.2">antismash</requirement>
11 </requirements> 10 </requirements>
12 <command> 11 <command>
13 run_antismash.py 12 run_antismash.py
32 31
33 </command> 32 </command>
34 <inputs> 33 <inputs>
35 <param name="infile" type="data" format="gb,embl" label="Nucleotide sequence file in GenBank or EMBL format"/> 34 <param name="infile" type="data" format="gb,embl" label="Nucleotide sequence file in GenBank or EMBL format"/>
36 35
37 <param name="smcogs" type="boolean" label="Look for sec.met. clusters of orthologous groups" falsevalue="" truevalue="--smcogs" checked="false" /> 36 <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="True" />
38 <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" truevalue="--clusterblast" falsevalue="" checked="false" /> 37 <param name="smcogs" type="boolean" label="analysis of secondary metabolism gene families (smCOGs)"
39 <param name="subclusterblast" type="boolean" label="BLAST identified clusters against known subclusters" truevalue="--subclusterblast" falsevalue="" checked="false" /> 38 falsevalue="" truevalue="--smcogs" checked="True" />
39
40 <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="False" />
41 <param name="subclusterblast" type="boolean" label="Subcluster Blast analysis" truevalue="--subclusterblast" falsevalue="" checked="false" />
42 <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" truevalue="--full-hmmer" falsevalue="" checked="false" />
40 43
41 <param name="inclusive" type="boolean" label="Use inclusive algorithm for cluster detection" truevalue="--inclusive" falsevalue="" checked="false" /> 44 <param name="inclusive" type="boolean" label="Use inclusive algorithm for cluster detection" truevalue="--inclusive" falsevalue="" checked="false" />
42 <param name="full_hmmer" type="boolean" label="Run a whole-genome HMMer analysis" truevalue="--full-hmmer" falsevalue="" checked="false" />
43 <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" truevalue="--full-blast" falsevalue="" checked="false" />
44 45
45 <param name="pfam_database" type="select" label="Pfam database" help="Pfam Covariance models"> 46 <param name="pfam_database" type="select" label="Pfam database" help="Pfam Covariance models">
46 <options from_file="antismash.loc"> 47 <options from_file="antismash.loc">
47 <column name="value" index="0"/> 48 <column name="value" index="0"/>
48 <column name="name" index="1"/> 49 <column name="name" index="1"/>
89 </outputs> 90 </outputs>
90 <help> 91 <help>
91 92
92 .. class:: infomark 93 .. class:: infomark
93 94
94 That version of antiSMASH can only handle one sequence. So multi-sequence FASTA files are not supported.
95 For multiple sequences please use multi-antiSMASH. The advantage of that tool is that it will provide you with a
96 archive of all results created from antiSMASH (It can be large!) and a HTML output, for better inspection.
97
98
99 **What it does** 95 **What it does**
100 96
101 antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. 97 antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes.
102 It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier. 98 It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier.
103 99
104 100
105 **Input** 101 **Input**
106 102
107 If you don't have an annotated GenBank or embl file you also can provide a glimmer prediction output. You can created it with glimmer or glimmerHMM. 103 The ideal input for antiSMASH is an annotated nucleotide file in Genbank format or EMBL format. If no annotation is available,
104 we recommend running your sequence through an annotation pipeline like RAST are one included in Galaxy.
105
106
107 There are several optional analyses that may or may not be run on your sequence.
108 Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a
109 query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify
110 the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.
111
112
113 Also available is the analysis of secondary metabolism gene families (smCOGs).
114 This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene
115 family using profile hidden Markov models specific for the conserved sequence region characteristic of this family.
116 Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment.
117
118
119 For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline.
120 Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage
121 (e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite).
122 Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented.
108 123
109 124
110 **References** 125 **References**
111 126
112 Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, 127 Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber,