Mercurial > repos > sanbi-uwc > mothur_test
diff tools/mothur/cluster.split.xml @ 0:ee4fee239fe7 draft default tip
planemo upload commit 68a4fd4cc5332c57ac39bef73db224425af0706c-dirty
author | sanbi-uwc |
---|---|
date | Fri, 03 Jun 2016 09:32:47 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mothur/cluster.split.xml Fri Jun 03 09:32:47 2016 -0400 @@ -0,0 +1,220 @@ +<tool profile="16.07" id="mothur_cluster_split" name="Cluster.split" version="@WRAPPER_VERSION@.0"> + <description>Assign sequences to OTUs (Operational Taxonomic Unit) splits large matrices</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="aggressive"><![CDATA[ + echo 'cluster.split( + splitmethod=$splitby.splitmethod, + #if $splitby.splitmethod == "distance": + #if $splitby.matrix.format == "column": + column=$splitby.matrix.dist, + name=$splitby.matrix.name, + #elif $splitby.matrix.format == "phylip": + phylip=$splitby.matrix.dist, + #if $splitby.matrix.name: + name=$splitby.matrix.name, + #end if + #end if + #elif $splitby.splitmethod == "classify": + column=$splitby.dist, + name=$splitby.name, + taxonomy=$splitby.taxonomy, + #if $splitby.taxlevel > 1: + taxlevel=$splitby.taxlevel, + #end if + #elif $splitby.splitmethod == "fasta": + fasta=$splitby.fasta, + name=$splitby.name, + taxonomy=$splitby.taxonomy, + #if $splitby.taxlevel > 1: + taxlevel=$splitby.taxlevel, + #end if + classic=$splitby.classic, + #end if + #if $count: + count=$count, + #end if + #if $method: + method=$method, + #end if + #if float($cutoff) > 0.0: + cutoff=$cutoff, + #end if + hard=$hard, + #if $precision + precision=$precision, + #end if + large=$large, + cluster=$cluster, + processors='\${GALAXY_SLOTS:-8}' + )' + | sed 's/ //g' ## mothur trips over whitespace + | mothur && + + ## move output files to correct destination + #if $splitby.splitmethod == 'distance': + prefix='$splitby.matrix.dist' && + #elif $splitby.splitmethod == 'classify': + prefix='$splitby.dist' && + #elif $splitby.splitmethod == 'fasta': + prefix='$splitby.fasta' && + #end if + #if $cluster: + mv \${prefix%.dat}*.rabund '$rabund' && + mv \${prefix%.dat}*.sabund '$sabund' && + mv \${prefix%.dat}*.list '$otulist' && + #else: + mv \${prefix%.dat}*.temp . && + mv \${prefix%.dat}*.file '$splitfile' && + cat $splitfile && + #end if + mv mothur.*.logfile '$logfile' + ]]></command> + <inputs> + <conditional name="splitby"> + <param name="splitmethod" type="select" label="Split by" help=""> + <option value="distance">Distance</option> + <option value="classify">Classification</option> + <option value="fasta">Classification using fasta</option> + </param> + <when value="distance"> + <conditional name="matrix"> + <param name="format" type="select" label="Select a Distance Matrix Format" help=""> + <option value="column">Pairwise Column Matrix</option> + <option value="phylip">Phylip Distance Matrix</option> + </param> + <when value="column"> + <param name="dist" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/> + <param name="name" type="data" format="mothur.names" label="name - Sequences Name reference"/> + </when> + <when value="phylip"> + <param name="dist" type="data" format="mothur.lower.dist,mothur.square.dist" label="phylip - Distance Matrix"/> + <param name="name" type="data" format="mothur.names" optional="true" label="name - Sequences Name reference"/> + </when> + </conditional> + </when> + <when value="classify"> + <param name="dist" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/> + <param name="name" type="data" format="mothur.names" label="name - Sequences Name reference"/> + <param name="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/> + <param name="taxlevel" type="integer" value="1" label="taxlevel - taxonomy level for split (default=1)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> + </when> + <when value="fasta"> + <param name="fasta" type="data" format="mothur.align" label="fasta - Aligned Sequences" help="must be aligned sequences (mothur.align)"/> + <param name="name" type="data" format="mothur.names" label="name - Sequences Name reference"/> + <param name="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/> + <param name="taxlevel" type="integer" value="3" label="taxlevel - taxonomy level for split (default=3)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> + <param name="classic" type="boolean" checked="false" truevalue="true" falsevalue="false" label="classic - Use cluster.classic"/> + </when> + </conditional> + <param name="count" type="data" format="mothur.count_table" optional="true" label="count - a count_table" help="generated by count.seqs"/> + <param name="method" type="select" label="method - Select a Clustering Method" help=""> + <option value="furthest">Furthest neighbor</option> + <option value="nearest">Nearest neighbor</option> + <option value="average" selected="true">Average neighbor</option> + </param> + <param name="cutoff" type="float" value="0.0" min="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0" help="Ignore pairwise distances larger than this, a common value would be 0.25"/> + <param name="hard" type="boolean" checked="true" truevalue="true" falsevalue="false" label="hard - Use hard cutoff instead of rounding" help=""/> + <param name="precision" type="select" optional="true" label="precision - Precision for rounding distance values" help="Set higher precision for longer genome scale sequence lengths"> + <option value="10">.1</option> + <option value="100" selected="true">.01</option> + <option value="1000">.001</option> + <option value="10000">.0001</option> + <option value="100000">.00001</option> + <option value="1000000">.000001</option> + </param> + <param name="large" type="boolean" checked="false" truevalue="true" falsevalue="false" label="large - distance matrix is too large to fit in RAM" help="If your job fails due to not enough memory error, set this to true to rerun"/> + <param name="cluster" type="boolean" falsevalue="false" truevalue="true" checked="true" label="The cluster parameter allows you to indicate whether you want to run the clustering or just split the distance matrix, default=T"/> + </inputs> + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: logfile"/> + <data name="rabund" format="mothur.rabund" label="${tool.name} on ${on_string}: rabund (Rank Abundance)"/> + <data name="sabund" format="mothur.sabund" label="${tool.name} on ${on_string}: sabund (Species Abundance)"/> + <data name="otulist" format="mothur.list" label="${tool.name} on ${on_string}: list (OTU List)"/> + <collection name="splitdist" type="list" label="${tool.name} on ${on_string}: split dist"> + <filter>not cluster</filter> + <discover_datasets pattern=".*?\.dist\.(?P<designation>.*)\.temp" format="mothur.dist"/> + </collection> + <collection name="splitnames" type="list" label="${tool.name} on ${on_string}: split names"> + <filter>not cluster</filter> + <discover_datasets pattern=".*?\.names\.(?P<designation>.*)\.temp" format="mothur.names"/> + </collection> + <data name="splitfile" format="txt" label="${tool.name} on ${on_string}: split.file"> + <filter>not cluster</filter> + </data> + </outputs> + <tests> + <test><!-- test with distance method --> + <param name="splitmethod" value="distance"/> + <param name="format" value="phylip"/> + <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/> + <output name="otulist" md5="2613ef0a1805ba9de012a41e938d8947" ftype="mothur.list"/> + <output name="rabund" md5="4df813ec2d51c373a846a82380c7a1f8" ftype="mothur.rabund"/> + <output name="sabund" md5="8d6813a5e8d2ad426a0ee5fdd99f1a19" ftype="mothur.sabund"/> + <expand macro="logfile-test"/> + </test> + <test><!-- test with cluster false --> + <param name="splitmethod" value="distance"/> + <param name="format" value="phylip"/> + <param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/> + <param name="cluster" value="false"/> + <output name="splitfile" ftype="txt"> + <assert_contents> + <has_text text="column"/> + <has_text text="dist"/> + <has_text text="names"/> + <has_text text="temp"/> + </assert_contents> + </output> + <output_collection name="splitnames" count="4"> + <element name="0" md5="27037eeb3e696888b24653d0996261cd" ftype="mothur.names"/> + </output_collection> + <output_collection name="splitdist" count="3"> + <element name="4" md5="f751aee00b598d3b6691d34f67dbc8d5" ftype="mothur.dist"/> + </output_collection> + <expand macro="logfile-test"/> + </test> + <test><!-- test with classify method --> + <param name="splitmethod" value="classify"/> + <param name="format" value="column"/> + <param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/> + <param name="name" value="amazon.names" ftype="mothur.names"/> + <param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/> + <output name="otulist" md5="d6eba624ad79759c530b9bc3285a1361" ftype="mothur.list"/> + <output name="rabund" md5="2a165e1e40644fccb8cc9f53d8915bc3" ftype="mothur.rabund"/> + <output name="sabund" md5="7aad8a9ca0eade414d6eba1f8bef960f" ftype="mothur.sabund"/> + <expand macro="logfile-test"/> + </test> + <test><!-- test with fasta --> + <param name="splitmethod" value="fasta"/> + <param name="fasta" value="amazon.align_head" ftype="mothur.align"/> + <param name="name" value="amazon.align_head.names" ftype="mothur.names"/> + <param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/> + <param name="cutoff" value="9999"/> + <output name="otulist" md5="a1279248cf2bc1094e0046b2cff1b785" ftype="mothur.list"/> + <output name="rabund" md5="65ec9f326cd92fc607679b9902ec8430" ftype="mothur.rabund"/> + <output name="sabund" md5="854d3acd15f64299c5d9d9e18f2d51b4" ftype="mothur.sabund"/> + <expand macro="logfile-test"/> + </test> + </tests> + <help> +<![CDATA[ + +@MOTHUR_OVERVIEW@ + + +**Command Documenation** + +The cluster.split_ command assign sequences to OTUs (Operational Taxonomy Unit). + +.. _cluster.split: http://www.mothur.org/wiki/Cluster.split + +v1.28.0: Upgraded to Mothur 1.33, introduced cluster boolean. + +]]> + </help> + <expand macro="citations"/> +</tool>