diff mothur/README @ 3:6b358d0f17b4 draft

Update to Mothur version 1.24
author Jim Johnson <jj@umn.edu>
date Wed, 05 Sep 2012 19:49:43 -0500
parents e076d95dbdb5
children 3cfe41810949
line wrap: on
line diff
--- a/mothur/README	Tue Jun 07 17:05:08 2011 -0400
+++ b/mothur/README	Wed Sep 05 19:49:43 2012 -0500
@@ -1,7 +1,9 @@
 Provides galaxy tools for the Mothur metagenomics package -  http://www.mothur.org/wiki/Main_Page 
 
-Install mothur v.1.19.0 on your galaxy system so galaxy can execute the mothur command
-  ( This version of wrappers is designed for Mothur version 1.19 - it may work on later versions )
+(The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands)
+
+Install mothur v.1.25.0 on your galaxy system so galaxy can execute the mothur command
+  ( This version of wrappers is designed for Mothur version 1.24 - it may work on later versions )
   http://www.mothur.org/wiki/Download_mothur
   http://www.mothur.org/wiki/Installation
   ( This Galaxy Mothur wrapper will invoke Mothur in command line mode: http://www.mothur.org/wiki/Command_line_mode )
@@ -13,6 +15,14 @@
   Install in galaxy:  tool-data/shared/jars/TreeVector.jar
 
 Install reference data from silva and greengenes
+ RDP reference file (modified for mothur):
+  http://www.mothur.org/wiki/RDP_reference_files
+   - 16S rRNA reference (RDP): A collection of 9,662 bacterial and 384 archaeal 16S rRNA gene sequences with an improved taxonomy compared to version 6.
+     http://www.mothur.org/w/images/2/29/Trainset7_112011.rdp.zip
+   - 16S rRNA reference (PDS): The RDP reference with three sequences reversed and 119 mitochondrial 16S rRNA gene sequences added as members of the Rickettsiales
+     http://www.mothur.org/w/images/4/4a/Trainset7_112011.pds.zip
+   - 28S rRNA reference (RDP): A collection of 8506 reference 28S rRNA gene sequences from the Fungi that were curated by the Kuske lab
+     http://www.mothur.org/w/images/3/36/FungiLSU_train_v7.zip
  Silva reference:
   http://www.mothur.org/wiki/Silva_reference_files
   - Bacterial references (14,956 sequences)
@@ -45,6 +55,8 @@
   SILVA-compatible mask:
      - lane1349.silva.filter - Pat Schloss's transcription of the mask from the Lane paper
        http://www.mothur.org/w/images/6/6d/Lane1349.silva.filter
+ Lookup Files for sff flow analysis using shhh.flows:
+  http://www.mothur.org/wiki/Alignment_database
 
  Example from UMN installation: (We also made these available in a Galaxy public data library)
     /project/db/galaxy/mothur/Silva.bacteria.zip
@@ -75,16 +87,28 @@
     /project/db/galaxy/mothur/Silva.eukarya.zip
     /project/db/galaxy/mothur/Gg_ss_map.zip
     /project/db/galaxy/mothur/core_set_aligned.imputed.fasta
+    /project/db/galaxy/mothur/RDP/FungiLSU_train_1400bp_8506_mod.fasta
+    /project/db/galaxy/mothur/RDP/FungiLSU_train_1400bp_8506_mod.tax
+    /project/db/galaxy/mothur/RDP/trainset6_032010.rdp.fasta
+    /project/db/galaxy/mothur/RDP/trainset6_032010.rdp.tax
+    /project/db/galaxy/mothur/RDP/trainset7_112011.pds.fasta
+    /project/db/galaxy/mothur/RDP/trainset7_112011.pds.tax
+    /project/db/galaxy/mothur/RDP/trainset7_112011.rdp.fasta
+    /project/db/galaxy/mothur/RDP/trainset7_112011.rdp.tax
 
 
-Add tool-data:  (contains  pointers to silva and greengenes reference data)
+
+Add tool-data:  (contains  pointers to silva, greengenes, and RDP reference data)
   tool-data/mothur_aligndb.loc
-  tool-data/mothur_calulators.loc
   tool-data/mothur_map.loc
   tool-data/mothur_taxonomy.loc
   tool-data/shared/jars/TreeVector.jar
 
 
+################################################################
+#### If you are manually adding this to your local galaxy:  ####
+################################################################
+
 add config files (*.xml) and wrapper code (*.py) from tools/mothur/*  to your galaxy installation 
 
 
@@ -94,6 +118,7 @@
 import metagenomics # added for metagenomics mothur
 
 
+
 add datatypes to:  datatypes_conf.xml
         <!-- Start Mothur Datatypes -->
         <datatype extension="otu" type="galaxy.datatypes.metagenomics:Otu" display_in_upload="true"/>
@@ -117,6 +142,9 @@
         <datatype extension="pair.dist" type="galaxy.datatypes.metagenomics:PairwiseDistanceMatrix" display_in_upload="true"/>
         <datatype extension="square.dist" type="galaxy.datatypes.metagenomics:SquareDistanceMatrix" display_in_upload="true"/>
         <datatype extension="lower.dist" type="galaxy.datatypes.metagenomics:LowerTriangleDistanceMatrix" display_in_upload="true"/>
+        <datatype extension="ref.taxonomy" type="galaxy.datatypes.metagenomics:RefTaxonomy" display_in_upload="true">
+            <converter file="ref_to_seq_taxonomy_converter.xml" target_datatype="seq.taxonomy"/>
+        </datatype>
         <datatype extension="seq.taxonomy" type="galaxy.datatypes.metagenomics:SequenceTaxonomy" display_in_upload="true"/>
         <datatype extension="rdp.taxonomy" type="galaxy.datatypes.metagenomics:RDPSequenceTaxonomy" display_in_upload="true"/>
         <datatype extension="cons.taxonomy" type="galaxy.datatypes.metagenomics:ConsensusTaxonomy" display_in_upload="true"/>
@@ -127,6 +155,7 @@
         <datatype extension="masked.quan" type="galaxy.datatypes.metagenomics:MaskedQuantile" display_in_upload="true"/>
         <datatype extension="filtered.masked.quan" type="galaxy.datatypes.metagenomics:FilteredMaskedQuantile" display_in_upload="true"/>
         <datatype extension="axes" type="galaxy.datatypes.metagenomics:Axes" display_in_upload="true"/>
+        <datatype extension="sff.flow" type="galaxy.datatypes.metagenomics:SffFlow" display_in_upload="true"/>
         <datatype extension="tre" type="galaxy.datatypes.data:Newick" display_in_upload="true"/>
         <!-- End Mothur Datatypes -->
 
@@ -138,18 +167,27 @@
       <tool file="mothur/get.groups.xml"/>
       <tool file="mothur/remove.groups.xml"/>
       <tool file="mothur/merge.groups.xml"/>
+      <tool file="mothur/count.groups.xml"/>
       <tool file="mothur/make.design.xml"/>
       <tool file="mothur/sub.sample.xml"/>
+      <tool file="mothur/sort.seqs.xml"/>
+      <tool file="mothur/create.database.xml"/>
     <label text="Mothur Sequence Analysis" id="mothur_sequence_analysis"/>
       <tool file="mothur/sffinfo.xml"/>
+      <tool file="mothur/trim.flows.xml"/>
+      <tool file="mothur/shhh.flows.xml"/>
+      <tool file="mothur/shhh.seqs.xml"/>
       <tool file="mothur/make.fastq.xml"/>
       <tool file="mothur/fastq.info.xml"/>
       <tool file="mothur/summary.seqs.xml"/>
+      <tool file="mothur/summary.qual.xml"/>
+      <tool file="mothur/count.seqs.xml"/>
       <tool file="mothur/reverse.seqs.xml"/>
       <tool file="mothur/list.seqs.xml"/>
       <tool file="mothur/get.seqs.xml"/>
       <tool file="mothur/remove.seqs.xml"/>
       <tool file="mothur/trim.seqs.xml"/>
+      <tool file="mothur/pcr.seqs.xml"/>
       <tool file="mothur/unique.seqs.xml"/>
       <tool file="mothur/deunique.seqs.xml"/>
       <tool file="mothur/chop.seqs.xml"/>
@@ -168,12 +206,15 @@
       <tool file="mothur/nmds.xml"/>
       <tool file="mothur/corr.axes.xml"/>
       <tool file="mothur/classify.seqs.xml"/>
+      <tool file="mothur/seq.error.xml"/>
     <label text="Mothur Sequence Chimera Detection" id="mothur_sequence_chimera"/>
       <tool file="mothur/chimera.bellerophon.xml"/>
       <tool file="mothur/chimera.ccode.xml"/>
       <tool file="mothur/chimera.check.xml"/>
+      <tool file="mothur/chimera.perseus.xml"/>
       <tool file="mothur/chimera.pintail.xml"/>
       <tool file="mothur/chimera.slayer.xml"/>
+      <tool file="mothur/chimera.uchime.xml"/>
     <label text="Mothur Operational Taxonomy Unit" id="mothur_taxonomy_unit"/>
       <tool file="mothur/pre.cluster.xml"/>
       <tool file="mothur/cluster.fragments.xml"/>
@@ -195,9 +236,12 @@
       <tool file="mothur/get.sabund.xml"/>
       <tool file="mothur/get.relabund.xml"/>
       <tool file="mothur/make.shared.xml"/>
+      <tool file="mothur/make.shared_from_biom.xml"/>
+      <tool file="mothur/make.biom.xml"/>
       <tool file="mothur/get.group.xml"/>
       <tool file="mothur/bin.seqs.xml"/>
       <tool file="mothur/get.sharedseqs.xml"/>
+      <tool file="mothur/summary.tax.xml"/>
     <label text="Mothur Single Sample Analysis" id="mothur_single_sample_analysis"/>
       <tool file="mothur/collect.single.xml"/>
       <tool file="mothur/rarefaction.single.xml"/>
@@ -208,8 +252,8 @@
       <tool file="mothur/rarefaction.shared.xml"/>
       <tool file="mothur/normalize.shared.xml"/>
       <tool file="mothur/summary.shared.xml"/>
+      <tool file="mothur/otu.association.xml"/>
       <tool file="mothur/dist.shared.xml"/>
-      <tool file="mothur/heatmap.bin.xml"/>
       <tool file="mothur/heatmap.sim.xml"/>
       <tool file="mothur/venn.xml"/>
       <tool file="mothur/tree.shared.xml"/>
@@ -222,6 +266,7 @@
       <tool file="mothur/homova.xml"/>
       <tool file="mothur/mantel.xml"/>
       <tool file="mothur/anosim.xml"/>
+      <tool file="mothur/cooccurrence.xml"/>
     <label text="Mothur Phylotype Analysis" id="mothur_phylotype_analysis"/>
       <tool file="mothur/get.lineage.xml"/>
       <tool file="mothur/remove.lineage.xml"/>
@@ -229,16 +274,17 @@
       <tool file="mothur/phylo.diversity.xml"/>
       <tool file="mothur/clearcut.xml"/>
       <tool file="mothur/indicator.xml"/>
+      <tool file="mothur/deunique.tree.xml"/>
+      <tool file="mothur/classify.tree.xml"/>
       <tool file="mothur/TreeVector.xml"/>
   </section> <!-- metagenomics_mothur -->
 
-
 ############ DESIGN NOTES #########################################################################################################
 Each mothur command has it's own tool_config (.xml) file, but all call the same python wrapper code: mothur_wrapper.py
 
+  (The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands)
+
 * Every mothur tool will call mothur_wrapper.py script with a --cmd= parameter that gives the mothur command name.
-* Many mothur commands require date to be read into memory (using read.dist, read.otu, read.tree) before executed the command,  
-  these are accomplished in the tool_config and mothur_wrapper.py with --READ_cmd= and --READ_<option> parameters. 
 * Every tool will produce the logfile of the mothur run as an output.
 * When the outputs of a mothur command could be determined in advance, they are included in the --result= parameter to mothur_wrapper.py
 * When the number of outputs cannot be determined in advance, the name patterns and datatypes of the ouputs 
@@ -264,10 +310,7 @@
  # Each item  conatins:   a regex pattern for matching filenames and  a galaxy datatype (separated by :)
  # The regex match.groups()[0] is used as the id name of the dataset, and must result in  unique name for each output
  --new_datasets='^\S+?\.((\S+)\.(unique|[0-9.]*)\.dist)$:lower.dist'
- # Many mothur commands first require data to be read into memory using: read.otu, read.dist, or read.tree
- # This prequisite command and its params are prefixed with 'READ_'
- --READ_cmd='read.otu'
- --READ_list=/home/galaxy/data/database/files/001/dataset_1557.dat
- --READ_group='/home/galaxy/data/database/files/001/dataset_1545.dat'
- --READ_label='unique,0.07'
 
+ ## 
+ ## NOTE:   The "read" commands were eliminated with Mothur version 1.18
+ ##