changeset 2:20adf95eb758 draft

Updated phyloseq_nmds.xml to allow for new datatype.
author simon-gladman
date Fri, 20 Jul 2018 00:23:53 -0400
parents e376a618eb9f
children 5a2bda9b28c5
files README.txt phyloseq_nmds.R phyloseq_nmds.xml
diffstat 3 files changed, 202 insertions(+), 83 deletions(-) [+]
line wrap: on
line diff
--- a/README.txt	Sat Jun 16 05:03:43 2018 -0400
+++ b/README.txt	Fri Jul 20 00:23:53 2018 -0400
@@ -1,5 +1,39 @@
+# Phloseq_NMDS
+
+A Galaxy tool to produce NMDS plots using Phyloseq from either a BIOM1 file or 2 input tables.
+
+Currently produces the plots embedded in a html file for output with links to a PDF file.
+
+Requires:
+
+Phyloseq 1.22.3
+r-getopt 1.20.0
+ghostscript 9.18
+
+
 ## Run phyloseq_nmds.R with three input files
 Rscript phyloseq_nmds.R --otu_table=GP_OTU_TABLE.txt --tax_table=GP_TAX_TABLE.txt --meta_table=GP_SAMPLE_TABLE.txt --method="bray" --kingdom=2 --cutoff=5 --category=6 --outdir=/outputdir --htmlfile=test.html
 
 ## Run phyloseq_nmds.R with biom file
-Rscript phyloseq_nmds.R --biom=GP.biom --subset=6 --method=NMDS --distance=bray --kingdom=Phylum --cutoff=5 --keep=5 --outdir=/outputdir --htmlfile=biom_out.html
\ No newline at end of file
+Rscript phyloseq_nmds.R --biom=GP.biom --subset=6 --method=NMDS --distance=bray --kingdom=Phylum --cutoff=5 --keep=5 --outdir=/outputdir --htmlfile=biom_out.html
+
+Version history:
+
+XML Wrapper:
+
+Alpha version by Michael Thang of QFAB, Australia.
+
+1.22.3.1: Simon Gladman, Melbourne Bioinformatics
+    * Incorporated tests
+    * Requirements
+    * Version statement
+    * Citations
+
+
+R Script: phyloseq_nmds.R
+
+0.1.0:
+    * Original version
+
+0.1.1:
+    * Added extra BIOM import functionality so it doesn't solely rely on phyloseq's internal importer.
--- a/phyloseq_nmds.R	Sat Jun 16 05:03:43 2018 -0400
+++ b/phyloseq_nmds.R	Fri Jul 20 00:23:53 2018 -0400
@@ -5,6 +5,9 @@
 library(plyr)
 Sys.setenv("DISPLAY"=":1")
 library("ggplot2")
+suppressPackageStartupMessages(library("doParallel"))
+ncores = ceiling(detectCores() * 0.8)
+registerDoParallel(cores=ncores)
 
 options(warn=-1)
 
@@ -15,14 +18,16 @@
 option_specification = matrix(c(
    'otu_table','o',2,'character',
    'tax_table','t',2,'character',
-  'meta_table','s',2,'character',
-        'biom','i',2,'character',
-      'subset','x',2,'numeric',
+  'meta_table','m',2,'character',
+        'biom','b',2,'character',
+      'filter','f',2,'numeric',
+      'subset','s',2,'character',
       'method','n',2,'character',
     'distance','d',2,'character',
      'kingdom','k',2,'character',
-      'cutoff','v',2,'numeric',
-    'category','c',2,'numeric',
+    'plottype','e',2,'numeric',
+      'cutoff','c',2,'numeric',
+    'category','g',2,'numeric',
         'keep','p',2,'numeric',
       'outdir','r',2,'character',
     'htmlfile','h',2,'character'
@@ -46,13 +51,20 @@
 kingdom_str<-options$kingdom
 distance<-options$distance
 keep<-options$keep
+plottype<-options$plottype
+filter<-options$filter
 
+### prepare the directory and file name
+pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
+pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
+pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
+htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
 
 
 ### This function accepts different two different type of BIOM file format
 readBIOM<-function(inBiom){
 	tryCatch({
-		phyloseq_obj<-import_biom(inBiom)
+		phyloseq_obj<-import_biom(inBiom,parallel=TRUE)
 		return(phyloseq_obj)
 		},
 		error=function(e){
@@ -66,7 +78,7 @@
 		
 		metadata.temp<-sample_metadata(biom_obj)
 		METADATA_TABLE<-plyr::ldply(metadata.temp, rbind)
-                rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
+        rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
 
 		phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE))
 		return(phyloseq_obj)
@@ -75,18 +87,97 @@
 }
 
 
+create_OTU_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet){
+    pdf(pdf_file);
+    p1<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
+    print(p1)
+    p2<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
+    print(p2)
+    garbage<-dev.off();
+
+    #png('nmds.png')
+    bitmap(pngfile_nmds,"png16m")
+    p3<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
+    print(p3)
+    garbage<-dev.off()
+
+    #png('nmds_facet.png')
+    bitmap(pngfile_nmds_facet,"png16m")
+    p4<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
+    print(p4)
+    garbage<-dev.off()
+
+    create_HTML_1(htmlfile)
+}
+
+create_SAMPLE_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,htmlfile,pngfile_nmds,category_type){
+    pdf(pdf_file);
+    p <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type)
+    p <- p + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples")
+    print(p)
+    garbage<-dev.off();
+
+    #png('nmds.png')
+    bitmap(pngfile_nmds,"png16m")
+    p1 <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type)
+    p1 <- p1 + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples")
+    print(p1)
+    garbage<-dev.off();
+
+     create_HTML_2(htmlfile)
+}
+
+create_HTML_1<-function(htmlfile){
+    htmlfile_handle <- file(htmlfile)
+    html_output = c('<html><body>',
+                    '<table align="center>',
+                    '<tr>',
+                    '<td valign="middle" style="vertical-align:middle;">',
+                        '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
+                    '</td>',
+                    '</tr>',
+                    '<tr>',
+                    '<td valign="middle" style="vertical-align:middle;">',
+                    '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
+                    '</td>',
+                    '</tr>',
+                    '</table>',
+                    '</html></body>');
+     writeLines(html_output, htmlfile_handle);
+     close(htmlfile_handle);
+}
+
+create_HTML_2<-function(htmlfile){
+    htmlfile_handle <- file(htmlfile)
+    html_output = c('<html><body>',
+                    '<table align="center>',
+                    '<tr>',
+                    '<td valign="middle" style="vertical-align:middle;">',
+                        '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
+                    '</td>',
+                    '</tr>',
+                    '</table>',
+                    '</html></body>');
+    writeLines(html_output, htmlfile_handle);
+    close(htmlfile_handle);
+}
 
 if(!is.null(options$biom)){
     
     #physeq<-import_biom(options$biom)
     physeq<-readBIOM(options$biom)
-
+    
     if(length(rank_names(physeq)) == 8){
-    tax_table(physeq) <- tax_table(physeq)[,-1]
-    colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+        tax_table(physeq) <- tax_table(physeq)[,-1]
+        colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+    } else {
+        colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
     }
+    
     ### select column name from sample table for nmds plot
-    category_type<-colnames(sample_data(physeq))[options$subset]
+    ## which(colnames(sample_data(biom)) == "vegetation_type_id")
+    #category_type<-colnames(sample_data(physeq))[options$subset]
+    category_type <- options$subset
 
     ### obtain the unique value in the selected column from sample table
     category_option<-unique(sample_data(physeq))[,options$subset]
@@ -121,69 +212,46 @@
     ### select a kingdom for phyloseq plot (e.g., "phylum")
     #kingdom_str<-colnames(tax_table)[options$kingdom]
     #kingdom_str<-options$kingdom
-
-
-    ### Remove OTUs that do not appear more than 5 times in more than half the samples
+    
+    
     ### filtering OTUs based on cutoff value (e.g., 5)
-    physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq))
-
+    #physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq))
+    physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=filter*nsamples(physeq))
+    
     ### phyloseq object after filtered
     physeq_filter = prune_taxa(physeq_temp, physeq)
-
+    
     ## Transform to even sampling depth
     physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x))
 
+
+
+if(plottype == 1){
+
+    # kingdom_str <- as.numeric(kingdom_str)
     ## Keep only the most abundant five phyla
-    phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE)
-        
+
+    phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum,na.rm=TRUE)
     ### number of most abundance phyla to keep
     topphyla = names(sort(phylum.sum, TRUE))[1:keep]
+
     physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter)
 
+}else{
+ 
+    otu_table(physeq_filter)<-otu_table(physeq_filter)[,colSums(otu_table(physeq_filter)) > 1]
+
+}
     ### select category to plot NMDS
     category_input = get_variable(physeq_filter, category_type) %in% category_option
     sample_data(physeq_filter)$category_input <- factor(category_input)
 
-    ### prepare the directory and file name
-    pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
-    pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
-    pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
-    htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
-
-
-    # Produce PDF file
-    pdf(pdffile);
     physeq_ord<-ordinate(physeq_filter,method,distance)
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
-    garbage<-dev.off();
-
-
-    #png('nmds.png')
-    bitmap(pngfile_nmds,"png16m")
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
-    garbage<-dev.off()
 
-    #png('nmds_facet.png')
-    bitmap(pngfile_nmds_facet,"png16m")
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
-    garbage<-dev.off()
+if(plottype == 1){
+    #kingdom_str = colnames(tax_table)[2]
+    create_OTU_PDF(pdffile,physeq_filter,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet) 
+}else{
+    create_SAMPLE_PDF(pdffile,physeq_filter,physeq_ord,htmlfile,pngfile_nmds,category_type)
+}
 
-    # Produce the HTML file
-    htmlfile_handle <- file(htmlfile)
-    html_output = c('<html><body>',
-    	        '<table align="center>',
-    		'<tr>',
-    		'<td valign="middle" style="vertical-align:middle;">',
-                    '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
-    		'</td>',
-    		'</tr>',
-    		'<tr>',
-            '<td valign="middle" style="vertical-align:middle;">',
-            '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
-            '</td>',
-            '</tr>',
-    		'</table>',
-                    '</html></body>');
-    writeLines(html_output, htmlfile_handle);
-    close(htmlfile_handle);
--- a/phyloseq_nmds.xml	Sat Jun 16 05:03:43 2018 -0400
+++ b/phyloseq_nmds.xml	Fri Jul 20 00:23:53 2018 -0400
@@ -6,7 +6,7 @@
         <requirement type="package" version="9.18">ghostscript</requirement>
     </requirements>
     <version_command><![CDATA[
-        echo $(R --version | grep version | grep -v GNU)", phyloseq version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+        echo $(R --version | grep version | grep -v GNU)", phyloseq version" $(R --vanilla --slave -e "library(phyloseq); cat(sessionInfo()\$otherPkgs\$phyloseq\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     <command detect_errors="exit_code"><![CDATA[
         Rscript '${__tool_directory__}/phyloseq_nmds.R'
@@ -17,34 +17,47 @@
             --otu_table='$OTU_TABLE'
             --tax_table='$TAX_TABLE'
             --meta_table='$META_TABLE'
-            --category=$file_source.category
+            --category='$file_source.category'
         #end if
         --method='$ord_method'
         --distance='$distance'
         --kingdom='$kingdom_field'
-        --cutoff=$cutoff
-        --keep=$keep
-        --outdir='$htmlfile.files_path'
+        --cutoff='$cutoff'
+        --keep='$keep'
+        --filter='$nsample'
+        --plottype='$plot_type'
+        --outdir="$htmlfile.files_path"
         --htmlfile='$htmlfile'
-    ]]>
+        ]]>
     </command>
+
     <inputs>
+
         <conditional name="file_source">
             <param name="file_source_selector" type="select" label="Choose a file type">
                 <option value="set_biom" selected="True">BIOM File</option>
-                <option value="set_table">TABULAR Files</option>
+                <option value="set_table">TABULAR File</option>
             </param>
             <when value="set_biom">
-                <param format="biom1" name="input" type="data" label="Input File" help="A biom1 table of the data to be visualised"/>
-                <param name="subsetColumn" size="10" type="integer" value="1" label="Select a subset column from metadata table"/>
+                <param format="biom1" name="input" type="data" label="Input File"/>
+                <!--<param format="tabular" name="META_COLUMN" type="data" label="Column File"/>-->
+                <param name="subsetColumn" type="select" label="Select a column">
+                    <options>
+                        <filter type="data_meta" ref="input" key="table_column_metadata_headers" />
+                    </options>
+                </param>
             </when>
             <when value="set_table">
-                <param format="tabular" name="OTU_TABLE" type="data" label="OTU table" help="A table of OTUs"/>
-                <param format="tabular" name="TAX_TABLE" type="data" label="Taxonomy table" help="A table of the Taxonomys"/>
-                <param format="tabular" name="META_TABLE" type="data" label="Metadata table" help="Table containing all the metadata"/>
-                <param name="category" type="data_column" data_ref="META_TABLE" use_header_names="True" label="Select a subset column from metadata table"/>
+                <param format="tabular" name="OTU_TABLE" type="data" label="OTU table"/>
+                <param format="tabular" name="TAX_TABLE" type="data" label="Taxonomy table"/>
+                <param format="tabular" name="META_TABLE" type="data" label="Metadata table"/>
+                <param name="category" type="data_column" data_ref="META_TABLE" use_header_names="True" label="Select a column"/>
             </when>
         </conditional>
+        <param name="plot_type" type="select" label="Type of Plot" help="NMDS plot by type (e.g., OTU, SAMPLE)">
+            <option value="1">OTU</option>
+            <option value="2">SAMPLE</option>
+        </param>
         <param name="ord_method" type="select" display="radio" label="Select NMDS method">
             <option value="NMDS" selected="true">NMDS</option>
             <option value="DCA">DCA</option>
@@ -68,20 +81,23 @@
             <option value="Genus">Genus</option>
             <option value="Species">Species</option>
         </param>
-        <param name="cutoff" size="10" type="integer" value="5" label="cutoff value for filtering OTU table" help="Remove OTUs that do not appear more than 5 times"/>
-        <param name="keep" size="10" type="integer" value="5" label="number of most abundant phyla" help="provide the number of most abundancet phyla to display"/>
+            <param name="nsample" size="10" type="float" value="0.5" label="Number of samples to keep" help="The percentage of sample number to keep for analysis (e.g., 0.5 is equivalent to half of the samples to keep for analysis)"/>
+            <param name="cutoff" size="10" type="integer" value="5" label="Cutoff value for filtering OTU table" help="Remove OTUs that do not appear more than 5 times"/>
+            <param name="keep" size="10" type="integer" value="5" label="Number of most abundant phyla" help="provide the number of most abundancet phyla to display"/>
     </inputs>
+
     <outputs>
-        <data format="html" name="htmlfile" label="${tool.name}.html"/>
+       <data format="html" name="htmlfile" label="${tool.name}.html"/>
     </outputs>
+
     <tests>
         <test> <!-- Test #1: Test BIOM format input -->
             <!-- Equivalent command (replace + with double dash): Rscript phyloseq_nmds.R +biom=test-data/GP.biom +subset=6 +method=NMDS
             +distance=bray +kingdom=Phylum +cutoff=5 +keep=5 +outdir=outputdir +htmlfile=biom_out.html -->
             <conditional name="file_source">
                 <param name="file_source_selector" value="set_biom"/>
-                <param name="input" value="GP.biom"/>
-                <param name="subsetColumn" value="6" />
+                <param name="input" value="GP.biom" format="biom1"/>
+                <param name="subsetColumn" value="Primer" />
             </conditional>
             <output name="htmlfile" ftype="html" file="biom_out.html" />
         </test>
@@ -99,7 +115,8 @@
             <output name="htmlfile" ftype="html" file="test.html" />
         </test>
     </tests>
-    <help><![CDATA[
+
+    <help>
 **What it does**
 Creates NMDS plot using R package called phyloseq_.
 
@@ -110,16 +127,16 @@
 **Input**
 
 - **Choose a file type** - BIOM File or TABULAR file
-- **Select a subset column from metadata table** - This column is used to extract unique value from the column of interest for NMDS plot
+- **Select a column** - This column is used to extract unique value from the column of interest for NMDS plot
 - **OTU TABLE** - this is a OTU matrix
 - **Taxonomy TABLE** - this is a TAX matrix
 - **Metadata TABLE** - this is a metadata file of the experiment design
 - **Select NMDS method** - select a method for NMDS plot
 - **Select a distance for NMDS** - select a distance option for NMDS plot
 - **Select a taxonomy rank** - select a taxonomy for NMDS plot
-- **cutoff value for filtering OTU table** - this cutoff value is used to fitler OTU table
-- **number of most abundant phyla** - a number of most abundant phyla to be display in NMDS plot
-    ]]>
+- **Number of samples to keep** - Number of samples to keep for analysis
+- **Cutoff value for filtering OTU table** - this cutoff value is used to fitler OTU table
+- **Number of most abundant phyla** - a number of most abundant phyla to be display in NMDS plot
     </help>
     <citations>
         <citation type="doi">10.18129/B9.bioc.phyloseq</citation>