Mercurial > repos > simon-gladman > phyloseq_nmds
changeset 2:20adf95eb758 draft
Updated phyloseq_nmds.xml to allow for new datatype.
author | simon-gladman |
---|---|
date | Fri, 20 Jul 2018 00:23:53 -0400 |
parents | e376a618eb9f |
children | 5a2bda9b28c5 |
files | README.txt phyloseq_nmds.R phyloseq_nmds.xml |
diffstat | 3 files changed, 202 insertions(+), 83 deletions(-) [+] |
line wrap: on
line diff
--- a/README.txt Sat Jun 16 05:03:43 2018 -0400 +++ b/README.txt Fri Jul 20 00:23:53 2018 -0400 @@ -1,5 +1,39 @@ +# Phloseq_NMDS + +A Galaxy tool to produce NMDS plots using Phyloseq from either a BIOM1 file or 2 input tables. + +Currently produces the plots embedded in a html file for output with links to a PDF file. + +Requires: + +Phyloseq 1.22.3 +r-getopt 1.20.0 +ghostscript 9.18 + + ## Run phyloseq_nmds.R with three input files Rscript phyloseq_nmds.R --otu_table=GP_OTU_TABLE.txt --tax_table=GP_TAX_TABLE.txt --meta_table=GP_SAMPLE_TABLE.txt --method="bray" --kingdom=2 --cutoff=5 --category=6 --outdir=/outputdir --htmlfile=test.html ## Run phyloseq_nmds.R with biom file -Rscript phyloseq_nmds.R --biom=GP.biom --subset=6 --method=NMDS --distance=bray --kingdom=Phylum --cutoff=5 --keep=5 --outdir=/outputdir --htmlfile=biom_out.html \ No newline at end of file +Rscript phyloseq_nmds.R --biom=GP.biom --subset=6 --method=NMDS --distance=bray --kingdom=Phylum --cutoff=5 --keep=5 --outdir=/outputdir --htmlfile=biom_out.html + +Version history: + +XML Wrapper: + +Alpha version by Michael Thang of QFAB, Australia. + +1.22.3.1: Simon Gladman, Melbourne Bioinformatics + * Incorporated tests + * Requirements + * Version statement + * Citations + + +R Script: phyloseq_nmds.R + +0.1.0: + * Original version + +0.1.1: + * Added extra BIOM import functionality so it doesn't solely rely on phyloseq's internal importer.
--- a/phyloseq_nmds.R Sat Jun 16 05:03:43 2018 -0400 +++ b/phyloseq_nmds.R Fri Jul 20 00:23:53 2018 -0400 @@ -5,6 +5,9 @@ library(plyr) Sys.setenv("DISPLAY"=":1") library("ggplot2") +suppressPackageStartupMessages(library("doParallel")) +ncores = ceiling(detectCores() * 0.8) +registerDoParallel(cores=ncores) options(warn=-1) @@ -15,14 +18,16 @@ option_specification = matrix(c( 'otu_table','o',2,'character', 'tax_table','t',2,'character', - 'meta_table','s',2,'character', - 'biom','i',2,'character', - 'subset','x',2,'numeric', + 'meta_table','m',2,'character', + 'biom','b',2,'character', + 'filter','f',2,'numeric', + 'subset','s',2,'character', 'method','n',2,'character', 'distance','d',2,'character', 'kingdom','k',2,'character', - 'cutoff','v',2,'numeric', - 'category','c',2,'numeric', + 'plottype','e',2,'numeric', + 'cutoff','c',2,'numeric', + 'category','g',2,'numeric', 'keep','p',2,'numeric', 'outdir','r',2,'character', 'htmlfile','h',2,'character' @@ -46,13 +51,20 @@ kingdom_str<-options$kingdom distance<-options$distance keep<-options$keep +plottype<-options$plottype +filter<-options$filter +### prepare the directory and file name +pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf")) +pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png")) +pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png")) +htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) ### This function accepts different two different type of BIOM file format readBIOM<-function(inBiom){ tryCatch({ - phyloseq_obj<-import_biom(inBiom) + phyloseq_obj<-import_biom(inBiom,parallel=TRUE) return(phyloseq_obj) }, error=function(e){ @@ -66,7 +78,7 @@ metadata.temp<-sample_metadata(biom_obj) METADATA_TABLE<-plyr::ldply(metadata.temp, rbind) - rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id) + rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id) phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE)) return(phyloseq_obj) @@ -75,18 +87,97 @@ } +create_OTU_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet){ + pdf(pdf_file); + p1<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + print(p1) + p2<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) + print(p2) + garbage<-dev.off(); + + #png('nmds.png') + bitmap(pngfile_nmds,"png16m") + p3<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + print(p3) + garbage<-dev.off() + + #png('nmds_facet.png') + bitmap(pngfile_nmds_facet,"png16m") + p4<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) + print(p4) + garbage<-dev.off() + + create_HTML_1(htmlfile) +} + +create_SAMPLE_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,htmlfile,pngfile_nmds,category_type){ + pdf(pdf_file); + p <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type) + p <- p + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples") + print(p) + garbage<-dev.off(); + + #png('nmds.png') + bitmap(pngfile_nmds,"png16m") + p1 <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type) + p1 <- p1 + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples") + print(p1) + garbage<-dev.off(); + + create_HTML_2(htmlfile) +} + +create_HTML_1<-function(htmlfile){ + htmlfile_handle <- file(htmlfile) + html_output = c('<html><body>', + '<table align="center>', + '<tr>', + '<td valign="middle" style="vertical-align:middle;">', + '<a href="pdffile.pdf"><img src="nmds.png"/></a>', + '</td>', + '</tr>', + '<tr>', + '<td valign="middle" style="vertical-align:middle;">', + '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>', + '</td>', + '</tr>', + '</table>', + '</html></body>'); + writeLines(html_output, htmlfile_handle); + close(htmlfile_handle); +} + +create_HTML_2<-function(htmlfile){ + htmlfile_handle <- file(htmlfile) + html_output = c('<html><body>', + '<table align="center>', + '<tr>', + '<td valign="middle" style="vertical-align:middle;">', + '<a href="pdffile.pdf"><img src="nmds.png"/></a>', + '</td>', + '</tr>', + '</table>', + '</html></body>'); + writeLines(html_output, htmlfile_handle); + close(htmlfile_handle); +} if(!is.null(options$biom)){ #physeq<-import_biom(options$biom) physeq<-readBIOM(options$biom) - + if(length(rank_names(physeq)) == 8){ - tax_table(physeq) <- tax_table(physeq)[,-1] - colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") + tax_table(physeq) <- tax_table(physeq)[,-1] + colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") + } else { + colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") } + ### select column name from sample table for nmds plot - category_type<-colnames(sample_data(physeq))[options$subset] + ## which(colnames(sample_data(biom)) == "vegetation_type_id") + #category_type<-colnames(sample_data(physeq))[options$subset] + category_type <- options$subset ### obtain the unique value in the selected column from sample table category_option<-unique(sample_data(physeq))[,options$subset] @@ -121,69 +212,46 @@ ### select a kingdom for phyloseq plot (e.g., "phylum") #kingdom_str<-colnames(tax_table)[options$kingdom] #kingdom_str<-options$kingdom - - - ### Remove OTUs that do not appear more than 5 times in more than half the samples + + ### filtering OTUs based on cutoff value (e.g., 5) - physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq)) - + #physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq)) + physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=filter*nsamples(physeq)) + ### phyloseq object after filtered physeq_filter = prune_taxa(physeq_temp, physeq) - + ## Transform to even sampling depth physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x)) + + +if(plottype == 1){ + + # kingdom_str <- as.numeric(kingdom_str) ## Keep only the most abundant five phyla - phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE) - + + phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum,na.rm=TRUE) ### number of most abundance phyla to keep topphyla = names(sort(phylum.sum, TRUE))[1:keep] + physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter) +}else{ + + otu_table(physeq_filter)<-otu_table(physeq_filter)[,colSums(otu_table(physeq_filter)) > 1] + +} ### select category to plot NMDS category_input = get_variable(physeq_filter, category_type) %in% category_option sample_data(physeq_filter)$category_input <- factor(category_input) - ### prepare the directory and file name - pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf")) - pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png")) - pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png")) - htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) - - - # Produce PDF file - pdf(pdffile); physeq_ord<-ordinate(physeq_filter,method,distance) - plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") - plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) - garbage<-dev.off(); - - - #png('nmds.png') - bitmap(pngfile_nmds,"png16m") - plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") - garbage<-dev.off() - #png('nmds_facet.png') - bitmap(pngfile_nmds_facet,"png16m") - plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) - garbage<-dev.off() +if(plottype == 1){ + #kingdom_str = colnames(tax_table)[2] + create_OTU_PDF(pdffile,physeq_filter,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet) +}else{ + create_SAMPLE_PDF(pdffile,physeq_filter,physeq_ord,htmlfile,pngfile_nmds,category_type) +} - # Produce the HTML file - htmlfile_handle <- file(htmlfile) - html_output = c('<html><body>', - '<table align="center>', - '<tr>', - '<td valign="middle" style="vertical-align:middle;">', - '<a href="pdffile.pdf"><img src="nmds.png"/></a>', - '</td>', - '</tr>', - '<tr>', - '<td valign="middle" style="vertical-align:middle;">', - '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>', - '</td>', - '</tr>', - '</table>', - '</html></body>'); - writeLines(html_output, htmlfile_handle); - close(htmlfile_handle);
--- a/phyloseq_nmds.xml Sat Jun 16 05:03:43 2018 -0400 +++ b/phyloseq_nmds.xml Fri Jul 20 00:23:53 2018 -0400 @@ -6,7 +6,7 @@ <requirement type="package" version="9.18">ghostscript</requirement> </requirements> <version_command><![CDATA[ - echo $(R --version | grep version | grep -v GNU)", phyloseq version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + echo $(R --version | grep version | grep -v GNU)", phyloseq version" $(R --vanilla --slave -e "library(phyloseq); cat(sessionInfo()\$otherPkgs\$phyloseq\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> <command detect_errors="exit_code"><![CDATA[ Rscript '${__tool_directory__}/phyloseq_nmds.R' @@ -17,34 +17,47 @@ --otu_table='$OTU_TABLE' --tax_table='$TAX_TABLE' --meta_table='$META_TABLE' - --category=$file_source.category + --category='$file_source.category' #end if --method='$ord_method' --distance='$distance' --kingdom='$kingdom_field' - --cutoff=$cutoff - --keep=$keep - --outdir='$htmlfile.files_path' + --cutoff='$cutoff' + --keep='$keep' + --filter='$nsample' + --plottype='$plot_type' + --outdir="$htmlfile.files_path" --htmlfile='$htmlfile' - ]]> + ]]> </command> + <inputs> + <conditional name="file_source"> <param name="file_source_selector" type="select" label="Choose a file type"> <option value="set_biom" selected="True">BIOM File</option> - <option value="set_table">TABULAR Files</option> + <option value="set_table">TABULAR File</option> </param> <when value="set_biom"> - <param format="biom1" name="input" type="data" label="Input File" help="A biom1 table of the data to be visualised"/> - <param name="subsetColumn" size="10" type="integer" value="1" label="Select a subset column from metadata table"/> + <param format="biom1" name="input" type="data" label="Input File"/> + <!--<param format="tabular" name="META_COLUMN" type="data" label="Column File"/>--> + <param name="subsetColumn" type="select" label="Select a column"> + <options> + <filter type="data_meta" ref="input" key="table_column_metadata_headers" /> + </options> + </param> </when> <when value="set_table"> - <param format="tabular" name="OTU_TABLE" type="data" label="OTU table" help="A table of OTUs"/> - <param format="tabular" name="TAX_TABLE" type="data" label="Taxonomy table" help="A table of the Taxonomys"/> - <param format="tabular" name="META_TABLE" type="data" label="Metadata table" help="Table containing all the metadata"/> - <param name="category" type="data_column" data_ref="META_TABLE" use_header_names="True" label="Select a subset column from metadata table"/> + <param format="tabular" name="OTU_TABLE" type="data" label="OTU table"/> + <param format="tabular" name="TAX_TABLE" type="data" label="Taxonomy table"/> + <param format="tabular" name="META_TABLE" type="data" label="Metadata table"/> + <param name="category" type="data_column" data_ref="META_TABLE" use_header_names="True" label="Select a column"/> </when> </conditional> + <param name="plot_type" type="select" label="Type of Plot" help="NMDS plot by type (e.g., OTU, SAMPLE)"> + <option value="1">OTU</option> + <option value="2">SAMPLE</option> + </param> <param name="ord_method" type="select" display="radio" label="Select NMDS method"> <option value="NMDS" selected="true">NMDS</option> <option value="DCA">DCA</option> @@ -68,20 +81,23 @@ <option value="Genus">Genus</option> <option value="Species">Species</option> </param> - <param name="cutoff" size="10" type="integer" value="5" label="cutoff value for filtering OTU table" help="Remove OTUs that do not appear more than 5 times"/> - <param name="keep" size="10" type="integer" value="5" label="number of most abundant phyla" help="provide the number of most abundancet phyla to display"/> + <param name="nsample" size="10" type="float" value="0.5" label="Number of samples to keep" help="The percentage of sample number to keep for analysis (e.g., 0.5 is equivalent to half of the samples to keep for analysis)"/> + <param name="cutoff" size="10" type="integer" value="5" label="Cutoff value for filtering OTU table" help="Remove OTUs that do not appear more than 5 times"/> + <param name="keep" size="10" type="integer" value="5" label="Number of most abundant phyla" help="provide the number of most abundancet phyla to display"/> </inputs> + <outputs> - <data format="html" name="htmlfile" label="${tool.name}.html"/> + <data format="html" name="htmlfile" label="${tool.name}.html"/> </outputs> + <tests> <test> <!-- Test #1: Test BIOM format input --> <!-- Equivalent command (replace + with double dash): Rscript phyloseq_nmds.R +biom=test-data/GP.biom +subset=6 +method=NMDS +distance=bray +kingdom=Phylum +cutoff=5 +keep=5 +outdir=outputdir +htmlfile=biom_out.html --> <conditional name="file_source"> <param name="file_source_selector" value="set_biom"/> - <param name="input" value="GP.biom"/> - <param name="subsetColumn" value="6" /> + <param name="input" value="GP.biom" format="biom1"/> + <param name="subsetColumn" value="Primer" /> </conditional> <output name="htmlfile" ftype="html" file="biom_out.html" /> </test> @@ -99,7 +115,8 @@ <output name="htmlfile" ftype="html" file="test.html" /> </test> </tests> - <help><![CDATA[ + + <help> **What it does** Creates NMDS plot using R package called phyloseq_. @@ -110,16 +127,16 @@ **Input** - **Choose a file type** - BIOM File or TABULAR file -- **Select a subset column from metadata table** - This column is used to extract unique value from the column of interest for NMDS plot +- **Select a column** - This column is used to extract unique value from the column of interest for NMDS plot - **OTU TABLE** - this is a OTU matrix - **Taxonomy TABLE** - this is a TAX matrix - **Metadata TABLE** - this is a metadata file of the experiment design - **Select NMDS method** - select a method for NMDS plot - **Select a distance for NMDS** - select a distance option for NMDS plot - **Select a taxonomy rank** - select a taxonomy for NMDS plot -- **cutoff value for filtering OTU table** - this cutoff value is used to fitler OTU table -- **number of most abundant phyla** - a number of most abundant phyla to be display in NMDS plot - ]]> +- **Number of samples to keep** - Number of samples to keep for analysis +- **Cutoff value for filtering OTU table** - this cutoff value is used to fitler OTU table +- **Number of most abundant phyla** - a number of most abundant phyla to be display in NMDS plot </help> <citations> <citation type="doi">10.18129/B9.bioc.phyloseq</citation>