diff phyloseq_nmds.R @ 0:b4606394e7ec draft

planemo upload
author simon-gladman
date Thu, 14 Jun 2018 09:08:23 -0400
parents
children e376a618eb9f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_nmds.R	Thu Jun 14 09:08:23 2018 -0400
@@ -0,0 +1,159 @@
+library('getopt')
+library('ape')
+suppressPackageStartupMessages(library('phyloseq'))
+Sys.setenv("DISPLAY"=":1")
+library("ggplot2")
+
+options(warn=-1)
+
+theme_set(theme_bw())
+
+#http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs
+#http://joey711.github.io/phyloseq-demo/phyloseq-demo.html
+option_specification = matrix(c(
+   'otu_table','o',2,'character',
+   'tax_table','t',2,'character',
+  'meta_table','s',2,'character',
+        'biom','i',2,'character',
+      'subset','x',2,'numeric',
+      'method','n',2,'character',
+    'distance','d',2,'character',
+     'kingdom','k',2,'character',
+      'cutoff','v',2,'numeric',
+    'category','c',2,'numeric',
+        'keep','p',2,'numeric',
+      'outdir','r',2,'character',
+    'htmlfile','h',2,'character'
+),byrow=TRUE,ncol=4);
+
+
+options <- getopt(option_specification);
+options(bitmapType="cairo")
+
+if (!is.null(options$outdir)) {
+  # Create the directory
+  dir.create(options$outdir,FALSE)
+}
+
+
+
+method<-options$method
+cutoff_value<-options$cutoff
+### select a kingdom for phyloseq plot (e.g., "phylum")
+#kingdom_str<-colnames(tax_table)[options$kingdom]
+kingdom_str<-options$kingdom
+distance<-options$distance
+keep<-options$keep
+
+
+if(!is.null(options$biom)){
+    
+    physeq<-import_biom(options$biom)
+  
+    if(length(rank_names(physeq)) == 8){
+    tax_table(physeq) <- tax_table(physeq)[,-1]
+    colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+    }
+    
+    ### select column name from sample table for nmds plot
+    category_type<-colnames(sample_data(physeq))[options$subset]
+
+    ### obtain the unique value in the selected column from sample table
+    category_option<-unique(sample_data(physeq))[,options$subset]
+
+}else{
+
+    ### read the data into correct data type to create phyloseq object
+    otu_table<-as.matrix(read.table(options$otu_table,header=T,sep="\t"))
+    tax_table<-as.matrix(read.table(options$tax_table,header=T,sep="\t"))
+    sample_table<-read.table(options$meta_table,header=T,sep="\t",stringsAsFactors=F)
+
+
+    ### select column name from sample table for nmds plot
+    category_type<-colnames(sample_table)[options$category]
+
+    ### obtain the unique value in the selected column from sample table
+    category_option<-unique(sample_table[,options$category])
+
+
+    ### create a sample object for phyloseq
+    sample_object<-sample_data(sample_table)
+
+    ### create otu object for phyloseq
+    OTU<-otu_table(otu_table, taxa_are_rows = TRUE)
+
+    ### create tax object for phyloseq
+    TAX<-tax_table(tax_table)
+
+    ### create a phyloseq object
+    physeq = phyloseq(OTU,TAX,sample_object)
+}
+    ### select a kingdom for phyloseq plot (e.g., "phylum")
+    #kingdom_str<-colnames(tax_table)[options$kingdom]
+    #kingdom_str<-options$kingdom
+
+
+    ### Remove OTUs that do not appear more than 5 times in more than half the samples
+    ### filtering OTUs based on cutoff value (e.g., 5)
+    physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.5*nsamples(physeq))
+
+    ### phyloseq object after filtered
+    physeq_filter = prune_taxa(physeq_temp, physeq)
+
+    ## Transform to even sampling depth
+    physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x))
+
+    ## Keep only the most abundant five phyla
+    phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE)
+        
+    ### number of most abundance phyla to keep
+    topphyla = names(sort(phylum.sum, TRUE))[1:keep]
+    physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter)
+
+    ### select category to plot NMDS
+    category_input = get_variable(physeq_filter, category_type) %in% category_option
+    sample_data(physeq_filter)$category_input <- factor(category_input)
+
+    ### prepare the directory and file name
+    pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
+    pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
+    pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
+    htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
+
+
+    # Produce PDF file
+    pdf(pdffile);
+    physeq_ord<-ordinate(physeq_filter,method,distance)
+    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
+    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
+    garbage<-dev.off();
+
+
+    #png('nmds.png')
+    bitmap(pngfile_nmds,"png16m")
+    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
+    garbage<-dev.off()
+
+    #png('nmds_facet.png')
+    bitmap(pngfile_nmds_facet,"png16m")
+    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
+    garbage<-dev.off()
+
+    # Produce the HTML file
+    htmlfile_handle <- file(htmlfile)
+    html_output = c('<html><body>',
+    	        '<table align="center>',
+    		'<tr>',
+    		'<td valign="middle" style="vertical-align:middle;">',
+                    '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
+    		'</td>',
+    		'</tr>',
+    		'<tr>',
+            '<td valign="middle" style="vertical-align:middle;">',
+            '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
+            '</td>',
+            '</tr>',
+    		'</table>',
+                    '</html></body>');
+    writeLines(html_output, htmlfile_handle);
+    close(htmlfile_handle);