diff phyloseq_nmds.R @ 2:20adf95eb758 draft

Updated phyloseq_nmds.xml to allow for new datatype.
author simon-gladman
date Fri, 20 Jul 2018 00:23:53 -0400
parents e376a618eb9f
children
line wrap: on
line diff
--- a/phyloseq_nmds.R	Sat Jun 16 05:03:43 2018 -0400
+++ b/phyloseq_nmds.R	Fri Jul 20 00:23:53 2018 -0400
@@ -5,6 +5,9 @@
 library(plyr)
 Sys.setenv("DISPLAY"=":1")
 library("ggplot2")
+suppressPackageStartupMessages(library("doParallel"))
+ncores = ceiling(detectCores() * 0.8)
+registerDoParallel(cores=ncores)
 
 options(warn=-1)
 
@@ -15,14 +18,16 @@
 option_specification = matrix(c(
    'otu_table','o',2,'character',
    'tax_table','t',2,'character',
-  'meta_table','s',2,'character',
-        'biom','i',2,'character',
-      'subset','x',2,'numeric',
+  'meta_table','m',2,'character',
+        'biom','b',2,'character',
+      'filter','f',2,'numeric',
+      'subset','s',2,'character',
       'method','n',2,'character',
     'distance','d',2,'character',
      'kingdom','k',2,'character',
-      'cutoff','v',2,'numeric',
-    'category','c',2,'numeric',
+    'plottype','e',2,'numeric',
+      'cutoff','c',2,'numeric',
+    'category','g',2,'numeric',
         'keep','p',2,'numeric',
       'outdir','r',2,'character',
     'htmlfile','h',2,'character'
@@ -46,13 +51,20 @@
 kingdom_str<-options$kingdom
 distance<-options$distance
 keep<-options$keep
+plottype<-options$plottype
+filter<-options$filter
 
+### prepare the directory and file name
+pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
+pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
+pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
+htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
 
 
 ### This function accepts different two different type of BIOM file format
 readBIOM<-function(inBiom){
 	tryCatch({
-		phyloseq_obj<-import_biom(inBiom)
+		phyloseq_obj<-import_biom(inBiom,parallel=TRUE)
 		return(phyloseq_obj)
 		},
 		error=function(e){
@@ -66,7 +78,7 @@
 		
 		metadata.temp<-sample_metadata(biom_obj)
 		METADATA_TABLE<-plyr::ldply(metadata.temp, rbind)
-                rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
+        rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
 
 		phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE))
 		return(phyloseq_obj)
@@ -75,18 +87,97 @@
 }
 
 
+create_OTU_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet){
+    pdf(pdf_file);
+    p1<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
+    print(p1)
+    p2<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
+    print(p2)
+    garbage<-dev.off();
+
+    #png('nmds.png')
+    bitmap(pngfile_nmds,"png16m")
+    p3<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
+    print(p3)
+    garbage<-dev.off()
+
+    #png('nmds_facet.png')
+    bitmap(pngfile_nmds_facet,"png16m")
+    p4<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
+    print(p4)
+    garbage<-dev.off()
+
+    create_HTML_1(htmlfile)
+}
+
+create_SAMPLE_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,htmlfile,pngfile_nmds,category_type){
+    pdf(pdf_file);
+    p <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type)
+    p <- p + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples")
+    print(p)
+    garbage<-dev.off();
+
+    #png('nmds.png')
+    bitmap(pngfile_nmds,"png16m")
+    p1 <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type)
+    p1 <- p1 + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples")
+    print(p1)
+    garbage<-dev.off();
+
+     create_HTML_2(htmlfile)
+}
+
+create_HTML_1<-function(htmlfile){
+    htmlfile_handle <- file(htmlfile)
+    html_output = c('<html><body>',
+                    '<table align="center>',
+                    '<tr>',
+                    '<td valign="middle" style="vertical-align:middle;">',
+                        '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
+                    '</td>',
+                    '</tr>',
+                    '<tr>',
+                    '<td valign="middle" style="vertical-align:middle;">',
+                    '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
+                    '</td>',
+                    '</tr>',
+                    '</table>',
+                    '</html></body>');
+     writeLines(html_output, htmlfile_handle);
+     close(htmlfile_handle);
+}
+
+create_HTML_2<-function(htmlfile){
+    htmlfile_handle <- file(htmlfile)
+    html_output = c('<html><body>',
+                    '<table align="center>',
+                    '<tr>',
+                    '<td valign="middle" style="vertical-align:middle;">',
+                        '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
+                    '</td>',
+                    '</tr>',
+                    '</table>',
+                    '</html></body>');
+    writeLines(html_output, htmlfile_handle);
+    close(htmlfile_handle);
+}
 
 if(!is.null(options$biom)){
     
     #physeq<-import_biom(options$biom)
     physeq<-readBIOM(options$biom)
-
+    
     if(length(rank_names(physeq)) == 8){
-    tax_table(physeq) <- tax_table(physeq)[,-1]
-    colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+        tax_table(physeq) <- tax_table(physeq)[,-1]
+        colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+    } else {
+        colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
     }
+    
     ### select column name from sample table for nmds plot
-    category_type<-colnames(sample_data(physeq))[options$subset]
+    ## which(colnames(sample_data(biom)) == "vegetation_type_id")
+    #category_type<-colnames(sample_data(physeq))[options$subset]
+    category_type <- options$subset
 
     ### obtain the unique value in the selected column from sample table
     category_option<-unique(sample_data(physeq))[,options$subset]
@@ -121,69 +212,46 @@
     ### select a kingdom for phyloseq plot (e.g., "phylum")
     #kingdom_str<-colnames(tax_table)[options$kingdom]
     #kingdom_str<-options$kingdom
-
-
-    ### Remove OTUs that do not appear more than 5 times in more than half the samples
+    
+    
     ### filtering OTUs based on cutoff value (e.g., 5)
-    physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq))
-
+    #physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq))
+    physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=filter*nsamples(physeq))
+    
     ### phyloseq object after filtered
     physeq_filter = prune_taxa(physeq_temp, physeq)
-
+    
     ## Transform to even sampling depth
     physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x))
 
+
+
+if(plottype == 1){
+
+    # kingdom_str <- as.numeric(kingdom_str)
     ## Keep only the most abundant five phyla
-    phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE)
-        
+
+    phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum,na.rm=TRUE)
     ### number of most abundance phyla to keep
     topphyla = names(sort(phylum.sum, TRUE))[1:keep]
+
     physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter)
 
+}else{
+ 
+    otu_table(physeq_filter)<-otu_table(physeq_filter)[,colSums(otu_table(physeq_filter)) > 1]
+
+}
     ### select category to plot NMDS
     category_input = get_variable(physeq_filter, category_type) %in% category_option
     sample_data(physeq_filter)$category_input <- factor(category_input)
 
-    ### prepare the directory and file name
-    pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
-    pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
-    pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
-    htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
-
-
-    # Produce PDF file
-    pdf(pdffile);
     physeq_ord<-ordinate(physeq_filter,method,distance)
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
-    garbage<-dev.off();
-
-
-    #png('nmds.png')
-    bitmap(pngfile_nmds,"png16m")
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
-    garbage<-dev.off()
 
-    #png('nmds_facet.png')
-    bitmap(pngfile_nmds_facet,"png16m")
-    plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
-    garbage<-dev.off()
+if(plottype == 1){
+    #kingdom_str = colnames(tax_table)[2]
+    create_OTU_PDF(pdffile,physeq_filter,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet) 
+}else{
+    create_SAMPLE_PDF(pdffile,physeq_filter,physeq_ord,htmlfile,pngfile_nmds,category_type)
+}
 
-    # Produce the HTML file
-    htmlfile_handle <- file(htmlfile)
-    html_output = c('<html><body>',
-    	        '<table align="center>',
-    		'<tr>',
-    		'<td valign="middle" style="vertical-align:middle;">',
-                    '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
-    		'</td>',
-    		'</tr>',
-    		'<tr>',
-            '<td valign="middle" style="vertical-align:middle;">',
-            '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
-            '</td>',
-            '</tr>',
-    		'</table>',
-                    '</html></body>');
-    writeLines(html_output, htmlfile_handle);
-    close(htmlfile_handle);