Mercurial > repos > simon-gladman > phyloseq_nmds
diff phyloseq_nmds.R @ 0:b4606394e7ec draft
planemo upload
author | simon-gladman |
---|---|
date | Thu, 14 Jun 2018 09:08:23 -0400 |
parents | |
children | e376a618eb9f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phyloseq_nmds.R Thu Jun 14 09:08:23 2018 -0400 @@ -0,0 +1,159 @@ +library('getopt') +library('ape') +suppressPackageStartupMessages(library('phyloseq')) +Sys.setenv("DISPLAY"=":1") +library("ggplot2") + +options(warn=-1) + +theme_set(theme_bw()) + +#http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs +#http://joey711.github.io/phyloseq-demo/phyloseq-demo.html +option_specification = matrix(c( + 'otu_table','o',2,'character', + 'tax_table','t',2,'character', + 'meta_table','s',2,'character', + 'biom','i',2,'character', + 'subset','x',2,'numeric', + 'method','n',2,'character', + 'distance','d',2,'character', + 'kingdom','k',2,'character', + 'cutoff','v',2,'numeric', + 'category','c',2,'numeric', + 'keep','p',2,'numeric', + 'outdir','r',2,'character', + 'htmlfile','h',2,'character' +),byrow=TRUE,ncol=4); + + +options <- getopt(option_specification); +options(bitmapType="cairo") + +if (!is.null(options$outdir)) { + # Create the directory + dir.create(options$outdir,FALSE) +} + + + +method<-options$method +cutoff_value<-options$cutoff +### select a kingdom for phyloseq plot (e.g., "phylum") +#kingdom_str<-colnames(tax_table)[options$kingdom] +kingdom_str<-options$kingdom +distance<-options$distance +keep<-options$keep + + +if(!is.null(options$biom)){ + + physeq<-import_biom(options$biom) + + if(length(rank_names(physeq)) == 8){ + tax_table(physeq) <- tax_table(physeq)[,-1] + colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") + } + + ### select column name from sample table for nmds plot + category_type<-colnames(sample_data(physeq))[options$subset] + + ### obtain the unique value in the selected column from sample table + category_option<-unique(sample_data(physeq))[,options$subset] + +}else{ + + ### read the data into correct data type to create phyloseq object + otu_table<-as.matrix(read.table(options$otu_table,header=T,sep="\t")) + tax_table<-as.matrix(read.table(options$tax_table,header=T,sep="\t")) + sample_table<-read.table(options$meta_table,header=T,sep="\t",stringsAsFactors=F) + + + ### select column name from sample table for nmds plot + category_type<-colnames(sample_table)[options$category] + + ### obtain the unique value in the selected column from sample table + category_option<-unique(sample_table[,options$category]) + + + ### create a sample object for phyloseq + sample_object<-sample_data(sample_table) + + ### create otu object for phyloseq + OTU<-otu_table(otu_table, taxa_are_rows = TRUE) + + ### create tax object for phyloseq + TAX<-tax_table(tax_table) + + ### create a phyloseq object + physeq = phyloseq(OTU,TAX,sample_object) +} + ### select a kingdom for phyloseq plot (e.g., "phylum") + #kingdom_str<-colnames(tax_table)[options$kingdom] + #kingdom_str<-options$kingdom + + + ### Remove OTUs that do not appear more than 5 times in more than half the samples + ### filtering OTUs based on cutoff value (e.g., 5) + physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.5*nsamples(physeq)) + + ### phyloseq object after filtered + physeq_filter = prune_taxa(physeq_temp, physeq) + + ## Transform to even sampling depth + physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x)) + + ## Keep only the most abundant five phyla + phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE) + + ### number of most abundance phyla to keep + topphyla = names(sort(phylum.sum, TRUE))[1:keep] + physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter) + + ### select category to plot NMDS + category_input = get_variable(physeq_filter, category_type) %in% category_option + sample_data(physeq_filter)$category_input <- factor(category_input) + + ### prepare the directory and file name + pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf")) + pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png")) + pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png")) + htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) + + + # Produce PDF file + pdf(pdffile); + physeq_ord<-ordinate(physeq_filter,method,distance) + plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) + garbage<-dev.off(); + + + #png('nmds.png') + bitmap(pngfile_nmds,"png16m") + plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + garbage<-dev.off() + + #png('nmds_facet.png') + bitmap(pngfile_nmds_facet,"png16m") + plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) + garbage<-dev.off() + + # Produce the HTML file + htmlfile_handle <- file(htmlfile) + html_output = c('<html><body>', + '<table align="center>', + '<tr>', + '<td valign="middle" style="vertical-align:middle;">', + '<a href="pdffile.pdf"><img src="nmds.png"/></a>', + '</td>', + '</tr>', + '<tr>', + '<td valign="middle" style="vertical-align:middle;">', + '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>', + '</td>', + '</tr>', + '</table>', + '</html></body>'); + writeLines(html_output, htmlfile_handle); + close(htmlfile_handle);