comparison phyloseq_nmds.R @ 2:20adf95eb758 draft

Updated phyloseq_nmds.xml to allow for new datatype.
author simon-gladman
date Fri, 20 Jul 2018 00:23:53 -0400
parents e376a618eb9f
children
comparison
equal deleted inserted replaced
1:e376a618eb9f 2:20adf95eb758
3 suppressPackageStartupMessages(library('phyloseq')) 3 suppressPackageStartupMessages(library('phyloseq'))
4 library(biomformat) 4 library(biomformat)
5 library(plyr) 5 library(plyr)
6 Sys.setenv("DISPLAY"=":1") 6 Sys.setenv("DISPLAY"=":1")
7 library("ggplot2") 7 library("ggplot2")
8 suppressPackageStartupMessages(library("doParallel"))
9 ncores = ceiling(detectCores() * 0.8)
10 registerDoParallel(cores=ncores)
8 11
9 options(warn=-1) 12 options(warn=-1)
10 13
11 theme_set(theme_bw()) 14 theme_set(theme_bw())
12 15
13 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs 16 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs
14 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html 17 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html
15 option_specification = matrix(c( 18 option_specification = matrix(c(
16 'otu_table','o',2,'character', 19 'otu_table','o',2,'character',
17 'tax_table','t',2,'character', 20 'tax_table','t',2,'character',
18 'meta_table','s',2,'character', 21 'meta_table','m',2,'character',
19 'biom','i',2,'character', 22 'biom','b',2,'character',
20 'subset','x',2,'numeric', 23 'filter','f',2,'numeric',
24 'subset','s',2,'character',
21 'method','n',2,'character', 25 'method','n',2,'character',
22 'distance','d',2,'character', 26 'distance','d',2,'character',
23 'kingdom','k',2,'character', 27 'kingdom','k',2,'character',
24 'cutoff','v',2,'numeric', 28 'plottype','e',2,'numeric',
25 'category','c',2,'numeric', 29 'cutoff','c',2,'numeric',
30 'category','g',2,'numeric',
26 'keep','p',2,'numeric', 31 'keep','p',2,'numeric',
27 'outdir','r',2,'character', 32 'outdir','r',2,'character',
28 'htmlfile','h',2,'character' 33 'htmlfile','h',2,'character'
29 ),byrow=TRUE,ncol=4); 34 ),byrow=TRUE,ncol=4);
30 35
44 ### select a kingdom for phyloseq plot (e.g., "phylum") 49 ### select a kingdom for phyloseq plot (e.g., "phylum")
45 #kingdom_str<-colnames(tax_table)[options$kingdom] 50 #kingdom_str<-colnames(tax_table)[options$kingdom]
46 kingdom_str<-options$kingdom 51 kingdom_str<-options$kingdom
47 distance<-options$distance 52 distance<-options$distance
48 keep<-options$keep 53 keep<-options$keep
49 54 plottype<-options$plottype
55 filter<-options$filter
56
57 ### prepare the directory and file name
58 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
59 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
60 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
61 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
50 62
51 63
52 ### This function accepts different two different type of BIOM file format 64 ### This function accepts different two different type of BIOM file format
53 readBIOM<-function(inBiom){ 65 readBIOM<-function(inBiom){
54 tryCatch({ 66 tryCatch({
55 phyloseq_obj<-import_biom(inBiom) 67 phyloseq_obj<-import_biom(inBiom,parallel=TRUE)
56 return(phyloseq_obj) 68 return(phyloseq_obj)
57 }, 69 },
58 error=function(e){ 70 error=function(e){
59 biom_obj<-read_biom(inBiom) 71 biom_obj<-read_biom(inBiom)
60 72
64 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE) 76 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE)
65 TAXONOMY_TABLE = tax_table(taxonomy_matrix) 77 TAXONOMY_TABLE = tax_table(taxonomy_matrix)
66 78
67 metadata.temp<-sample_metadata(biom_obj) 79 metadata.temp<-sample_metadata(biom_obj)
68 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind) 80 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind)
69 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id) 81 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
70 82
71 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE)) 83 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE))
72 return(phyloseq_obj) 84 return(phyloseq_obj)
73 } 85 }
74 ) 86 )
75 } 87 }
76 88
77 89
90 create_OTU_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet){
91 pdf(pdf_file);
92 p1<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
93 print(p1)
94 p2<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
95 print(p2)
96 garbage<-dev.off();
97
98 #png('nmds.png')
99 bitmap(pngfile_nmds,"png16m")
100 p3<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
101 print(p3)
102 garbage<-dev.off()
103
104 #png('nmds_facet.png')
105 bitmap(pngfile_nmds_facet,"png16m")
106 p4<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
107 print(p4)
108 garbage<-dev.off()
109
110 create_HTML_1(htmlfile)
111 }
112
113 create_SAMPLE_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,htmlfile,pngfile_nmds,category_type){
114 pdf(pdf_file);
115 p <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type)
116 p <- p + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples")
117 print(p)
118 garbage<-dev.off();
119
120 #png('nmds.png')
121 bitmap(pngfile_nmds,"png16m")
122 p1 <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type)
123 p1 <- p1 + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples")
124 print(p1)
125 garbage<-dev.off();
126
127 create_HTML_2(htmlfile)
128 }
129
130 create_HTML_1<-function(htmlfile){
131 htmlfile_handle <- file(htmlfile)
132 html_output = c('<html><body>',
133 '<table align="center>',
134 '<tr>',
135 '<td valign="middle" style="vertical-align:middle;">',
136 '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
137 '</td>',
138 '</tr>',
139 '<tr>',
140 '<td valign="middle" style="vertical-align:middle;">',
141 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
142 '</td>',
143 '</tr>',
144 '</table>',
145 '</html></body>');
146 writeLines(html_output, htmlfile_handle);
147 close(htmlfile_handle);
148 }
149
150 create_HTML_2<-function(htmlfile){
151 htmlfile_handle <- file(htmlfile)
152 html_output = c('<html><body>',
153 '<table align="center>',
154 '<tr>',
155 '<td valign="middle" style="vertical-align:middle;">',
156 '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
157 '</td>',
158 '</tr>',
159 '</table>',
160 '</html></body>');
161 writeLines(html_output, htmlfile_handle);
162 close(htmlfile_handle);
163 }
78 164
79 if(!is.null(options$biom)){ 165 if(!is.null(options$biom)){
80 166
81 #physeq<-import_biom(options$biom) 167 #physeq<-import_biom(options$biom)
82 physeq<-readBIOM(options$biom) 168 physeq<-readBIOM(options$biom)
83 169
84 if(length(rank_names(physeq)) == 8){ 170 if(length(rank_names(physeq)) == 8){
85 tax_table(physeq) <- tax_table(physeq)[,-1] 171 tax_table(physeq) <- tax_table(physeq)[,-1]
86 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") 172 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
173 } else {
174 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
87 } 175 }
176
88 ### select column name from sample table for nmds plot 177 ### select column name from sample table for nmds plot
89 category_type<-colnames(sample_data(physeq))[options$subset] 178 ## which(colnames(sample_data(biom)) == "vegetation_type_id")
179 #category_type<-colnames(sample_data(physeq))[options$subset]
180 category_type <- options$subset
90 181
91 ### obtain the unique value in the selected column from sample table 182 ### obtain the unique value in the selected column from sample table
92 category_option<-unique(sample_data(physeq))[,options$subset] 183 category_option<-unique(sample_data(physeq))[,options$subset]
93 184
94 }else{ 185 }else{
119 physeq = phyloseq(OTU,TAX,sample_object) 210 physeq = phyloseq(OTU,TAX,sample_object)
120 } 211 }
121 ### select a kingdom for phyloseq plot (e.g., "phylum") 212 ### select a kingdom for phyloseq plot (e.g., "phylum")
122 #kingdom_str<-colnames(tax_table)[options$kingdom] 213 #kingdom_str<-colnames(tax_table)[options$kingdom]
123 #kingdom_str<-options$kingdom 214 #kingdom_str<-options$kingdom
124 215
125 216
126 ### Remove OTUs that do not appear more than 5 times in more than half the samples
127 ### filtering OTUs based on cutoff value (e.g., 5) 217 ### filtering OTUs based on cutoff value (e.g., 5)
128 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq)) 218 #physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq))
129 219 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=filter*nsamples(physeq))
220
130 ### phyloseq object after filtered 221 ### phyloseq object after filtered
131 physeq_filter = prune_taxa(physeq_temp, physeq) 222 physeq_filter = prune_taxa(physeq_temp, physeq)
132 223
133 ## Transform to even sampling depth 224 ## Transform to even sampling depth
134 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x)) 225 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x))
135 226
227
228
229 if(plottype == 1){
230
231 # kingdom_str <- as.numeric(kingdom_str)
136 ## Keep only the most abundant five phyla 232 ## Keep only the most abundant five phyla
137 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE) 233
138 234 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum,na.rm=TRUE)
139 ### number of most abundance phyla to keep 235 ### number of most abundance phyla to keep
140 topphyla = names(sort(phylum.sum, TRUE))[1:keep] 236 topphyla = names(sort(phylum.sum, TRUE))[1:keep]
237
141 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter) 238 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter)
142 239
240 }else{
241
242 otu_table(physeq_filter)<-otu_table(physeq_filter)[,colSums(otu_table(physeq_filter)) > 1]
243
244 }
143 ### select category to plot NMDS 245 ### select category to plot NMDS
144 category_input = get_variable(physeq_filter, category_type) %in% category_option 246 category_input = get_variable(physeq_filter, category_type) %in% category_option
145 sample_data(physeq_filter)$category_input <- factor(category_input) 247 sample_data(physeq_filter)$category_input <- factor(category_input)
146 248
147 ### prepare the directory and file name
148 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
149 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
150 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
151 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
152
153
154 # Produce PDF file
155 pdf(pdffile);
156 physeq_ord<-ordinate(physeq_filter,method,distance) 249 physeq_ord<-ordinate(physeq_filter,method,distance)
157 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") 250
158 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) 251 if(plottype == 1){
159 garbage<-dev.off(); 252 #kingdom_str = colnames(tax_table)[2]
160 253 create_OTU_PDF(pdffile,physeq_filter,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet)
161 254 }else{
162 #png('nmds.png') 255 create_SAMPLE_PDF(pdffile,physeq_filter,physeq_ord,htmlfile,pngfile_nmds,category_type)
163 bitmap(pngfile_nmds,"png16m") 256 }
164 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") 257
165 garbage<-dev.off()
166
167 #png('nmds_facet.png')
168 bitmap(pngfile_nmds_facet,"png16m")
169 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
170 garbage<-dev.off()
171
172 # Produce the HTML file
173 htmlfile_handle <- file(htmlfile)
174 html_output = c('<html><body>',
175 '<table align="center>',
176 '<tr>',
177 '<td valign="middle" style="vertical-align:middle;">',
178 '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
179 '</td>',
180 '</tr>',
181 '<tr>',
182 '<td valign="middle" style="vertical-align:middle;">',
183 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
184 '</td>',
185 '</tr>',
186 '</table>',
187 '</html></body>');
188 writeLines(html_output, htmlfile_handle);
189 close(htmlfile_handle);