0
|
1 library('getopt')
|
|
2 library('ape')
|
|
3 suppressPackageStartupMessages(library('phyloseq'))
|
|
4 library(biomformat)
|
|
5 library(plyr)
|
|
6 Sys.setenv("DISPLAY"=":1")
|
|
7 library("ggplot2")
|
|
8 suppressPackageStartupMessages(library("doParallel"))
|
|
9 ncores = ceiling(detectCores() * 0.8)
|
|
10 registerDoParallel(cores=ncores)
|
|
11
|
|
12 options(warn=-1)
|
|
13
|
|
14 theme_set(theme_bw())
|
|
15
|
|
16 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs
|
|
17 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html
|
|
18 option_specification = matrix(c(
|
|
19 'otu_table','o',2,'character',
|
|
20 'tax_table','t',2,'character',
|
|
21 'meta_table','m',2,'character',
|
|
22 'biom','b',2,'character',
|
|
23 'subset','s',2,'character',
|
|
24 'method','n',2,'character',
|
|
25 'distance','d',2,'character',
|
|
26 'kingdom','k',2,'character',
|
|
27 'plottype','e',2,'numeric',
|
|
28 'category','g',2,'numeric',
|
1
|
29 'log','l',2,'character',
|
0
|
30 'outdir','r',2,'character',
|
|
31 'htmlfile','h',2,'character'
|
|
32 ),byrow=TRUE,ncol=4);
|
|
33
|
|
34
|
|
35 options <- getopt(option_specification);
|
|
36 options(bitmapType="cairo")
|
|
37
|
|
38 if (!is.null(options$outdir)) {
|
|
39 # Create the directory
|
|
40 dir.create(options$outdir,FALSE)
|
|
41 }
|
|
42
|
|
43
|
|
44
|
|
45 method<-options$method
|
|
46 ### select a kingdom for phyloseq plot (e.g., "phylum")
|
|
47 #kingdom_str<-colnames(tax_table)[options$kingdom]
|
|
48 kingdom_str<-options$kingdom
|
|
49 distance<-options$distance
|
|
50 plottype<-options$plottype
|
|
51
|
|
52 ### prepare the directory and file name
|
|
53 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
|
|
54 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
|
|
55 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
|
|
56 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
|
1
|
57 output_summary <- gsub("[ ]+", "", paste(options$log))
|
0
|
58
|
|
59 ### This function accepts different two different type of BIOM file format
|
|
60 readBIOM<-function(inBiom){
|
|
61 tryCatch({
|
|
62 phyloseq_obj<-import_biom(inBiom,parallel=TRUE)
|
|
63 return(phyloseq_obj)
|
|
64 },
|
|
65 error=function(e){
|
|
66 biom_obj<-read_biom(inBiom)
|
|
67
|
|
68 otu_matrix = as(biom_data(biom_obj), "matrix")
|
|
69 OTU_TABLE = otu_table(otu_matrix, taxa_are_rows=TRUE)
|
|
70
|
|
71 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE)
|
|
72 TAXONOMY_TABLE = tax_table(taxonomy_matrix)
|
|
73
|
|
74 metadata.temp<-sample_metadata(biom_obj)
|
|
75 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind)
|
|
76 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
|
|
77
|
|
78 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE))
|
|
79 return(phyloseq_obj)
|
|
80 }
|
|
81 )
|
|
82 }
|
|
83
|
|
84
|
|
85 create_OTU_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet){
|
|
86 pdf(pdf_file);
|
1
|
87 p1<-plot_ordination(phyloseq_obj,phyloseq_ord,type="taxa",color=kingdom_str,title="taxa") + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
88 print(p1)
|
1
|
89 p2<-plot_ordination(phyloseq_obj,phyloseq_ord,type="taxa",color=kingdom_str,title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
90 print(p2)
|
|
91 garbage<-dev.off();
|
|
92
|
|
93 #png('nmds.png')
|
1
|
94 bitmap(pngfile_nmds,"png16m",res=120)
|
|
95 p3<-plot_ordination(phyloseq_obj,phyloseq_ord,type="taxa",color=kingdom_str,title="taxa") + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
96 print(p3)
|
|
97 garbage<-dev.off()
|
|
98
|
|
99 #png('nmds_facet.png')
|
1
|
100 bitmap(pngfile_nmds_facet,"png16m",res=120)
|
|
101 p4<-plot_ordination(phyloseq_obj,phyloseq_ord,type="taxa",color=kingdom_str,title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
102 print(p4)
|
|
103 garbage<-dev.off()
|
|
104
|
|
105 create_HTML_1(htmlfile)
|
|
106 }
|
|
107
|
|
108 create_SAMPLE_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,htmlfile,pngfile_nmds,category_type){
|
|
109 pdf(pdf_file);
|
|
110 p <- plot_ordination(phyloseq_obj, phyloseq_ord, type="samples", color=category_type)
|
1
|
111 p <- p + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle(paste("Samples - Stress value",formatC(phyloseq_ord$stress,digits=4,format="f"),sep=":")) + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
112 print(p)
|
|
113 garbage<-dev.off();
|
|
114
|
|
115 #png('nmds.png')
|
1
|
116 bitmap(pngfile_nmds,"png16m",res=120)
|
0
|
117 p1 <- plot_ordination(phyloseq_obj, phyloseq_ord, type="samples", color=category_type)
|
1
|
118 p1 <- p1 + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle(paste("Samples - Stress value",formatC(phyloseq_ord$stress,digits=4,format="f"),sep=":")) + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
119 print(p1)
|
|
120 garbage<-dev.off();
|
|
121
|
|
122 create_HTML_2(htmlfile)
|
|
123 }
|
|
124
|
|
125 create_BIPLOT_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,category_type){
|
|
126 pdf(pdf_file);
|
|
127 print(category_type)
|
1
|
128 p_biplot <- plot_ordination(phyloseq_obj, phyloseq_ord, type="biplot", color=category_type, shape=kingdom_str,title="BIPLOT") + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
129 print(p_biplot)
|
|
130 garbage<-dev.off();
|
|
131
|
1
|
132 bitmap(pngfile_nmds,"png16m",res=120)
|
|
133 p_biplot_png <- plot_ordination(phyloseq_obj, phyloseq_ord, type="biplot", color=category_type, shape=kingdom_str,title="BIPLOT") + theme(legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
134 print(p_biplot_png)
|
|
135 garbage<-dev.off();
|
|
136
|
|
137 create_HTML_2(htmlfile)
|
|
138 }
|
|
139
|
|
140 create_SPLITPLOT_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,category_type){
|
|
141 pdf(pdf_file,width=10, height=6);
|
|
142 split_plot <- plot_ordination(phyloseq_obj, phyloseq_ord, type="split", color=kingdom_str, shape=kingdom_str, label=category_type, title="SPLIT PLOT")
|
1
|
143 split_plot <- split_plot + theme(plot.margin = unit(c(12,18,12,18),"pt"),legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
144 print(split_plot)
|
|
145 garbage<-dev.off();
|
|
146
|
1
|
147 bitmap(pngfile_nmds,"png16m", res=120)
|
0
|
148 split_plot <- plot_ordination(phyloseq_obj, phyloseq_ord, type="split", color=kingdom_str, shape=kingdom_str, label=category_type, title="SPLIT PLOT")
|
1
|
149 split_plot <- split_plot + theme(plot.margin = unit(c(12,18,12,18),"pt"),legend.position="bottom",legend.box="vertical",legend.direction="horizontal")
|
0
|
150 print(split_plot)
|
|
151 garbage<-dev.off();
|
|
152 create_HTML_2(htmlfile)
|
|
153 }
|
|
154
|
|
155 create_HTML_1<-function(htmlfile){
|
|
156 htmlfile_handle <- file(htmlfile)
|
|
157 html_output = c('<html><body>',
|
1
|
158 '<table align="center">',
|
0
|
159 '<tr>',
|
|
160 '<td valign="middle" style="vertical-align:middle;">',
|
1
|
161 '<a href="pdffile.pdf"><img src="nmds.png" width="800" height="800"/></a>',
|
0
|
162 '</td>',
|
|
163 '</tr>',
|
|
164 '<tr>',
|
|
165 '<td valign="middle" style="vertical-align:middle;">',
|
1
|
166 '<a href="pdffile.pdf"><img src="nmds_facet.png" width="800" height="800"/></a>',
|
0
|
167 '</td>',
|
|
168 '</tr>',
|
|
169 '</table>',
|
|
170 '</html></body>');
|
|
171 writeLines(html_output, htmlfile_handle);
|
|
172 close(htmlfile_handle);
|
|
173 }
|
|
174
|
|
175 create_HTML_2<-function(htmlfile){
|
|
176 htmlfile_handle <- file(htmlfile)
|
|
177 html_output = c('<html><body>',
|
1
|
178 '<table align="center">',
|
0
|
179 '<tr>',
|
|
180 '<td valign="middle" style="vertical-align:middle;">',
|
1
|
181 '<a href="pdffile.pdf"><img src="nmds.png" width="800" height="800"/></a>',
|
0
|
182 '</td>',
|
|
183 '</tr>',
|
|
184 '</table>',
|
|
185 '</html></body>');
|
|
186 writeLines(html_output, htmlfile_handle);
|
|
187 close(htmlfile_handle);
|
|
188 }
|
|
189
|
|
190 if(!is.null(options$biom)){
|
|
191
|
|
192 #physeq<-import_biom(options$biom)
|
|
193 physeq<-readBIOM(options$biom)
|
|
194
|
|
195 if(length(rank_names(physeq)) == 8){
|
|
196 tax_table(physeq) <- tax_table(physeq)[,-1]
|
|
197 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
|
|
198 } else {
|
|
199 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
|
|
200 }
|
|
201
|
|
202 ### select column name from sample table for nmds plot
|
|
203 ## which(colnames(sample_data(biom)) == "vegetation_type_id")
|
|
204 #category_type<-colnames(sample_data(physeq))[options$subset]
|
|
205 category_type <- options$subset
|
|
206
|
|
207 ### obtain the unique value in the selected column from sample table
|
|
208 category_option<-unique(sample_data(physeq))[,options$subset]
|
|
209
|
|
210 }else{
|
|
211
|
|
212 ### read the data into correct data type to create phyloseq object
|
|
213 otu_table<-as.matrix(read.table(options$otu_table,header=T,sep="\t"))
|
|
214 tax_table<-as.matrix(read.table(options$tax_table,header=T,sep="\t"))
|
|
215 sample_table<-read.table(options$meta_table,header=T,sep="\t",stringsAsFactors=F)
|
|
216
|
|
217
|
|
218 ### select column name from sample table for nmds plot
|
|
219 category_type<-colnames(sample_table)[options$category]
|
|
220
|
|
221 ### obtain the unique value in the selected column from sample table
|
|
222 category_option<-unique(sample_table[,options$category])
|
|
223
|
|
224
|
|
225 ### create a sample object for phyloseq
|
|
226 sample_object<-sample_data(sample_table)
|
|
227
|
|
228 ### create otu object for phyloseq
|
|
229 OTU<-otu_table(otu_table, taxa_are_rows = TRUE)
|
|
230
|
|
231 ### create tax object for phyloseq
|
|
232 TAX<-tax_table(tax_table)
|
|
233
|
|
234 ### create a phyloseq object
|
|
235 physeq = phyloseq(OTU,TAX,sample_object)
|
|
236 }
|
1
|
237
|
0
|
238 category_input = get_variable(physeq, category_type) %in% category_option
|
|
239 sample_data(physeq)$category_input <- factor(category_input)
|
|
240
|
1
|
241 # compute distance matrix
|
0
|
242 physeq_ord<-ordinate(physeq,method,distance)
|
1
|
243
|
|
244 # get column sum
|
|
245 sum_table<-data.frame(column_sum=as.matrix(colSums(otu_table(physeq))))
|
|
246
|
|
247 rowname_table<-data.frame(sample=rownames(sum_table))
|
|
248
|
|
249 output_table<-as.data.frame(cbind(rowname_table,sum_table))
|
|
250
|
|
251 output_table<-output_table[order(output_table$column_sum),]
|
|
252
|
|
253 # Reformat distance matrix
|
|
254 distance_matrix<-as.data.frame(physeq_ord$points)
|
|
255 distance_matrix<-cbind(sample=rownames(distance_matrix),distance_matrix)
|
|
256
|
|
257 sink(output_summary)
|
|
258 cat('--------------------------------------')
|
|
259 cat('\n')
|
|
260 cat('Stress value')
|
|
261 cat('\n')
|
|
262 cat(formatC(physeq_ord$stress,digits=4,format="f"))
|
|
263 cat('\n')
|
|
264 cat('--------------------------------------')
|
|
265 cat('\n')
|
|
266 cat('Sample - Column Sum')
|
|
267 cat('\n')
|
|
268 cat('--------------------------------------')
|
|
269 cat('\n')
|
|
270 write.table(output_table,row.names=F,quote=F)
|
|
271 cat('\n')
|
|
272 cat('--------------------------------------')
|
|
273 cat('\n')
|
|
274 cat('Distance Matrix')
|
|
275 cat('\n')
|
|
276 cat('--------------------------------------')
|
|
277 cat('\n')
|
|
278 write.table(distance_matrix,row.names=F,quote=F)
|
|
279 cat('\n')
|
|
280 cat('--------------------------------------')
|
|
281 sink()
|
0
|
282
|
|
283 if(plottype == 1){
|
|
284 #kingdom_str = colnames(tax_table)[2]
|
|
285 create_OTU_PDF(pdffile,physeq,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet)
|
|
286 }else if(plottype == 2){
|
|
287 create_SAMPLE_PDF(pdffile,physeq,physeq_ord,htmlfile,pngfile_nmds,category_type)
|
|
288 }else if(plottype == 3){
|
|
289 create_BIPLOT_PDF(pdffile,physeq,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,category_type)
|
|
290 }else{
|
|
291 create_SPLITPLOT_PDF(pdffile,physeq,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,category_type)
|
|
292 }
|
|
293
|