comparison phyloseq_filter.R @ 1:9fbb104e16d9 draft

update
author simon-gladman
date Mon, 12 Nov 2018 22:42:14 -0500
parents 910739de7b80
children
comparison
equal deleted inserted replaced
0:910739de7b80 1:9fbb104e16d9
1 library('getopt') 1 library('getopt')
2 library('ape') 2 library('ape')
3 library('ggplot2') 3 library('ggplot2')
4 suppressPackageStartupMessages(library('phyloseq')) 4 suppressPackageStartupMessages(library('phyloseq'))
5 library(biomformat)
6 library(plyr) 5 library(plyr)
7 Sys.setenv("DISPLAY"=":1") 6 Sys.setenv("DISPLAY"=":1")
8 library(biomformat) 7 library(biomformat)
8 library(jsonlite)
9 suppressPackageStartupMessages(library(metagenomeSeq)) 9 suppressPackageStartupMessages(library(metagenomeSeq))
10 suppressPackageStartupMessages(library("doParallel")) 10 suppressPackageStartupMessages(library("doParallel"))
11 ncores = ceiling(detectCores() * 0.8) 11 ncores = ceiling(detectCores() * 0.8)
12 registerDoParallel(cores=ncores) 12 registerDoParallel(cores=ncores)
13 13
63 pngfile_after_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_filtering.png")) 63 pngfile_after_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_filtering.png"))
64 pngfile_pre_phyla_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_before_phyla_filtering.png")) 64 pngfile_pre_phyla_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_before_phyla_filtering.png"))
65 pngfile_post_phyla_filtering<- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_phyla_filtering.png")) 65 pngfile_post_phyla_filtering<- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_phyla_filtering.png"))
66 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) 66 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
67 67
68 ### overwrite the write_biom function for proper BIOM format
69 ### https://github.com/smdabdoub/biomformat/blob/master/R/IO-methods.R#L124
70 write_biom <- function(x, biom_file){
71 cat(toJSON(x, always_decimal=TRUE, auto_unbox=TRUE), file=biom_file)
72 }
73
68 ### This function accepts different two different type of BIOM file format 74 ### This function accepts different two different type of BIOM file format
69 readBIOM<-function(inBiom){ 75 readBIOM<-function(inBiom){
70 tryCatch({ 76 tryCatch({
71 phyloseq_obj<-import_biom(inBiom,parallel=TRUE) 77 phyloseq_obj<-import_biom(inBiom,parallel=TRUE)
72 return(phyloseq_obj) 78 return(phyloseq_obj)
150 print(barplot_after_filtering_png) 156 print(barplot_after_filtering_png)
151 garbage<-dev.off() 157 garbage<-dev.off()
152 158
153 #png('barplot_pre_phyla_filtering.png') 159 #png('barplot_pre_phyla_filtering.png')
154 bitmap(pngfile_pre_phyla_filtering,"png16m") 160 bitmap(pngfile_pre_phyla_filtering,"png16m")
155 print(sample_data(physeq_pre_phyla_filtering)) 161 #print(sample_data(physeq_pre_phyla_filtering))
156 barplot_pre_phyla_filtering<-plot_bar(physeq_pre_phyla_filtering, x=colnames(sample_data(physeq_pre_phyla_filtering))[1], fill=kingdom_str) + 162 barplot_pre_phyla_filtering<-plot_bar(physeq_pre_phyla_filtering, x=colnames(sample_data(physeq_pre_phyla_filtering))[1], fill=kingdom_str) +
157 geom_bar(stat="identity", position="stack") + 163 geom_bar(stat="identity", position="stack") +
158 labs(title=paste("Sample Depth Bar Chart",kingdom_str,sep=":"),subtitle="Sample Vs Abundance (Pre Phyla Filtering)",caption="source: Input Biom") + 164 labs(title=paste("Sample Depth Bar Chart",kingdom_str,sep=":"),subtitle="Sample Vs Abundance (Pre Phyla Filtering)",caption="source: Input Biom") +
159 xlab("Sample") + 165 xlab("Sample") +
160 ylab("Abundance") + 166 ylab("Abundance") +
179 } 185 }
180 186
181 create_HTML<-function(htmlfile){ 187 create_HTML<-function(htmlfile){
182 htmlfile_handle <- file(htmlfile) 188 htmlfile_handle <- file(htmlfile)
183 html_output = c('<html><body>', 189 html_output = c('<html><body>',
184 '<table align="center>', 190 '<table align="center">',
185 '<tr>', 191 '<tr>',
186 '<td valign="middle" style="vertical-align:middle;">', 192 '<td valign="middle" style="vertical-align:middle;">',
187 '<a href="pdffile.pdf"><img src="barplot_before_filtering.png"/></a>', 193 '<a href="pdffile.pdf"><img src="barplot_before_filtering.png"/></a>',
188 '</td>', 194 '</td>',
189 '</tr>', 195 '</tr>',
201 '<td valign="middle" style="vertical-align:middle;">', 207 '<td valign="middle" style="vertical-align:middle;">',
202 '<a href="pdffile.pdf"><img src="barplot_after_phyla_filtering.png"/></a>', 208 '<a href="pdffile.pdf"><img src="barplot_after_phyla_filtering.png"/></a>',
203 '</td>', 209 '</td>',
204 '</tr>', 210 '</tr>',
205 '</table>', 211 '</table>',
206 '</html></body>'); 212 '</body></html>');
207 writeLines(html_output, htmlfile_handle); 213 writeLines(html_output, htmlfile_handle);
208 close(htmlfile_handle); 214 close(htmlfile_handle);
209 } 215 }
210 216
211 convert_phyloseq_otutable_to_dataframe<-function(physeq_obj){ 217 convert_phyloseq_otutable_to_dataframe<-function(physeq_obj){
212 temp.df<-data.frame(otu_table(physeq_obj)) 218 temp.df<-data.frame(otu_table(physeq_obj))
213 temp.df.counts<-as.data.frame(colSums(temp.df)) 219 temp.df.counts<-as.data.frame(colSums(temp.df))
214 colnames(temp.df.counts)<-"Abundance" 220 colnames(temp.df.counts)<-"Abundance"
215 print(temp.df.counts) 221 #print(temp.df.counts)
216 return(temp.df.counts) 222 return(temp.df.counts)
217 } 223 }
218 224
219 if(!is.null(options$biom)){ 225 if(!is.null(options$biom)){
220 226
315 ### number of most abundance phyla to keep 321 ### number of most abundance phyla to keep
316 topphyla_post_transform = names(sort(phylum.sum_post_transform, TRUE))[1:keep] 322 topphyla_post_transform = names(sort(phylum.sum_post_transform, TRUE))[1:keep]
317 323
318 physeq_filter_post_transform = prune_taxa((tax_table(physeq_filter_post_transform)[, kingdom_str] %in% topphyla_post_transform), physeq_filter_post_transform) 324 physeq_filter_post_transform = prune_taxa((tax_table(physeq_filter_post_transform)[, kingdom_str] %in% topphyla_post_transform), physeq_filter_post_transform)
319 325
326 ### remove samples with zero value
327 otu_table(physeq_filter_post_transform)<-otu_table(physeq_filter_post_transform)[,colSums(otu_table(physeq_filter_post_transform)) > 0]
320 328
321 create_PDF(pdffile,before_filtering_dataframe_sampleCounts,after_filtering_dataframe_sampleCounts,physeq_filter_pre_transform,physeq_filter_post_transform,kingdom_str,htmlfile,pngfile_before_filtering,pngfile_after_filtering,pngfile_pre_phyla_filtering,pngfile_post_phyla_filtering) 329 create_PDF(pdffile,before_filtering_dataframe_sampleCounts,after_filtering_dataframe_sampleCounts,physeq_filter_pre_transform,physeq_filter_post_transform,kingdom_str,htmlfile,pngfile_before_filtering,pngfile_after_filtering,pngfile_pre_phyla_filtering,pngfile_post_phyla_filtering)
322 330
323 ### convert phyloseq object to metagenomeSeq - preparing for BIOM output 331 ### convert phyloseq object to metagenomeSeq - preparing for BIOM output
324 metagenomeSeq_obj <- phyloseq_to_metagenomeSeq(physeq_filter_post_transform) 332 #metagenomeSeq_obj <- phyloseq_to_metagenomeSeq(physeq_filter_post_transform)
325 metagenomeSeq_biom <- MRexperiment2biom(metagenomeSeq_obj) 333 #metagenomeSeq_biom <- MRexperiment2biom(metagenomeSeq_obj)
334
335 biom_obj=make_biom(otu_table(physeq_filter_post_transform),sample_metadata=sample_data(physeq_filter_post_transform),observation_metadata=tax_table(physeq_filter_post_transform),matrix_element_type="float")
336 biom_obj_2_metagenomeSeq_obj<-biom2MRexperiment(biom_obj)
337 metagenomeSeq_biom <- MRexperiment2biom(biom_obj_2_metagenomeSeq_obj)
338
326 339
327 ## write biom file 340 ## write biom file
328 write_biom(metagenomeSeq_biom, biom_file=options$outbiom) 341 write_biom(metagenomeSeq_biom, biom_file=options$outbiom)