Mercurial > repos > simon-gladman > phyloseq_filter
comparison phyloseq_filter.R @ 1:9fbb104e16d9 draft
update
| author | simon-gladman |
|---|---|
| date | Mon, 12 Nov 2018 22:42:14 -0500 |
| parents | 910739de7b80 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:910739de7b80 | 1:9fbb104e16d9 |
|---|---|
| 1 library('getopt') | 1 library('getopt') |
| 2 library('ape') | 2 library('ape') |
| 3 library('ggplot2') | 3 library('ggplot2') |
| 4 suppressPackageStartupMessages(library('phyloseq')) | 4 suppressPackageStartupMessages(library('phyloseq')) |
| 5 library(biomformat) | |
| 6 library(plyr) | 5 library(plyr) |
| 7 Sys.setenv("DISPLAY"=":1") | 6 Sys.setenv("DISPLAY"=":1") |
| 8 library(biomformat) | 7 library(biomformat) |
| 8 library(jsonlite) | |
| 9 suppressPackageStartupMessages(library(metagenomeSeq)) | 9 suppressPackageStartupMessages(library(metagenomeSeq)) |
| 10 suppressPackageStartupMessages(library("doParallel")) | 10 suppressPackageStartupMessages(library("doParallel")) |
| 11 ncores = ceiling(detectCores() * 0.8) | 11 ncores = ceiling(detectCores() * 0.8) |
| 12 registerDoParallel(cores=ncores) | 12 registerDoParallel(cores=ncores) |
| 13 | 13 |
| 63 pngfile_after_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_filtering.png")) | 63 pngfile_after_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_filtering.png")) |
| 64 pngfile_pre_phyla_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_before_phyla_filtering.png")) | 64 pngfile_pre_phyla_filtering <- gsub("[ ]+", "", paste(options$outdir,"/barplot_before_phyla_filtering.png")) |
| 65 pngfile_post_phyla_filtering<- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_phyla_filtering.png")) | 65 pngfile_post_phyla_filtering<- gsub("[ ]+", "", paste(options$outdir,"/barplot_after_phyla_filtering.png")) |
| 66 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) | 66 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) |
| 67 | 67 |
| 68 ### overwrite the write_biom function for proper BIOM format | |
| 69 ### https://github.com/smdabdoub/biomformat/blob/master/R/IO-methods.R#L124 | |
| 70 write_biom <- function(x, biom_file){ | |
| 71 cat(toJSON(x, always_decimal=TRUE, auto_unbox=TRUE), file=biom_file) | |
| 72 } | |
| 73 | |
| 68 ### This function accepts different two different type of BIOM file format | 74 ### This function accepts different two different type of BIOM file format |
| 69 readBIOM<-function(inBiom){ | 75 readBIOM<-function(inBiom){ |
| 70 tryCatch({ | 76 tryCatch({ |
| 71 phyloseq_obj<-import_biom(inBiom,parallel=TRUE) | 77 phyloseq_obj<-import_biom(inBiom,parallel=TRUE) |
| 72 return(phyloseq_obj) | 78 return(phyloseq_obj) |
| 150 print(barplot_after_filtering_png) | 156 print(barplot_after_filtering_png) |
| 151 garbage<-dev.off() | 157 garbage<-dev.off() |
| 152 | 158 |
| 153 #png('barplot_pre_phyla_filtering.png') | 159 #png('barplot_pre_phyla_filtering.png') |
| 154 bitmap(pngfile_pre_phyla_filtering,"png16m") | 160 bitmap(pngfile_pre_phyla_filtering,"png16m") |
| 155 print(sample_data(physeq_pre_phyla_filtering)) | 161 #print(sample_data(physeq_pre_phyla_filtering)) |
| 156 barplot_pre_phyla_filtering<-plot_bar(physeq_pre_phyla_filtering, x=colnames(sample_data(physeq_pre_phyla_filtering))[1], fill=kingdom_str) + | 162 barplot_pre_phyla_filtering<-plot_bar(physeq_pre_phyla_filtering, x=colnames(sample_data(physeq_pre_phyla_filtering))[1], fill=kingdom_str) + |
| 157 geom_bar(stat="identity", position="stack") + | 163 geom_bar(stat="identity", position="stack") + |
| 158 labs(title=paste("Sample Depth Bar Chart",kingdom_str,sep=":"),subtitle="Sample Vs Abundance (Pre Phyla Filtering)",caption="source: Input Biom") + | 164 labs(title=paste("Sample Depth Bar Chart",kingdom_str,sep=":"),subtitle="Sample Vs Abundance (Pre Phyla Filtering)",caption="source: Input Biom") + |
| 159 xlab("Sample") + | 165 xlab("Sample") + |
| 160 ylab("Abundance") + | 166 ylab("Abundance") + |
| 179 } | 185 } |
| 180 | 186 |
| 181 create_HTML<-function(htmlfile){ | 187 create_HTML<-function(htmlfile){ |
| 182 htmlfile_handle <- file(htmlfile) | 188 htmlfile_handle <- file(htmlfile) |
| 183 html_output = c('<html><body>', | 189 html_output = c('<html><body>', |
| 184 '<table align="center>', | 190 '<table align="center">', |
| 185 '<tr>', | 191 '<tr>', |
| 186 '<td valign="middle" style="vertical-align:middle;">', | 192 '<td valign="middle" style="vertical-align:middle;">', |
| 187 '<a href="pdffile.pdf"><img src="barplot_before_filtering.png"/></a>', | 193 '<a href="pdffile.pdf"><img src="barplot_before_filtering.png"/></a>', |
| 188 '</td>', | 194 '</td>', |
| 189 '</tr>', | 195 '</tr>', |
| 201 '<td valign="middle" style="vertical-align:middle;">', | 207 '<td valign="middle" style="vertical-align:middle;">', |
| 202 '<a href="pdffile.pdf"><img src="barplot_after_phyla_filtering.png"/></a>', | 208 '<a href="pdffile.pdf"><img src="barplot_after_phyla_filtering.png"/></a>', |
| 203 '</td>', | 209 '</td>', |
| 204 '</tr>', | 210 '</tr>', |
| 205 '</table>', | 211 '</table>', |
| 206 '</html></body>'); | 212 '</body></html>'); |
| 207 writeLines(html_output, htmlfile_handle); | 213 writeLines(html_output, htmlfile_handle); |
| 208 close(htmlfile_handle); | 214 close(htmlfile_handle); |
| 209 } | 215 } |
| 210 | 216 |
| 211 convert_phyloseq_otutable_to_dataframe<-function(physeq_obj){ | 217 convert_phyloseq_otutable_to_dataframe<-function(physeq_obj){ |
| 212 temp.df<-data.frame(otu_table(physeq_obj)) | 218 temp.df<-data.frame(otu_table(physeq_obj)) |
| 213 temp.df.counts<-as.data.frame(colSums(temp.df)) | 219 temp.df.counts<-as.data.frame(colSums(temp.df)) |
| 214 colnames(temp.df.counts)<-"Abundance" | 220 colnames(temp.df.counts)<-"Abundance" |
| 215 print(temp.df.counts) | 221 #print(temp.df.counts) |
| 216 return(temp.df.counts) | 222 return(temp.df.counts) |
| 217 } | 223 } |
| 218 | 224 |
| 219 if(!is.null(options$biom)){ | 225 if(!is.null(options$biom)){ |
| 220 | 226 |
| 315 ### number of most abundance phyla to keep | 321 ### number of most abundance phyla to keep |
| 316 topphyla_post_transform = names(sort(phylum.sum_post_transform, TRUE))[1:keep] | 322 topphyla_post_transform = names(sort(phylum.sum_post_transform, TRUE))[1:keep] |
| 317 | 323 |
| 318 physeq_filter_post_transform = prune_taxa((tax_table(physeq_filter_post_transform)[, kingdom_str] %in% topphyla_post_transform), physeq_filter_post_transform) | 324 physeq_filter_post_transform = prune_taxa((tax_table(physeq_filter_post_transform)[, kingdom_str] %in% topphyla_post_transform), physeq_filter_post_transform) |
| 319 | 325 |
| 326 ### remove samples with zero value | |
| 327 otu_table(physeq_filter_post_transform)<-otu_table(physeq_filter_post_transform)[,colSums(otu_table(physeq_filter_post_transform)) > 0] | |
| 320 | 328 |
| 321 create_PDF(pdffile,before_filtering_dataframe_sampleCounts,after_filtering_dataframe_sampleCounts,physeq_filter_pre_transform,physeq_filter_post_transform,kingdom_str,htmlfile,pngfile_before_filtering,pngfile_after_filtering,pngfile_pre_phyla_filtering,pngfile_post_phyla_filtering) | 329 create_PDF(pdffile,before_filtering_dataframe_sampleCounts,after_filtering_dataframe_sampleCounts,physeq_filter_pre_transform,physeq_filter_post_transform,kingdom_str,htmlfile,pngfile_before_filtering,pngfile_after_filtering,pngfile_pre_phyla_filtering,pngfile_post_phyla_filtering) |
| 322 | 330 |
| 323 ### convert phyloseq object to metagenomeSeq - preparing for BIOM output | 331 ### convert phyloseq object to metagenomeSeq - preparing for BIOM output |
| 324 metagenomeSeq_obj <- phyloseq_to_metagenomeSeq(physeq_filter_post_transform) | 332 #metagenomeSeq_obj <- phyloseq_to_metagenomeSeq(physeq_filter_post_transform) |
| 325 metagenomeSeq_biom <- MRexperiment2biom(metagenomeSeq_obj) | 333 #metagenomeSeq_biom <- MRexperiment2biom(metagenomeSeq_obj) |
| 334 | |
| 335 biom_obj=make_biom(otu_table(physeq_filter_post_transform),sample_metadata=sample_data(physeq_filter_post_transform),observation_metadata=tax_table(physeq_filter_post_transform),matrix_element_type="float") | |
| 336 biom_obj_2_metagenomeSeq_obj<-biom2MRexperiment(biom_obj) | |
| 337 metagenomeSeq_biom <- MRexperiment2biom(biom_obj_2_metagenomeSeq_obj) | |
| 338 | |
| 326 | 339 |
| 327 ## write biom file | 340 ## write biom file |
| 328 write_biom(metagenomeSeq_biom, biom_file=options$outbiom) | 341 write_biom(metagenomeSeq_biom, biom_file=options$outbiom) |
