Mercurial > repos > simon-gladman > phyloseq_nmds
comparison phyloseq_nmds.R @ 2:20adf95eb758 draft
Updated phyloseq_nmds.xml to allow for new datatype.
author | simon-gladman |
---|---|
date | Fri, 20 Jul 2018 00:23:53 -0400 |
parents | e376a618eb9f |
children |
comparison
equal
deleted
inserted
replaced
1:e376a618eb9f | 2:20adf95eb758 |
---|---|
3 suppressPackageStartupMessages(library('phyloseq')) | 3 suppressPackageStartupMessages(library('phyloseq')) |
4 library(biomformat) | 4 library(biomformat) |
5 library(plyr) | 5 library(plyr) |
6 Sys.setenv("DISPLAY"=":1") | 6 Sys.setenv("DISPLAY"=":1") |
7 library("ggplot2") | 7 library("ggplot2") |
8 suppressPackageStartupMessages(library("doParallel")) | |
9 ncores = ceiling(detectCores() * 0.8) | |
10 registerDoParallel(cores=ncores) | |
8 | 11 |
9 options(warn=-1) | 12 options(warn=-1) |
10 | 13 |
11 theme_set(theme_bw()) | 14 theme_set(theme_bw()) |
12 | 15 |
13 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs | 16 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs |
14 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html | 17 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html |
15 option_specification = matrix(c( | 18 option_specification = matrix(c( |
16 'otu_table','o',2,'character', | 19 'otu_table','o',2,'character', |
17 'tax_table','t',2,'character', | 20 'tax_table','t',2,'character', |
18 'meta_table','s',2,'character', | 21 'meta_table','m',2,'character', |
19 'biom','i',2,'character', | 22 'biom','b',2,'character', |
20 'subset','x',2,'numeric', | 23 'filter','f',2,'numeric', |
24 'subset','s',2,'character', | |
21 'method','n',2,'character', | 25 'method','n',2,'character', |
22 'distance','d',2,'character', | 26 'distance','d',2,'character', |
23 'kingdom','k',2,'character', | 27 'kingdom','k',2,'character', |
24 'cutoff','v',2,'numeric', | 28 'plottype','e',2,'numeric', |
25 'category','c',2,'numeric', | 29 'cutoff','c',2,'numeric', |
30 'category','g',2,'numeric', | |
26 'keep','p',2,'numeric', | 31 'keep','p',2,'numeric', |
27 'outdir','r',2,'character', | 32 'outdir','r',2,'character', |
28 'htmlfile','h',2,'character' | 33 'htmlfile','h',2,'character' |
29 ),byrow=TRUE,ncol=4); | 34 ),byrow=TRUE,ncol=4); |
30 | 35 |
44 ### select a kingdom for phyloseq plot (e.g., "phylum") | 49 ### select a kingdom for phyloseq plot (e.g., "phylum") |
45 #kingdom_str<-colnames(tax_table)[options$kingdom] | 50 #kingdom_str<-colnames(tax_table)[options$kingdom] |
46 kingdom_str<-options$kingdom | 51 kingdom_str<-options$kingdom |
47 distance<-options$distance | 52 distance<-options$distance |
48 keep<-options$keep | 53 keep<-options$keep |
49 | 54 plottype<-options$plottype |
55 filter<-options$filter | |
56 | |
57 ### prepare the directory and file name | |
58 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf")) | |
59 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png")) | |
60 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png")) | |
61 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) | |
50 | 62 |
51 | 63 |
52 ### This function accepts different two different type of BIOM file format | 64 ### This function accepts different two different type of BIOM file format |
53 readBIOM<-function(inBiom){ | 65 readBIOM<-function(inBiom){ |
54 tryCatch({ | 66 tryCatch({ |
55 phyloseq_obj<-import_biom(inBiom) | 67 phyloseq_obj<-import_biom(inBiom,parallel=TRUE) |
56 return(phyloseq_obj) | 68 return(phyloseq_obj) |
57 }, | 69 }, |
58 error=function(e){ | 70 error=function(e){ |
59 biom_obj<-read_biom(inBiom) | 71 biom_obj<-read_biom(inBiom) |
60 | 72 |
64 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE) | 76 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE) |
65 TAXONOMY_TABLE = tax_table(taxonomy_matrix) | 77 TAXONOMY_TABLE = tax_table(taxonomy_matrix) |
66 | 78 |
67 metadata.temp<-sample_metadata(biom_obj) | 79 metadata.temp<-sample_metadata(biom_obj) |
68 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind) | 80 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind) |
69 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id) | 81 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id) |
70 | 82 |
71 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE)) | 83 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE)) |
72 return(phyloseq_obj) | 84 return(phyloseq_obj) |
73 } | 85 } |
74 ) | 86 ) |
75 } | 87 } |
76 | 88 |
77 | 89 |
90 create_OTU_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet){ | |
91 pdf(pdf_file); | |
92 p1<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") | |
93 print(p1) | |
94 p2<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) | |
95 print(p2) | |
96 garbage<-dev.off(); | |
97 | |
98 #png('nmds.png') | |
99 bitmap(pngfile_nmds,"png16m") | |
100 p3<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") | |
101 print(p3) | |
102 garbage<-dev.off() | |
103 | |
104 #png('nmds_facet.png') | |
105 bitmap(pngfile_nmds_facet,"png16m") | |
106 p4<-plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) | |
107 print(p4) | |
108 garbage<-dev.off() | |
109 | |
110 create_HTML_1(htmlfile) | |
111 } | |
112 | |
113 create_SAMPLE_PDF<-function(pdf_file,phyloseq_obj,phyloseq_ord,htmlfile,pngfile_nmds,category_type){ | |
114 pdf(pdf_file); | |
115 p <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type) | |
116 p <- p + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples") | |
117 print(p) | |
118 garbage<-dev.off(); | |
119 | |
120 #png('nmds.png') | |
121 bitmap(pngfile_nmds,"png16m") | |
122 p1 <- plot_ordination(physeq_filter, physeq_ord, type="samples", color=category_type) | |
123 p1 <- p1 + geom_point(aes(fill=category_type)) + geom_point(size=5) + ggtitle("samples") | |
124 print(p1) | |
125 garbage<-dev.off(); | |
126 | |
127 create_HTML_2(htmlfile) | |
128 } | |
129 | |
130 create_HTML_1<-function(htmlfile){ | |
131 htmlfile_handle <- file(htmlfile) | |
132 html_output = c('<html><body>', | |
133 '<table align="center>', | |
134 '<tr>', | |
135 '<td valign="middle" style="vertical-align:middle;">', | |
136 '<a href="pdffile.pdf"><img src="nmds.png"/></a>', | |
137 '</td>', | |
138 '</tr>', | |
139 '<tr>', | |
140 '<td valign="middle" style="vertical-align:middle;">', | |
141 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>', | |
142 '</td>', | |
143 '</tr>', | |
144 '</table>', | |
145 '</html></body>'); | |
146 writeLines(html_output, htmlfile_handle); | |
147 close(htmlfile_handle); | |
148 } | |
149 | |
150 create_HTML_2<-function(htmlfile){ | |
151 htmlfile_handle <- file(htmlfile) | |
152 html_output = c('<html><body>', | |
153 '<table align="center>', | |
154 '<tr>', | |
155 '<td valign="middle" style="vertical-align:middle;">', | |
156 '<a href="pdffile.pdf"><img src="nmds.png"/></a>', | |
157 '</td>', | |
158 '</tr>', | |
159 '</table>', | |
160 '</html></body>'); | |
161 writeLines(html_output, htmlfile_handle); | |
162 close(htmlfile_handle); | |
163 } | |
78 | 164 |
79 if(!is.null(options$biom)){ | 165 if(!is.null(options$biom)){ |
80 | 166 |
81 #physeq<-import_biom(options$biom) | 167 #physeq<-import_biom(options$biom) |
82 physeq<-readBIOM(options$biom) | 168 physeq<-readBIOM(options$biom) |
83 | 169 |
84 if(length(rank_names(physeq)) == 8){ | 170 if(length(rank_names(physeq)) == 8){ |
85 tax_table(physeq) <- tax_table(physeq)[,-1] | 171 tax_table(physeq) <- tax_table(physeq)[,-1] |
86 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") | 172 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") |
173 } else { | |
174 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") | |
87 } | 175 } |
176 | |
88 ### select column name from sample table for nmds plot | 177 ### select column name from sample table for nmds plot |
89 category_type<-colnames(sample_data(physeq))[options$subset] | 178 ## which(colnames(sample_data(biom)) == "vegetation_type_id") |
179 #category_type<-colnames(sample_data(physeq))[options$subset] | |
180 category_type <- options$subset | |
90 | 181 |
91 ### obtain the unique value in the selected column from sample table | 182 ### obtain the unique value in the selected column from sample table |
92 category_option<-unique(sample_data(physeq))[,options$subset] | 183 category_option<-unique(sample_data(physeq))[,options$subset] |
93 | 184 |
94 }else{ | 185 }else{ |
119 physeq = phyloseq(OTU,TAX,sample_object) | 210 physeq = phyloseq(OTU,TAX,sample_object) |
120 } | 211 } |
121 ### select a kingdom for phyloseq plot (e.g., "phylum") | 212 ### select a kingdom for phyloseq plot (e.g., "phylum") |
122 #kingdom_str<-colnames(tax_table)[options$kingdom] | 213 #kingdom_str<-colnames(tax_table)[options$kingdom] |
123 #kingdom_str<-options$kingdom | 214 #kingdom_str<-options$kingdom |
124 | 215 |
125 | 216 |
126 ### Remove OTUs that do not appear more than 5 times in more than half the samples | |
127 ### filtering OTUs based on cutoff value (e.g., 5) | 217 ### filtering OTUs based on cutoff value (e.g., 5) |
128 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq)) | 218 #physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq)) |
129 | 219 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=filter*nsamples(physeq)) |
220 | |
130 ### phyloseq object after filtered | 221 ### phyloseq object after filtered |
131 physeq_filter = prune_taxa(physeq_temp, physeq) | 222 physeq_filter = prune_taxa(physeq_temp, physeq) |
132 | 223 |
133 ## Transform to even sampling depth | 224 ## Transform to even sampling depth |
134 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x)) | 225 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x)) |
135 | 226 |
227 | |
228 | |
229 if(plottype == 1){ | |
230 | |
231 # kingdom_str <- as.numeric(kingdom_str) | |
136 ## Keep only the most abundant five phyla | 232 ## Keep only the most abundant five phyla |
137 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE) | 233 |
138 | 234 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum,na.rm=TRUE) |
139 ### number of most abundance phyla to keep | 235 ### number of most abundance phyla to keep |
140 topphyla = names(sort(phylum.sum, TRUE))[1:keep] | 236 topphyla = names(sort(phylum.sum, TRUE))[1:keep] |
237 | |
141 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter) | 238 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter) |
142 | 239 |
240 }else{ | |
241 | |
242 otu_table(physeq_filter)<-otu_table(physeq_filter)[,colSums(otu_table(physeq_filter)) > 1] | |
243 | |
244 } | |
143 ### select category to plot NMDS | 245 ### select category to plot NMDS |
144 category_input = get_variable(physeq_filter, category_type) %in% category_option | 246 category_input = get_variable(physeq_filter, category_type) %in% category_option |
145 sample_data(physeq_filter)$category_input <- factor(category_input) | 247 sample_data(physeq_filter)$category_input <- factor(category_input) |
146 | 248 |
147 ### prepare the directory and file name | |
148 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf")) | |
149 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png")) | |
150 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png")) | |
151 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) | |
152 | |
153 | |
154 # Produce PDF file | |
155 pdf(pdffile); | |
156 physeq_ord<-ordinate(physeq_filter,method,distance) | 249 physeq_ord<-ordinate(physeq_filter,method,distance) |
157 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") | 250 |
158 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) | 251 if(plottype == 1){ |
159 garbage<-dev.off(); | 252 #kingdom_str = colnames(tax_table)[2] |
160 | 253 create_OTU_PDF(pdffile,physeq_filter,physeq_ord,kingdom_str,htmlfile,pngfile_nmds,pngfile_nmds_facet) |
161 | 254 }else{ |
162 #png('nmds.png') | 255 create_SAMPLE_PDF(pdffile,physeq_filter,physeq_ord,htmlfile,pngfile_nmds,category_type) |
163 bitmap(pngfile_nmds,"png16m") | 256 } |
164 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") | 257 |
165 garbage<-dev.off() | |
166 | |
167 #png('nmds_facet.png') | |
168 bitmap(pngfile_nmds_facet,"png16m") | |
169 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) | |
170 garbage<-dev.off() | |
171 | |
172 # Produce the HTML file | |
173 htmlfile_handle <- file(htmlfile) | |
174 html_output = c('<html><body>', | |
175 '<table align="center>', | |
176 '<tr>', | |
177 '<td valign="middle" style="vertical-align:middle;">', | |
178 '<a href="pdffile.pdf"><img src="nmds.png"/></a>', | |
179 '</td>', | |
180 '</tr>', | |
181 '<tr>', | |
182 '<td valign="middle" style="vertical-align:middle;">', | |
183 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>', | |
184 '</td>', | |
185 '</tr>', | |
186 '</table>', | |
187 '</html></body>'); | |
188 writeLines(html_output, htmlfile_handle); | |
189 close(htmlfile_handle); |