Mercurial > repos > simon-gladman > phyloseq_nmds
annotate phyloseq_nmds.R @ 1:e376a618eb9f draft
Updated phyloseq_nmds.R to allow for non standard BIOM files.
author | simon-gladman |
---|---|
date | Sat, 16 Jun 2018 05:03:43 -0400 |
parents | b4606394e7ec |
children | 20adf95eb758 |
rev | line source |
---|---|
0 | 1 library('getopt') |
2 library('ape') | |
3 suppressPackageStartupMessages(library('phyloseq')) | |
1
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
4 library(biomformat) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
5 library(plyr) |
0 | 6 Sys.setenv("DISPLAY"=":1") |
7 library("ggplot2") | |
8 | |
9 options(warn=-1) | |
10 | |
11 theme_set(theme_bw()) | |
12 | |
13 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs | |
14 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html | |
15 option_specification = matrix(c( | |
16 'otu_table','o',2,'character', | |
17 'tax_table','t',2,'character', | |
18 'meta_table','s',2,'character', | |
19 'biom','i',2,'character', | |
20 'subset','x',2,'numeric', | |
21 'method','n',2,'character', | |
22 'distance','d',2,'character', | |
23 'kingdom','k',2,'character', | |
24 'cutoff','v',2,'numeric', | |
25 'category','c',2,'numeric', | |
26 'keep','p',2,'numeric', | |
27 'outdir','r',2,'character', | |
28 'htmlfile','h',2,'character' | |
29 ),byrow=TRUE,ncol=4); | |
30 | |
31 | |
32 options <- getopt(option_specification); | |
33 options(bitmapType="cairo") | |
34 | |
35 if (!is.null(options$outdir)) { | |
36 # Create the directory | |
37 dir.create(options$outdir,FALSE) | |
38 } | |
39 | |
40 | |
41 | |
42 method<-options$method | |
43 cutoff_value<-options$cutoff | |
44 ### select a kingdom for phyloseq plot (e.g., "phylum") | |
45 #kingdom_str<-colnames(tax_table)[options$kingdom] | |
46 kingdom_str<-options$kingdom | |
47 distance<-options$distance | |
48 keep<-options$keep | |
49 | |
50 | |
1
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
51 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
52 ### This function accepts different two different type of BIOM file format |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
53 readBIOM<-function(inBiom){ |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
54 tryCatch({ |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
55 phyloseq_obj<-import_biom(inBiom) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
56 return(phyloseq_obj) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
57 }, |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
58 error=function(e){ |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
59 biom_obj<-read_biom(inBiom) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
60 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
61 otu_matrix = as(biom_data(biom_obj), "matrix") |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
62 OTU_TABLE = otu_table(otu_matrix, taxa_are_rows=TRUE) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
63 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
64 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
65 TAXONOMY_TABLE = tax_table(taxonomy_matrix) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
66 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
67 metadata.temp<-sample_metadata(biom_obj) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
68 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
69 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
70 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
71 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE)) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
72 return(phyloseq_obj) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
73 } |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
74 ) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
75 } |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
76 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
77 |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
78 |
0 | 79 if(!is.null(options$biom)){ |
80 | |
1
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
81 #physeq<-import_biom(options$biom) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
82 physeq<-readBIOM(options$biom) |
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
83 |
0 | 84 if(length(rank_names(physeq)) == 8){ |
85 tax_table(physeq) <- tax_table(physeq)[,-1] | |
86 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") | |
87 } | |
88 ### select column name from sample table for nmds plot | |
89 category_type<-colnames(sample_data(physeq))[options$subset] | |
90 | |
91 ### obtain the unique value in the selected column from sample table | |
92 category_option<-unique(sample_data(physeq))[,options$subset] | |
93 | |
94 }else{ | |
95 | |
96 ### read the data into correct data type to create phyloseq object | |
97 otu_table<-as.matrix(read.table(options$otu_table,header=T,sep="\t")) | |
98 tax_table<-as.matrix(read.table(options$tax_table,header=T,sep="\t")) | |
99 sample_table<-read.table(options$meta_table,header=T,sep="\t",stringsAsFactors=F) | |
100 | |
101 | |
102 ### select column name from sample table for nmds plot | |
103 category_type<-colnames(sample_table)[options$category] | |
104 | |
105 ### obtain the unique value in the selected column from sample table | |
106 category_option<-unique(sample_table[,options$category]) | |
107 | |
108 | |
109 ### create a sample object for phyloseq | |
110 sample_object<-sample_data(sample_table) | |
111 | |
112 ### create otu object for phyloseq | |
113 OTU<-otu_table(otu_table, taxa_are_rows = TRUE) | |
114 | |
115 ### create tax object for phyloseq | |
116 TAX<-tax_table(tax_table) | |
117 | |
118 ### create a phyloseq object | |
119 physeq = phyloseq(OTU,TAX,sample_object) | |
120 } | |
121 ### select a kingdom for phyloseq plot (e.g., "phylum") | |
122 #kingdom_str<-colnames(tax_table)[options$kingdom] | |
123 #kingdom_str<-options$kingdom | |
124 | |
125 | |
126 ### Remove OTUs that do not appear more than 5 times in more than half the samples | |
127 ### filtering OTUs based on cutoff value (e.g., 5) | |
1
e376a618eb9f
Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents:
0
diff
changeset
|
128 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq)) |
0 | 129 |
130 ### phyloseq object after filtered | |
131 physeq_filter = prune_taxa(physeq_temp, physeq) | |
132 | |
133 ## Transform to even sampling depth | |
134 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x)) | |
135 | |
136 ## Keep only the most abundant five phyla | |
137 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE) | |
138 | |
139 ### number of most abundance phyla to keep | |
140 topphyla = names(sort(phylum.sum, TRUE))[1:keep] | |
141 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter) | |
142 | |
143 ### select category to plot NMDS | |
144 category_input = get_variable(physeq_filter, category_type) %in% category_option | |
145 sample_data(physeq_filter)$category_input <- factor(category_input) | |
146 | |
147 ### prepare the directory and file name | |
148 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf")) | |
149 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png")) | |
150 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png")) | |
151 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) | |
152 | |
153 | |
154 # Produce PDF file | |
155 pdf(pdffile); | |
156 physeq_ord<-ordinate(physeq_filter,method,distance) | |
157 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") | |
158 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) | |
159 garbage<-dev.off(); | |
160 | |
161 | |
162 #png('nmds.png') | |
163 bitmap(pngfile_nmds,"png16m") | |
164 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") | |
165 garbage<-dev.off() | |
166 | |
167 #png('nmds_facet.png') | |
168 bitmap(pngfile_nmds_facet,"png16m") | |
169 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3) | |
170 garbage<-dev.off() | |
171 | |
172 # Produce the HTML file | |
173 htmlfile_handle <- file(htmlfile) | |
174 html_output = c('<html><body>', | |
175 '<table align="center>', | |
176 '<tr>', | |
177 '<td valign="middle" style="vertical-align:middle;">', | |
178 '<a href="pdffile.pdf"><img src="nmds.png"/></a>', | |
179 '</td>', | |
180 '</tr>', | |
181 '<tr>', | |
182 '<td valign="middle" style="vertical-align:middle;">', | |
183 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>', | |
184 '</td>', | |
185 '</tr>', | |
186 '</table>', | |
187 '</html></body>'); | |
188 writeLines(html_output, htmlfile_handle); | |
189 close(htmlfile_handle); |