annotate phyloseq_nmds.R @ 1:e376a618eb9f draft

Updated phyloseq_nmds.R to allow for non standard BIOM files.
author simon-gladman
date Sat, 16 Jun 2018 05:03:43 -0400
parents b4606394e7ec
children 20adf95eb758
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
1 library('getopt')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
2 library('ape')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
3 suppressPackageStartupMessages(library('phyloseq'))
1
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
4 library(biomformat)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
5 library(plyr)
0
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
6 Sys.setenv("DISPLAY"=":1")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
7 library("ggplot2")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
8
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
9 options(warn=-1)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
10
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
11 theme_set(theme_bw())
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
12
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
13 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
14 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
15 option_specification = matrix(c(
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
16 'otu_table','o',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
17 'tax_table','t',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
18 'meta_table','s',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
19 'biom','i',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
20 'subset','x',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
21 'method','n',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
22 'distance','d',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
23 'kingdom','k',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
24 'cutoff','v',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
25 'category','c',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
26 'keep','p',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
27 'outdir','r',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
28 'htmlfile','h',2,'character'
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
29 ),byrow=TRUE,ncol=4);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
30
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
31
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
32 options <- getopt(option_specification);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
33 options(bitmapType="cairo")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
34
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
35 if (!is.null(options$outdir)) {
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
36 # Create the directory
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
37 dir.create(options$outdir,FALSE)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
38 }
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
39
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
40
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
41
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
42 method<-options$method
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
43 cutoff_value<-options$cutoff
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
44 ### select a kingdom for phyloseq plot (e.g., "phylum")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
45 #kingdom_str<-colnames(tax_table)[options$kingdom]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
46 kingdom_str<-options$kingdom
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
47 distance<-options$distance
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
48 keep<-options$keep
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
49
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
50
1
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
51
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
52 ### This function accepts different two different type of BIOM file format
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
53 readBIOM<-function(inBiom){
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
54 tryCatch({
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
55 phyloseq_obj<-import_biom(inBiom)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
56 return(phyloseq_obj)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
57 },
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
58 error=function(e){
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
59 biom_obj<-read_biom(inBiom)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
60
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
61 otu_matrix = as(biom_data(biom_obj), "matrix")
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
62 OTU_TABLE = otu_table(otu_matrix, taxa_are_rows=TRUE)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
63
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
64 taxonomy_matrix = as.matrix(observation_metadata(biom_obj), rownames.force=TRUE)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
65 TAXONOMY_TABLE = tax_table(taxonomy_matrix)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
66
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
67 metadata.temp<-sample_metadata(biom_obj)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
68 METADATA_TABLE<-plyr::ldply(metadata.temp, rbind)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
69 rownames(METADATA_TABLE)<-as.character(METADATA_TABLE$.id)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
70
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
71 phyloseq_obj = phyloseq(OTU_TABLE, TAXONOMY_TABLE,sample_data(METADATA_TABLE))
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
72 return(phyloseq_obj)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
73 }
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
74 )
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
75 }
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
76
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
77
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
78
0
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
79 if(!is.null(options$biom)){
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
80
1
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
81 #physeq<-import_biom(options$biom)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
82 physeq<-readBIOM(options$biom)
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
83
0
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
84 if(length(rank_names(physeq)) == 8){
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
85 tax_table(physeq) <- tax_table(physeq)[,-1]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
86 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
87 }
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
88 ### select column name from sample table for nmds plot
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
89 category_type<-colnames(sample_data(physeq))[options$subset]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
90
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
91 ### obtain the unique value in the selected column from sample table
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
92 category_option<-unique(sample_data(physeq))[,options$subset]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
93
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
94 }else{
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
95
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
96 ### read the data into correct data type to create phyloseq object
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
97 otu_table<-as.matrix(read.table(options$otu_table,header=T,sep="\t"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
98 tax_table<-as.matrix(read.table(options$tax_table,header=T,sep="\t"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
99 sample_table<-read.table(options$meta_table,header=T,sep="\t",stringsAsFactors=F)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
100
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
101
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
102 ### select column name from sample table for nmds plot
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
103 category_type<-colnames(sample_table)[options$category]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
104
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
105 ### obtain the unique value in the selected column from sample table
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
106 category_option<-unique(sample_table[,options$category])
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
107
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
108
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
109 ### create a sample object for phyloseq
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
110 sample_object<-sample_data(sample_table)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
111
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
112 ### create otu object for phyloseq
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
113 OTU<-otu_table(otu_table, taxa_are_rows = TRUE)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
114
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
115 ### create tax object for phyloseq
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
116 TAX<-tax_table(tax_table)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
117
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
118 ### create a phyloseq object
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
119 physeq = phyloseq(OTU,TAX,sample_object)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
120 }
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
121 ### select a kingdom for phyloseq plot (e.g., "phylum")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
122 #kingdom_str<-colnames(tax_table)[options$kingdom]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
123 #kingdom_str<-options$kingdom
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
124
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
125
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
126 ### Remove OTUs that do not appear more than 5 times in more than half the samples
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
127 ### filtering OTUs based on cutoff value (e.g., 5)
1
e376a618eb9f Updated phyloseq_nmds.R to allow for non standard BIOM files.
simon-gladman
parents: 0
diff changeset
128 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.1*nsamples(physeq))
0
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
129
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
130 ### phyloseq object after filtered
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
131 physeq_filter = prune_taxa(physeq_temp, physeq)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
132
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
133 ## Transform to even sampling depth
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
134 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
135
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
136 ## Keep only the most abundant five phyla
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
137 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
138
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
139 ### number of most abundance phyla to keep
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
140 topphyla = names(sort(phylum.sum, TRUE))[1:keep]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
141 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
142
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
143 ### select category to plot NMDS
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
144 category_input = get_variable(physeq_filter, category_type) %in% category_option
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
145 sample_data(physeq_filter)$category_input <- factor(category_input)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
146
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
147 ### prepare the directory and file name
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
148 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
149 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
150 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
151 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
152
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
153
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
154 # Produce PDF file
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
155 pdf(pdffile);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
156 physeq_ord<-ordinate(physeq_filter,method,distance)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
157 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
158 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
159 garbage<-dev.off();
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
160
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
161
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
162 #png('nmds.png')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
163 bitmap(pngfile_nmds,"png16m")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
164 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
165 garbage<-dev.off()
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
166
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
167 #png('nmds_facet.png')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
168 bitmap(pngfile_nmds_facet,"png16m")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
169 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
170 garbage<-dev.off()
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
171
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
172 # Produce the HTML file
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
173 htmlfile_handle <- file(htmlfile)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
174 html_output = c('<html><body>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
175 '<table align="center>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
176 '<tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
177 '<td valign="middle" style="vertical-align:middle;">',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
178 '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
179 '</td>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
180 '</tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
181 '<tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
182 '<td valign="middle" style="vertical-align:middle;">',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
183 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
184 '</td>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
185 '</tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
186 '</table>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
187 '</html></body>');
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
188 writeLines(html_output, htmlfile_handle);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
189 close(htmlfile_handle);