annotate phyloseq_nmds.R @ 0:b4606394e7ec draft

planemo upload
author simon-gladman
date Thu, 14 Jun 2018 09:08:23 -0400
parents
children e376a618eb9f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
1 library('getopt')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
2 library('ape')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
3 suppressPackageStartupMessages(library('phyloseq'))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
4 Sys.setenv("DISPLAY"=":1")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
5 library("ggplot2")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
6
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
7 options(warn=-1)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
8
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
9 theme_set(theme_bw())
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
10
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
11 #http://saml.rilspace.com/creating-a-galaxy-tool-for-r-scripts-that-output-images-and-pdfs
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
12 #http://joey711.github.io/phyloseq-demo/phyloseq-demo.html
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
13 option_specification = matrix(c(
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
14 'otu_table','o',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
15 'tax_table','t',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
16 'meta_table','s',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
17 'biom','i',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
18 'subset','x',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
19 'method','n',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
20 'distance','d',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
21 'kingdom','k',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
22 'cutoff','v',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
23 'category','c',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
24 'keep','p',2,'numeric',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
25 'outdir','r',2,'character',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
26 'htmlfile','h',2,'character'
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
27 ),byrow=TRUE,ncol=4);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
28
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
29
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
30 options <- getopt(option_specification);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
31 options(bitmapType="cairo")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
32
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
33 if (!is.null(options$outdir)) {
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
34 # Create the directory
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
35 dir.create(options$outdir,FALSE)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
36 }
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
37
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
38
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
39
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
40 method<-options$method
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
41 cutoff_value<-options$cutoff
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
42 ### select a kingdom for phyloseq plot (e.g., "phylum")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
43 #kingdom_str<-colnames(tax_table)[options$kingdom]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
44 kingdom_str<-options$kingdom
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
45 distance<-options$distance
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
46 keep<-options$keep
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
47
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
48
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
49 if(!is.null(options$biom)){
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
50
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
51 physeq<-import_biom(options$biom)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
52
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
53 if(length(rank_names(physeq)) == 8){
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
54 tax_table(physeq) <- tax_table(physeq)[,-1]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
55 colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
56 }
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
57
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
58 ### select column name from sample table for nmds plot
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
59 category_type<-colnames(sample_data(physeq))[options$subset]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
60
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
61 ### obtain the unique value in the selected column from sample table
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
62 category_option<-unique(sample_data(physeq))[,options$subset]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
63
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
64 }else{
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
65
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
66 ### read the data into correct data type to create phyloseq object
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
67 otu_table<-as.matrix(read.table(options$otu_table,header=T,sep="\t"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
68 tax_table<-as.matrix(read.table(options$tax_table,header=T,sep="\t"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
69 sample_table<-read.table(options$meta_table,header=T,sep="\t",stringsAsFactors=F)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
70
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
71
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
72 ### select column name from sample table for nmds plot
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
73 category_type<-colnames(sample_table)[options$category]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
74
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
75 ### obtain the unique value in the selected column from sample table
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
76 category_option<-unique(sample_table[,options$category])
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
77
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
78
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
79 ### create a sample object for phyloseq
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
80 sample_object<-sample_data(sample_table)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
81
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
82 ### create otu object for phyloseq
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
83 OTU<-otu_table(otu_table, taxa_are_rows = TRUE)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
84
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
85 ### create tax object for phyloseq
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
86 TAX<-tax_table(tax_table)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
87
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
88 ### create a phyloseq object
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
89 physeq = phyloseq(OTU,TAX,sample_object)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
90 }
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
91 ### select a kingdom for phyloseq plot (e.g., "phylum")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
92 #kingdom_str<-colnames(tax_table)[options$kingdom]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
93 #kingdom_str<-options$kingdom
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
94
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
95
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
96 ### Remove OTUs that do not appear more than 5 times in more than half the samples
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
97 ### filtering OTUs based on cutoff value (e.g., 5)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
98 physeq_temp =genefilter_sample(physeq, filterfun_sample(function(x) x > cutoff_value), A=0.5*nsamples(physeq))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
99
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
100 ### phyloseq object after filtered
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
101 physeq_filter = prune_taxa(physeq_temp, physeq)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
102
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
103 ## Transform to even sampling depth
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
104 physeq_filter = transform_sample_counts(physeq_filter, function(x) 1E6 * x/sum(x))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
105
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
106 ## Keep only the most abundant five phyla
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
107 phylum.sum = tapply(taxa_sums(physeq_filter), tax_table(physeq_filter)[, kingdom_str], sum, na.rm=TRUE)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
108
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
109 ### number of most abundance phyla to keep
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
110 topphyla = names(sort(phylum.sum, TRUE))[1:keep]
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
111 physeq_filter = prune_taxa((tax_table(physeq_filter)[, kingdom_str] %in% topphyla), physeq_filter)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
112
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
113 ### select category to plot NMDS
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
114 category_input = get_variable(physeq_filter, category_type) %in% category_option
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
115 sample_data(physeq_filter)$category_input <- factor(category_input)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
116
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
117 ### prepare the directory and file name
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
118 pdffile <- gsub("[ ]+", "", paste(options$outdir,"/pdffile.pdf"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
119 pngfile_nmds <- gsub("[ ]+", "", paste(options$outdir,"/nmds.png"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
120 pngfile_nmds_facet <- gsub("[ ]+", "", paste(options$outdir,"/nmds_facet.png"))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
121 htmlfile <- gsub("[ ]+", "", paste(options$htmlfile))
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
122
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
123
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
124 # Produce PDF file
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
125 pdf(pdffile);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
126 physeq_ord<-ordinate(physeq_filter,method,distance)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
127 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
128 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
129 garbage<-dev.off();
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
130
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
131
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
132 #png('nmds.png')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
133 bitmap(pngfile_nmds,"png16m")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
134 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
135 garbage<-dev.off()
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
136
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
137 #png('nmds_facet.png')
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
138 bitmap(pngfile_nmds_facet,"png16m")
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
139 plot_ordination(physeq,physeq_ord,type="taxa",color="Phylum",title="taxa") + facet_wrap(formula(paste('~',kingdom_str)),3)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
140 garbage<-dev.off()
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
141
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
142 # Produce the HTML file
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
143 htmlfile_handle <- file(htmlfile)
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
144 html_output = c('<html><body>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
145 '<table align="center>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
146 '<tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
147 '<td valign="middle" style="vertical-align:middle;">',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
148 '<a href="pdffile.pdf"><img src="nmds.png"/></a>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
149 '</td>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
150 '</tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
151 '<tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
152 '<td valign="middle" style="vertical-align:middle;">',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
153 '<a href="pdffile.pdf"><img src="nmds_facet.png"/></a>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
154 '</td>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
155 '</tr>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
156 '</table>',
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
157 '</html></body>');
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
158 writeLines(html_output, htmlfile_handle);
b4606394e7ec planemo upload
simon-gladman
parents:
diff changeset
159 close(htmlfile_handle);