annotate rgedgeRpaired_nocamera.xml @ 141:9809bcb27cc1 draft

Uploaded
author fubar
date Wed, 07 Jan 2015 05:45:33 -0500
parents c50bdc1b7fa7
children e7894f37320a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
132
0de946608423 Uploaded
fubar
parents:
diff changeset
1 <tool id="rgdifferentialcount" name="Differential_Count" version="0.28">
0de946608423 Uploaded
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
0de946608423 Uploaded
fubar
parents:
diff changeset
3 <requirements>
0de946608423 Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="3.1.2">R</requirement>
0de946608423 Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
0de946608423 Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="9.10">ghostscript</requirement>
0de946608423 Uploaded
fubar
parents:
diff changeset
7 <requirement type="package" version="2.14">biocbasics</requirement>
0de946608423 Uploaded
fubar
parents:
diff changeset
8 </requirements>
0de946608423 Uploaded
fubar
parents:
diff changeset
9
0de946608423 Uploaded
fubar
parents:
diff changeset
10 <command interpreter="python">
0de946608423 Uploaded
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts"
0de946608423 Uploaded
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
0de946608423 Uploaded
fubar
parents:
diff changeset
13 </command>
0de946608423 Uploaded
fubar
parents:
diff changeset
14 <inputs>
0de946608423 Uploaded
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
0de946608423 Uploaded
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
0de946608423 Uploaded
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
0de946608423 Uploaded
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
0de946608423 Uploaded
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
0de946608423 Uploaded
fubar
parents:
diff changeset
21 </sanitizer>
0de946608423 Uploaded
fubar
parents:
diff changeset
22 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
0de946608423 Uploaded
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes" force_select="True">
0de946608423 Uploaded
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
0de946608423 Uploaded
fubar
parents:
diff changeset
27 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
28 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
29 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
0de946608423 Uploaded
fubar
parents:
diff changeset
30 multiple="true" use_header_names="true" size="120" display="checkboxes" force_select="True">
0de946608423 Uploaded
fubar
parents:
diff changeset
31 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
32 <param name="subjectids" type="text" optional="true" size="120" value = ""
0de946608423 Uploaded
fubar
parents:
diff changeset
33 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
0de946608423 Uploaded
fubar
parents:
diff changeset
34 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
0de946608423 Uploaded
fubar
parents:
diff changeset
35 <sanitizer>
0de946608423 Uploaded
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="," /> </valid>
0de946608423 Uploaded
fubar
parents:
diff changeset
37 </sanitizer>
0de946608423 Uploaded
fubar
parents:
diff changeset
38 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
39 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
0de946608423 Uploaded
fubar
parents:
diff changeset
40 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
41 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
0de946608423 Uploaded
fubar
parents:
diff changeset
42 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
0de946608423 Uploaded
fubar
parents:
diff changeset
43 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
44
0de946608423 Uploaded
fubar
parents:
diff changeset
45 <conditional name="edgeR">
0de946608423 Uploaded
fubar
parents:
diff changeset
46 <param name="doedgeR" type="select"
0de946608423 Uploaded
fubar
parents:
diff changeset
47 label="Run this model using edgeR"
0de946608423 Uploaded
fubar
parents:
diff changeset
48 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
0de946608423 Uploaded
fubar
parents:
diff changeset
49 <option value="F">Do not run edgeR</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
50 <option value="T" selected="true">Run edgeR</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
51 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
52 <when value="T">
0de946608423 Uploaded
fubar
parents:
diff changeset
53 <param name="edgeR_priordf" type="integer" value="10" size="3"
0de946608423 Uploaded
fubar
parents:
diff changeset
54 label="prior.df for tagwise dispersion - larger value = more squeezing of tag dispersions to common dispersion. Replaces prior.n and prior.df = prior.n * residual.df"
0de946608423 Uploaded
fubar
parents:
diff changeset
55 help="10 = edgeR default. Use a larger value to 'smooth' small samples. See edgeR docs and note below"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
56 <param name="edgeR_robust_method" type="select" value="20" size="3"
0de946608423 Uploaded
fubar
parents:
diff changeset
57 label="Use robust dispersion method"
0de946608423 Uploaded
fubar
parents:
diff changeset
58 help="Use ordinary, anscombe or deviance robust deviance estimates">
0de946608423 Uploaded
fubar
parents:
diff changeset
59 <option value="ordinary" selected="true">Use ordinary deviance estimates</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
60 <option value="deviance">Use robust deviance estimates</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
61 <option value="anscombe">use Anscombe robust deviance estimates</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
62 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
63 </when>
0de946608423 Uploaded
fubar
parents:
diff changeset
64 <when value="F"></when>
0de946608423 Uploaded
fubar
parents:
diff changeset
65 </conditional>
0de946608423 Uploaded
fubar
parents:
diff changeset
66 <conditional name="DESeq2">
0de946608423 Uploaded
fubar
parents:
diff changeset
67 <param name="doDESeq2" type="select"
0de946608423 Uploaded
fubar
parents:
diff changeset
68 label="Run the same model with DESeq2 and compare findings"
0de946608423 Uploaded
fubar
parents:
diff changeset
69 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
0de946608423 Uploaded
fubar
parents:
diff changeset
70 <option value="F" selected="true">Do not run DESeq2</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
71 <option value="T">Run DESeq2</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
72 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
73 <when value="T">
0de946608423 Uploaded
fubar
parents:
diff changeset
74 <param name="DESeq_fitType" type="select">
0de946608423 Uploaded
fubar
parents:
diff changeset
75 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
76 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
77 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
78 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
79 </when>
0de946608423 Uploaded
fubar
parents:
diff changeset
80 <when value="F"> </when>
0de946608423 Uploaded
fubar
parents:
diff changeset
81 </conditional>
0de946608423 Uploaded
fubar
parents:
diff changeset
82 <param name="doVoom" type="select"
0de946608423 Uploaded
fubar
parents:
diff changeset
83 label="Run the same model with Voom/limma and compare findings"
0de946608423 Uploaded
fubar
parents:
diff changeset
84 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
0de946608423 Uploaded
fubar
parents:
diff changeset
85 <option value="F" selected="true">Do not run VOOM</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
86 <option value="T">Run VOOM</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
87 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
88 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
0de946608423 Uploaded
fubar
parents:
diff changeset
89 help="Conventional default value of 0.05 recommended"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
90 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
0de946608423 Uploaded
fubar
parents:
diff changeset
91 help="Use fdr or bh typically to control for the number of tests in a reliable way">
0de946608423 Uploaded
fubar
parents:
diff changeset
92 <option value="fdr" selected="true">fdr</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
93 <option value="BH">Benjamini Hochberg</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
94 <option value="BY">Benjamini Yukateli</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
95 <option value="bonferroni">Bonferroni</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
96 <option value="hochberg">Hochberg</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
97 <option value="holm">Holm</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
98 <option value="hommel">Hommel</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
99 <option value="none">no control for multiple tests</option>
0de946608423 Uploaded
fubar
parents:
diff changeset
100 </param>
0de946608423 Uploaded
fubar
parents:
diff changeset
101 </inputs>
0de946608423 Uploaded
fubar
parents:
diff changeset
102 <outputs>
0de946608423 Uploaded
fubar
parents:
diff changeset
103 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
0de946608423 Uploaded
fubar
parents:
diff changeset
104 <filter>edgeR['doedgeR'] == "T"</filter>
0de946608423 Uploaded
fubar
parents:
diff changeset
105 </data>
0de946608423 Uploaded
fubar
parents:
diff changeset
106 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
0de946608423 Uploaded
fubar
parents:
diff changeset
107 <filter>DESeq2['doDESeq2'] == "T"</filter>
0de946608423 Uploaded
fubar
parents:
diff changeset
108 </data>
0de946608423 Uploaded
fubar
parents:
diff changeset
109 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
0de946608423 Uploaded
fubar
parents:
diff changeset
110 <filter>doVoom == "T"</filter>
0de946608423 Uploaded
fubar
parents:
diff changeset
111 </data>
0de946608423 Uploaded
fubar
parents:
diff changeset
112 <data format="html" name="html_file" label="${title}.html"/>
0de946608423 Uploaded
fubar
parents:
diff changeset
113 </outputs>
0de946608423 Uploaded
fubar
parents:
diff changeset
114 <stdio>
0de946608423 Uploaded
fubar
parents:
diff changeset
115 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
0de946608423 Uploaded
fubar
parents:
diff changeset
116 </stdio>
0de946608423 Uploaded
fubar
parents:
diff changeset
117 <tests>
0de946608423 Uploaded
fubar
parents:
diff changeset
118 <test>
0de946608423 Uploaded
fubar
parents:
diff changeset
119 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
0de946608423 Uploaded
fubar
parents:
diff changeset
120 <param name='treatment_name' value='liver' />
0de946608423 Uploaded
fubar
parents:
diff changeset
121 <param name='title' value='edgeRtest' />
0de946608423 Uploaded
fubar
parents:
diff changeset
122 <param name='useNDF' value='' />
0de946608423 Uploaded
fubar
parents:
diff changeset
123 <param name='doedgeR' value='T' />
0de946608423 Uploaded
fubar
parents:
diff changeset
124 <param name='doVoom' value='T' />
0de946608423 Uploaded
fubar
parents:
diff changeset
125 <param name='doDESeq2' value='T' />
0de946608423 Uploaded
fubar
parents:
diff changeset
126 <param name='fdrtype' value='fdr' />
0de946608423 Uploaded
fubar
parents:
diff changeset
127 <param name='edgeR_priordf' value="8" />
0de946608423 Uploaded
fubar
parents:
diff changeset
128 <param name='edgeR_robust' value="ordinary" />
0de946608423 Uploaded
fubar
parents:
diff changeset
129 <param name='fdrthresh' value="0.05" />
0de946608423 Uploaded
fubar
parents:
diff changeset
130 <param name='control_name' value='heart' />
0de946608423 Uploaded
fubar
parents:
diff changeset
131 <param name='subjectids' value='' />
0de946608423 Uploaded
fubar
parents:
diff changeset
132 <param name='Control_cols' value='3,4,5,9' />
0de946608423 Uploaded
fubar
parents:
diff changeset
133 <param name='Treat_cols' value='2,6,7,8' />
0de946608423 Uploaded
fubar
parents:
diff changeset
134 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
0de946608423 Uploaded
fubar
parents:
diff changeset
135 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
0de946608423 Uploaded
fubar
parents:
diff changeset
136 </test>
0de946608423 Uploaded
fubar
parents:
diff changeset
137 </tests>
0de946608423 Uploaded
fubar
parents:
diff changeset
138
0de946608423 Uploaded
fubar
parents:
diff changeset
139 <configfiles>
0de946608423 Uploaded
fubar
parents:
diff changeset
140 <configfile name="runme">
0de946608423 Uploaded
fubar
parents:
diff changeset
141 <![CDATA[
0de946608423 Uploaded
fubar
parents:
diff changeset
142 #
0de946608423 Uploaded
fubar
parents:
diff changeset
143 # edgeR.Rscript
0de946608423 Uploaded
fubar
parents:
diff changeset
144 # updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13
0de946608423 Uploaded
fubar
parents:
diff changeset
145 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
0de946608423 Uploaded
fubar
parents:
diff changeset
146 # Performs DGE on a count table containing n replicates of two conditions
0de946608423 Uploaded
fubar
parents:
diff changeset
147 #
0de946608423 Uploaded
fubar
parents:
diff changeset
148 # Parameters
0de946608423 Uploaded
fubar
parents:
diff changeset
149 #
0de946608423 Uploaded
fubar
parents:
diff changeset
150 # 1 - Output Dir
0de946608423 Uploaded
fubar
parents:
diff changeset
151
0de946608423 Uploaded
fubar
parents:
diff changeset
152 # Original edgeR code by: S.Lunke and A.Kaspi
0de946608423 Uploaded
fubar
parents:
diff changeset
153 reallybig = log10(.Machine\$double.xmax)
0de946608423 Uploaded
fubar
parents:
diff changeset
154 reallysmall = log10(.Machine\$double.xmin)
0de946608423 Uploaded
fubar
parents:
diff changeset
155 library("gplots")
0de946608423 Uploaded
fubar
parents:
diff changeset
156 library("edgeR")
140
c50bdc1b7fa7 Uploaded
fubar
parents: 138
diff changeset
157 library('stringr')
132
0de946608423 Uploaded
fubar
parents:
diff changeset
158 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
0de946608423 Uploaded
fubar
parents:
diff changeset
159 {
0de946608423 Uploaded
fubar
parents:
diff changeset
160 # Perform clustering for significant pvalues after controlling FWER
0de946608423 Uploaded
fubar
parents:
diff changeset
161 samples = colnames(cmat)
0de946608423 Uploaded
fubar
parents:
diff changeset
162 gu = unique(group)
0de946608423 Uploaded
fubar
parents:
diff changeset
163 gn = rownames(cmat)
0de946608423 Uploaded
fubar
parents:
diff changeset
164 if (length(gu) == 2) {
0de946608423 Uploaded
fubar
parents:
diff changeset
165 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
0de946608423 Uploaded
fubar
parents:
diff changeset
166 pcols = unlist(lapply(group,col.map))
0de946608423 Uploaded
fubar
parents:
diff changeset
167 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
168 colours = rainbow(length(gu),start=0,end=4/6)
0de946608423 Uploaded
fubar
parents:
diff changeset
169 pcols = colours[match(group,gu)] }
0de946608423 Uploaded
fubar
parents:
diff changeset
170 dm = cmat[(! is.na(gn)),]
0de946608423 Uploaded
fubar
parents:
diff changeset
171 # remove unlabelled hm rows
0de946608423 Uploaded
fubar
parents:
diff changeset
172 nprobes = nrow(dm)
0de946608423 Uploaded
fubar
parents:
diff changeset
173 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
0de946608423 Uploaded
fubar
parents:
diff changeset
174 if (nprobes > nsamp) {
0de946608423 Uploaded
fubar
parents:
diff changeset
175 dm =dm[1:nsamp,]
0de946608423 Uploaded
fubar
parents:
diff changeset
176 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
0de946608423 Uploaded
fubar
parents:
diff changeset
177 }
0de946608423 Uploaded
fubar
parents:
diff changeset
178 newcolnames = substr(colnames(dm),1,20)
0de946608423 Uploaded
fubar
parents:
diff changeset
179 colnames(dm) = newcolnames
0de946608423 Uploaded
fubar
parents:
diff changeset
180 pdf(outpdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
181 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
0de946608423 Uploaded
fubar
parents:
diff changeset
182 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
0de946608423 Uploaded
fubar
parents:
diff changeset
183 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
184 }
0de946608423 Uploaded
fubar
parents:
diff changeset
185
0de946608423 Uploaded
fubar
parents:
diff changeset
186 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
0de946608423 Uploaded
fubar
parents:
diff changeset
187 {
0de946608423 Uploaded
fubar
parents:
diff changeset
188 # for 2 groups only was
0de946608423 Uploaded
fubar
parents:
diff changeset
189 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
0de946608423 Uploaded
fubar
parents:
diff changeset
190 #pcols = unlist(lapply(group,col.map))
0de946608423 Uploaded
fubar
parents:
diff changeset
191 gu = unique(group)
0de946608423 Uploaded
fubar
parents:
diff changeset
192 colours = rainbow(length(gu),start=0.3,end=0.6)
0de946608423 Uploaded
fubar
parents:
diff changeset
193 pcols = colours[match(group,gu)]
0de946608423 Uploaded
fubar
parents:
diff changeset
194 nrows = nrow(cmat)
0de946608423 Uploaded
fubar
parents:
diff changeset
195 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
0de946608423 Uploaded
fubar
parents:
diff changeset
196 if (nrows > nsamp) {
0de946608423 Uploaded
fubar
parents:
diff changeset
197 cmat = cmat[c(1:nsamp),]
0de946608423 Uploaded
fubar
parents:
diff changeset
198 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
0de946608423 Uploaded
fubar
parents:
diff changeset
199 }
0de946608423 Uploaded
fubar
parents:
diff changeset
200 newcolnames = substr(colnames(cmat),1,20)
0de946608423 Uploaded
fubar
parents:
diff changeset
201 colnames(cmat) = newcolnames
0de946608423 Uploaded
fubar
parents:
diff changeset
202 pdf(outpdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
203 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
0de946608423 Uploaded
fubar
parents:
diff changeset
204 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
205 }
0de946608423 Uploaded
fubar
parents:
diff changeset
206
0de946608423 Uploaded
fubar
parents:
diff changeset
207 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
0de946608423 Uploaded
fubar
parents:
diff changeset
208 # stolen from https://gist.github.com/703512
0de946608423 Uploaded
fubar
parents:
diff changeset
209 {
0de946608423 Uploaded
fubar
parents:
diff changeset
210 o = -log10(sort(pvector,decreasing=F))
0de946608423 Uploaded
fubar
parents:
diff changeset
211 e = -log10( 1:length(o)/length(o) )
0de946608423 Uploaded
fubar
parents:
diff changeset
212 o[o==-Inf] = reallysmall
0de946608423 Uploaded
fubar
parents:
diff changeset
213 o[o==Inf] = reallybig
0de946608423 Uploaded
fubar
parents:
diff changeset
214 maint = descr
0de946608423 Uploaded
fubar
parents:
diff changeset
215 pdf(outpdf)
0de946608423 Uploaded
fubar
parents:
diff changeset
216 plot(e,o,pch=19,cex=1, main=maint, ...,
0de946608423 Uploaded
fubar
parents:
diff changeset
217 xlab=expression(Expected~~-log[10](italic(p))),
0de946608423 Uploaded
fubar
parents:
diff changeset
218 ylab=expression(Observed~~-log[10](italic(p))),
0de946608423 Uploaded
fubar
parents:
diff changeset
219 xlim=c(0,max(e)), ylim=c(0,max(o)))
0de946608423 Uploaded
fubar
parents:
diff changeset
220 lines(e,e,col="red")
0de946608423 Uploaded
fubar
parents:
diff changeset
221 grid(col = "lightgray", lty = "dotted")
0de946608423 Uploaded
fubar
parents:
diff changeset
222 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
223 }
0de946608423 Uploaded
fubar
parents:
diff changeset
224
0de946608423 Uploaded
fubar
parents:
diff changeset
225 smearPlot = function(myDGEList,deTags, outSmear, outMain)
0de946608423 Uploaded
fubar
parents:
diff changeset
226 {
0de946608423 Uploaded
fubar
parents:
diff changeset
227 pdf(outSmear)
0de946608423 Uploaded
fubar
parents:
diff changeset
228 plotSmear(myDGEList,de.tags=deTags,main=outMain)
0de946608423 Uploaded
fubar
parents:
diff changeset
229 grid(col="lightgray", lty="dotted")
0de946608423 Uploaded
fubar
parents:
diff changeset
230 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
231 }
0de946608423 Uploaded
fubar
parents:
diff changeset
232
0de946608423 Uploaded
fubar
parents:
diff changeset
233 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
234 {
0de946608423 Uploaded
fubar
parents:
diff changeset
235 nc = ncol(rawrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
236 ##### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
0de946608423 Uploaded
fubar
parents:
diff changeset
237 fullnames = colnames(rawrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
238 newcolnames = substr(colnames(rawrs),1,20)
0de946608423 Uploaded
fubar
parents:
diff changeset
239 colnames(rawrs) = newcolnames
0de946608423 Uploaded
fubar
parents:
diff changeset
240 newcolnames = substr(colnames(cleanrs),1,20)
0de946608423 Uploaded
fubar
parents:
diff changeset
241 colnames(cleanrs) = newcolnames
0de946608423 Uploaded
fubar
parents:
diff changeset
242 defpar = par(no.readonly=T)
0de946608423 Uploaded
fubar
parents:
diff changeset
243 print.noquote('@@@ Raw contig counts by sample:')
0de946608423 Uploaded
fubar
parents:
diff changeset
244 print.noquote(summary(rawrs))
0de946608423 Uploaded
fubar
parents:
diff changeset
245 print.noquote('@@@ Library size contig counts by sample:')
0de946608423 Uploaded
fubar
parents:
diff changeset
246 print.noquote(summary(cleanrs))
0de946608423 Uploaded
fubar
parents:
diff changeset
247 pdf(pdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
248 par(mfrow=c(1,2))
0de946608423 Uploaded
fubar
parents:
diff changeset
249 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main='log2 raw counts')
0de946608423 Uploaded
fubar
parents:
diff changeset
250 grid(col="lightgray",lty="dotted")
0de946608423 Uploaded
fubar
parents:
diff changeset
251 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('log2 counts after ',maint))
0de946608423 Uploaded
fubar
parents:
diff changeset
252 grid(col="lightgray",lty="dotted")
0de946608423 Uploaded
fubar
parents:
diff changeset
253 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
254 pdfname = "sample_counts_histogram.pdf"
0de946608423 Uploaded
fubar
parents:
diff changeset
255 nc = ncol(rawrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
256 print.noquote(paste('Using ncol rawrs=',nc))
0de946608423 Uploaded
fubar
parents:
diff changeset
257 ncroot = round(sqrt(nc))
0de946608423 Uploaded
fubar
parents:
diff changeset
258 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
0de946608423 Uploaded
fubar
parents:
diff changeset
259 m = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
260 for (i in c(1:nc)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
261 rhist = hist(rawrs[,i],breaks=100,plot=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
262 m = append(m,max(rhist\$counts))
0de946608423 Uploaded
fubar
parents:
diff changeset
263 }
0de946608423 Uploaded
fubar
parents:
diff changeset
264 ymax = max(m)
0de946608423 Uploaded
fubar
parents:
diff changeset
265 ncols = length(fullnames)
0de946608423 Uploaded
fubar
parents:
diff changeset
266 if (ncols > 20)
0de946608423 Uploaded
fubar
parents:
diff changeset
267 {
0de946608423 Uploaded
fubar
parents:
diff changeset
268 scale = 7*ncols/20
0de946608423 Uploaded
fubar
parents:
diff changeset
269 pdf(pdfname,width=scale,height=scale)
0de946608423 Uploaded
fubar
parents:
diff changeset
270 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
271 pdf(pdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
272 }
0de946608423 Uploaded
fubar
parents:
diff changeset
273 par(mfrow=c(ncroot,ncroot))
0de946608423 Uploaded
fubar
parents:
diff changeset
274 for (i in c(1:nc)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
275 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
0de946608423 Uploaded
fubar
parents:
diff changeset
276 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
0de946608423 Uploaded
fubar
parents:
diff changeset
277 }
0de946608423 Uploaded
fubar
parents:
diff changeset
278 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
279 par(defpar)
0de946608423 Uploaded
fubar
parents:
diff changeset
280
0de946608423 Uploaded
fubar
parents:
diff changeset
281 }
0de946608423 Uploaded
fubar
parents:
diff changeset
282
0de946608423 Uploaded
fubar
parents:
diff changeset
283 cumPlot = function(rawrs,cleanrs,maint,myTitle)
0de946608423 Uploaded
fubar
parents:
diff changeset
284 { # updated to use ecdf
0de946608423 Uploaded
fubar
parents:
diff changeset
285 pdfname = "Differential_rowsum_bar_charts.pdf"
0de946608423 Uploaded
fubar
parents:
diff changeset
286 defpar = par(no.readonly=T)
0de946608423 Uploaded
fubar
parents:
diff changeset
287 lrs = log(rawrs,10)
0de946608423 Uploaded
fubar
parents:
diff changeset
288 lim = max(lrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
289 pdf(pdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
290 par(mfrow=c(2,1))
0de946608423 Uploaded
fubar
parents:
diff changeset
291 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
0de946608423 Uploaded
fubar
parents:
diff changeset
292 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
0de946608423 Uploaded
fubar
parents:
diff changeset
293 grid(col="lightgray", lty="dotted")
0de946608423 Uploaded
fubar
parents:
diff changeset
294 lrs = log(cleanrs,10)
0de946608423 Uploaded
fubar
parents:
diff changeset
295 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
0de946608423 Uploaded
fubar
parents:
diff changeset
296 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
0de946608423 Uploaded
fubar
parents:
diff changeset
297 grid(col="lightgray", lty="dotted")
0de946608423 Uploaded
fubar
parents:
diff changeset
298 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
299 par(defpar)
0de946608423 Uploaded
fubar
parents:
diff changeset
300 }
0de946608423 Uploaded
fubar
parents:
diff changeset
301
0de946608423 Uploaded
fubar
parents:
diff changeset
302 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
0de946608423 Uploaded
fubar
parents:
diff changeset
303 { # updated to use ecdf
0de946608423 Uploaded
fubar
parents:
diff changeset
304 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
0de946608423 Uploaded
fubar
parents:
diff changeset
305 pdf(pdfname)
0de946608423 Uploaded
fubar
parents:
diff changeset
306 par(mfrow=c(2,1))
0de946608423 Uploaded
fubar
parents:
diff changeset
307 lastx = max(rawrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
308 rawe = knots(ecdf(rawrs))
0de946608423 Uploaded
fubar
parents:
diff changeset
309 cleane = knots(ecdf(cleanrs))
0de946608423 Uploaded
fubar
parents:
diff changeset
310 cy = 1:length(cleane)/length(cleane)
0de946608423 Uploaded
fubar
parents:
diff changeset
311 ry = 1:length(rawe)/length(rawe)
0de946608423 Uploaded
fubar
parents:
diff changeset
312 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
0de946608423 Uploaded
fubar
parents:
diff changeset
313 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
0de946608423 Uploaded
fubar
parents:
diff changeset
314 grid(col="blue")
0de946608423 Uploaded
fubar
parents:
diff changeset
315 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
0de946608423 Uploaded
fubar
parents:
diff changeset
316 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
0de946608423 Uploaded
fubar
parents:
diff changeset
317 grid(col="blue")
0de946608423 Uploaded
fubar
parents:
diff changeset
318 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
319 }
0de946608423 Uploaded
fubar
parents:
diff changeset
320
0de946608423 Uploaded
fubar
parents:
diff changeset
321
0de946608423 Uploaded
fubar
parents:
diff changeset
322
0de946608423 Uploaded
fubar
parents:
diff changeset
323 doGSEAold = function(y=NULL,design=NULL,histgmt="",
0de946608423 Uploaded
fubar
parents:
diff changeset
324 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
0de946608423 Uploaded
fubar
parents:
diff changeset
325 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
0de946608423 Uploaded
fubar
parents:
diff changeset
326 {
0de946608423 Uploaded
fubar
parents:
diff changeset
327 sink('Camera.log')
0de946608423 Uploaded
fubar
parents:
diff changeset
328 genesets = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
329 if (bigmt > "")
0de946608423 Uploaded
fubar
parents:
diff changeset
330 {
0de946608423 Uploaded
fubar
parents:
diff changeset
331 bigenesets = readLines(bigmt)
0de946608423 Uploaded
fubar
parents:
diff changeset
332 genesets = bigenesets
0de946608423 Uploaded
fubar
parents:
diff changeset
333 }
0de946608423 Uploaded
fubar
parents:
diff changeset
334 if (histgmt > "")
0de946608423 Uploaded
fubar
parents:
diff changeset
335 {
0de946608423 Uploaded
fubar
parents:
diff changeset
336 hgenesets = readLines(histgmt)
0de946608423 Uploaded
fubar
parents:
diff changeset
337 if (bigmt > "") {
0de946608423 Uploaded
fubar
parents:
diff changeset
338 genesets = rbind(genesets,hgenesets)
0de946608423 Uploaded
fubar
parents:
diff changeset
339 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
340 genesets = hgenesets
0de946608423 Uploaded
fubar
parents:
diff changeset
341 } # use only history if no bi
0de946608423 Uploaded
fubar
parents:
diff changeset
342 }
0de946608423 Uploaded
fubar
parents:
diff changeset
343 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
0de946608423 Uploaded
fubar
parents:
diff changeset
344 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
0de946608423 Uploaded
fubar
parents:
diff changeset
345 outf = outfname
0de946608423 Uploaded
fubar
parents:
diff changeset
346 head=paste(myTitle,'edgeR GSEA')
0de946608423 Uploaded
fubar
parents:
diff changeset
347 write(head,file=outfname,append=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
348 ntest=length(genesets)
0de946608423 Uploaded
fubar
parents:
diff changeset
349 urownames = toupper(rownames(y))
0de946608423 Uploaded
fubar
parents:
diff changeset
350 upcam = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
351 downcam = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
352 for (i in 1:ntest) {
0de946608423 Uploaded
fubar
parents:
diff changeset
353 gs = unlist(genesets[i])
0de946608423 Uploaded
fubar
parents:
diff changeset
354 g = gs[1] # geneset_id
0de946608423 Uploaded
fubar
parents:
diff changeset
355 u = gs[2]
0de946608423 Uploaded
fubar
parents:
diff changeset
356 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
0de946608423 Uploaded
fubar
parents:
diff changeset
357 glist = gs[3:length(gs)] # member gene symbols
0de946608423 Uploaded
fubar
parents:
diff changeset
358 glist = toupper(glist)
0de946608423 Uploaded
fubar
parents:
diff changeset
359 inglist = urownames %in% glist
0de946608423 Uploaded
fubar
parents:
diff changeset
360 nin = sum(inglist)
0de946608423 Uploaded
fubar
parents:
diff changeset
361 if ((nin > minnin) && (nin < maxnin)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
362 ### print(paste('@@found',sum(inglist),'genes in glist'))
0de946608423 Uploaded
fubar
parents:
diff changeset
363 camres = camera(y=y,index=inglist,design=design)
0de946608423 Uploaded
fubar
parents:
diff changeset
364 if (! is.null(camres)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
365 rownames(camres) = g # gene set name
0de946608423 Uploaded
fubar
parents:
diff changeset
366 camres = cbind(GeneSet=g,URL=u,camres)
0de946608423 Uploaded
fubar
parents:
diff changeset
367 if (camres\$Direction == "Up")
0de946608423 Uploaded
fubar
parents:
diff changeset
368 {
0de946608423 Uploaded
fubar
parents:
diff changeset
369 upcam = rbind(upcam,camres) } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
370 downcam = rbind(downcam,camres)
0de946608423 Uploaded
fubar
parents:
diff changeset
371 }
0de946608423 Uploaded
fubar
parents:
diff changeset
372 }
0de946608423 Uploaded
fubar
parents:
diff changeset
373 }
0de946608423 Uploaded
fubar
parents:
diff changeset
374 }
0de946608423 Uploaded
fubar
parents:
diff changeset
375 uscam = upcam[order(upcam\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
376 unadjp = uscam\$PValue
0de946608423 Uploaded
fubar
parents:
diff changeset
377 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
0de946608423 Uploaded
fubar
parents:
diff changeset
378 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
0de946608423 Uploaded
fubar
parents:
diff changeset
379 dscam = downcam[order(downcam\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
380 unadjp = dscam\$PValue
0de946608423 Uploaded
fubar
parents:
diff changeset
381 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
0de946608423 Uploaded
fubar
parents:
diff changeset
382 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
0de946608423 Uploaded
fubar
parents:
diff changeset
383 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
384 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
385 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
0de946608423 Uploaded
fubar
parents:
diff changeset
386 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
387 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
0de946608423 Uploaded
fubar
parents:
diff changeset
388 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
389 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
390 }
0de946608423 Uploaded
fubar
parents:
diff changeset
391
0de946608423 Uploaded
fubar
parents:
diff changeset
392
0de946608423 Uploaded
fubar
parents:
diff changeset
393
0de946608423 Uploaded
fubar
parents:
diff changeset
394
0de946608423 Uploaded
fubar
parents:
diff changeset
395 doGSEA = function(y=NULL,design=NULL,histgmt="",
0de946608423 Uploaded
fubar
parents:
diff changeset
396 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
0de946608423 Uploaded
fubar
parents:
diff changeset
397 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
0de946608423 Uploaded
fubar
parents:
diff changeset
398 {
0de946608423 Uploaded
fubar
parents:
diff changeset
399 sink('Camera.log')
0de946608423 Uploaded
fubar
parents:
diff changeset
400 genesets = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
401 if (bigmt > "")
0de946608423 Uploaded
fubar
parents:
diff changeset
402 {
0de946608423 Uploaded
fubar
parents:
diff changeset
403 bigenesets = readLines(bigmt)
0de946608423 Uploaded
fubar
parents:
diff changeset
404 genesets = bigenesets
0de946608423 Uploaded
fubar
parents:
diff changeset
405 }
0de946608423 Uploaded
fubar
parents:
diff changeset
406 if (histgmt > "")
0de946608423 Uploaded
fubar
parents:
diff changeset
407 {
0de946608423 Uploaded
fubar
parents:
diff changeset
408 hgenesets = readLines(histgmt)
0de946608423 Uploaded
fubar
parents:
diff changeset
409 if (bigmt > "") {
0de946608423 Uploaded
fubar
parents:
diff changeset
410 genesets = rbind(genesets,hgenesets)
0de946608423 Uploaded
fubar
parents:
diff changeset
411 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
412 genesets = hgenesets
0de946608423 Uploaded
fubar
parents:
diff changeset
413 } # use only history if no bi
0de946608423 Uploaded
fubar
parents:
diff changeset
414 }
0de946608423 Uploaded
fubar
parents:
diff changeset
415 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
0de946608423 Uploaded
fubar
parents:
diff changeset
416 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
0de946608423 Uploaded
fubar
parents:
diff changeset
417 outf = outfname
0de946608423 Uploaded
fubar
parents:
diff changeset
418 head=paste(myTitle,'edgeR GSEA')
0de946608423 Uploaded
fubar
parents:
diff changeset
419 write(head,file=outfname,append=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
420 ntest=length(genesets)
0de946608423 Uploaded
fubar
parents:
diff changeset
421 urownames = toupper(rownames(y))
0de946608423 Uploaded
fubar
parents:
diff changeset
422 upcam = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
423 downcam = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
424 incam = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
425 urls = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
426 gsids = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
427 for (i in 1:ntest) {
0de946608423 Uploaded
fubar
parents:
diff changeset
428 gs = unlist(genesets[i])
0de946608423 Uploaded
fubar
parents:
diff changeset
429 gsid = gs[1] # geneset_id
0de946608423 Uploaded
fubar
parents:
diff changeset
430 url = gs[2]
0de946608423 Uploaded
fubar
parents:
diff changeset
431 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
0de946608423 Uploaded
fubar
parents:
diff changeset
432 glist = gs[3:length(gs)] # member gene symbols
0de946608423 Uploaded
fubar
parents:
diff changeset
433 glist = toupper(glist)
0de946608423 Uploaded
fubar
parents:
diff changeset
434 inglist = urownames %in% glist
0de946608423 Uploaded
fubar
parents:
diff changeset
435 nin = sum(inglist)
0de946608423 Uploaded
fubar
parents:
diff changeset
436 if ((nin > minnin) && (nin < maxnin)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
437 incam = c(incam,inglist)
0de946608423 Uploaded
fubar
parents:
diff changeset
438 gsids = c(gsids,gsid)
0de946608423 Uploaded
fubar
parents:
diff changeset
439 urls = c(urls,url)
0de946608423 Uploaded
fubar
parents:
diff changeset
440 }
0de946608423 Uploaded
fubar
parents:
diff changeset
441 }
0de946608423 Uploaded
fubar
parents:
diff changeset
442 incam = as.list(incam)
0de946608423 Uploaded
fubar
parents:
diff changeset
443 names(incam) = gsids
0de946608423 Uploaded
fubar
parents:
diff changeset
444 allcam = camera(y=y,index=incam,design=design)
0de946608423 Uploaded
fubar
parents:
diff changeset
445 allcamres = cbind(geneset=gsids,allcam,URL=urls)
0de946608423 Uploaded
fubar
parents:
diff changeset
446 for (i in 1:ntest) {
0de946608423 Uploaded
fubar
parents:
diff changeset
447 camres = allcamres[i]
0de946608423 Uploaded
fubar
parents:
diff changeset
448 res = try(test = (camres\$Direction == "Up"))
0de946608423 Uploaded
fubar
parents:
diff changeset
449 if ("try-error" %in% class(res)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
450 cat("test failed, camres = :")
0de946608423 Uploaded
fubar
parents:
diff changeset
451 print.noquote(camres)
0de946608423 Uploaded
fubar
parents:
diff changeset
452 } else { if (camres\$Direction == "Up")
0de946608423 Uploaded
fubar
parents:
diff changeset
453 { upcam = rbind(upcam,camres)
0de946608423 Uploaded
fubar
parents:
diff changeset
454 } else { downcam = rbind(downcam,camres)
0de946608423 Uploaded
fubar
parents:
diff changeset
455 }
0de946608423 Uploaded
fubar
parents:
diff changeset
456
0de946608423 Uploaded
fubar
parents:
diff changeset
457 }
0de946608423 Uploaded
fubar
parents:
diff changeset
458 }
0de946608423 Uploaded
fubar
parents:
diff changeset
459 uscam = upcam[order(upcam\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
460 unadjp = uscam\$PValue
0de946608423 Uploaded
fubar
parents:
diff changeset
461 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
0de946608423 Uploaded
fubar
parents:
diff changeset
462 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
0de946608423 Uploaded
fubar
parents:
diff changeset
463 dscam = downcam[order(downcam\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
464 unadjp = dscam\$PValue
0de946608423 Uploaded
fubar
parents:
diff changeset
465 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
0de946608423 Uploaded
fubar
parents:
diff changeset
466 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
0de946608423 Uploaded
fubar
parents:
diff changeset
467 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
468 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
469 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
0de946608423 Uploaded
fubar
parents:
diff changeset
470 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
471 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
0de946608423 Uploaded
fubar
parents:
diff changeset
472 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
473 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
474 }
0de946608423 Uploaded
fubar
parents:
diff changeset
475
0de946608423 Uploaded
fubar
parents:
diff changeset
476
0de946608423 Uploaded
fubar
parents:
diff changeset
477 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
0de946608423 Uploaded
fubar
parents:
diff changeset
478 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
0de946608423 Uploaded
fubar
parents:
diff changeset
479 filterquantile=0.2, subjects=c(),TreatmentName="Rx",ControlName="Ctrl",mydesign=NULL,
0de946608423 Uploaded
fubar
parents:
diff changeset
480 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
0de946608423 Uploaded
fubar
parents:
diff changeset
481 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
0de946608423 Uploaded
fubar
parents:
diff changeset
482 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary')
0de946608423 Uploaded
fubar
parents:
diff changeset
483 {
0de946608423 Uploaded
fubar
parents:
diff changeset
484
0de946608423 Uploaded
fubar
parents:
diff changeset
485 logf = file('Differential.log', open = "a")
0de946608423 Uploaded
fubar
parents:
diff changeset
486 sink(logf,type = c("output", "message"))
0de946608423 Uploaded
fubar
parents:
diff changeset
487
0de946608423 Uploaded
fubar
parents:
diff changeset
488
0de946608423 Uploaded
fubar
parents:
diff changeset
489 run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod)
0de946608423 Uploaded
fubar
parents:
diff changeset
490 {
0de946608423 Uploaded
fubar
parents:
diff changeset
491 logf = file('edgeR.log', open = "a")
0de946608423 Uploaded
fubar
parents:
diff changeset
492 sink(logf,type = c("output", "message"))
0de946608423 Uploaded
fubar
parents:
diff changeset
493 #### Setup myDGEList object
0de946608423 Uploaded
fubar
parents:
diff changeset
494 myDGEList = DGEList(counts=workCM, group = group)
0de946608423 Uploaded
fubar
parents:
diff changeset
495 myDGEList = calcNormFactors(myDGEList)
0de946608423 Uploaded
fubar
parents:
diff changeset
496 if (robust_meth == 'ordinary') {
0de946608423 Uploaded
fubar
parents:
diff changeset
497 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
498 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
499 if (priordf > 0) { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf)
0de946608423 Uploaded
fubar
parents:
diff changeset
500 } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) }
0de946608423 Uploaded
fubar
parents:
diff changeset
501 comdisp = myDGEList\$common.dispersion
0de946608423 Uploaded
fubar
parents:
diff changeset
502 estpriorn = getPriorN(myDGEList)
0de946608423 Uploaded
fubar
parents:
diff changeset
503 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
504 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
505 myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth)
0de946608423 Uploaded
fubar
parents:
diff changeset
506 }
0de946608423 Uploaded
fubar
parents:
diff changeset
507
0de946608423 Uploaded
fubar
parents:
diff changeset
508
0de946608423 Uploaded
fubar
parents:
diff changeset
509 DGLM = glmFit(myDGEList,design=mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
510 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
0de946608423 Uploaded
fubar
parents:
diff changeset
511 normData = cpm(myDGEList)
0de946608423 Uploaded
fubar
parents:
diff changeset
512 uoutput = cbind(
0de946608423 Uploaded
fubar
parents:
diff changeset
513 Name=as.character(rownames(myDGEList\$counts)),
0de946608423 Uploaded
fubar
parents:
diff changeset
514 DE\$table,
0de946608423 Uploaded
fubar
parents:
diff changeset
515 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
0de946608423 Uploaded
fubar
parents:
diff changeset
516 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
0de946608423 Uploaded
fubar
parents:
diff changeset
517 myDGEList\$counts
0de946608423 Uploaded
fubar
parents:
diff changeset
518 )
0de946608423 Uploaded
fubar
parents:
diff changeset
519 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
0de946608423 Uploaded
fubar
parents:
diff changeset
520 goodness = gof(DGLM, pcutoff=fdrthresh)
0de946608423 Uploaded
fubar
parents:
diff changeset
521 if (sum(goodness\$outlier) > 0) {
0de946608423 Uploaded
fubar
parents:
diff changeset
522 print.noquote('GLM outliers:')
0de946608423 Uploaded
fubar
parents:
diff changeset
523 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
524 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
525 print('No GLM fit outlier genes found\n')
0de946608423 Uploaded
fubar
parents:
diff changeset
526 }
0de946608423 Uploaded
fubar
parents:
diff changeset
527 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
0de946608423 Uploaded
fubar
parents:
diff changeset
528 pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_'))
0de946608423 Uploaded
fubar
parents:
diff changeset
529 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
0de946608423 Uploaded
fubar
parents:
diff changeset
530 abline(0,1,lwd=3)
0de946608423 Uploaded
fubar
parents:
diff changeset
531 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
0de946608423 Uploaded
fubar
parents:
diff changeset
532 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
533 uniqueg = unique(group)
0de946608423 Uploaded
fubar
parents:
diff changeset
534 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
535 tt = cbind(
0de946608423 Uploaded
fubar
parents:
diff changeset
536 Name=as.character(rownames(myDGEList)),
0de946608423 Uploaded
fubar
parents:
diff changeset
537 DE\$table,
0de946608423 Uploaded
fubar
parents:
diff changeset
538 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
0de946608423 Uploaded
fubar
parents:
diff changeset
539 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums
0de946608423 Uploaded
fubar
parents:
diff changeset
540 )
0de946608423 Uploaded
fubar
parents:
diff changeset
541 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
0de946608423 Uploaded
fubar
parents:
diff changeset
542 stt = tt[order(DE\$table\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
543 print.noquote("@@ edgeR Top tags\n")
0de946608423 Uploaded
fubar
parents:
diff changeset
544 print.noquote(stt[1:50,])
0de946608423 Uploaded
fubar
parents:
diff changeset
545 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
0de946608423 Uploaded
fubar
parents:
diff changeset
546 nsig = length(deTags)
0de946608423 Uploaded
fubar
parents:
diff changeset
547 print.noquote(paste('@@',nsig,'tags significant at adj p=',fdrthresh))
0de946608423 Uploaded
fubar
parents:
diff changeset
548 deColours = ifelse(deTags,'red','black')
0de946608423 Uploaded
fubar
parents:
diff changeset
549 pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_"))
0de946608423 Uploaded
fubar
parents:
diff changeset
550 plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance")
0de946608423 Uploaded
fubar
parents:
diff changeset
551 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
552 dg = myDGEList[order(DE\$table\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
553 outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_")
0de946608423 Uploaded
fubar
parents:
diff changeset
554 ocpm = normData[order(DE\$table\$PValue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
555 ocpm = ocpm[c(1:100),]
0de946608423 Uploaded
fubar
parents:
diff changeset
556 hmap2(ocpm,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
0de946608423 Uploaded
fubar
parents:
diff changeset
557 outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_")
0de946608423 Uploaded
fubar
parents:
diff changeset
558 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
0de946608423 Uploaded
fubar
parents:
diff changeset
559 smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
0de946608423 Uploaded
fubar
parents:
diff changeset
560 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_'))
0de946608423 Uploaded
fubar
parents:
diff changeset
561 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
0de946608423 Uploaded
fubar
parents:
diff changeset
562 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
0de946608423 Uploaded
fubar
parents:
diff changeset
563 edgeRcounts = rep(0, length(allgenes))
0de946608423 Uploaded
fubar
parents:
diff changeset
564 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
0de946608423 Uploaded
fubar
parents:
diff changeset
565 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
566 return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts))
0de946608423 Uploaded
fubar
parents:
diff changeset
567 } ### run_edgeR
0de946608423 Uploaded
fubar
parents:
diff changeset
568
0de946608423 Uploaded
fubar
parents:
diff changeset
569
0de946608423 Uploaded
fubar
parents:
diff changeset
570 run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType)
0de946608423 Uploaded
fubar
parents:
diff changeset
571
0de946608423 Uploaded
fubar
parents:
diff changeset
572 {
0de946608423 Uploaded
fubar
parents:
diff changeset
573 logf = file("DESeq2.log", open = "a")
0de946608423 Uploaded
fubar
parents:
diff changeset
574 sink(logf,type = c("output", "message"))
0de946608423 Uploaded
fubar
parents:
diff changeset
575 # DESeq2
0de946608423 Uploaded
fubar
parents:
diff changeset
576 require('DESeq2')
0de946608423 Uploaded
fubar
parents:
diff changeset
577 library('RColorBrewer')
0de946608423 Uploaded
fubar
parents:
diff changeset
578 if (length(subjects) == 0)
0de946608423 Uploaded
fubar
parents:
diff changeset
579 {
0de946608423 Uploaded
fubar
parents:
diff changeset
580 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
0de946608423 Uploaded
fubar
parents:
diff changeset
581 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
0de946608423 Uploaded
fubar
parents:
diff changeset
582 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
583 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
0de946608423 Uploaded
fubar
parents:
diff changeset
584 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
0de946608423 Uploaded
fubar
parents:
diff changeset
585 }
0de946608423 Uploaded
fubar
parents:
diff changeset
586 deSeqDatsizefac = estimateSizeFactors(deSEQds)
0de946608423 Uploaded
fubar
parents:
diff changeset
587 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
0de946608423 Uploaded
fubar
parents:
diff changeset
588 resDESeq = nbinomWaldTest(deSeqDatdisp)
0de946608423 Uploaded
fubar
parents:
diff changeset
589 rDESeq = as.data.frame(results(resDESeq))
0de946608423 Uploaded
fubar
parents:
diff changeset
590 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
0de946608423 Uploaded
fubar
parents:
diff changeset
591 srDESeq = rDESeq[order(rDESeq\$pvalue),]
0de946608423 Uploaded
fubar
parents:
diff changeset
592 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_"))
0de946608423 Uploaded
fubar
parents:
diff changeset
593 cat("# DESeq top 50\n")
0de946608423 Uploaded
fubar
parents:
diff changeset
594 print.noquote(srDESeq[1:50,])
0de946608423 Uploaded
fubar
parents:
diff changeset
595 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
596 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
0de946608423 Uploaded
fubar
parents:
diff changeset
597 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
0de946608423 Uploaded
fubar
parents:
diff changeset
598 DESeqcounts = rep(0, length(allgenes))
0de946608423 Uploaded
fubar
parents:
diff changeset
599 DESeqcounts[DESeqcountsindex] = 1
0de946608423 Uploaded
fubar
parents:
diff changeset
600 pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_'))
0de946608423 Uploaded
fubar
parents:
diff changeset
601 plotDispEsts(resDESeq)
0de946608423 Uploaded
fubar
parents:
diff changeset
602 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
603 ysmall = abs(min(rDESeq\$log2FoldChange))
0de946608423 Uploaded
fubar
parents:
diff changeset
604 ybig = abs(max(rDESeq\$log2FoldChange))
0de946608423 Uploaded
fubar
parents:
diff changeset
605 ylimit = min(4,ysmall,ybig)
0de946608423 Uploaded
fubar
parents:
diff changeset
606 pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_"))
0de946608423 Uploaded
fubar
parents:
diff changeset
607 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
0de946608423 Uploaded
fubar
parents:
diff changeset
608 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
609 rlogres = rlogTransformation(resDESeq)
0de946608423 Uploaded
fubar
parents:
diff changeset
610 sampledists = dist( t( assay(rlogres) ) )
0de946608423 Uploaded
fubar
parents:
diff changeset
611 sdmat = as.matrix(sampledists)
0de946608423 Uploaded
fubar
parents:
diff changeset
612 pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_"))
0de946608423 Uploaded
fubar
parents:
diff changeset
613 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
0de946608423 Uploaded
fubar
parents:
diff changeset
614 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
0de946608423 Uploaded
fubar
parents:
diff changeset
615 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
616 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
0de946608423 Uploaded
fubar
parents:
diff changeset
617 if ("try-error" %in% class(result)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
618 print.noquote('DESeq2 plotPCA failed.')
0de946608423 Uploaded
fubar
parents:
diff changeset
619 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
620 pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_"))
0de946608423 Uploaded
fubar
parents:
diff changeset
621 #### wtf - print? Seems needed to get this to work
0de946608423 Uploaded
fubar
parents:
diff changeset
622 print(ppca)
0de946608423 Uploaded
fubar
parents:
diff changeset
623 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
624 }
0de946608423 Uploaded
fubar
parents:
diff changeset
625 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
626 return(DESeqcounts)
0de946608423 Uploaded
fubar
parents:
diff changeset
627 }
0de946608423 Uploaded
fubar
parents:
diff changeset
628
0de946608423 Uploaded
fubar
parents:
diff changeset
629
0de946608423 Uploaded
fubar
parents:
diff changeset
630 run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom)
0de946608423 Uploaded
fubar
parents:
diff changeset
631 {
0de946608423 Uploaded
fubar
parents:
diff changeset
632 logf = file('VOOM.log', open = "a")
0de946608423 Uploaded
fubar
parents:
diff changeset
633 sink(logf,type = c("output", "message"))
0de946608423 Uploaded
fubar
parents:
diff changeset
634 if (doedgeR == F) {
0de946608423 Uploaded
fubar
parents:
diff changeset
635 #### Setup myDGEList object
0de946608423 Uploaded
fubar
parents:
diff changeset
636 myDGEList = DGEList(counts=workCM, group = group)
0de946608423 Uploaded
fubar
parents:
diff changeset
637 myDGEList = calcNormFactors(myDGEList)
0de946608423 Uploaded
fubar
parents:
diff changeset
638 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
639 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
640 myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
641 }
0de946608423 Uploaded
fubar
parents:
diff changeset
642 pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
0de946608423 Uploaded
fubar
parents:
diff changeset
643 dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
0de946608423 Uploaded
fubar
parents:
diff changeset
644 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
645 # Use limma to fit data
0de946608423 Uploaded
fubar
parents:
diff changeset
646 fit = lmFit(dat.voomed, mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
647 fit = eBayes(fit)
0de946608423 Uploaded
fubar
parents:
diff changeset
648 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
0de946608423 Uploaded
fubar
parents:
diff changeset
649 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
0de946608423 Uploaded
fubar
parents:
diff changeset
650 rownames(rvoom) = rownames(workCM)
0de946608423 Uploaded
fubar
parents:
diff changeset
651 rvoom = cbind(Contig=rownames(workCM),rvoom,NReads=cmrowsums,URL=contigurls)
0de946608423 Uploaded
fubar
parents:
diff changeset
652 srvoom = rvoom[order(rvoom\$P.Value),]
0de946608423 Uploaded
fubar
parents:
diff changeset
653 cat("# VOOM top 50\n")
0de946608423 Uploaded
fubar
parents:
diff changeset
654 print(srvoom[1:50,])
0de946608423 Uploaded
fubar
parents:
diff changeset
655 write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
656 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
0de946608423 Uploaded
fubar
parents:
diff changeset
657 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
0de946608423 Uploaded
fubar
parents:
diff changeset
658 voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
0de946608423 Uploaded
fubar
parents:
diff changeset
659 voomcounts = rep(0, length(allgenes))
0de946608423 Uploaded
fubar
parents:
diff changeset
660 voomcounts[voomcountsindex] = 1
0de946608423 Uploaded
fubar
parents:
diff changeset
661 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
662 return(voomcounts)
0de946608423 Uploaded
fubar
parents:
diff changeset
663 }
0de946608423 Uploaded
fubar
parents:
diff changeset
664
0de946608423 Uploaded
fubar
parents:
diff changeset
665
0de946608423 Uploaded
fubar
parents:
diff changeset
666 #### data cleaning and analsis control starts here
0de946608423 Uploaded
fubar
parents:
diff changeset
667
0de946608423 Uploaded
fubar
parents:
diff changeset
668
0de946608423 Uploaded
fubar
parents:
diff changeset
669 # Error handling
0de946608423 Uploaded
fubar
parents:
diff changeset
670 nugroup = length(unique(group))
0de946608423 Uploaded
fubar
parents:
diff changeset
671 if (nugroup!=2){
0de946608423 Uploaded
fubar
parents:
diff changeset
672 print("Number of conditions identified in experiment does not equal 2")
0de946608423 Uploaded
fubar
parents:
diff changeset
673 q()
0de946608423 Uploaded
fubar
parents:
diff changeset
674 }
0de946608423 Uploaded
fubar
parents:
diff changeset
675 require(edgeR)
0de946608423 Uploaded
fubar
parents:
diff changeset
676 options(width = 512)
0de946608423 Uploaded
fubar
parents:
diff changeset
677 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
0de946608423 Uploaded
fubar
parents:
diff changeset
678 allN = nrow(Count_Matrix)
0de946608423 Uploaded
fubar
parents:
diff changeset
679 nscut = round(ncol(Count_Matrix)/2) # half samples
0de946608423 Uploaded
fubar
parents:
diff changeset
680 colTotmillionreads = colSums(Count_Matrix)/1e6
0de946608423 Uploaded
fubar
parents:
diff changeset
681 counts.dataframe = as.data.frame(c())
0de946608423 Uploaded
fubar
parents:
diff changeset
682 rawrs = rowSums(Count_Matrix)
0de946608423 Uploaded
fubar
parents:
diff changeset
683 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
0de946608423 Uploaded
fubar
parents:
diff changeset
684 nzN = nrow(nonzerod)
0de946608423 Uploaded
fubar
parents:
diff changeset
685 nzrs = rowSums(nonzerod)
0de946608423 Uploaded
fubar
parents:
diff changeset
686 zN = allN - nzN
0de946608423 Uploaded
fubar
parents:
diff changeset
687 print('@@@ Quantiles for non-zero row counts:',quote=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
688 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
689 if (useNDF == T)
0de946608423 Uploaded
fubar
parents:
diff changeset
690 {
0de946608423 Uploaded
fubar
parents:
diff changeset
691 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
0de946608423 Uploaded
fubar
parents:
diff changeset
692 lo = colSums(Count_Matrix[!gt1rpin3,])
0de946608423 Uploaded
fubar
parents:
diff changeset
693 workCM = Count_Matrix[gt1rpin3,]
0de946608423 Uploaded
fubar
parents:
diff changeset
694 cleanrs = rowSums(workCM)
0de946608423 Uploaded
fubar
parents:
diff changeset
695 cleanN = length(cleanrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
696 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
0de946608423 Uploaded
fubar
parents:
diff changeset
697 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
698 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
0de946608423 Uploaded
fubar
parents:
diff changeset
699 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
700 useme = (nzrs > quantile(nzrs,filterquantile))
0de946608423 Uploaded
fubar
parents:
diff changeset
701 workCM = nonzerod[useme,]
0de946608423 Uploaded
fubar
parents:
diff changeset
702 lo = colSums(nonzerod[!useme,])
0de946608423 Uploaded
fubar
parents:
diff changeset
703 cleanrs = rowSums(workCM)
0de946608423 Uploaded
fubar
parents:
diff changeset
704 cleanN = length(cleanrs)
0de946608423 Uploaded
fubar
parents:
diff changeset
705 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
0de946608423 Uploaded
fubar
parents:
diff changeset
706 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
0de946608423 Uploaded
fubar
parents:
diff changeset
707 maint = paste('Filter below',filterquantile,'quantile')
0de946608423 Uploaded
fubar
parents:
diff changeset
708 }
0de946608423 Uploaded
fubar
parents:
diff changeset
709 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
0de946608423 Uploaded
fubar
parents:
diff changeset
710 allgenes = rownames(workCM)
0de946608423 Uploaded
fubar
parents:
diff changeset
711 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp
0de946608423 Uploaded
fubar
parents:
diff changeset
712 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
0de946608423 Uploaded
fubar
parents:
diff changeset
713 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
0de946608423 Uploaded
fubar
parents:
diff changeset
714 testreg = str_match(allgenes,reg)
0de946608423 Uploaded
fubar
parents:
diff changeset
715 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
0de946608423 Uploaded
fubar
parents:
diff changeset
716 {
0de946608423 Uploaded
fubar
parents:
diff changeset
717 print("@@ using ucsc substitution for urls")
0de946608423 Uploaded
fubar
parents:
diff changeset
718 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
0de946608423 Uploaded
fubar
parents:
diff changeset
719 } else {
0de946608423 Uploaded
fubar
parents:
diff changeset
720 print("@@ using genecards substitution for urls")
0de946608423 Uploaded
fubar
parents:
diff changeset
721 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
0de946608423 Uploaded
fubar
parents:
diff changeset
722 }
0de946608423 Uploaded
fubar
parents:
diff changeset
723 print.noquote(paste("@@ Total low count contigs per sample = ",paste(table(lo),collapse=',')))
0de946608423 Uploaded
fubar
parents:
diff changeset
724 cmrowsums = rowSums(workCM)
0de946608423 Uploaded
fubar
parents:
diff changeset
725 TName=unique(group)[1]
0de946608423 Uploaded
fubar
parents:
diff changeset
726 CName=unique(group)[2]
0de946608423 Uploaded
fubar
parents:
diff changeset
727 if (is.null(mydesign)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
728 if (length(subjects) == 0)
0de946608423 Uploaded
fubar
parents:
diff changeset
729 {
0de946608423 Uploaded
fubar
parents:
diff changeset
730 mydesign = model.matrix(~group)
0de946608423 Uploaded
fubar
parents:
diff changeset
731 }
0de946608423 Uploaded
fubar
parents:
diff changeset
732 else {
0de946608423 Uploaded
fubar
parents:
diff changeset
733 subjf = factor(subjects)
0de946608423 Uploaded
fubar
parents:
diff changeset
734 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
0de946608423 Uploaded
fubar
parents:
diff changeset
735 }
0de946608423 Uploaded
fubar
parents:
diff changeset
736 }
0de946608423 Uploaded
fubar
parents:
diff changeset
737 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
0de946608423 Uploaded
fubar
parents:
diff changeset
738 print.noquote('Using design matrix:')
0de946608423 Uploaded
fubar
parents:
diff changeset
739 print.noquote(mydesign)
0de946608423 Uploaded
fubar
parents:
diff changeset
740 normData = cpm(workCM)*1e6
0de946608423 Uploaded
fubar
parents:
diff changeset
741 colnames(normData) = paste( colnames(workCM),'N',sep="_")
0de946608423 Uploaded
fubar
parents:
diff changeset
742 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
0de946608423 Uploaded
fubar
parents:
diff changeset
743
0de946608423 Uploaded
fubar
parents:
diff changeset
744 if (doedgeR == T) {
0de946608423 Uploaded
fubar
parents:
diff changeset
745 eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod)
0de946608423 Uploaded
fubar
parents:
diff changeset
746 myDGEList = eres\$myDGEList
0de946608423 Uploaded
fubar
parents:
diff changeset
747 edgeRcounts = eres\$edgeRcounts
0de946608423 Uploaded
fubar
parents:
diff changeset
748 #### Plot MDS
0de946608423 Uploaded
fubar
parents:
diff changeset
749 sample_colors = match(group,levels(group))
0de946608423 Uploaded
fubar
parents:
diff changeset
750 sampleTypes = levels(factor(group))
0de946608423 Uploaded
fubar
parents:
diff changeset
751 print.noquote(sampleTypes)
0de946608423 Uploaded
fubar
parents:
diff changeset
752 pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
0de946608423 Uploaded
fubar
parents:
diff changeset
753 plotMDS.DGEList(myDGEList,main=paste("MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
0de946608423 Uploaded
fubar
parents:
diff changeset
754 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
0de946608423 Uploaded
fubar
parents:
diff changeset
755 grid(col="blue")
0de946608423 Uploaded
fubar
parents:
diff changeset
756 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
757 scale <- myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
0de946608423 Uploaded
fubar
parents:
diff changeset
758 normCounts <- round(t(t(myDGEList\$counts)/scale)*mean(scale))
0de946608423 Uploaded
fubar
parents:
diff changeset
759 try({boxPlot(rawrs=nzd,cleanrs=log2(normCounts+1),maint='Effects of TMM size normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_'))},T)
0de946608423 Uploaded
fubar
parents:
diff changeset
760 }
0de946608423 Uploaded
fubar
parents:
diff changeset
761 if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) }
0de946608423 Uploaded
fubar
parents:
diff changeset
762 if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) }
0de946608423 Uploaded
fubar
parents:
diff changeset
763
0de946608423 Uploaded
fubar
parents:
diff changeset
764
0de946608423 Uploaded
fubar
parents:
diff changeset
765 if (doCamera) {
0de946608423 Uploaded
fubar
parents:
diff changeset
766 doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
0de946608423 Uploaded
fubar
parents:
diff changeset
767 outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
0de946608423 Uploaded
fubar
parents:
diff changeset
768 }
0de946608423 Uploaded
fubar
parents:
diff changeset
769 counts.dataframe = c()
0de946608423 Uploaded
fubar
parents:
diff changeset
770 vennmain = 'no venn'
0de946608423 Uploaded
fubar
parents:
diff changeset
771 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
772 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
773 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
0de946608423 Uploaded
fubar
parents:
diff changeset
774 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
0de946608423 Uploaded
fubar
parents:
diff changeset
775 VOOM_limma = voomcounts, row.names = allgenes)
0de946608423 Uploaded
fubar
parents:
diff changeset
776 } else if ((doDESeq2==T) && (doedgeR==T)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
777 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
0de946608423 Uploaded
fubar
parents:
diff changeset
778 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
0de946608423 Uploaded
fubar
parents:
diff changeset
779 } else if ((doVoom==T) && (doedgeR==T)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
780 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
0de946608423 Uploaded
fubar
parents:
diff changeset
781 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
0de946608423 Uploaded
fubar
parents:
diff changeset
782 }
0de946608423 Uploaded
fubar
parents:
diff changeset
783
0de946608423 Uploaded
fubar
parents:
diff changeset
784 if (nrow(counts.dataframe > 1)) {
0de946608423 Uploaded
fubar
parents:
diff changeset
785 counts.venn = vennCounts(counts.dataframe)
0de946608423 Uploaded
fubar
parents:
diff changeset
786 vennf = paste("Differential_venn",mt,"significant_genes_overlap.pdf",sep="_")
0de946608423 Uploaded
fubar
parents:
diff changeset
787 pdf(vennf)
0de946608423 Uploaded
fubar
parents:
diff changeset
788 vennDiagram(counts.venn,main=vennmain,col="maroon")
0de946608423 Uploaded
fubar
parents:
diff changeset
789 dev.off()
0de946608423 Uploaded
fubar
parents:
diff changeset
790 }
0de946608423 Uploaded
fubar
parents:
diff changeset
791 } #### doDESeq2 or doVoom
0de946608423 Uploaded
fubar
parents:
diff changeset
792 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
793 }
0de946608423 Uploaded
fubar
parents:
diff changeset
794 #### Done
0de946608423 Uploaded
fubar
parents:
diff changeset
795 ]]>
0de946608423 Uploaded
fubar
parents:
diff changeset
796 builtin_gmt = ""
0de946608423 Uploaded
fubar
parents:
diff changeset
797 history_gmt = ""
0de946608423 Uploaded
fubar
parents:
diff changeset
798 history_gmt_name = ""
0de946608423 Uploaded
fubar
parents:
diff changeset
799 out_edgeR = F
0de946608423 Uploaded
fubar
parents:
diff changeset
800 out_DESeq2 = F
0de946608423 Uploaded
fubar
parents:
diff changeset
801 out_Voom = "$out_VOOM"
0de946608423 Uploaded
fubar
parents:
diff changeset
802 edgeR_robust_meth = "ordinary"
0de946608423 Uploaded
fubar
parents:
diff changeset
803 doDESeq2 = $DESeq2.doDESeq2
0de946608423 Uploaded
fubar
parents:
diff changeset
804 doVoom = $doVoom
0de946608423 Uploaded
fubar
parents:
diff changeset
805 doCamera = F
0de946608423 Uploaded
fubar
parents:
diff changeset
806 doedgeR = $edgeR.doedgeR
0de946608423 Uploaded
fubar
parents:
diff changeset
807 edgeR_priordf = 10
0de946608423 Uploaded
fubar
parents:
diff changeset
808
0de946608423 Uploaded
fubar
parents:
diff changeset
809
0de946608423 Uploaded
fubar
parents:
diff changeset
810 #if $doVoom == "T":
0de946608423 Uploaded
fubar
parents:
diff changeset
811 out_Voom = "$out_VOOM"
0de946608423 Uploaded
fubar
parents:
diff changeset
812 #end if
0de946608423 Uploaded
fubar
parents:
diff changeset
813
0de946608423 Uploaded
fubar
parents:
diff changeset
814 #if $DESeq2.doDESeq2 == "T":
0de946608423 Uploaded
fubar
parents:
diff changeset
815 out_DESeq2 = "$out_DESeq2"
0de946608423 Uploaded
fubar
parents:
diff changeset
816 doDESeq2 = T
0de946608423 Uploaded
fubar
parents:
diff changeset
817 DESeq_fitType = "$DESeq2.DESeq_fitType"
0de946608423 Uploaded
fubar
parents:
diff changeset
818 #end if
0de946608423 Uploaded
fubar
parents:
diff changeset
819
0de946608423 Uploaded
fubar
parents:
diff changeset
820 #if $edgeR.doedgeR == "T":
0de946608423 Uploaded
fubar
parents:
diff changeset
821 out_edgeR = "$out_edgeR"
0de946608423 Uploaded
fubar
parents:
diff changeset
822 edgeR_priordf = $edgeR.edgeR_priordf
0de946608423 Uploaded
fubar
parents:
diff changeset
823 edgeR_robust_meth = "$edgeR.edgeR_robust_method"
0de946608423 Uploaded
fubar
parents:
diff changeset
824 #end if
0de946608423 Uploaded
fubar
parents:
diff changeset
825
0de946608423 Uploaded
fubar
parents:
diff changeset
826
0de946608423 Uploaded
fubar
parents:
diff changeset
827 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
0de946608423 Uploaded
fubar
parents:
diff changeset
828 {
0de946608423 Uploaded
fubar
parents:
diff changeset
829 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
0de946608423 Uploaded
fubar
parents:
diff changeset
830 quit(save="no",status=2)
0de946608423 Uploaded
fubar
parents:
diff changeset
831 }
0de946608423 Uploaded
fubar
parents:
diff changeset
832
0de946608423 Uploaded
fubar
parents:
diff changeset
833 Out_Dir = "$html_file.files_path"
0de946608423 Uploaded
fubar
parents:
diff changeset
834 Input = "$input1"
0de946608423 Uploaded
fubar
parents:
diff changeset
835 TreatmentName = "$treatment_name"
0de946608423 Uploaded
fubar
parents:
diff changeset
836 TreatmentCols = "$Treat_cols"
0de946608423 Uploaded
fubar
parents:
diff changeset
837 ControlName = "$control_name"
0de946608423 Uploaded
fubar
parents:
diff changeset
838 ControlCols= "$Control_cols"
0de946608423 Uploaded
fubar
parents:
diff changeset
839 org = "$input1.dbkey"
0de946608423 Uploaded
fubar
parents:
diff changeset
840 if (org == "") { org = "hg19"}
0de946608423 Uploaded
fubar
parents:
diff changeset
841 fdrtype = "$fdrtype"
0de946608423 Uploaded
fubar
parents:
diff changeset
842 fdrthresh = $fdrthresh
0de946608423 Uploaded
fubar
parents:
diff changeset
843 useNDF = $useNDF
0de946608423 Uploaded
fubar
parents:
diff changeset
844 fQ = $fQ # non-differential centile cutoff
0de946608423 Uploaded
fubar
parents:
diff changeset
845 myTitle = "$title"
0de946608423 Uploaded
fubar
parents:
diff changeset
846 sids = strsplit("$subjectids",',')
0de946608423 Uploaded
fubar
parents:
diff changeset
847 subjects = unlist(sids)
0de946608423 Uploaded
fubar
parents:
diff changeset
848 nsubj = length(subjects)
0de946608423 Uploaded
fubar
parents:
diff changeset
849 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
0de946608423 Uploaded
fubar
parents:
diff changeset
850 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
0de946608423 Uploaded
fubar
parents:
diff changeset
851 cat('Got TCols=')
0de946608423 Uploaded
fubar
parents:
diff changeset
852 cat(TCols)
0de946608423 Uploaded
fubar
parents:
diff changeset
853 cat('; CCols=')
0de946608423 Uploaded
fubar
parents:
diff changeset
854 cat(CCols)
0de946608423 Uploaded
fubar
parents:
diff changeset
855 cat('\n')
0de946608423 Uploaded
fubar
parents:
diff changeset
856 <![CDATA[
0de946608423 Uploaded
fubar
parents:
diff changeset
857 useCols = c(TCols,CCols)
0de946608423 Uploaded
fubar
parents:
diff changeset
858 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
0de946608423 Uploaded
fubar
parents:
diff changeset
859 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t')
0de946608423 Uploaded
fubar
parents:
diff changeset
860 snames = colnames(Count_Matrix)
0de946608423 Uploaded
fubar
parents:
diff changeset
861 nsamples = length(snames)
0de946608423 Uploaded
fubar
parents:
diff changeset
862 if (nsubj > 0 & nsubj != nsamples) {
0de946608423 Uploaded
fubar
parents:
diff changeset
863 options("show.error.messages"=T)
0de946608423 Uploaded
fubar
parents:
diff changeset
864 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
0de946608423 Uploaded
fubar
parents:
diff changeset
865 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
0de946608423 Uploaded
fubar
parents:
diff changeset
866 write(mess, stderr())
0de946608423 Uploaded
fubar
parents:
diff changeset
867 quit(save="no",status=4)
0de946608423 Uploaded
fubar
parents:
diff changeset
868 }
0de946608423 Uploaded
fubar
parents:
diff changeset
869 if (length(subjects) != 0) {subjects = subjects[useCols]}
0de946608423 Uploaded
fubar
parents:
diff changeset
870 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
0de946608423 Uploaded
fubar
parents:
diff changeset
871 rn = rownames(Count_Matrix)
0de946608423 Uploaded
fubar
parents:
diff changeset
872 islib = rn %in% c('librarySize','NotInBedRegions')
0de946608423 Uploaded
fubar
parents:
diff changeset
873 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
0de946608423 Uploaded
fubar
parents:
diff changeset
874 Count_Matrix = Count_Matrix[subset(rn,! islib),]
0de946608423 Uploaded
fubar
parents:
diff changeset
875 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )
0de946608423 Uploaded
fubar
parents:
diff changeset
876 group = factor(group, levels=c(ControlName,TreatmentName))
0de946608423 Uploaded
fubar
parents:
diff changeset
877 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")
0de946608423 Uploaded
fubar
parents:
diff changeset
878 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2,
0de946608423 Uploaded
fubar
parents:
diff changeset
879 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
0de946608423 Uploaded
fubar
parents:
diff changeset
880 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,TreatmentName=TreatmentName,ControlName=ControlName,
0de946608423 Uploaded
fubar
parents:
diff changeset
881 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
0de946608423 Uploaded
fubar
parents:
diff changeset
882 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth)
0de946608423 Uploaded
fubar
parents:
diff changeset
883 sessionInfo()
0de946608423 Uploaded
fubar
parents:
diff changeset
884
0de946608423 Uploaded
fubar
parents:
diff changeset
885 sink()
0de946608423 Uploaded
fubar
parents:
diff changeset
886 ]]>
0de946608423 Uploaded
fubar
parents:
diff changeset
887 </configfile>
0de946608423 Uploaded
fubar
parents:
diff changeset
888 </configfiles>
0de946608423 Uploaded
fubar
parents:
diff changeset
889 <help>
0de946608423 Uploaded
fubar
parents:
diff changeset
890
0de946608423 Uploaded
fubar
parents:
diff changeset
891 **What it does**
0de946608423 Uploaded
fubar
parents:
diff changeset
892
0de946608423 Uploaded
fubar
parents:
diff changeset
893 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
0de946608423 Uploaded
fubar
parents:
diff changeset
894 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
0de946608423 Uploaded
fubar
parents:
diff changeset
895
0de946608423 Uploaded
fubar
parents:
diff changeset
896 **Input**
0de946608423 Uploaded
fubar
parents:
diff changeset
897
0de946608423 Uploaded
fubar
parents:
diff changeset
898 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
0de946608423 Uploaded
fubar
parents:
diff changeset
899 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
0de946608423 Uploaded
fubar
parents:
diff changeset
900 non-negative integer count of reads from one sample overlapping the feature.
0de946608423 Uploaded
fubar
parents:
diff changeset
901
0de946608423 Uploaded
fubar
parents:
diff changeset
902 The matrix must have a header row uniquely identifying the source samples, and unique row names in
0de946608423 Uploaded
fubar
parents:
diff changeset
903 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
0de946608423 Uploaded
fubar
parents:
diff changeset
904 They must be unique and R names or they will be mangled - please read the fine R docs for the rules on identifiers.
0de946608423 Uploaded
fubar
parents:
diff changeset
905
0de946608423 Uploaded
fubar
parents:
diff changeset
906 **Specifying comparisons**
0de946608423 Uploaded
fubar
parents:
diff changeset
907
0de946608423 Uploaded
fubar
parents:
diff changeset
908 This is basically dumbed down for two factors - case vs control.
0de946608423 Uploaded
fubar
parents:
diff changeset
909
0de946608423 Uploaded
fubar
parents:
diff changeset
910 More complex interfaces are possible but painful at present.
0de946608423 Uploaded
fubar
parents:
diff changeset
911 Probably need to specify a phenotype file to do this better.
0de946608423 Uploaded
fubar
parents:
diff changeset
912 Work in progress. Send code.
0de946608423 Uploaded
fubar
parents:
diff changeset
913
0de946608423 Uploaded
fubar
parents:
diff changeset
914 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
0de946608423 Uploaded
fubar
parents:
diff changeset
915 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
0de946608423 Uploaded
fubar
parents:
diff changeset
916 A list of integers, one for each subject or an empty string if samples are all independent.
0de946608423 Uploaded
fubar
parents:
diff changeset
917 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
0de946608423 Uploaded
fubar
parents:
diff changeset
918 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
0de946608423 Uploaded
fubar
parents:
diff changeset
919
0de946608423 Uploaded
fubar
parents:
diff changeset
920 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
0de946608423 Uploaded
fubar
parents:
diff changeset
921 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
0de946608423 Uploaded
fubar
parents:
diff changeset
922 8,9,1,1,2,2
0de946608423 Uploaded
fubar
parents:
diff changeset
923 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
0de946608423 Uploaded
fubar
parents:
diff changeset
924
0de946608423 Uploaded
fubar
parents:
diff changeset
925 **Methods available**
0de946608423 Uploaded
fubar
parents:
diff changeset
926
0de946608423 Uploaded
fubar
parents:
diff changeset
927 You can run 3 popular Bioconductor packages available for count data.
0de946608423 Uploaded
fubar
parents:
diff changeset
928
0de946608423 Uploaded
fubar
parents:
diff changeset
929 edgeR - see edgeR_ for details
0de946608423 Uploaded
fubar
parents:
diff changeset
930
0de946608423 Uploaded
fubar
parents:
diff changeset
931 VOOM/limma - see limma_VOOM_ for details
0de946608423 Uploaded
fubar
parents:
diff changeset
932
0de946608423 Uploaded
fubar
parents:
diff changeset
933 DESeq2 - see DESeq2_ for details
0de946608423 Uploaded
fubar
parents:
diff changeset
934
0de946608423 Uploaded
fubar
parents:
diff changeset
935 and optionally camera in edgeR which works better if MSigDB is installed.
0de946608423 Uploaded
fubar
parents:
diff changeset
936
0de946608423 Uploaded
fubar
parents:
diff changeset
937 **Outputs**
0de946608423 Uploaded
fubar
parents:
diff changeset
938
0de946608423 Uploaded
fubar
parents:
diff changeset
939 Some helpful plots and analysis results. Note that most of these are produced using R code
0de946608423 Uploaded
fubar
parents:
diff changeset
940 suggested by the excellent documentation and vignettes for the Bioconductor
0de946608423 Uploaded
fubar
parents:
diff changeset
941 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
0de946608423 Uploaded
fubar
parents:
diff changeset
942
0de946608423 Uploaded
fubar
parents:
diff changeset
943 **Note on Voom**
0de946608423 Uploaded
fubar
parents:
diff changeset
944
0de946608423 Uploaded
fubar
parents:
diff changeset
945 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
0de946608423 Uploaded
fubar
parents:
diff changeset
946
0de946608423 Uploaded
fubar
parents:
diff changeset
947 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
0de946608423 Uploaded
fubar
parents:
diff changeset
948
0de946608423 Uploaded
fubar
parents:
diff changeset
949 voom is an acronym for mean-variance modelling at the observational level.
0de946608423 Uploaded
fubar
parents:
diff changeset
950 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
0de946608423 Uploaded
fubar
parents:
diff changeset
951 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
0de946608423 Uploaded
fubar
parents:
diff changeset
952 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
0de946608423 Uploaded
fubar
parents:
diff changeset
953 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
0de946608423 Uploaded
fubar
parents:
diff changeset
954
0de946608423 Uploaded
fubar
parents:
diff changeset
955 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
0de946608423 Uploaded
fubar
parents:
diff changeset
956 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
0de946608423 Uploaded
fubar
parents:
diff changeset
957 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
0de946608423 Uploaded
fubar
parents:
diff changeset
958 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
0de946608423 Uploaded
fubar
parents:
diff changeset
959 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
0de946608423 Uploaded
fubar
parents:
diff changeset
960 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
0de946608423 Uploaded
fubar
parents:
diff changeset
961
0de946608423 Uploaded
fubar
parents:
diff changeset
962
0de946608423 Uploaded
fubar
parents:
diff changeset
963 Author(s)
0de946608423 Uploaded
fubar
parents:
diff changeset
964
0de946608423 Uploaded
fubar
parents:
diff changeset
965 Charity Law and Gordon Smyth
0de946608423 Uploaded
fubar
parents:
diff changeset
966
0de946608423 Uploaded
fubar
parents:
diff changeset
967 References
0de946608423 Uploaded
fubar
parents:
diff changeset
968
0de946608423 Uploaded
fubar
parents:
diff changeset
969 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
0de946608423 Uploaded
fubar
parents:
diff changeset
970
0de946608423 Uploaded
fubar
parents:
diff changeset
971 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
0de946608423 Uploaded
fubar
parents:
diff changeset
972 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
0de946608423 Uploaded
fubar
parents:
diff changeset
973 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
0de946608423 Uploaded
fubar
parents:
diff changeset
974
0de946608423 Uploaded
fubar
parents:
diff changeset
975 See Also
0de946608423 Uploaded
fubar
parents:
diff changeset
976
0de946608423 Uploaded
fubar
parents:
diff changeset
977 A voom case study is given in the edgeR User's Guide.
0de946608423 Uploaded
fubar
parents:
diff changeset
978
0de946608423 Uploaded
fubar
parents:
diff changeset
979 vooma is a similar function but for microarrays instead of RNA-seq.
0de946608423 Uploaded
fubar
parents:
diff changeset
980
0de946608423 Uploaded
fubar
parents:
diff changeset
981
0de946608423 Uploaded
fubar
parents:
diff changeset
982 ***old rant on changes to Bioconductor package variable names between versions***
0de946608423 Uploaded
fubar
parents:
diff changeset
983
0de946608423 Uploaded
fubar
parents:
diff changeset
984 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
0de946608423 Uploaded
fubar
parents:
diff changeset
985 breaking this and all other code that assumed the old name for this variable,
0de946608423 Uploaded
fubar
parents:
diff changeset
986 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
0de946608423 Uploaded
fubar
parents:
diff changeset
987 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
0de946608423 Uploaded
fubar
parents:
diff changeset
988 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
0de946608423 Uploaded
fubar
parents:
diff changeset
989 when their old scripts break. This tool currently now works with 2.4.6.
0de946608423 Uploaded
fubar
parents:
diff changeset
990
0de946608423 Uploaded
fubar
parents:
diff changeset
991 **Note on prior.N**
0de946608423 Uploaded
fubar
parents:
diff changeset
992
0de946608423 Uploaded
fubar
parents:
diff changeset
993 http://seqanswers.com/forums/showthread.php?t=5591 says:
0de946608423 Uploaded
fubar
parents:
diff changeset
994
0de946608423 Uploaded
fubar
parents:
diff changeset
995 *prior.n*
0de946608423 Uploaded
fubar
parents:
diff changeset
996
0de946608423 Uploaded
fubar
parents:
diff changeset
997 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
0de946608423 Uploaded
fubar
parents:
diff changeset
998 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
0de946608423 Uploaded
fubar
parents:
diff changeset
999 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
0de946608423 Uploaded
fubar
parents:
diff changeset
1000 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
0de946608423 Uploaded
fubar
parents:
diff changeset
1001 common likelihood the weight of one observation.
0de946608423 Uploaded
fubar
parents:
diff changeset
1002
0de946608423 Uploaded
fubar
parents:
diff changeset
1003 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
0de946608423 Uploaded
fubar
parents:
diff changeset
1004 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
0de946608423 Uploaded
fubar
parents:
diff changeset
1005 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
0de946608423 Uploaded
fubar
parents:
diff changeset
1006 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
0de946608423 Uploaded
fubar
parents:
diff changeset
1007 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
0de946608423 Uploaded
fubar
parents:
diff changeset
1008 If you have more samples, then the tagwise dispersion estimates will be more reliable,
0de946608423 Uploaded
fubar
parents:
diff changeset
1009 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
0de946608423 Uploaded
fubar
parents:
diff changeset
1010
0de946608423 Uploaded
fubar
parents:
diff changeset
1011
0de946608423 Uploaded
fubar
parents:
diff changeset
1012 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
0de946608423 Uploaded
fubar
parents:
diff changeset
1013
0de946608423 Uploaded
fubar
parents:
diff changeset
1014 Dear Dorota,
0de946608423 Uploaded
fubar
parents:
diff changeset
1015
0de946608423 Uploaded
fubar
parents:
diff changeset
1016 The important settings are prior.df and trend.
0de946608423 Uploaded
fubar
parents:
diff changeset
1017
0de946608423 Uploaded
fubar
parents:
diff changeset
1018 prior.n and prior.df are related through prior.df = prior.n * residual.df,
0de946608423 Uploaded
fubar
parents:
diff changeset
1019 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
0de946608423 Uploaded
fubar
parents:
diff changeset
1020 prior.n=10 is equivalent for your data to prior.df = 240, a very large
0de946608423 Uploaded
fubar
parents:
diff changeset
1021 value. Going the other way, the new setting of prior.df=10 is equivalent
0de946608423 Uploaded
fubar
parents:
diff changeset
1022 to prior.n=10/24.
0de946608423 Uploaded
fubar
parents:
diff changeset
1023
0de946608423 Uploaded
fubar
parents:
diff changeset
1024 To recover old results with the current software you would use
0de946608423 Uploaded
fubar
parents:
diff changeset
1025
0de946608423 Uploaded
fubar
parents:
diff changeset
1026 estimateTagwiseDisp(object, prior.df=240, trend="none")
0de946608423 Uploaded
fubar
parents:
diff changeset
1027
0de946608423 Uploaded
fubar
parents:
diff changeset
1028 To get the new default from old software you would use
0de946608423 Uploaded
fubar
parents:
diff changeset
1029
0de946608423 Uploaded
fubar
parents:
diff changeset
1030 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
0de946608423 Uploaded
fubar
parents:
diff changeset
1031
0de946608423 Uploaded
fubar
parents:
diff changeset
1032 Actually the old trend method is equivalent to trend="loess" in the new
0de946608423 Uploaded
fubar
parents:
diff changeset
1033 software. You should use plotBCV(object) to see whether a trend is
0de946608423 Uploaded
fubar
parents:
diff changeset
1034 required.
0de946608423 Uploaded
fubar
parents:
diff changeset
1035
0de946608423 Uploaded
fubar
parents:
diff changeset
1036 Note you could also use
0de946608423 Uploaded
fubar
parents:
diff changeset
1037
0de946608423 Uploaded
fubar
parents:
diff changeset
1038 prior.n = getPriorN(object, prior.df=10)
0de946608423 Uploaded
fubar
parents:
diff changeset
1039
0de946608423 Uploaded
fubar
parents:
diff changeset
1040 to map between prior.df and prior.n.
0de946608423 Uploaded
fubar
parents:
diff changeset
1041
0de946608423 Uploaded
fubar
parents:
diff changeset
1042 ----
0de946608423 Uploaded
fubar
parents:
diff changeset
1043
0de946608423 Uploaded
fubar
parents:
diff changeset
1044 **Attributions**
0de946608423 Uploaded
fubar
parents:
diff changeset
1045
0de946608423 Uploaded
fubar
parents:
diff changeset
1046 edgeR - edgeR_
0de946608423 Uploaded
fubar
parents:
diff changeset
1047
0de946608423 Uploaded
fubar
parents:
diff changeset
1048 VOOM/limma - limma_VOOM_
0de946608423 Uploaded
fubar
parents:
diff changeset
1049
0de946608423 Uploaded
fubar
parents:
diff changeset
1050 DESeq2 - DESeq2_ for details
0de946608423 Uploaded
fubar
parents:
diff changeset
1051
0de946608423 Uploaded
fubar
parents:
diff changeset
1052 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
0de946608423 Uploaded
fubar
parents:
diff changeset
1053
0de946608423 Uploaded
fubar
parents:
diff changeset
1054 Galaxy_ (that's what you are using right now!) for gluing everything together
0de946608423 Uploaded
fubar
parents:
diff changeset
1055
0de946608423 Uploaded
fubar
parents:
diff changeset
1056 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
0de946608423 Uploaded
fubar
parents:
diff changeset
1057 licensed to you under the LGPL_ like other rgenetics artefacts
0de946608423 Uploaded
fubar
parents:
diff changeset
1058
0de946608423 Uploaded
fubar
parents:
diff changeset
1059 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
0de946608423 Uploaded
fubar
parents:
diff changeset
1060 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
0de946608423 Uploaded
fubar
parents:
diff changeset
1061 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
0de946608423 Uploaded
fubar
parents:
diff changeset
1062 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
0de946608423 Uploaded
fubar
parents:
diff changeset
1063 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
0de946608423 Uploaded
fubar
parents:
diff changeset
1064 .. _Galaxy: http://getgalaxy.org
0de946608423 Uploaded
fubar
parents:
diff changeset
1065 </help>
0de946608423 Uploaded
fubar
parents:
diff changeset
1066
0de946608423 Uploaded
fubar
parents:
diff changeset
1067 </tool>
0de946608423 Uploaded
fubar
parents:
diff changeset
1068
0de946608423 Uploaded
fubar
parents:
diff changeset
1069