annotate rgedgeRpaired_nocamera.xml @ 42:c446f7f0ebbe draft

Uploaded
author fubar
date Sun, 22 Dec 2013 05:58:21 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
42
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.22">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
3 <requirements>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="3.0.1">r3</requirement>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="9.07">ghostscript</requirement>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
7 <requirement type="package" version="2.12">biocbasics</requirement>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
8 </requirements>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
9
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
10 <command interpreter="python">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
13 </command>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
14 <inputs>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
21 </sanitizer>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
22 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
27 <sanitizer invalid_char="">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
28 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
29 </sanitizer>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
30 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
31 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
32 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
33 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
34 <validator type="no_options" message="Please select at least one column."/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
35 <sanitizer invalid_char="">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
37 </sanitizer> <validator type="no_options" message="Please select at least one column."/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
38 <sanitizer invalid_char="">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
39 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
40 </sanitizer>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
41
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
42 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
43 <param name="subjectids" type="text" optional="true" size="120" value = ""
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
44 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
45 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
46 <sanitizer>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
47 <valid initial="string.letters,string.digits"><add value="," /> </valid>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
48 </sanitizer>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
49 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
50 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
51 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
52 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
53 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
54 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
55
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
56 <conditional name="edgeR">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
57 <param name="doedgeR" type="select"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
58 label="Run this model using edgeR"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
59 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
60 <option value="F">Do not run edgeR</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
61 <option value="T" selected="true">Run edgeR</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
62 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
63 <when value="T">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
64 <param name="edgeR_priordf" type="integer" value="20" size="3"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
65 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
66 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
67 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
68 <when value="F"></when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
69 </conditional>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
70 <conditional name="DESeq2">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
71 <param name="doDESeq2" type="select"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
72 label="Run the same model with DESeq2 and compare findings"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
73 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
74 <option value="F" selected="true">Do not run DESeq2</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
75 <option value="T">Run DESeq2</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
76 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
77 <when value="T">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
78 <param name="DESeq_fitType" type="select">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
79 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
80 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
81 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
82 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
83 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
84 <when value="F"> </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
85 </conditional>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
86 <param name="doVoom" type="select"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
87 label="Run the same model with Voom/limma and compare findings"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
88 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
89 <option value="F" selected="true">Do not run VOOM</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
90 <option value="T">Run VOOM</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
91 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
92 <!--
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
93 <conditional name="camera">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
94 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
95 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
96 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
97 <option value="T">Run GSEA tests with the Camera algorithm</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
98 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
99 <when value="T">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
100 <conditional name="gmtSource">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
101 <param name="refgmtSource" type="select"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
102 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
103 <option value="indexed" selected="true">Use a built-in gene set</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
104 <option value="history">Use a gene set from my history</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
105 <option value="both">Add a gene set from my history to a built in gene set</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
106 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
107 <when value="indexed">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
108 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
109 <options from_data_table="gseaGMT_3.1">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
110 <filter type="sort_by" column="2" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
111 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
112 </options>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
113 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
114 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
115 <when value="history">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
116 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
117 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
118 <when value="both">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
119 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
120 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
121 <options from_data_table="gseaGMT_4">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
122 <filter type="sort_by" column="2" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
123 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
124 </options>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
125 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
126 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
127 </conditional>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
128 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
129 <when value="F">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
130 </when>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
131 </conditional>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
132 -->
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
133 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
134 help="Conventional default value of 0.05 recommended"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
135 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
136 help="Use fdr or bh typically to control for the number of tests in a reliable way">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
137 <option value="fdr" selected="true">fdr</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
138 <option value="BH">Benjamini Hochberg</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
139 <option value="BY">Benjamini Yukateli</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
140 <option value="bonferroni">Bonferroni</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
141 <option value="hochberg">Hochberg</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
142 <option value="holm">Holm</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
143 <option value="hommel">Hommel</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
144 <option value="none">no control for multiple tests</option>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
145 </param>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
146 </inputs>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
147 <outputs>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
148 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
149 <filter>edgeR['doedgeR'] == "T"</filter>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
150 </data>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
151 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
152 <filter>DESeq2['doDESeq2'] == "T"</filter>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
153 </data>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
154 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
155 <filter>doVoom == "T"</filter>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
156 </data>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
157 <data format="html" name="html_file" label="${title}.html"/>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
158 </outputs>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
159 <stdio>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
160 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
161 </stdio>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
162 <tests>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
163 <test>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
164 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
165 <param name='treatment_name' value='liver' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
166 <param name='title' value='edgeRtest' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
167 <param name='useNDF' value='' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
168 <param name='doedgeR' value='T' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
169 <param name='doVoom' value='T' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
170 <param name='doDESeq2' value='T' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
171 <param name='fdrtype' value='fdr' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
172 <param name='edgeR_priordf' value="8" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
173 <param name='fdrthresh' value="0.05" />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
174 <param name='control_name' value='heart' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
175 <param name='subjectids' value='' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
176 <param name='Control_cols' value='3,4,5,9' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
177 <param name='Treat_cols' value='2,6,7,8' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
178 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
179 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
180 </test>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
181 </tests>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
182
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
183 <configfiles>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
184 <configfile name="runme">
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
185 <![CDATA[
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
186 #
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
187 # edgeR.Rscript
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
188 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
189 # Performs DGE on a count table containing n replicates of two conditions
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
190 #
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
191 # Parameters
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
192 #
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
193 # 1 - Output Dir
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
194
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
195 # Original edgeR code by: S.Lunke and A.Kaspi
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
196 reallybig = log10(.Machine\$double.xmax)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
197 reallysmall = log10(.Machine\$double.xmin)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
198 library('stringr')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
199 library('gplots')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
200 library('edgeR')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
201 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
202 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
203 # Perform clustering for significant pvalues after controlling FWER
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
204 samples = colnames(cmat)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
205 gu = unique(group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
206 gn = rownames(cmat)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
207 if (length(gu) == 2) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
208 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
209 pcols = unlist(lapply(group,col.map))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
210 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
211 colours = rainbow(length(gu),start=0,end=4/6)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
212 pcols = colours[match(group,gu)] }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
213 dm = cmat[(! is.na(gn)),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
214 # remove unlabelled hm rows
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
215 nprobes = nrow(dm)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
216 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
217 if (nprobes > nsamp) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
218 dm =dm[1:nsamp,]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
219 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
220 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
221 newcolnames = substr(colnames(dm),1,20)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
222 colnames(dm) = newcolnames
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
223 pdf(outpdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
224 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
225 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
226 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
227 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
228
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
229 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
230 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
231 # for 2 groups only was
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
232 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
233 #pcols = unlist(lapply(group,col.map))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
234 gu = unique(group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
235 colours = rainbow(length(gu),start=0.3,end=0.6)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
236 pcols = colours[match(group,gu)]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
237 nrows = nrow(cmat)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
238 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
239 if (nrows > nsamp) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
240 cmat = cmat[c(1:nsamp),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
241 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
242 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
243 newcolnames = substr(colnames(cmat),1,20)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
244 colnames(cmat) = newcolnames
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
245 pdf(outpdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
246 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
247 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
248 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
249
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
250 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
251 # stolen from https://gist.github.com/703512
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
252 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
253 o = -log10(sort(pvector,decreasing=F))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
254 e = -log10( 1:length(o)/length(o) )
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
255 o[o==-Inf] = reallysmall
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
256 o[o==Inf] = reallybig
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
257 maint = descr
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
258 pdf(outpdf)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
259 plot(e,o,pch=19,cex=1, main=maint, ...,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
260 xlab=expression(Expected~~-log[10](italic(p))),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
261 ylab=expression(Observed~~-log[10](italic(p))),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
262 xlim=c(0,max(e)), ylim=c(0,max(o)))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
263 lines(e,e,col="red")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
264 grid(col = "lightgray", lty = "dotted")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
265 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
266 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
267
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
268 smearPlot = function(DGEList,deTags, outSmear, outMain)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
269 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
270 pdf(outSmear)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
271 plotSmear(DGEList,de.tags=deTags,main=outMain)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
272 grid(col="lightgray", lty="dotted")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
273 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
274 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
275
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
276 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
277 { #
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
278 nc = ncol(rawrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
279 #### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
280 fullnames = colnames(rawrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
281 newcolnames = substr(colnames(rawrs),1,20)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
282 colnames(rawrs) = newcolnames
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
283 newcolnames = substr(colnames(cleanrs),1,20)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
284 colnames(cleanrs) = newcolnames
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
285 defpar = par(no.readonly=T)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
286 print.noquote('raw contig counts by sample:')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
287 print.noquote(summary(rawrs))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
288 print.noquote('normalised contig counts by sample:')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
289 print.noquote(summary(cleanrs))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
290 pdf(pdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
291 par(mfrow=c(1,2))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
292 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
293 grid(col="lightgray",lty="dotted")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
294 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
295 grid(col="lightgray",lty="dotted")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
296 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
297 pdfname = "sample_counts_histogram.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
298 nc = ncol(rawrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
299 print.noquote(paste('Using ncol rawrs=',nc))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
300 ncroot = round(sqrt(nc))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
301 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
302 m = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
303 for (i in c(1:nc)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
304 rhist = hist(rawrs[,i],breaks=100,plot=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
305 m = append(m,max(rhist\$counts))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
306 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
307 ymax = max(m)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
308 ncols = length(fullnames)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
309 if (ncols > 20)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
310 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
311 scale = 7*ncols/20
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
312 pdf(pdfname,width=scale,height=scale)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
313 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
314 pdf(pdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
315 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
316 par(mfrow=c(ncroot,ncroot))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
317 for (i in c(1:nc)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
318 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
319 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
320 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
321 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
322 par(defpar)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
323
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
324 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
325
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
326 cumPlot = function(rawrs,cleanrs,maint,myTitle)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
327 { # updated to use ecdf
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
328 pdfname = "Filtering_rowsum_bar_charts.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
329 defpar = par(no.readonly=T)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
330 lrs = log(rawrs,10)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
331 lim = max(lrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
332 pdf(pdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
333 par(mfrow=c(2,1))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
334 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
335 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
336 grid(col="lightgray", lty="dotted")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
337 lrs = log(cleanrs,10)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
338 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
339 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
340 grid(col="lightgray", lty="dotted")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
341 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
342 par(defpar)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
343 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
344
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
345 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
346 { # updated to use ecdf
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
347 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
348 pdf(pdfname)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
349 par(mfrow=c(2,1))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
350 lastx = max(rawrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
351 rawe = knots(ecdf(rawrs))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
352 cleane = knots(ecdf(cleanrs))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
353 cy = 1:length(cleane)/length(cleane)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
354 ry = 1:length(rawe)/length(rawe)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
355 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
356 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
357 grid(col="blue")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
358 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
359 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
360 grid(col="blue")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
361 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
362 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
363
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
364
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
365
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
366 doGSEA = function(y=NULL,design=NULL,histgmt="",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
367 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
368 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
369 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
370 sink('Camera.log')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
371 genesets = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
372 if (bigmt > "")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
373 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
374 bigenesets = readLines(bigmt)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
375 genesets = bigenesets
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
376 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
377 if (histgmt > "")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
378 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
379 hgenesets = readLines(histgmt)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
380 if (bigmt > "") {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
381 genesets = rbind(genesets,hgenesets)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
382 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
383 genesets = hgenesets
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
384 } # use only history if no bi
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
385 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
386 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
387 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
388 outf = outfname
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
389 head=paste(myTitle,'edgeR GSEA')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
390 write(head,file=outfname,append=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
391 ntest=length(genesets)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
392 urownames = toupper(rownames(y))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
393 upcam = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
394 downcam = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
395 for (i in 1:ntest) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
396 gs = unlist(genesets[i])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
397 g = gs[1] # geneset_id
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
398 u = gs[2]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
399 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
400 glist = gs[3:length(gs)] # member gene symbols
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
401 glist = toupper(glist)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
402 inglist = urownames %in% glist
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
403 nin = sum(inglist)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
404 if ((nin > minnin) && (nin < maxnin)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
405 ### print(paste('@@found',sum(inglist),'genes in glist'))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
406 camres = camera(y=y,index=inglist,design=design)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
407 if (! is.null(camres)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
408 rownames(camres) = g # gene set name
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
409 camres = cbind(GeneSet=g,URL=u,camres)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
410 if (camres\$Direction == "Up")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
411 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
412 upcam = rbind(upcam,camres) } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
413 downcam = rbind(downcam,camres)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
414 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
415 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
416 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
417 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
418 uscam = upcam[order(upcam\$PValue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
419 unadjp = uscam\$PValue
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
420 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
421 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
422 dscam = downcam[order(downcam\$PValue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
423 unadjp = dscam\$PValue
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
424 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
425 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
426 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
427 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
428 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
429 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
430 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
431 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
432 sink()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
433 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
434
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
435
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
436
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
437
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
438 doGSEAatonce = function(y=NULL,design=NULL,histgmt="",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
439 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
440 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
441 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
442 sink('Camera.log')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
443 genesets = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
444 if (bigmt > "")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
445 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
446 bigenesets = readLines(bigmt)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
447 genesets = bigenesets
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
448 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
449 if (histgmt > "")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
450 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
451 hgenesets = readLines(histgmt)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
452 if (bigmt > "") {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
453 genesets = rbind(genesets,hgenesets)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
454 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
455 genesets = hgenesets
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
456 } # use only history if no bi
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
457 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
458 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
459 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
460 outf = outfname
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
461 head=paste(myTitle,'edgeR GSEA')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
462 write(head,file=outfname,append=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
463 ntest=length(genesets)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
464 urownames = toupper(rownames(y))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
465 upcam = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
466 downcam = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
467 incam = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
468 urls = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
469 gsids = c()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
470 for (i in 1:ntest) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
471 gs = unlist(genesets[i])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
472 gsid = gs[1] # geneset_id
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
473 url = gs[2]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
474 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
475 glist = gs[3:length(gs)] # member gene symbols
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
476 glist = toupper(glist)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
477 inglist = urownames %in% glist
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
478 nin = sum(inglist)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
479 if ((nin > minnin) && (nin < maxnin)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
480 incam = c(incam,inglist)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
481 gsids = c(gsids,gsid)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
482 urls = c(urls,url)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
483 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
484 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
485 incam = as.list(incam)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
486 names(incam) = gsids
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
487 allcam = camera(y=y,index=incam,design=design)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
488 allcamres = cbind(geneset=gsids,allcam,URL=urls)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
489 for (i in 1:ntest) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
490 camres = allcamres[i]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
491 res = try(test = (camres\$Direction == "Up"))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
492 if ("try-error" %in% class(res)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
493 cat("test failed, camres = :")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
494 print.noquote(camres)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
495 } else { if (camres\$Direction == "Up")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
496 { upcam = rbind(upcam,camres)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
497 } else { downcam = rbind(downcam,camres)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
498 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
499
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
500 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
501 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
502 uscam = upcam[order(upcam\$PValue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
503 unadjp = uscam\$PValue
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
504 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
505 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
506 dscam = downcam[order(downcam\$PValue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
507 unadjp = dscam\$PValue
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
508 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
509 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
510 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
511 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
512 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
513 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
514 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
515 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
516 sink()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
517 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
518
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
519
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
520 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
521 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
522 filterquantile=0.2, subjects=c(),mydesign=NULL,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
523 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
524 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
525 doCook=F,DESeq_fitType="parameteric")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
526 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
527 # Error handling
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
528 if (length(unique(group))!=2){
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
529 print("Number of conditions identified in experiment does not equal 2")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
530 q()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
531 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
532 require(edgeR)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
533 options(width = 512)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
534 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
535 allN = nrow(Count_Matrix)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
536 nscut = round(ncol(Count_Matrix)/2)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
537 colTotmillionreads = colSums(Count_Matrix)/1e6
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
538 counts.dataframe = as.data.frame(c())
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
539 rawrs = rowSums(Count_Matrix)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
540 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
541 nzN = nrow(nonzerod)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
542 nzrs = rowSums(nonzerod)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
543 zN = allN - nzN
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
544 print('# Quantiles for non-zero row counts:',quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
545 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
546 if (useNDF == T)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
547 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
548 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
549 lo = colSums(Count_Matrix[!gt1rpin3,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
550 workCM = Count_Matrix[gt1rpin3,]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
551 cleanrs = rowSums(workCM)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
552 cleanN = length(cleanrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
553 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
554 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
555 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
556 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
557 useme = (nzrs > quantile(nzrs,filterquantile))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
558 workCM = nonzerod[useme,]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
559 lo = colSums(nonzerod[!useme,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
560 cleanrs = rowSums(workCM)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
561 cleanN = length(cleanrs)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
562 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
563 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
564 maint = paste('Filter below',filterquantile,'quantile')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
565 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
566 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
567 allgenes = rownames(workCM)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
568 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
569 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
570 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
571 testreg = str_match(allgenes,reg)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
572 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
573 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
574 print("@@ using ucsc substitution for urls")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
575 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
576 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
577 print.noquote("@@ using genecards substitution for urls")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
578 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
579 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
580 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
581 cmrowsums = rowSums(workCM)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
582 TName=unique(group)[1]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
583 CName=unique(group)[2]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
584 if (is.null(mydesign)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
585 if (length(subjects) == 0)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
586 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
587 mydesign = model.matrix(~group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
588 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
589 else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
590 subjf = factor(subjects)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
591 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
592 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
593 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
594 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
595 print.noquote('Using design matrix:')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
596 print.noquote(mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
597 if (doedgeR) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
598 sink('edgeR.log')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
599 #### Setup DGEList object
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
600 DGEList = DGEList(counts=workCM, group = group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
601 DGEList = calcNormFactors(DGEList)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
602
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
603 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
604 comdisp = DGEList\$common.dispersion
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
605 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
606 if (edgeR_priordf > 0) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
607 print.noquote(paste("prior.df =",edgeR_priordf))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
608 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
609 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
610 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
611 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
612 DGLM = glmFit(DGEList,design=mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
613 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
614 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
615 normData = (1e+06*DGEList\$counts/efflib)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
616 uoutput = cbind(
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
617 Name=as.character(rownames(DGEList\$counts)),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
618 DE\$table,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
619 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
620 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
621 DGEList\$counts
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
622 )
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
623 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
624 goodness = gof(DGLM, pcutoff=fdrthresh)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
625 if (sum(goodness\$outlier) > 0) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
626 print.noquote('GLM outliers:')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
627 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
628 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
629 print('No GLM fit outlier genes found\n')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
630 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
631 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
632 pdf("edgeR_GoodnessofFit.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
633 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
634 abline(0,1,lwd=3)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
635 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
636 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
637 estpriorn = getPriorN(DGEList)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
638 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
639 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
640 normData = (1e+06*DGEList\$counts)/efflib
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
641 lnormData = log(normData + 1e-6,10)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
642 uniqueg = unique(group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
643 #### Plot MDS
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
644 sample_colors = match(group,levels(group))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
645 sampleTypes = levels(factor(group))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
646 print.noquote(sampleTypes)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
647 pdf("edgeR_MDSplot.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
648 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
649 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
650 grid(col="blue")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
651 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
652 colnames(normData) = paste( colnames(normData),'N',sep="_")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
653 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
654 nzd = data.frame(log(nonzerod + 1e-2,10))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
655 try( boxPlot(rawrs=nzd,cleanrs=lnormData,maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
656 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
657 tt = cbind(
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
658 Name=as.character(rownames(DGEList\$counts)),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
659 DE\$table,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
660 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
661 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
662 )
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
663 print.noquote("# edgeR Top tags\n")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
664 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
665 tt = tt[order(DE\$table\$PValue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
666 print.noquote(tt[1:50,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
667 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
668 nsig = length(deTags)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
669 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
670 deColours = ifelse(deTags,'red','black')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
671 pdf("edgeR_BCV_vs_abundance.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
672 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance",col.tagwise=deColours)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
673 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
674 dg = DGEList[order(DE\$table\$PValue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
675 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
676 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
677 normData = (1e+06*dg\$counts/efflib)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
678 outpdfname="edgeR_top_100_heatmap.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
679 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
680 outSmear = "edgeR_smearplot.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
681 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
682 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
683 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
684 norm.factor = DGEList\$samples\$norm.factors
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
685 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
686 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
687 edgeRcounts = rep(0, length(allgenes))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
688 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
689 sink()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
690 } ### doedgeR
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
691 if (doDESeq2 == T)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
692 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
693 sink("DESeq2.log")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
694 # DESeq2
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
695 require('DESeq2')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
696 library('RColorBrewer')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
697 if (length(subjects) == 0)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
698 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
699 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
700 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
701 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
702 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
703 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
704 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
705 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
706 #rDESeq = results(DESeq2)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
707 #newCountDataSet(workCM, group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
708 deSeqDatsizefac = estimateSizeFactors(deSEQds)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
709 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
710 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
711 rDESeq = as.data.frame(results(resDESeq))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
712 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
713 srDESeq = rDESeq[order(rDESeq\$pvalue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
714 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
715 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
716 cat("# DESeq top 50\n")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
717 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
718 srDESeq = rDESeq[order(rDESeq\$pvalue),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
719 print.noquote(srDESeq[1:50,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
720 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
721 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
722 DESeqcounts = rep(0, length(allgenes))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
723 DESeqcounts[DESeqcountsindex] = 1
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
724 pdf("DESeq2_dispersion_estimates.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
725 plotDispEsts(resDESeq)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
726 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
727 ysmall = abs(min(rDESeq\$log2FoldChange))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
728 ybig = abs(max(rDESeq\$log2FoldChange))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
729 ylimit = min(4,ysmall,ybig)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
730 pdf("DESeq2_MA_plot.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
731 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
732 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
733 rlogres = rlogTransformation(resDESeq)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
734 sampledists = dist( t( assay(rlogres) ) )
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
735 sdmat = as.matrix(sampledists)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
736 pdf("DESeq2_sample_distance_plot.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
737 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
738 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
739 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
740 ###outpdfname="DESeq2_top50_heatmap.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
741 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
742 sink()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
743 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
744 if ("try-error" %in% class(result)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
745 print.noquote('DESeq2 plotPCA failed.')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
746 } else {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
747 pdf("DESeq2_PCA_plot.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
748 #### wtf - print? Seems needed to get this to work
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
749 print(ppca)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
750 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
751 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
752 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
753
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
754 if (doVoom == T) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
755 sink('Voom.log')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
756 if (doedgeR == F) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
757 #### Setup DGEList object
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
758 DGEList = DGEList(counts=workCM, group = group)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
759 DGEList = calcNormFactors(DGEList)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
760 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
761 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
762 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
763 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
764 norm.factor = DGEList\$samples\$norm.factors
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
765 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
766 pdf("Voom_mean_variance_plot.pdf")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
767 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
768 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
769 # Use limma to fit data
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
770 fit = lmFit(dat.voomed, mydesign)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
771 fit = eBayes(fit)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
772 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
773 qqPlot(descr=paste(myTitle,'Voom-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='Voom_qqplot.pdf')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
774 rownames(rvoom) = rownames(workCM)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
775 rvoom = cbind(rvoom,NReads=cmrowsums)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
776 srvoom = rvoom[order(rvoom\$P.Value),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
777 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
778 rvoom = cbind(rvoom,URL=contigurls)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
779 deTags = rownames(rvoom[rvoom\$adj.p.value < fdrthresh,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
780 nsig = length(deTags)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
781 srvoom = rvoom[order(rvoom\$P.Value),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
782 cat("# Voom top 50\n")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
783 print(srvoom[1:50,])
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
784 normData = srvoom\$E
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
785 outpdfname="VOOM_top_100_heatmap.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
786 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('VOOM Heatmap',myTitle))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
787 outSmear = "VOOM_smearplot.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
788 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
789 smearPlot(DGEList=rvoom,deTags=deTags, outSmear=outSmear, outMain = outMain)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
790 qqPlot(descr=paste(myTitle,'VOOM adj p QQ plot'),pvector=srvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
791 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
792 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
793 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
794 voomcounts = rep(0, length(allgenes))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
795 voomcounts[voomcountsindex] = 1
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
796 sink()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
797 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
798
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
799 if (doCamera) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
800 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
801 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
802 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
803
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
804 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
805 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
806 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
807 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
808 VOOM_limma = voomcounts, row.names = allgenes)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
809 } else if ((doDESeq2==T) && (doedgeR==T)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
810 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
811 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
812 } else if ((doVoom==T) && (doedgeR==T)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
813 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
814 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
815 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
816
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
817 if (nrow(counts.dataframe > 1)) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
818 counts.venn = vennCounts(counts.dataframe)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
819 vennf = "Venn_significant_genes_overlap.pdf"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
820 pdf(vennf)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
821 vennDiagram(counts.venn,main=vennmain,col="maroon")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
822 dev.off()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
823 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
824 } #### doDESeq2 or doVoom
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
825
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
826 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
827 #### Done
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
828
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
829 ###sink(stdout(),append=T,type="message")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
830 builtin_gmt = ""
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
831 history_gmt = ""
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
832 history_gmt_name = ""
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
833 out_edgeR = F
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
834 out_DESeq2 = F
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
835 out_VOOM = "$out_VOOM"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
836 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
837 doVoom = $doVoom
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
838 doCamera = F
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
839 doedgeR = $edgeR.doedgeR
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
840 edgeR_priordf = 0
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
841
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
842
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
843 #if $doVoom == "T":
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
844 out_VOOM = "$out_VOOM"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
845 #end if
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
846
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
847 #if $DESeq2.doDESeq2 == "T":
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
848 out_DESeq2 = "$out_DESeq2"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
849 DESeq_fitType = "$DESeq2.DESeq_fitType"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
850 #end if
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
851
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
852 #if $edgeR.doedgeR == "T":
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
853 out_edgeR = "$out_edgeR"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
854 edgeR_priordf = $edgeR.edgeR_priordf
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
855 #end if
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
856
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
857
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
858 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
859 {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
860 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
861 quit(save="no",status=2)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
862 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
863
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
864 Out_Dir = "$html_file.files_path"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
865 Input = "$input1"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
866 TreatmentName = "$treatment_name"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
867 TreatmentCols = "$Treat_cols"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
868 ControlName = "$control_name"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
869 ControlCols= "$Control_cols"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
870 org = "$input1.dbkey"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
871 if (org == "") { org = "hg19"}
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
872 fdrtype = "$fdrtype"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
873 fdrthresh = $fdrthresh
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
874 useNDF = $useNDF
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
875 fQ = $fQ # non-differential centile cutoff
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
876 myTitle = "$title"
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
877 sids = strsplit("$subjectids",',')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
878 subjects = unlist(sids)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
879 nsubj = length(subjects)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
880 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
881 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
882 cat('Got TCols=')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
883 cat(TCols)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
884 cat('; CCols=')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
885 cat(CCols)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
886 cat('\n')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
887 useCols = c(TCols,CCols)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
888 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
889 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
890 snames = colnames(Count_Matrix)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
891 nsamples = length(snames)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
892 if (nsubj > 0 & nsubj != nsamples) {
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
893 options("show.error.messages"=T)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
894 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
895 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
896 write(mess, stderr())
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
897 quit(save="no",status=4)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
898 }
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
899 if (length(subjects) != 0) {subjects = subjects[useCols]}
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
900 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
901 rn = rownames(Count_Matrix)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
902 islib = rn %in% c('librarySize','NotInBedRegions')
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
903 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
904 Count_Matrix = Count_Matrix[subset(rn,! islib),]
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
905 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
906 group = factor(group, levels=c(ControlName,TreatmentName))
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
907 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
908 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
909 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
910 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
911 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
912 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
913 sessionInfo()
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
914 ]]>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
915 </configfile>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
916 </configfiles>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
917 <help>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
918
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
919 **What it does**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
920
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
921 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
922 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
923
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
924 **Input**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
925
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
926 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
927 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
928 non-negative integer count of reads from one sample overlapping the feature.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
929 The matrix must have a header row uniquely identifying the source samples, and unique row names in
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
930 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
931
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
932 **Specifying comparisons**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
933
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
934 This is basically dumbed down for two factors - case vs control.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
935
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
936 More complex interfaces are possible but painful at present.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
937 Probably need to specify a phenotype file to do this better.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
938 Work in progress. Send code.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
939
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
940 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
941 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
942 A list of integers, one for each subject or an empty string if samples are all independent.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
943 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
944 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
945
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
946 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
947 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
948 8,9,1,1,2,2
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
949 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
950
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
951 **Methods available**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
952
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
953 You can run 3 popular Bioconductor packages available for count data.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
954
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
955 edgeR - see edgeR_ for details
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
956
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
957 VOOM/limma - see limma_VOOM_ for details
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
958
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
959 DESeq2 - see DESeq2_ for details
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
960
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
961 and optionally camera in edgeR which works better if MSigDB is installed.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
962
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
963 **Outputs**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
964
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
965 Some helpful plots and analysis results. Note that most of these are produced using R code
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
966 suggested by the excellent documentation and vignettes for the Bioconductor
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
967 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
968
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
969 **Note on Voom**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
970
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
971 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
972
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
973 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
974
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
975 voom is an acronym for mean-variance modelling at the observational level.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
976 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
977 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
978 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
979 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
980
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
981 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
982 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
983 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
984 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
985 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
986 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
987
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
988
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
989 Author(s)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
990
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
991 Charity Law and Gordon Smyth
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
992
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
993 References
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
994
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
995 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
996
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
997 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
998 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
999 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1000
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1001 See Also
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1002
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1003 A voom case study is given in the edgeR User's Guide.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1004
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1005 vooma is a similar function but for microarrays instead of RNA-seq.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1006
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1007
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1008 ***old rant on changes to Bioconductor package variable names between versions***
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1009
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1010 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1011 breaking this and all other code that assumed the old name for this variable,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1012 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1013 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1014 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1015 when their old scripts break. This tool currently now works with 2.4.6.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1016
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1017 **Note on prior.N**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1018
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1019 http://seqanswers.com/forums/showthread.php?t=5591 says:
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1020
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1021 *prior.n*
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1022
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1023 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1024 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1025 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1026 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1027 common likelihood the weight of one observation.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1028
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1029 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1030 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1031 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1032 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1033 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1034 If you have more samples, then the tagwise dispersion estimates will be more reliable,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1035 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1036
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1037
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1038 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1039
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1040 Dear Dorota,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1041
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1042 The important settings are prior.df and trend.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1043
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1044 prior.n and prior.df are related through prior.df = prior.n * residual.df,
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1045 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1046 prior.n=10 is equivalent for your data to prior.df = 240, a very large
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1047 value. Going the other way, the new setting of prior.df=10 is equivalent
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1048 to prior.n=10/24.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1049
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1050 To recover old results with the current software you would use
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1051
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1052 estimateTagwiseDisp(object, prior.df=240, trend="none")
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1053
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1054 To get the new default from old software you would use
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1055
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1056 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1057
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1058 Actually the old trend method is equivalent to trend="loess" in the new
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1059 software. You should use plotBCV(object) to see whether a trend is
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1060 required.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1061
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1062 Note you could also use
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1063
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1064 prior.n = getPriorN(object, prior.df=10)
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1065
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1066 to map between prior.df and prior.n.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1067
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1068 ----
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1069
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1070 **Attributions**
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1071
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1072 edgeR - edgeR_
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1073
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1074 VOOM/limma - limma_VOOM_
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1075
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1076 DESeq2 - DESeq2_ for details
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1077
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1078 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1079
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1080 Galaxy_ (that's what you are using right now!) for gluing everything together
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1081
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1082 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1083 licensed to you under the LGPL_ like other rgenetics artefacts
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1084
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1085 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1086 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1087 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1088 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1089 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1090 .. _Galaxy: http://getgalaxy.org
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1091 </help>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1092
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1093 </tool>
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1094
c446f7f0ebbe Uploaded
fubar
parents:
diff changeset
1095