annotate rgedgeRpaired_nocamera.xml @ 77:4a2e7a9725b2 draft

Uploaded
author fubar
date Tue, 25 Feb 2014 23:54:59 -0500
parents 151bf55e018a
children 340d5460f3ff
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.22">
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
3 <requirements>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="2.14">biocbasics</requirement>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="3.0.2">r302</requirement>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
7 <requirement type="package" version="9.07">ghostscript</requirement>
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
8 </requirements>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
9
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
10 <command interpreter="python">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
13 </command>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
14 <inputs>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
21 </sanitizer>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
22 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
27 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
28 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
29 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
30 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
31 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
32 <param name="subjectids" type="text" optional="true" size="120" value = ""
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
33 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
34 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
35 <sanitizer>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="," /> </valid>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
37 </sanitizer>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
38 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
39 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
40 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
41 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
42 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
43 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
44
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
45 <conditional name="edgeR">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
46 <param name="doedgeR" type="select"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
47 label="Run this model using edgeR"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
48 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
49 <option value="F">Do not run edgeR</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
50 <option value="T" selected="true">Run edgeR</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
51 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
52 <when value="T">
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
53 <param name="edgeR_priordf" type="integer" value="10" size="3"
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
54 label="prior.df for tagwise dispersion - larger value = more squeezing of tag dispersions to common dispersion. Replaces prior.n and prior.df = prior.n * residual.df"
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
55 help="10 = edgeR default. Use a larger value to 'smooth' small samples. See edgeR docs and note below"/>
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
56 <param name="edgeR_robust" type="select" value="20" size="3"
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
57 label="Use robust dispersion method"
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
58 help="Use ordinary, anscombe or deviance robust deviance estimates">
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
59 <option value="ordinary" selected="true">Use ordinary deviance estimates</option>
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
60 <option value="deviance">Use robust deviance estimates</option>
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
61 <option value="anscombe">use Anscombe robust deviance estimates</option>
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
62 </param>
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
63 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
64 <when value="F"></when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
65 </conditional>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
66 <conditional name="DESeq2">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
67 <param name="doDESeq2" type="select"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
68 label="Run the same model with DESeq2 and compare findings"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
69 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
70 <option value="F" selected="true">Do not run DESeq2</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
71 <option value="T">Run DESeq2</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
72 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
73 <when value="T">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
74 <param name="DESeq_fitType" type="select">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
75 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
76 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
77 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
78 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
79 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
80 <when value="F"> </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
81 </conditional>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
82 <param name="doVoom" type="select"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
83 label="Run the same model with Voom/limma and compare findings"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
84 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
85 <option value="F" selected="true">Do not run VOOM</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
86 <option value="T">Run VOOM</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
87 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
88 <!--
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
89 <conditional name="camera">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
90 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
91 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
92 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
93 <option value="T">Run GSEA tests with the Camera algorithm</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
94 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
95 <when value="T">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
96 <conditional name="gmtSource">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
97 <param name="refgmtSource" type="select"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
98 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
99 <option value="indexed" selected="true">Use a built-in gene set</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
100 <option value="history">Use a gene set from my history</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
101 <option value="both">Add a gene set from my history to a built in gene set</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
102 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
103 <when value="indexed">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
104 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
105 <options from_data_table="gseaGMT_3.1">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
106 <filter type="sort_by" column="2" />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
107 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
108 </options>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
109 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
110 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
111 <when value="history">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
112 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
113 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
114 <when value="both">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
115 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
116 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
117 <options from_data_table="gseaGMT_4">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
118 <filter type="sort_by" column="2" />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
119 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
120 </options>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
121 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
122 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
123 </conditional>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
124 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
125 <when value="F">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
126 </when>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
127 </conditional>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
128 -->
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
129 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
130 help="Conventional default value of 0.05 recommended"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
131 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
132 help="Use fdr or bh typically to control for the number of tests in a reliable way">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
133 <option value="fdr" selected="true">fdr</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
134 <option value="BH">Benjamini Hochberg</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
135 <option value="BY">Benjamini Yukateli</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
136 <option value="bonferroni">Bonferroni</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
137 <option value="hochberg">Hochberg</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
138 <option value="holm">Holm</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
139 <option value="hommel">Hommel</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
140 <option value="none">no control for multiple tests</option>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
141 </param>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
142 </inputs>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
143 <outputs>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
144 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
145 <filter>edgeR['doedgeR'] == "T"</filter>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
146 </data>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
147 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
148 <filter>DESeq2['doDESeq2'] == "T"</filter>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
149 </data>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
150 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
151 <filter>doVoom == "T"</filter>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
152 </data>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
153 <data format="html" name="html_file" label="${title}.html"/>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
154 </outputs>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
155 <stdio>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
156 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
157 </stdio>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
158 <tests>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
159 <test>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
160 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
161 <param name='treatment_name' value='liver' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
162 <param name='title' value='edgeRtest' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
163 <param name='useNDF' value='' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
164 <param name='doedgeR' value='T' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
165 <param name='doVoom' value='T' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
166 <param name='doDESeq2' value='T' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
167 <param name='fdrtype' value='fdr' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
168 <param name='edgeR_priordf' value="8" />
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
169 <param name='edgeR_robust' value="ordinary" />
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
170 <param name='fdrthresh' value="0.05" />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
171 <param name='control_name' value='heart' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
172 <param name='subjectids' value='' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
173 <param name='Control_cols' value='3,4,5,9' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
174 <param name='Treat_cols' value='2,6,7,8' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
175 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
176 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
177 </test>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
178 </tests>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
179
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
180 <configfiles>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
181 <configfile name="runme">
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
182 <![CDATA[
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
183 #
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
184 # edgeR.Rscript
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
185 # updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
186 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
187 # Performs DGE on a count table containing n replicates of two conditions
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
188 #
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
189 # Parameters
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
190 #
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
191 # 1 - Output Dir
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
192
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
193 # Original edgeR code by: S.Lunke and A.Kaspi
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
194 reallybig = log10(.Machine\$double.xmax)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
195 reallysmall = log10(.Machine\$double.xmin)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
196 library('stringr')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
197 library('gplots')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
198 library('edgeR')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
199 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
200 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
201 # Perform clustering for significant pvalues after controlling FWER
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
202 samples = colnames(cmat)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
203 gu = unique(group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
204 gn = rownames(cmat)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
205 if (length(gu) == 2) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
206 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
207 pcols = unlist(lapply(group,col.map))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
208 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
209 colours = rainbow(length(gu),start=0,end=4/6)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
210 pcols = colours[match(group,gu)] }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
211 dm = cmat[(! is.na(gn)),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
212 # remove unlabelled hm rows
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
213 nprobes = nrow(dm)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
214 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
215 if (nprobes > nsamp) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
216 dm =dm[1:nsamp,]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
217 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
218 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
219 newcolnames = substr(colnames(dm),1,20)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
220 colnames(dm) = newcolnames
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
221 pdf(outpdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
222 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
223 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
224 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
225 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
226
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
227 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
228 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
229 # for 2 groups only was
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
230 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
231 #pcols = unlist(lapply(group,col.map))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
232 gu = unique(group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
233 colours = rainbow(length(gu),start=0.3,end=0.6)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
234 pcols = colours[match(group,gu)]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
235 nrows = nrow(cmat)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
236 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
237 if (nrows > nsamp) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
238 cmat = cmat[c(1:nsamp),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
239 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
240 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
241 newcolnames = substr(colnames(cmat),1,20)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
242 colnames(cmat) = newcolnames
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
243 pdf(outpdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
244 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
245 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
246 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
247
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
248 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
249 # stolen from https://gist.github.com/703512
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
250 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
251 o = -log10(sort(pvector,decreasing=F))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
252 e = -log10( 1:length(o)/length(o) )
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
253 o[o==-Inf] = reallysmall
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
254 o[o==Inf] = reallybig
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
255 maint = descr
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
256 pdf(outpdf)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
257 plot(e,o,pch=19,cex=1, main=maint, ...,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
258 xlab=expression(Expected~~-log[10](italic(p))),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
259 ylab=expression(Observed~~-log[10](italic(p))),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
260 xlim=c(0,max(e)), ylim=c(0,max(o)))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
261 lines(e,e,col="red")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
262 grid(col = "lightgray", lty = "dotted")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
263 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
264 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
265
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
266 smearPlot = function(DGEList,deTags, outSmear, outMain)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
267 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
268 pdf(outSmear)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
269 plotSmear(DGEList,de.tags=deTags,main=outMain)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
270 grid(col="lightgray", lty="dotted")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
271 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
272 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
273
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
274 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
275 { #
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
276 nc = ncol(rawrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
277 for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
278 fullnames = colnames(rawrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
279 newcolnames = substr(colnames(rawrs),1,20)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
280 colnames(rawrs) = newcolnames
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
281 newcolnames = substr(colnames(cleanrs),1,20)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
282 colnames(cleanrs) = newcolnames
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
283 defpar = par(no.readonly=T)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
284 print.noquote('raw contig counts by sample:')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
285 print.noquote(summary(rawrs))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
286 print.noquote('normalised contig counts by sample:')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
287 print.noquote(summary(cleanrs))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
288 pdf(pdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
289 par(mfrow=c(1,2))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
290 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
291 grid(col="lightgray",lty="dotted")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
292 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
293 grid(col="lightgray",lty="dotted")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
294 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
295 pdfname = "sample_counts_histogram.pdf"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
296 nc = ncol(rawrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
297 print.noquote(paste('Using ncol rawrs=',nc))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
298 ncroot = round(sqrt(nc))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
299 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
300 m = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
301 for (i in c(1:nc)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
302 rhist = hist(rawrs[,i],breaks=100,plot=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
303 m = append(m,max(rhist\$counts))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
304 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
305 ymax = max(m)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
306 ncols = length(fullnames)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
307 if (ncols > 20)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
308 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
309 scale = 7*ncols/20
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
310 pdf(pdfname,width=scale,height=scale)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
311 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
312 pdf(pdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
313 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
314 par(mfrow=c(ncroot,ncroot))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
315 for (i in c(1:nc)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
316 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
317 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
318 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
319 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
320 par(defpar)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
321
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
322 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
323
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
324 cumPlot = function(rawrs,cleanrs,maint,myTitle)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
325 { # updated to use ecdf
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
326 pdfname = "Filtering_rowsum_bar_charts.pdf"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
327 defpar = par(no.readonly=T)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
328 lrs = log(rawrs,10)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
329 lim = max(lrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
330 pdf(pdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
331 par(mfrow=c(2,1))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
332 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
333 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
334 grid(col="lightgray", lty="dotted")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
335 lrs = log(cleanrs,10)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
336 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
337 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
338 grid(col="lightgray", lty="dotted")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
339 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
340 par(defpar)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
341 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
342
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
343 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
344 { # updated to use ecdf
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
345 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
346 pdf(pdfname)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
347 par(mfrow=c(2,1))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
348 lastx = max(rawrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
349 rawe = knots(ecdf(rawrs))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
350 cleane = knots(ecdf(cleanrs))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
351 cy = 1:length(cleane)/length(cleane)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
352 ry = 1:length(rawe)/length(rawe)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
353 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
354 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
355 grid(col="blue")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
356 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
357 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
358 grid(col="blue")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
359 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
360 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
361
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
362
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
363
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
364 doGSEAold = function(y=NULL,design=NULL,histgmt="",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
365 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
366 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
367 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
368 sink('Camera.log')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
369 genesets = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
370 if (bigmt > "")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
371 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
372 bigenesets = readLines(bigmt)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
373 genesets = bigenesets
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
374 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
375 if (histgmt > "")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
376 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
377 hgenesets = readLines(histgmt)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
378 if (bigmt > "") {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
379 genesets = rbind(genesets,hgenesets)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
380 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
381 genesets = hgenesets
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
382 } # use only history if no bi
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
383 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
384 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
385 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
386 outf = outfname
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
387 head=paste(myTitle,'edgeR GSEA')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
388 write(head,file=outfname,append=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
389 ntest=length(genesets)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
390 urownames = toupper(rownames(y))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
391 upcam = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
392 downcam = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
393 for (i in 1:ntest) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
394 gs = unlist(genesets[i])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
395 g = gs[1] # geneset_id
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
396 u = gs[2]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
397 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
398 glist = gs[3:length(gs)] # member gene symbols
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
399 glist = toupper(glist)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
400 inglist = urownames %in% glist
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
401 nin = sum(inglist)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
402 if ((nin > minnin) && (nin < maxnin)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
403 ### print(paste('@@found',sum(inglist),'genes in glist'))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
404 camres = camera(y=y,index=inglist,design=design)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
405 if (! is.null(camres)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
406 rownames(camres) = g # gene set name
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
407 camres = cbind(GeneSet=g,URL=u,camres)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
408 if (camres\$Direction == "Up")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
409 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
410 upcam = rbind(upcam,camres) } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
411 downcam = rbind(downcam,camres)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
412 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
413 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
414 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
415 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
416 uscam = upcam[order(upcam\$PValue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
417 unadjp = uscam\$PValue
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
418 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
419 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
420 dscam = downcam[order(downcam\$PValue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
421 unadjp = dscam\$PValue
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
422 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
423 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
424 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
425 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
426 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
427 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
428 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
429 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
430 sink()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
431 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
432
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
433
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
434
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
435
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
436 doGSEA = function(y=NULL,design=NULL,histgmt="",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
437 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
438 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
439 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
440 sink('Camera.log')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
441 genesets = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
442 if (bigmt > "")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
443 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
444 bigenesets = readLines(bigmt)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
445 genesets = bigenesets
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
446 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
447 if (histgmt > "")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
448 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
449 hgenesets = readLines(histgmt)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
450 if (bigmt > "") {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
451 genesets = rbind(genesets,hgenesets)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
452 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
453 genesets = hgenesets
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
454 } # use only history if no bi
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
455 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
456 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
457 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
458 outf = outfname
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
459 head=paste(myTitle,'edgeR GSEA')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
460 write(head,file=outfname,append=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
461 ntest=length(genesets)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
462 urownames = toupper(rownames(y))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
463 upcam = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
464 downcam = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
465 incam = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
466 urls = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
467 gsids = c()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
468 for (i in 1:ntest) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
469 gs = unlist(genesets[i])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
470 gsid = gs[1] # geneset_id
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
471 url = gs[2]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
472 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
473 glist = gs[3:length(gs)] # member gene symbols
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
474 glist = toupper(glist)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
475 inglist = urownames %in% glist
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
476 nin = sum(inglist)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
477 if ((nin > minnin) && (nin < maxnin)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
478 incam = c(incam,inglist)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
479 gsids = c(gsids,gsid)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
480 urls = c(urls,url)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
481 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
482 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
483 incam = as.list(incam)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
484 names(incam) = gsids
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
485 allcam = camera(y=y,index=incam,design=design)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
486 allcamres = cbind(geneset=gsids,allcam,URL=urls)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
487 for (i in 1:ntest) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
488 camres = allcamres[i]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
489 res = try(test = (camres\$Direction == "Up"))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
490 if ("try-error" %in% class(res)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
491 cat("test failed, camres = :")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
492 print.noquote(camres)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
493 } else { if (camres\$Direction == "Up")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
494 { upcam = rbind(upcam,camres)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
495 } else { downcam = rbind(downcam,camres)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
496 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
497
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
498 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
499 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
500 uscam = upcam[order(upcam\$PValue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
501 unadjp = uscam\$PValue
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
502 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
503 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
504 dscam = downcam[order(downcam\$PValue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
505 unadjp = dscam\$PValue
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
506 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
507 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
508 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
509 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
510 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
511 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
512 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
513 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
514 sink()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
515 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
516
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
517
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
518 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
519 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
520 filterquantile=0.2, subjects=c(),mydesign=NULL,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
521 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
522 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
523 doCook=F,DESeq_fitType="parameteric",robustmeth='ordinary')
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
524 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
525 # Error handling
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
526 if (length(unique(group))!=2){
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
527 print("Number of conditions identified in experiment does not equal 2")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
528 q()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
529 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
530 require(edgeR)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
531 options(width = 512)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
532 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
533 allN = nrow(Count_Matrix)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
534 nscut = round(ncol(Count_Matrix)/2)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
535 colTotmillionreads = colSums(Count_Matrix)/1e6
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
536 counts.dataframe = as.data.frame(c())
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
537 rawrs = rowSums(Count_Matrix)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
538 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
539 nzN = nrow(nonzerod)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
540 nzrs = rowSums(nonzerod)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
541 zN = allN - nzN
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
542 print('# Quantiles for non-zero row counts:',quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
543 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
544 if (useNDF == T)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
545 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
546 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
547 lo = colSums(Count_Matrix[!gt1rpin3,])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
548 workCM = Count_Matrix[gt1rpin3,]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
549 cleanrs = rowSums(workCM)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
550 cleanN = length(cleanrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
551 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
552 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
553 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
554 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
555 useme = (nzrs > quantile(nzrs,filterquantile))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
556 workCM = nonzerod[useme,]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
557 lo = colSums(nonzerod[!useme,])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
558 cleanrs = rowSums(workCM)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
559 cleanN = length(cleanrs)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
560 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
561 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
562 maint = paste('Filter below',filterquantile,'quantile')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
563 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
564 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
565 allgenes = rownames(workCM)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
566 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
567 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
568 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
569 testreg = str_match(allgenes,reg)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
570 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
571 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
572 print("@@ using ucsc substitution for urls")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
573 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
574 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
575 print("@@ using genecards substitution for urls")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
576 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
577 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
578 print.noquote("# urls")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
579 print.noquote(head(contigurls))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
580 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
581 cmrowsums = rowSums(workCM)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
582 TName=unique(group)[1]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
583 CName=unique(group)[2]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
584 if (is.null(mydesign)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
585 if (length(subjects) == 0)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
586 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
587 mydesign = model.matrix(~group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
588 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
589 else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
590 subjf = factor(subjects)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
591 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
592 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
593 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
594 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
595 print.noquote('Using design matrix:')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
596 print.noquote(mydesign)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
597 if (doedgeR) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
598 sink('edgeR.log')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
599 #### Setup DGEList object
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
600 DGEList = DGEList(counts=workCM, group = group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
601 DGEList = calcNormFactors(DGEList)
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
602 if (robust_meth == 'ordinary') {
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
603 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
604 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
605 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
606
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
607 comdisp = DGEList\$common.dispersion
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
608 estpriorn = getPriorN(DGEList)
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
609 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
610 } else {
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
611 DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth)
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
612 }
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
613 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
614 DGLM = glmFit(DGEList,design=mydesign)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
615 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
616 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
617 normData = (1e+06*DGEList\$counts/efflib)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
618 uoutput = cbind(
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
619 Name=as.character(rownames(DGEList\$counts)),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
620 DE\$table,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
621 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
622 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
623 DGEList\$counts
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
624 )
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
625 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
626 goodness = gof(DGLM, pcutoff=fdrthresh)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
627 if (sum(goodness\$outlier) > 0) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
628 print.noquote('GLM outliers:')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
629 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
630 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
631 print('No GLM fit outlier genes found\n')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
632 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
633 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
634 pdf("edgeR_GoodnessofFit.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
635 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
636 abline(0,1,lwd=3)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
637 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
638 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
639 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
640 normData = (1e+06*DGEList\$counts/efflib)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
641 uniqueg = unique(group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
642 #### Plot MDS
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
643 sample_colors = match(group,levels(group))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
644 sampleTypes = levels(factor(group))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
645 print.noquote(sampleTypes)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
646 pdf("edgeR_MDSplot.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
647 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
648 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
649 grid(col="blue")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
650 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
651 colnames(normData) = paste( colnames(normData),'N',sep="_")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
652 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
653 nzd = data.frame(log(nonzerod + 1e-2,10))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
654 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
655 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
656 tt = cbind(
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
657 Name=as.character(rownames(DGEList\$counts)),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
658 DE\$table,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
659 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
660 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
661 )
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
662 print.noquote("# edgeR Top tags\n")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
663 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
664 tt = tt[order(DE\$table\$PValue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
665 print.noquote(tt[1:50,])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
666 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
667 nsig = length(deTags)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
668 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
669 deColours = ifelse(deTags,'red','black')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
670 pdf("edgeR_BCV_vs_abundance.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
671 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
672 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
673 dg = DGEList[order(DE\$table\$PValue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
674 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
675 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
676 normData = (1e+06*dg\$counts/efflib)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
677 outpdfname="edgeR_top_100_heatmap.pdf"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
678 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
679 outSmear = "edgeR_smearplot.pdf"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
680 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
681 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
682 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
683 norm.factor = DGEList\$samples\$norm.factors
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
684 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
685 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
686 edgeRcounts = rep(0, length(allgenes))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
687 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
688 sink()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
689 } ### doedgeR
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
690 if (doDESeq2 == T)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
691 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
692 sink("DESeq2.log")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
693 # DESeq2
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
694 require('DESeq2')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
695 library('RColorBrewer')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
696 if (length(subjects) == 0)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
697 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
698 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
699 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
700 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
701 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
702 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
703 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
704 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
705 #rDESeq = results(DESeq2)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
706 #newCountDataSet(workCM, group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
707 deSeqDatsizefac = estimateSizeFactors(deSEQds)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
708 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
709 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
710 rDESeq = as.data.frame(results(resDESeq))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
711 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
712 srDESeq = rDESeq[order(rDESeq\$pvalue),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
713 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
714 cat("# DESeq top 50\n")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
715 print.noquote(srDESeq[1:50,])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
716 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
717 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
718 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
719 DESeqcounts = rep(0, length(allgenes))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
720 DESeqcounts[DESeqcountsindex] = 1
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
721 pdf("DESeq2_dispersion_estimates.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
722 plotDispEsts(resDESeq)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
723 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
724 ysmall = abs(min(rDESeq\$log2FoldChange))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
725 ybig = abs(max(rDESeq\$log2FoldChange))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
726 ylimit = min(4,ysmall,ybig)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
727 pdf("DESeq2_MA_plot.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
728 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
729 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
730 rlogres = rlogTransformation(resDESeq)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
731 sampledists = dist( t( assay(rlogres) ) )
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
732 sdmat = as.matrix(sampledists)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
733 pdf("DESeq2_sample_distance_plot.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
734 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
735 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
736 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
737 ###outpdfname="DESeq2_top50_heatmap.pdf"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
738 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
739 sink()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
740 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
741 if ("try-error" %in% class(result)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
742 print.noquote('DESeq2 plotPCA failed.')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
743 } else {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
744 pdf("DESeq2_PCA_plot.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
745 #### wtf - print? Seems needed to get this to work
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
746 print(ppca)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
747 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
748 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
749 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
750
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
751 if (doVoom == T) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
752 sink('VOOM.log')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
753 if (doedgeR == F) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
754 #### Setup DGEList object
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
755 DGEList = DGEList(counts=workCM, group = group)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
756 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
757 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
758 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
759 }
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
760 norm.factor = calcNormFactors(DGEList)
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
761 pdf("VOOM_mean_variance_plot.pdf")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
762 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
763 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
764 # Use limma to fit data
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
765 fit = lmFit(dat.voomed, mydesign)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
766 fit = eBayes(fit)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
767 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
768 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
769 rownames(rvoom) = rownames(workCM)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
770 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
771 srvoom = rvoom[order(rvoom\$P.Value),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
772 cat("# VOOM top 50\n")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
773 print(srvoom[1:50,])
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
774 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
775 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
776 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
777 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
778 voomcounts = rep(0, length(allgenes))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
779 voomcounts[voomcountsindex] = 1
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
780 sink()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
781 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
782
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
783 if (doCamera) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
784 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
785 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
786 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
787
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
788 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
789 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
790 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
791 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
792 VOOM_limma = voomcounts, row.names = allgenes)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
793 } else if ((doDESeq2==T) && (doedgeR==T)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
794 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
795 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
796 } else if ((doVoom==T) && (doedgeR==T)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
797 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
798 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
799 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
800
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
801 if (nrow(counts.dataframe > 1)) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
802 counts.venn = vennCounts(counts.dataframe)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
803 vennf = "Venn_significant_genes_overlap.pdf"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
804 pdf(vennf)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
805 vennDiagram(counts.venn,main=vennmain,col="maroon")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
806 dev.off()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
807 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
808 } #### doDESeq2 or doVoom
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
809
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
810 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
811 #### Done
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
812
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
813 ###sink(stdout(),append=T,type="message")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
814 builtin_gmt = ""
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
815 history_gmt = ""
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
816 history_gmt_name = ""
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
817 out_edgeR = F
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
818 out_DESeq2 = F
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
819 out_VOOM = "$out_VOOM"
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
820 edgeR_robust_meth = $edgeR_robust # control robust deviance options
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
821 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
822 doVoom = $doVoom
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
823 doCamera = F
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
824 doedgeR = $edgeR.doedgeR
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
825 edgeR_priordf = 10
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
826
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
827
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
828 #if $doVoom == "T":
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
829 out_VOOM = "$out_VOOM"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
830 #end if
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
831
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
832 #if $DESeq2.doDESeq2 == "T":
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
833 out_DESeq2 = "$out_DESeq2"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
834 DESeq_fitType = "$DESeq2.DESeq_fitType"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
835 #end if
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
836
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
837 #if $edgeR.doedgeR == "T":
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
838 out_edgeR = "$out_edgeR"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
839 edgeR_priordf = $edgeR.edgeR_priordf
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
840 #end if
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
841
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
842
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
843 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
844 {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
845 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
846 quit(save="no",status=2)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
847 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
848
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
849 Out_Dir = "$html_file.files_path"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
850 Input = "$input1"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
851 TreatmentName = "$treatment_name"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
852 TreatmentCols = "$Treat_cols"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
853 ControlName = "$control_name"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
854 ControlCols= "$Control_cols"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
855 org = "$input1.dbkey"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
856 if (org == "") { org = "hg19"}
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
857 fdrtype = "$fdrtype"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
858 fdrthresh = $fdrthresh
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
859 useNDF = $useNDF
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
860 fQ = $fQ # non-differential centile cutoff
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
861 myTitle = "$title"
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
862 sids = strsplit("$subjectids",',')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
863 subjects = unlist(sids)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
864 nsubj = length(subjects)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
865 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
866 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
867 cat('Got TCols=')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
868 cat(TCols)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
869 cat('; CCols=')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
870 cat(CCols)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
871 cat('\n')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
872 useCols = c(TCols,CCols)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
873 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
874 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
875 snames = colnames(Count_Matrix)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
876 nsamples = length(snames)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
877 if (nsubj > 0 & nsubj != nsamples) {
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
878 options("show.error.messages"=T)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
879 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
880 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
881 write(mess, stderr())
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
882 quit(save="no",status=4)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
883 }
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
884 if (length(subjects) != 0) {subjects = subjects[useCols]}
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
885 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
886 rn = rownames(Count_Matrix)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
887 islib = rn %in% c('librarySize','NotInBedRegions')
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
888 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
889 Count_Matrix = Count_Matrix[subset(rn,! islib),]
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
890 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
891 group = factor(group, levels=c(ControlName,TreatmentName))
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
892 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
893 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
894 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
895 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
896 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
77
4a2e7a9725b2 Uploaded
fubar
parents: 74
diff changeset
897 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robustmeth=edgeR_robust_meth)
61
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
898 sessionInfo()
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
899 ]]>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
900 </configfile>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
901 </configfiles>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
902 <help>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
903
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
904 **What it does**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
905
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
906 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
907 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
908
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
909 **Input**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
910
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
911 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
912 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
913 non-negative integer count of reads from one sample overlapping the feature.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
914 The matrix must have a header row uniquely identifying the source samples, and unique row names in
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
915 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
916
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
917 **Specifying comparisons**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
918
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
919 This is basically dumbed down for two factors - case vs control.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
920
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
921 More complex interfaces are possible but painful at present.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
922 Probably need to specify a phenotype file to do this better.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
923 Work in progress. Send code.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
924
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
925 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
926 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
927 A list of integers, one for each subject or an empty string if samples are all independent.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
928 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
929 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
930
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
931 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
932 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
933 8,9,1,1,2,2
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
934 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
935
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
936 **Methods available**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
937
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
938 You can run 3 popular Bioconductor packages available for count data.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
939
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
940 edgeR - see edgeR_ for details
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
941
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
942 VOOM/limma - see limma_VOOM_ for details
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
943
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
944 DESeq2 - see DESeq2_ for details
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
945
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
946 and optionally camera in edgeR which works better if MSigDB is installed.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
947
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
948 **Outputs**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
949
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
950 Some helpful plots and analysis results. Note that most of these are produced using R code
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
951 suggested by the excellent documentation and vignettes for the Bioconductor
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
952 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
953
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
954 **Note on Voom**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
955
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
956 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
957
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
958 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
959
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
960 voom is an acronym for mean-variance modelling at the observational level.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
961 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
962 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
963 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
964 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
965
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
966 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
967 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
968 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
969 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
970 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
971 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
972
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
973
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
974 Author(s)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
975
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
976 Charity Law and Gordon Smyth
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
977
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
978 References
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
979
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
980 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
981
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
982 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
983 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
984 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
985
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
986 See Also
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
987
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
988 A voom case study is given in the edgeR User's Guide.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
989
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
990 vooma is a similar function but for microarrays instead of RNA-seq.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
991
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
992
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
993 ***old rant on changes to Bioconductor package variable names between versions***
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
994
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
995 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
996 breaking this and all other code that assumed the old name for this variable,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
997 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
998 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
999 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1000 when their old scripts break. This tool currently now works with 2.4.6.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1001
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1002 **Note on prior.N**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1003
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1004 http://seqanswers.com/forums/showthread.php?t=5591 says:
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1005
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1006 *prior.n*
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1007
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1008 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1009 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1010 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1011 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1012 common likelihood the weight of one observation.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1013
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1014 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1015 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1016 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1017 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1018 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1019 If you have more samples, then the tagwise dispersion estimates will be more reliable,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1020 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1021
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1022
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1023 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1024
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1025 Dear Dorota,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1026
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1027 The important settings are prior.df and trend.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1028
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1029 prior.n and prior.df are related through prior.df = prior.n * residual.df,
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1030 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1031 prior.n=10 is equivalent for your data to prior.df = 240, a very large
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1032 value. Going the other way, the new setting of prior.df=10 is equivalent
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1033 to prior.n=10/24.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1034
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1035 To recover old results with the current software you would use
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1036
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1037 estimateTagwiseDisp(object, prior.df=240, trend="none")
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1038
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1039 To get the new default from old software you would use
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1040
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1041 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1042
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1043 Actually the old trend method is equivalent to trend="loess" in the new
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1044 software. You should use plotBCV(object) to see whether a trend is
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1045 required.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1046
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1047 Note you could also use
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1048
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1049 prior.n = getPriorN(object, prior.df=10)
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1050
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1051 to map between prior.df and prior.n.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1052
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1053 ----
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1054
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1055 **Attributions**
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1056
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1057 edgeR - edgeR_
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1058
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1059 VOOM/limma - limma_VOOM_
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1060
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1061 DESeq2 - DESeq2_ for details
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1062
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1063 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1064
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1065 Galaxy_ (that's what you are using right now!) for gluing everything together
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1066
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1067 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1068 licensed to you under the LGPL_ like other rgenetics artefacts
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1069
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1070 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1071 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1072 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1073 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1074 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1075 .. _Galaxy: http://getgalaxy.org
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1076 </help>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1077
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1078 </tool>
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1079
dfc1046c8806 Uploaded
fubar
parents:
diff changeset
1080