annotate rgedgeRpaired_nocamera.xml @ 46:ee47fea51b40 draft

Uploaded
author fubar
date Mon, 23 Dec 2013 16:43:27 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.22">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
3 <requirements>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="3.11.11">atlas</requirement>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="3.0.1">r3</requirement>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
7 <requirement type="package" version="9.07">ghostscript</requirement>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
8 <requirement type="package" version="2.12">biocbasics</requirement>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
9 </requirements>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
10
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
11 <command interpreter="python">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
12 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
13 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
14 </command>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
15 <inputs>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
16 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
17 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
18 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
19 help="Supply a meaningful name here to remind you what the outputs contain">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
20 <sanitizer invalid_char="">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
21 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
22 </sanitizer>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
23 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
24 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
25 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
26 multiple="true" use_header_names="true" size="120" display="checkboxes">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
27 <validator type="no_options" message="Please select at least one column."/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
28 <sanitizer invalid_char="">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
29 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
30 </sanitizer>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
31 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
32 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
33 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
34 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
35 <validator type="no_options" message="Please select at least one column."/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
36 <sanitizer invalid_char="">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
37 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
38 </sanitizer> <validator type="no_options" message="Please select at least one column."/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
39 <sanitizer invalid_char="">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
40 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
41 </sanitizer>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
42
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
43 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
44 <param name="subjectids" type="text" optional="true" size="120" value = ""
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
45 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
46 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
47 <sanitizer>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
48 <valid initial="string.letters,string.digits"><add value="," /> </valid>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
49 </sanitizer>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
50 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
51 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
52 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
53 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
54 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
55 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
56
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
57 <conditional name="edgeR">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
58 <param name="doedgeR" type="select"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
59 label="Run this model using edgeR"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
60 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
61 <option value="F">Do not run edgeR</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
62 <option value="T" selected="true">Run edgeR</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
63 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
64 <when value="T">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
65 <param name="edgeR_priordf" type="integer" value="20" size="3"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
66 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
67 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
68 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
69 <when value="F"></when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
70 </conditional>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
71 <conditional name="DESeq2">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
72 <param name="doDESeq2" type="select"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
73 label="Run the same model with DESeq2 and compare findings"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
74 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
75 <option value="F" selected="true">Do not run DESeq2</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
76 <option value="T">Run DESeq2</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
77 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
78 <when value="T">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
79 <param name="DESeq_fitType" type="select">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
80 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
81 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
82 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
83 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
84 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
85 <when value="F"> </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
86 </conditional>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
87 <param name="doVoom" type="select"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
88 label="Run the same model with Voom/limma and compare findings"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
89 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
90 <option value="F" selected="true">Do not run VOOM</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
91 <option value="T">Run VOOM</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
92 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
93 <!--
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
94 <conditional name="camera">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
95 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
96 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
97 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
98 <option value="T">Run GSEA tests with the Camera algorithm</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
99 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
100 <when value="T">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
101 <conditional name="gmtSource">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
102 <param name="refgmtSource" type="select"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
103 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
104 <option value="indexed" selected="true">Use a built-in gene set</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
105 <option value="history">Use a gene set from my history</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
106 <option value="both">Add a gene set from my history to a built in gene set</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
107 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
108 <when value="indexed">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
109 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
110 <options from_data_table="gseaGMT_3.1">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
111 <filter type="sort_by" column="2" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
112 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
113 </options>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
114 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
115 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
116 <when value="history">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
117 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
118 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
119 <when value="both">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
120 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
121 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
122 <options from_data_table="gseaGMT_4">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
123 <filter type="sort_by" column="2" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
124 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
125 </options>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
126 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
127 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
128 </conditional>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
129 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
130 <when value="F">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
131 </when>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
132 </conditional>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
133 -->
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
134 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
135 help="Conventional default value of 0.05 recommended"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
136 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
137 help="Use fdr or bh typically to control for the number of tests in a reliable way">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
138 <option value="fdr" selected="true">fdr</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
139 <option value="BH">Benjamini Hochberg</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
140 <option value="BY">Benjamini Yukateli</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
141 <option value="bonferroni">Bonferroni</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
142 <option value="hochberg">Hochberg</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
143 <option value="holm">Holm</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
144 <option value="hommel">Hommel</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
145 <option value="none">no control for multiple tests</option>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
146 </param>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
147 </inputs>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
148 <outputs>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
149 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
150 <filter>edgeR['doedgeR'] == "T"</filter>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
151 </data>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
152 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
153 <filter>DESeq2['doDESeq2'] == "T"</filter>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
154 </data>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
155 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
156 <filter>doVoom == "T"</filter>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
157 </data>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
158 <data format="html" name="html_file" label="${title}.html"/>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
159 </outputs>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
160 <stdio>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
161 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
162 </stdio>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
163 <tests>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
164 <test>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
165 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
166 <param name='treatment_name' value='liver' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
167 <param name='title' value='edgeRtest' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
168 <param name='useNDF' value='' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
169 <param name='doedgeR' value='T' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
170 <param name='doVoom' value='T' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
171 <param name='doDESeq2' value='T' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
172 <param name='fdrtype' value='fdr' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
173 <param name='edgeR_priordf' value="8" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
174 <param name='fdrthresh' value="0.05" />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
175 <param name='control_name' value='heart' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
176 <param name='subjectids' value='' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
177 <param name='Control_cols' value='3,4,5,9' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
178 <param name='Treat_cols' value='2,6,7,8' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
179 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
180 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
181 </test>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
182 </tests>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
183
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
184 <configfiles>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
185 <configfile name="runme">
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
186 <![CDATA[
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
187 #
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
188 # edgeR.Rscript
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
189 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
190 # Performs DGE on a count table containing n replicates of two conditions
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
191 #
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
192 # Parameters
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
193 #
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
194 # 1 - Output Dir
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
195
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
196 # Original edgeR code by: S.Lunke and A.Kaspi
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
197 reallybig = log10(.Machine\$double.xmax)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
198 reallysmall = log10(.Machine\$double.xmin)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
199 library('stringr')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
200 library('gplots')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
201 library('edgeR')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
202 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
203 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
204 # Perform clustering for significant pvalues after controlling FWER
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
205 samples = colnames(cmat)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
206 gu = unique(group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
207 gn = rownames(cmat)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
208 if (length(gu) == 2) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
209 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
210 pcols = unlist(lapply(group,col.map))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
211 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
212 colours = rainbow(length(gu),start=0,end=4/6)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
213 pcols = colours[match(group,gu)] }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
214 dm = cmat[(! is.na(gn)),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
215 # remove unlabelled hm rows
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
216 nprobes = nrow(dm)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
217 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
218 if (nprobes > nsamp) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
219 dm =dm[1:nsamp,]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
220 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
221 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
222 newcolnames = substr(colnames(dm),1,20)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
223 colnames(dm) = newcolnames
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
224 pdf(outpdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
225 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
226 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
227 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
228 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
229
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
230 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
231 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
232 # for 2 groups only was
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
233 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
234 #pcols = unlist(lapply(group,col.map))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
235 gu = unique(group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
236 colours = rainbow(length(gu),start=0.3,end=0.6)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
237 pcols = colours[match(group,gu)]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
238 nrows = nrow(cmat)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
239 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
240 if (nrows > nsamp) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
241 cmat = cmat[c(1:nsamp),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
242 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
243 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
244 newcolnames = substr(colnames(cmat),1,20)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
245 colnames(cmat) = newcolnames
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
246 pdf(outpdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
247 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
248 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
249 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
250
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
251 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
252 # stolen from https://gist.github.com/703512
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
253 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
254 o = -log10(sort(pvector,decreasing=F))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
255 e = -log10( 1:length(o)/length(o) )
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
256 o[o==-Inf] = reallysmall
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
257 o[o==Inf] = reallybig
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
258 maint = descr
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
259 pdf(outpdf)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
260 plot(e,o,pch=19,cex=1, main=maint, ...,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
261 xlab=expression(Expected~~-log[10](italic(p))),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
262 ylab=expression(Observed~~-log[10](italic(p))),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
263 xlim=c(0,max(e)), ylim=c(0,max(o)))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
264 lines(e,e,col="red")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
265 grid(col = "lightgray", lty = "dotted")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
266 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
267 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
268
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
269 smearPlot = function(DGEList,deTags, outSmear, outMain)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
270 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
271 pdf(outSmear)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
272 plotSmear(DGEList,de.tags=deTags,main=outMain)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
273 grid(col="lightgray", lty="dotted")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
274 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
275 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
276
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
277 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
278 { #
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
279 nc = ncol(rawrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
280 #### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
281 fullnames = colnames(rawrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
282 newcolnames = substr(colnames(rawrs),1,20)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
283 colnames(rawrs) = newcolnames
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
284 newcolnames = substr(colnames(cleanrs),1,20)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
285 colnames(cleanrs) = newcolnames
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
286 defpar = par(no.readonly=T)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
287 print.noquote('raw contig counts by sample:')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
288 print.noquote(summary(rawrs))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
289 print.noquote('normalised contig counts by sample:')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
290 print.noquote(summary(cleanrs))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
291 pdf(pdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
292 par(mfrow=c(1,2))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
293 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
294 grid(col="lightgray",lty="dotted")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
295 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
296 grid(col="lightgray",lty="dotted")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
297 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
298 pdfname = "sample_counts_histogram.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
299 nc = ncol(rawrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
300 print.noquote(paste('Using ncol rawrs=',nc))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
301 ncroot = round(sqrt(nc))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
302 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
303 m = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
304 for (i in c(1:nc)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
305 rhist = hist(rawrs[,i],breaks=100,plot=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
306 m = append(m,max(rhist\$counts))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
307 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
308 ymax = max(m)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
309 ncols = length(fullnames)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
310 if (ncols > 20)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
311 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
312 scale = 7*ncols/20
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
313 pdf(pdfname,width=scale,height=scale)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
314 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
315 pdf(pdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
316 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
317 par(mfrow=c(ncroot,ncroot))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
318 for (i in c(1:nc)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
319 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
320 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
321 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
322 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
323 par(defpar)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
324
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
325 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
326
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
327 cumPlot = function(rawrs,cleanrs,maint,myTitle)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
328 { # updated to use ecdf
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
329 pdfname = "Filtering_rowsum_bar_charts.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
330 defpar = par(no.readonly=T)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
331 lrs = log(rawrs,10)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
332 lim = max(lrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
333 pdf(pdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
334 par(mfrow=c(2,1))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
335 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
336 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
337 grid(col="lightgray", lty="dotted")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
338 lrs = log(cleanrs,10)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
339 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
340 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
341 grid(col="lightgray", lty="dotted")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
342 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
343 par(defpar)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
344 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
345
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
346 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
347 { # updated to use ecdf
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
348 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
349 pdf(pdfname)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
350 par(mfrow=c(2,1))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
351 lastx = max(rawrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
352 rawe = knots(ecdf(rawrs))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
353 cleane = knots(ecdf(cleanrs))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
354 cy = 1:length(cleane)/length(cleane)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
355 ry = 1:length(rawe)/length(rawe)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
356 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
357 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
358 grid(col="blue")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
359 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
360 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
361 grid(col="blue")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
362 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
363 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
364
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
365
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
366
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
367 doGSEA = function(y=NULL,design=NULL,histgmt="",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
368 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
369 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
370 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
371 sink('Camera.log')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
372 genesets = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
373 if (bigmt > "")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
374 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
375 bigenesets = readLines(bigmt)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
376 genesets = bigenesets
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
377 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
378 if (histgmt > "")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
379 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
380 hgenesets = readLines(histgmt)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
381 if (bigmt > "") {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
382 genesets = rbind(genesets,hgenesets)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
383 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
384 genesets = hgenesets
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
385 } # use only history if no bi
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
386 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
387 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
388 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
389 outf = outfname
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
390 head=paste(myTitle,'edgeR GSEA')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
391 write(head,file=outfname,append=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
392 ntest=length(genesets)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
393 urownames = toupper(rownames(y))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
394 upcam = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
395 downcam = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
396 for (i in 1:ntest) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
397 gs = unlist(genesets[i])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
398 g = gs[1] # geneset_id
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
399 u = gs[2]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
400 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
401 glist = gs[3:length(gs)] # member gene symbols
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
402 glist = toupper(glist)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
403 inglist = urownames %in% glist
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
404 nin = sum(inglist)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
405 if ((nin > minnin) && (nin < maxnin)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
406 ### print(paste('@@found',sum(inglist),'genes in glist'))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
407 camres = camera(y=y,index=inglist,design=design)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
408 if (! is.null(camres)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
409 rownames(camres) = g # gene set name
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
410 camres = cbind(GeneSet=g,URL=u,camres)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
411 if (camres\$Direction == "Up")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
412 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
413 upcam = rbind(upcam,camres) } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
414 downcam = rbind(downcam,camres)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
415 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
416 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
417 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
418 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
419 uscam = upcam[order(upcam\$PValue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
420 unadjp = uscam\$PValue
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
421 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
422 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
423 dscam = downcam[order(downcam\$PValue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
424 unadjp = dscam\$PValue
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
425 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
426 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
427 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
428 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
429 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
430 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
431 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
432 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
433 sink()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
434 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
435
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
436
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
437
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
438
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
439 doGSEAatonce = function(y=NULL,design=NULL,histgmt="",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
440 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
441 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
442 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
443 sink('Camera.log')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
444 genesets = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
445 if (bigmt > "")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
446 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
447 bigenesets = readLines(bigmt)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
448 genesets = bigenesets
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
449 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
450 if (histgmt > "")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
451 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
452 hgenesets = readLines(histgmt)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
453 if (bigmt > "") {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
454 genesets = rbind(genesets,hgenesets)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
455 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
456 genesets = hgenesets
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
457 } # use only history if no bi
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
458 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
459 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
460 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
461 outf = outfname
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
462 head=paste(myTitle,'edgeR GSEA')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
463 write(head,file=outfname,append=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
464 ntest=length(genesets)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
465 urownames = toupper(rownames(y))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
466 upcam = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
467 downcam = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
468 incam = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
469 urls = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
470 gsids = c()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
471 for (i in 1:ntest) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
472 gs = unlist(genesets[i])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
473 gsid = gs[1] # geneset_id
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
474 url = gs[2]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
475 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
476 glist = gs[3:length(gs)] # member gene symbols
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
477 glist = toupper(glist)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
478 inglist = urownames %in% glist
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
479 nin = sum(inglist)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
480 if ((nin > minnin) && (nin < maxnin)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
481 incam = c(incam,inglist)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
482 gsids = c(gsids,gsid)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
483 urls = c(urls,url)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
484 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
485 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
486 incam = as.list(incam)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
487 names(incam) = gsids
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
488 allcam = camera(y=y,index=incam,design=design)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
489 allcamres = cbind(geneset=gsids,allcam,URL=urls)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
490 for (i in 1:ntest) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
491 camres = allcamres[i]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
492 res = try(test = (camres\$Direction == "Up"))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
493 if ("try-error" %in% class(res)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
494 cat("test failed, camres = :")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
495 print.noquote(camres)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
496 } else { if (camres\$Direction == "Up")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
497 { upcam = rbind(upcam,camres)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
498 } else { downcam = rbind(downcam,camres)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
499 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
500
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
501 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
502 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
503 uscam = upcam[order(upcam\$PValue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
504 unadjp = uscam\$PValue
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
505 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
506 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
507 dscam = downcam[order(downcam\$PValue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
508 unadjp = dscam\$PValue
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
509 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
510 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
511 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
512 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
513 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
514 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
515 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
516 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
517 sink()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
518 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
519
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
520
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
521 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
522 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
523 filterquantile=0.2, subjects=c(),mydesign=NULL,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
524 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
525 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
526 doCook=F,DESeq_fitType="parameteric")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
527 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
528 # Error handling
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
529 if (length(unique(group))!=2){
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
530 print("Number of conditions identified in experiment does not equal 2")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
531 q()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
532 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
533 require(edgeR)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
534 options(width = 512)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
535 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
536 allN = nrow(Count_Matrix)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
537 nscut = round(ncol(Count_Matrix)/2)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
538 colTotmillionreads = colSums(Count_Matrix)/1e6
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
539 counts.dataframe = as.data.frame(c())
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
540 rawrs = rowSums(Count_Matrix)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
541 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
542 nzN = nrow(nonzerod)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
543 nzrs = rowSums(nonzerod)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
544 zN = allN - nzN
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
545 print('# Quantiles for non-zero row counts:',quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
546 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
547 if (useNDF == T)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
548 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
549 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
550 lo = colSums(Count_Matrix[!gt1rpin3,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
551 workCM = Count_Matrix[gt1rpin3,]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
552 cleanrs = rowSums(workCM)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
553 cleanN = length(cleanrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
554 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
555 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
556 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
557 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
558 useme = (nzrs > quantile(nzrs,filterquantile))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
559 workCM = nonzerod[useme,]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
560 lo = colSums(nonzerod[!useme,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
561 cleanrs = rowSums(workCM)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
562 cleanN = length(cleanrs)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
563 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
564 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
565 maint = paste('Filter below',filterquantile,'quantile')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
566 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
567 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
568 allgenes = rownames(workCM)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
569 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
570 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
571 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
572 testreg = str_match(allgenes,reg)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
573 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
574 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
575 print("@@ using ucsc substitution for urls")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
576 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
577 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
578 print.noquote("@@ using genecards substitution for urls")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
579 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
580 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
581 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
582 cmrowsums = rowSums(workCM)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
583 TName=unique(group)[1]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
584 CName=unique(group)[2]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
585 if (is.null(mydesign)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
586 if (length(subjects) == 0)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
587 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
588 mydesign = model.matrix(~group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
589 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
590 else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
591 subjf = factor(subjects)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
592 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
593 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
594 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
595 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
596 print.noquote('Using design matrix:')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
597 print.noquote(mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
598 if (doedgeR) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
599 sink('edgeR.log')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
600 #### Setup DGEList object
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
601 DGEList = DGEList(counts=workCM, group = group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
602 DGEList = calcNormFactors(DGEList)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
603
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
604 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
605 comdisp = DGEList\$common.dispersion
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
606 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
607 if (edgeR_priordf > 0) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
608 print.noquote(paste("prior.df =",edgeR_priordf))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
609 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
610 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
611 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
612 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
613 DGLM = glmFit(DGEList,design=mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
614 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
615 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
616 normData = (1e+06*DGEList\$counts/efflib)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
617 uoutput = cbind(
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
618 Name=as.character(rownames(DGEList\$counts)),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
619 DE\$table,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
620 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
621 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
622 DGEList\$counts
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
623 )
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
624 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
625 goodness = gof(DGLM, pcutoff=fdrthresh)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
626 if (sum(goodness\$outlier) > 0) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
627 print.noquote('GLM outliers:')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
628 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
629 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
630 print('No GLM fit outlier genes found\n')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
631 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
632 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
633 pdf("edgeR_GoodnessofFit.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
634 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
635 abline(0,1,lwd=3)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
636 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
637 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
638 estpriorn = getPriorN(DGEList)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
639 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
640 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
641 normData = (1e+06*DGEList\$counts)/efflib
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
642 lnormData = log(normData + 1e-6,10)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
643 uniqueg = unique(group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
644 #### Plot MDS
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
645 sample_colors = match(group,levels(group))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
646 sampleTypes = levels(factor(group))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
647 print.noquote(sampleTypes)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
648 pdf("edgeR_MDSplot.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
649 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
650 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
651 grid(col="blue")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
652 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
653 colnames(normData) = paste( colnames(normData),'N',sep="_")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
654 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
655 nzd = data.frame(log(nonzerod + 1e-2,10))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
656 try( boxPlot(rawrs=nzd,cleanrs=lnormData,maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
657 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
658 tt = cbind(
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
659 Name=as.character(rownames(DGEList\$counts)),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
660 DE\$table,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
661 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
662 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
663 )
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
664 print.noquote("# edgeR Top tags\n")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
665 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
666 tt = tt[order(DE\$table\$PValue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
667 print.noquote(tt[1:50,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
668 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
669 nsig = length(deTags)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
670 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
671 deColours = ifelse(deTags,'red','black')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
672 pdf("edgeR_BCV_vs_abundance.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
673 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance",col.tagwise=deColours)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
674 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
675 dg = DGEList[order(DE\$table\$PValue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
676 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
677 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
678 normData = (1e+06*dg\$counts/efflib)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
679 outpdfname="edgeR_top_100_heatmap.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
680 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
681 outSmear = "edgeR_smearplot.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
682 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
683 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
684 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
685 norm.factor = DGEList\$samples\$norm.factors
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
686 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
687 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
688 edgeRcounts = rep(0, length(allgenes))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
689 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
690 sink()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
691 } ### doedgeR
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
692 if (doDESeq2 == T)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
693 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
694 sink("DESeq2.log")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
695 # DESeq2
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
696 require('DESeq2')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
697 library('RColorBrewer')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
698 if (length(subjects) == 0)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
699 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
700 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
701 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
702 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
703 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
704 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
705 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
706 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
707 #rDESeq = results(DESeq2)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
708 #newCountDataSet(workCM, group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
709 deSeqDatsizefac = estimateSizeFactors(deSEQds)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
710 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
711 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
712 rDESeq = as.data.frame(results(resDESeq))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
713 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
714 srDESeq = rDESeq[order(rDESeq\$pvalue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
715 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
716 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
717 cat("# DESeq top 50\n")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
718 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
719 srDESeq = rDESeq[order(rDESeq\$pvalue),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
720 print.noquote(srDESeq[1:50,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
721 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
722 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
723 DESeqcounts = rep(0, length(allgenes))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
724 DESeqcounts[DESeqcountsindex] = 1
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
725 pdf("DESeq2_dispersion_estimates.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
726 plotDispEsts(resDESeq)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
727 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
728 ysmall = abs(min(rDESeq\$log2FoldChange))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
729 ybig = abs(max(rDESeq\$log2FoldChange))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
730 ylimit = min(4,ysmall,ybig)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
731 pdf("DESeq2_MA_plot.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
732 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
733 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
734 rlogres = rlogTransformation(resDESeq)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
735 sampledists = dist( t( assay(rlogres) ) )
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
736 sdmat = as.matrix(sampledists)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
737 pdf("DESeq2_sample_distance_plot.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
738 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
739 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
740 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
741 ###outpdfname="DESeq2_top50_heatmap.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
742 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
743 sink()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
744 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
745 if ("try-error" %in% class(result)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
746 print.noquote('DESeq2 plotPCA failed.')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
747 } else {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
748 pdf("DESeq2_PCA_plot.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
749 #### wtf - print? Seems needed to get this to work
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
750 print(ppca)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
751 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
752 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
753 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
754
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
755 if (doVoom == T) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
756 sink('Voom.log')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
757 if (doedgeR == F) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
758 #### Setup DGEList object
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
759 DGEList = DGEList(counts=workCM, group = group)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
760 DGEList = calcNormFactors(DGEList)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
761 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
762 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
763 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
764 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
765 norm.factor = DGEList\$samples\$norm.factors
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
766 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
767 pdf("Voom_mean_variance_plot.pdf")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
768 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
769 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
770 # Use limma to fit data
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
771 fit = lmFit(dat.voomed, mydesign)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
772 fit = eBayes(fit)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
773 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
774 qqPlot(descr=paste(myTitle,'Voom-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='Voom_qqplot.pdf')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
775 rownames(rvoom) = rownames(workCM)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
776 rvoom = cbind(rvoom,NReads=cmrowsums)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
777 srvoom = rvoom[order(rvoom\$P.Value),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
778 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
779 rvoom = cbind(rvoom,URL=contigurls)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
780 deTags = rownames(rvoom[rvoom\$adj.p.value < fdrthresh,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
781 nsig = length(deTags)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
782 cat("# Voom top 50\n")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
783 print(srvoom[1:50,])
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
784 normData = srvoom\$E
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
785 outpdfname="Voom_top_100_heatmap.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
786 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('VOOM Heatmap',myTitle))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
787 outSmear = "Voom_smearplot.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
788 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
789 smearPlot(DGEList=rvoom,deTags=deTags, outSmear=outSmear, outMain = outMain)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
790 qqPlot(descr=paste(myTitle,'VOOM adj p QQ plot'),pvector=srvoom\$adj.P.Val,outpdf='Voom_qqplot.pdf')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
791 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
792 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
793 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
794 voomcounts = rep(0, length(allgenes))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
795 voomcounts[voomcountsindex] = 1
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
796 sink()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
797 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
798
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
799 if (doCamera) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
800 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
801 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
802 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
803
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
804 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
805 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
806 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
807 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
808 VOOM_limma = voomcounts, row.names = allgenes)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
809 } else if ((doDESeq2==T) && (doedgeR==T)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
810 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
811 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
812 } else if ((doVoom==T) && (doedgeR==T)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
813 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
814 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
815 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
816
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
817 if (nrow(counts.dataframe > 1)) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
818 counts.venn = vennCounts(counts.dataframe)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
819 vennf = "Venn_significant_genes_overlap.pdf"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
820 pdf(vennf)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
821 vennDiagram(counts.venn,main=vennmain,col="maroon")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
822 dev.off()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
823 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
824 } #### doDESeq2 or doVoom
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
825
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
826 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
827 #### Done
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
828
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
829 ###sink(stdout(),append=T,type="message")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
830 builtin_gmt = ""
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
831 history_gmt = ""
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
832 history_gmt_name = ""
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
833 out_edgeR = F
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
834 out_DESeq2 = F
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
835 out_VOOM = "$out_VOOM"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
836 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
837 doVoom = $doVoom
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
838 doCamera = F
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
839 doedgeR = $edgeR.doedgeR
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
840 edgeR_priordf = 0
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
841
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
842
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
843 #if $doVoom == "T":
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
844 out_VOOM = "$out_VOOM"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
845 #end if
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
846
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
847 #if $DESeq2.doDESeq2 == "T":
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
848 out_DESeq2 = "$out_DESeq2"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
849 DESeq_fitType = "$DESeq2.DESeq_fitType"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
850 #end if
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
851
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
852 #if $edgeR.doedgeR == "T":
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
853 out_edgeR = "$out_edgeR"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
854 edgeR_priordf = $edgeR.edgeR_priordf
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
855 #end if
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
856
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
857
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
858 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
859 {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
860 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
861 quit(save="no",status=2)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
862 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
863
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
864 Out_Dir = "$html_file.files_path"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
865 Input = "$input1"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
866 TreatmentName = "$treatment_name"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
867 TreatmentCols = "$Treat_cols"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
868 ControlName = "$control_name"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
869 ControlCols= "$Control_cols"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
870 org = "$input1.dbkey"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
871 if (org == "") { org = "hg19"}
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
872 fdrtype = "$fdrtype"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
873 fdrthresh = $fdrthresh
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
874 useNDF = $useNDF
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
875 fQ = $fQ # non-differential centile cutoff
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
876 myTitle = "$title"
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
877 sids = strsplit("$subjectids",',')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
878 subjects = unlist(sids)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
879 nsubj = length(subjects)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
880 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
881 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
882 cat('Got TCols=')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
883 cat(TCols)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
884 cat('; CCols=')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
885 cat(CCols)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
886 cat('\n')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
887 useCols = c(TCols,CCols)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
888 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
889 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
890 snames = colnames(Count_Matrix)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
891 nsamples = length(snames)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
892 if (nsubj > 0 & nsubj != nsamples) {
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
893 options("show.error.messages"=T)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
894 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
895 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
896 write(mess, stderr())
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
897 quit(save="no",status=4)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
898 }
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
899 if (length(subjects) != 0) {subjects = subjects[useCols]}
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
900 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
901 rn = rownames(Count_Matrix)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
902 islib = rn %in% c('librarySize','NotInBedRegions')
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
903 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
904 Count_Matrix = Count_Matrix[subset(rn,! islib),]
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
905 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
906 group = factor(group, levels=c(ControlName,TreatmentName))
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
907 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
908 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
909 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
910 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
911 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
912 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
913 sessionInfo()
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
914 ]]>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
915 </configfile>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
916 </configfiles>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
917 <help>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
918
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
919 **What it does**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
920
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
921 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
922 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
923
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
924 **Input**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
925
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
926 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
927 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
928 non-negative integer count of reads from one sample overlapping the feature.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
929 The matrix must have a header row uniquely identifying the source samples, and unique row names in
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
930 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
931
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
932 **Specifying comparisons**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
933
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
934 This is basically dumbed down for two factors - case vs control.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
935
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
936 More complex interfaces are possible but painful at present.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
937 Probably need to specify a phenotype file to do this better.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
938 Work in progress. Send code.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
939
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
940 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
941 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
942 A list of integers, one for each subject or an empty string if samples are all independent.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
943 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
944 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
945
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
946 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
947 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
948 8,9,1,1,2,2
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
949 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
950
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
951 **Methods available**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
952
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
953 You can run 3 popular Bioconductor packages available for count data.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
954
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
955 edgeR - see edgeR_ for details
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
956
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
957 VOOM/limma - see limma_VOOM_ for details
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
958
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
959 DESeq2 - see DESeq2_ for details
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
960
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
961 and optionally camera in edgeR which works better if MSigDB is installed.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
962
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
963 **Outputs**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
964
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
965 Some helpful plots and analysis results. Note that most of these are produced using R code
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
966 suggested by the excellent documentation and vignettes for the Bioconductor
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
967 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
968
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
969 **Note on Voom**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
970
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
971 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
972
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
973 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
974
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
975 voom is an acronym for mean-variance modelling at the observational level.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
976 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
977 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
978 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
979 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
980
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
981 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
982 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
983 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
984 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
985 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
986 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
987
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
988
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
989 Author(s)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
990
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
991 Charity Law and Gordon Smyth
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
992
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
993 References
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
994
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
995 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
996
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
997 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
998 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
999 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1000
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1001 See Also
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1002
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1003 A voom case study is given in the edgeR User's Guide.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1004
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1005 vooma is a similar function but for microarrays instead of RNA-seq.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1006
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1007
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1008 ***old rant on changes to Bioconductor package variable names between versions***
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1009
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1010 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1011 breaking this and all other code that assumed the old name for this variable,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1012 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1013 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1014 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1015 when their old scripts break. This tool currently now works with 2.4.6.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1016
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1017 **Note on prior.N**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1018
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1019 http://seqanswers.com/forums/showthread.php?t=5591 says:
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1020
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1021 *prior.n*
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1022
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1023 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1024 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1025 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1026 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1027 common likelihood the weight of one observation.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1028
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1029 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1030 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1031 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1032 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1033 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1034 If you have more samples, then the tagwise dispersion estimates will be more reliable,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1035 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1036
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1037
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1038 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1039
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1040 Dear Dorota,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1041
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1042 The important settings are prior.df and trend.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1043
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1044 prior.n and prior.df are related through prior.df = prior.n * residual.df,
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1045 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1046 prior.n=10 is equivalent for your data to prior.df = 240, a very large
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1047 value. Going the other way, the new setting of prior.df=10 is equivalent
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1048 to prior.n=10/24.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1049
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1050 To recover old results with the current software you would use
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1051
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1052 estimateTagwiseDisp(object, prior.df=240, trend="none")
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1053
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1054 To get the new default from old software you would use
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1055
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1056 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1057
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1058 Actually the old trend method is equivalent to trend="loess" in the new
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1059 software. You should use plotBCV(object) to see whether a trend is
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1060 required.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1061
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1062 Note you could also use
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1063
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1064 prior.n = getPriorN(object, prior.df=10)
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1065
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1066 to map between prior.df and prior.n.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1067
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1068 ----
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1069
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1070 **Attributions**
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1071
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1072 edgeR - edgeR_
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1073
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1074 VOOM/limma - limma_VOOM_
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1075
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1076 DESeq2 - DESeq2_ for details
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1077
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1078 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1079
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1080 Galaxy_ (that's what you are using right now!) for gluing everything together
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1081
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1082 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1083 licensed to you under the LGPL_ like other rgenetics artefacts
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1084
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1085 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1086 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1087 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1088 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1089 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1090 .. _Galaxy: http://getgalaxy.org
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1091 </help>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1092
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1093 </tool>
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1094
ee47fea51b40 Uploaded
fubar
parents:
diff changeset
1095