annotate rgedgeRpaired_nocamera.xml~ @ 39:571b5a524d6b draft

Uploaded
author fubar
date Sun, 22 Dec 2013 05:53:33 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
39
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.22">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
3 <requirements>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="3.0.1">r3</requirement>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="9.07">ghostscript</requirement>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
7 <requirement type="package" version="2.12">biocbasics</requirement>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
8 </requirements>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
9
571b5a524d6b Uploaded
fubar
parents:
diff changeset
10 <command interpreter="python">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
13 </command>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
14 <inputs>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
21 </sanitizer>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
22 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
27 <sanitizer invalid_char="">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
28 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
29 </sanitizer>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
30 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
31 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
32 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
33 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
34 <validator type="no_options" message="Please select at least one column."/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
35 <sanitizer invalid_char="">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
37 </sanitizer> <validator type="no_options" message="Please select at least one column."/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
38 <sanitizer invalid_char="">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
39 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
40 </sanitizer>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
41
571b5a524d6b Uploaded
fubar
parents:
diff changeset
42 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
43 <param name="subjectids" type="text" optional="true" size="120" value = ""
571b5a524d6b Uploaded
fubar
parents:
diff changeset
44 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
45 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
46 <sanitizer>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
47 <valid initial="string.letters,string.digits"><add value="," /> </valid>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
48 </sanitizer>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
49 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
50 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
51 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
52 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
53 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
54 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
55
571b5a524d6b Uploaded
fubar
parents:
diff changeset
56 <conditional name="edgeR">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
57 <param name="doedgeR" type="select"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
58 label="Run this model using edgeR"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
59 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
60 <option value="F">Do not run edgeR</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
61 <option value="T" selected="true">Run edgeR</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
62 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
63 <when value="T">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
64 <param name="edgeR_priordf" type="integer" value="20" size="3"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
65 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
66 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
67 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
68 <when value="F"></when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
69 </conditional>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
70 <conditional name="DESeq2">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
71 <param name="doDESeq2" type="select"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
72 label="Run the same model with DESeq2 and compare findings"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
73 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
74 <option value="F" selected="true">Do not run DESeq2</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
75 <option value="T">Run DESeq2</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
76 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
77 <when value="T">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
78 <param name="DESeq_fitType" type="select">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
79 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
80 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
81 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
82 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
83 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
84 <when value="F"> </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
85 </conditional>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
86 <param name="doVoom" type="select"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
87 label="Run the same model with Voom/limma and compare findings"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
88 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
89 <option value="F" selected="true">Do not run VOOM</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
90 <option value="T">Run VOOM</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
91 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
92 <!--
571b5a524d6b Uploaded
fubar
parents:
diff changeset
93 <conditional name="camera">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
94 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
95 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
96 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
97 <option value="T">Run GSEA tests with the Camera algorithm</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
98 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
99 <when value="T">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
100 <conditional name="gmtSource">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
101 <param name="refgmtSource" type="select"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
102 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
103 <option value="indexed" selected="true">Use a built-in gene set</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
104 <option value="history">Use a gene set from my history</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
105 <option value="both">Add a gene set from my history to a built in gene set</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
106 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
107 <when value="indexed">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
108 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
109 <options from_data_table="gseaGMT_3.1">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
110 <filter type="sort_by" column="2" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
111 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
112 </options>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
113 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
114 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
115 <when value="history">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
116 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
117 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
118 <when value="both">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
119 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
120 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
121 <options from_data_table="gseaGMT_4">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
122 <filter type="sort_by" column="2" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
123 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
124 </options>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
125 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
126 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
127 </conditional>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
128 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
129 <when value="F">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
130 </when>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
131 </conditional>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
132 -->
571b5a524d6b Uploaded
fubar
parents:
diff changeset
133 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
134 help="Conventional default value of 0.05 recommended"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
135 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
136 help="Use fdr or bh typically to control for the number of tests in a reliable way">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
137 <option value="fdr" selected="true">fdr</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
138 <option value="BH">Benjamini Hochberg</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
139 <option value="BY">Benjamini Yukateli</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
140 <option value="bonferroni">Bonferroni</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
141 <option value="hochberg">Hochberg</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
142 <option value="holm">Holm</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
143 <option value="hommel">Hommel</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
144 <option value="none">no control for multiple tests</option>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
145 </param>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
146 </inputs>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
147 <outputs>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
148 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
149 <filter>edgeR['doedgeR'] == "T"</filter>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
150 </data>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
151 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
152 <filter>DESeq2['doDESeq2'] == "T"</filter>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
153 </data>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
154 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
155 <filter>doVoom == "T"</filter>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
156 </data>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
157 <data format="html" name="html_file" label="${title}.html"/>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
158 </outputs>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
159 <stdio>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
160 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
161 </stdio>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
162 <tests>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
163 <test>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
164 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
165 <param name='treatment_name' value='liver' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
166 <param name='title' value='edgeRtest' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
167 <param name='useNDF' value='' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
168 <param name='doedgeR' value='T' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
169 <param name='doVoom' value='T' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
170 <param name='doDESeq2' value='T' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
171 <param name='fdrtype' value='fdr' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
172 <param name='edgeR_priordf' value="8" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
173 <param name='fdrthresh' value="0.05" />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
174 <param name='control_name' value='heart' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
175 <param name='subjectids' value='' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
176 <param name='Control_cols' value='3,4,5,9' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
177 <param name='Treat_cols' value='2,6,7,8' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
178 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
179 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
571b5a524d6b Uploaded
fubar
parents:
diff changeset
180 </test>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
181 </tests>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
182
571b5a524d6b Uploaded
fubar
parents:
diff changeset
183 <configfiles>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
184 <configfile name="runme">
571b5a524d6b Uploaded
fubar
parents:
diff changeset
185 <![CDATA[
571b5a524d6b Uploaded
fubar
parents:
diff changeset
186 #
571b5a524d6b Uploaded
fubar
parents:
diff changeset
187 # edgeR.Rscript
571b5a524d6b Uploaded
fubar
parents:
diff changeset
188 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
571b5a524d6b Uploaded
fubar
parents:
diff changeset
189 # Performs DGE on a count table containing n replicates of two conditions
571b5a524d6b Uploaded
fubar
parents:
diff changeset
190 #
571b5a524d6b Uploaded
fubar
parents:
diff changeset
191 # Parameters
571b5a524d6b Uploaded
fubar
parents:
diff changeset
192 #
571b5a524d6b Uploaded
fubar
parents:
diff changeset
193 # 1 - Output Dir
571b5a524d6b Uploaded
fubar
parents:
diff changeset
194
571b5a524d6b Uploaded
fubar
parents:
diff changeset
195 # Original edgeR code by: S.Lunke and A.Kaspi
571b5a524d6b Uploaded
fubar
parents:
diff changeset
196 reallybig = log10(.Machine\$double.xmax)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
197 reallysmall = log10(.Machine\$double.xmin)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
198 library('stringr')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
199 library('gplots')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
200 library('edgeR')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
201 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
202 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
203 # Perform clustering for significant pvalues after controlling FWER
571b5a524d6b Uploaded
fubar
parents:
diff changeset
204 samples = colnames(cmat)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
205 gu = unique(group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
206 gn = rownames(cmat)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
207 if (length(gu) == 2) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
208 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
571b5a524d6b Uploaded
fubar
parents:
diff changeset
209 pcols = unlist(lapply(group,col.map))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
210 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
211 colours = rainbow(length(gu),start=0,end=4/6)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
212 pcols = colours[match(group,gu)] }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
213 dm = cmat[(! is.na(gn)),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
214 # remove unlabelled hm rows
571b5a524d6b Uploaded
fubar
parents:
diff changeset
215 nprobes = nrow(dm)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
216 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
217 if (nprobes > nsamp) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
218 dm =dm[1:nsamp,]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
219 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
220 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
221 newcolnames = substr(colnames(dm),1,20)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
222 colnames(dm) = newcolnames
571b5a524d6b Uploaded
fubar
parents:
diff changeset
223 pdf(outpdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
224 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
571b5a524d6b Uploaded
fubar
parents:
diff changeset
225 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
226 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
227 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
228
571b5a524d6b Uploaded
fubar
parents:
diff changeset
229 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
230 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
231 # for 2 groups only was
571b5a524d6b Uploaded
fubar
parents:
diff changeset
232 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
571b5a524d6b Uploaded
fubar
parents:
diff changeset
233 #pcols = unlist(lapply(group,col.map))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
234 gu = unique(group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
235 colours = rainbow(length(gu),start=0.3,end=0.6)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
236 pcols = colours[match(group,gu)]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
237 nrows = nrow(cmat)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
238 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
239 if (nrows > nsamp) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
240 cmat = cmat[c(1:nsamp),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
241 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
242 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
243 newcolnames = substr(colnames(cmat),1,20)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
244 colnames(cmat) = newcolnames
571b5a524d6b Uploaded
fubar
parents:
diff changeset
245 pdf(outpdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
246 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
247 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
248 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
249
571b5a524d6b Uploaded
fubar
parents:
diff changeset
250 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
251 # stolen from https://gist.github.com/703512
571b5a524d6b Uploaded
fubar
parents:
diff changeset
252 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
253 o = -log10(sort(pvector,decreasing=F))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
254 e = -log10( 1:length(o)/length(o) )
571b5a524d6b Uploaded
fubar
parents:
diff changeset
255 o[o==-Inf] = reallysmall
571b5a524d6b Uploaded
fubar
parents:
diff changeset
256 o[o==Inf] = reallybig
571b5a524d6b Uploaded
fubar
parents:
diff changeset
257 maint = descr
571b5a524d6b Uploaded
fubar
parents:
diff changeset
258 pdf(outpdf)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
259 plot(e,o,pch=19,cex=1, main=maint, ...,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
260 xlab=expression(Expected~~-log[10](italic(p))),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
261 ylab=expression(Observed~~-log[10](italic(p))),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
262 xlim=c(0,max(e)), ylim=c(0,max(o)))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
263 lines(e,e,col="red")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
264 grid(col = "lightgray", lty = "dotted")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
265 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
266 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
267
571b5a524d6b Uploaded
fubar
parents:
diff changeset
268 smearPlot = function(DGEList,deTags, outSmear, outMain)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
269 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
270 pdf(outSmear)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
271 plotSmear(DGEList,de.tags=deTags,main=outMain)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
272 grid(col="lightgray", lty="dotted")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
273 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
274 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
275
571b5a524d6b Uploaded
fubar
parents:
diff changeset
276 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
277 { #
571b5a524d6b Uploaded
fubar
parents:
diff changeset
278 nc = ncol(rawrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
279 #### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
571b5a524d6b Uploaded
fubar
parents:
diff changeset
280 fullnames = colnames(rawrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
281 newcolnames = substr(colnames(rawrs),1,20)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
282 colnames(rawrs) = newcolnames
571b5a524d6b Uploaded
fubar
parents:
diff changeset
283 newcolnames = substr(colnames(cleanrs),1,20)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
284 colnames(cleanrs) = newcolnames
571b5a524d6b Uploaded
fubar
parents:
diff changeset
285 defpar = par(no.readonly=T)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
286 print.noquote('raw contig counts by sample:')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
287 print.noquote(summary(rawrs))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
288 print.noquote('normalised contig counts by sample:')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
289 print.noquote(summary(cleanrs))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
290 pdf(pdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
291 par(mfrow=c(1,2))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
292 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
293 grid(col="lightgray",lty="dotted")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
294 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
295 grid(col="lightgray",lty="dotted")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
296 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
297 pdfname = "sample_counts_histogram.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
298 nc = ncol(rawrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
299 print.noquote(paste('Using ncol rawrs=',nc))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
300 ncroot = round(sqrt(nc))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
301 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
302 m = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
303 for (i in c(1:nc)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
304 rhist = hist(rawrs[,i],breaks=100,plot=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
305 m = append(m,max(rhist\$counts))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
306 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
307 ymax = max(m)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
308 ncols = length(fullnames)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
309 if (ncols > 20)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
310 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
311 scale = 7*ncols/20
571b5a524d6b Uploaded
fubar
parents:
diff changeset
312 pdf(pdfname,width=scale,height=scale)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
313 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
314 pdf(pdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
315 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
316 par(mfrow=c(ncroot,ncroot))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
317 for (i in c(1:nc)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
318 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
319 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
320 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
321 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
322 par(defpar)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
323
571b5a524d6b Uploaded
fubar
parents:
diff changeset
324 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
325
571b5a524d6b Uploaded
fubar
parents:
diff changeset
326 cumPlot = function(rawrs,cleanrs,maint,myTitle)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
327 { # updated to use ecdf
571b5a524d6b Uploaded
fubar
parents:
diff changeset
328 pdfname = "Filtering_rowsum_bar_charts.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
329 defpar = par(no.readonly=T)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
330 lrs = log(rawrs,10)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
331 lim = max(lrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
332 pdf(pdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
333 par(mfrow=c(2,1))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
334 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
335 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
336 grid(col="lightgray", lty="dotted")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
337 lrs = log(cleanrs,10)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
338 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
339 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
340 grid(col="lightgray", lty="dotted")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
341 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
342 par(defpar)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
343 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
344
571b5a524d6b Uploaded
fubar
parents:
diff changeset
345 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
346 { # updated to use ecdf
571b5a524d6b Uploaded
fubar
parents:
diff changeset
347 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
348 pdf(pdfname)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
349 par(mfrow=c(2,1))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
350 lastx = max(rawrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
351 rawe = knots(ecdf(rawrs))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
352 cleane = knots(ecdf(cleanrs))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
353 cy = 1:length(cleane)/length(cleane)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
354 ry = 1:length(rawe)/length(rawe)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
355 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
356 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
357 grid(col="blue")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
358 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
359 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
360 grid(col="blue")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
361 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
362 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
363
571b5a524d6b Uploaded
fubar
parents:
diff changeset
364
571b5a524d6b Uploaded
fubar
parents:
diff changeset
365
571b5a524d6b Uploaded
fubar
parents:
diff changeset
366 doGSEA = function(y=NULL,design=NULL,histgmt="",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
367 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
368 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
369 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
370 sink('Camera.log')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
371 genesets = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
372 if (bigmt > "")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
373 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
374 bigenesets = readLines(bigmt)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
375 genesets = bigenesets
571b5a524d6b Uploaded
fubar
parents:
diff changeset
376 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
377 if (histgmt > "")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
378 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
379 hgenesets = readLines(histgmt)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
380 if (bigmt > "") {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
381 genesets = rbind(genesets,hgenesets)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
382 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
383 genesets = hgenesets
571b5a524d6b Uploaded
fubar
parents:
diff changeset
384 } # use only history if no bi
571b5a524d6b Uploaded
fubar
parents:
diff changeset
385 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
386 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
387 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
571b5a524d6b Uploaded
fubar
parents:
diff changeset
388 outf = outfname
571b5a524d6b Uploaded
fubar
parents:
diff changeset
389 head=paste(myTitle,'edgeR GSEA')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
390 write(head,file=outfname,append=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
391 ntest=length(genesets)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
392 urownames = toupper(rownames(y))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
393 upcam = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
394 downcam = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
395 for (i in 1:ntest) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
396 gs = unlist(genesets[i])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
397 g = gs[1] # geneset_id
571b5a524d6b Uploaded
fubar
parents:
diff changeset
398 u = gs[2]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
399 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
400 glist = gs[3:length(gs)] # member gene symbols
571b5a524d6b Uploaded
fubar
parents:
diff changeset
401 glist = toupper(glist)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
402 inglist = urownames %in% glist
571b5a524d6b Uploaded
fubar
parents:
diff changeset
403 nin = sum(inglist)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
404 if ((nin > minnin) && (nin < maxnin)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
405 ### print(paste('@@found',sum(inglist),'genes in glist'))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
406 camres = camera(y=y,index=inglist,design=design)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
407 if (! is.null(camres)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
408 rownames(camres) = g # gene set name
571b5a524d6b Uploaded
fubar
parents:
diff changeset
409 camres = cbind(GeneSet=g,URL=u,camres)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
410 if (camres\$Direction == "Up")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
411 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
412 upcam = rbind(upcam,camres) } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
413 downcam = rbind(downcam,camres)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
414 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
415 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
416 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
417 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
418 uscam = upcam[order(upcam\$PValue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
419 unadjp = uscam\$PValue
571b5a524d6b Uploaded
fubar
parents:
diff changeset
420 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
421 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
422 dscam = downcam[order(downcam\$PValue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
423 unadjp = dscam\$PValue
571b5a524d6b Uploaded
fubar
parents:
diff changeset
424 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
425 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
426 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
427 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
428 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
429 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
430 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
431 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
432 sink()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
433 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
434
571b5a524d6b Uploaded
fubar
parents:
diff changeset
435
571b5a524d6b Uploaded
fubar
parents:
diff changeset
436
571b5a524d6b Uploaded
fubar
parents:
diff changeset
437
571b5a524d6b Uploaded
fubar
parents:
diff changeset
438 doGSEAatonce = function(y=NULL,design=NULL,histgmt="",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
439 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
440 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
441 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
442 sink('Camera.log')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
443 genesets = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
444 if (bigmt > "")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
445 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
446 bigenesets = readLines(bigmt)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
447 genesets = bigenesets
571b5a524d6b Uploaded
fubar
parents:
diff changeset
448 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
449 if (histgmt > "")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
450 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
451 hgenesets = readLines(histgmt)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
452 if (bigmt > "") {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
453 genesets = rbind(genesets,hgenesets)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
454 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
455 genesets = hgenesets
571b5a524d6b Uploaded
fubar
parents:
diff changeset
456 } # use only history if no bi
571b5a524d6b Uploaded
fubar
parents:
diff changeset
457 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
458 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
459 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
571b5a524d6b Uploaded
fubar
parents:
diff changeset
460 outf = outfname
571b5a524d6b Uploaded
fubar
parents:
diff changeset
461 head=paste(myTitle,'edgeR GSEA')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
462 write(head,file=outfname,append=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
463 ntest=length(genesets)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
464 urownames = toupper(rownames(y))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
465 upcam = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
466 downcam = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
467 incam = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
468 urls = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
469 gsids = c()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
470 for (i in 1:ntest) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
471 gs = unlist(genesets[i])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
472 gsid = gs[1] # geneset_id
571b5a524d6b Uploaded
fubar
parents:
diff changeset
473 url = gs[2]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
474 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
475 glist = gs[3:length(gs)] # member gene symbols
571b5a524d6b Uploaded
fubar
parents:
diff changeset
476 glist = toupper(glist)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
477 inglist = urownames %in% glist
571b5a524d6b Uploaded
fubar
parents:
diff changeset
478 nin = sum(inglist)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
479 if ((nin > minnin) && (nin < maxnin)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
480 incam = c(incam,inglist)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
481 gsids = c(gsids,gsid)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
482 urls = c(urls,url)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
483 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
484 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
485 incam = as.list(incam)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
486 names(incam) = gsids
571b5a524d6b Uploaded
fubar
parents:
diff changeset
487 allcam = camera(y=y,index=incam,design=design)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
488 allcamres = cbind(geneset=gsids,allcam,URL=urls)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
489 for (i in 1:ntest) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
490 camres = allcamres[i]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
491 res = try(test = (camres\$Direction == "Up"))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
492 if ("try-error" %in% class(res)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
493 cat("test failed, camres = :")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
494 print.noquote(camres)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
495 } else { if (camres\$Direction == "Up")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
496 { upcam = rbind(upcam,camres)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
497 } else { downcam = rbind(downcam,camres)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
498 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
499
571b5a524d6b Uploaded
fubar
parents:
diff changeset
500 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
501 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
502 uscam = upcam[order(upcam\$PValue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
503 unadjp = uscam\$PValue
571b5a524d6b Uploaded
fubar
parents:
diff changeset
504 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
505 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
506 dscam = downcam[order(downcam\$PValue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
507 unadjp = dscam\$PValue
571b5a524d6b Uploaded
fubar
parents:
diff changeset
508 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
509 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
510 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
511 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
512 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
513 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
514 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
515 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
516 sink()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
517 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
518
571b5a524d6b Uploaded
fubar
parents:
diff changeset
519
571b5a524d6b Uploaded
fubar
parents:
diff changeset
520 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
521 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
522 filterquantile=0.2, subjects=c(),mydesign=NULL,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
523 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
571b5a524d6b Uploaded
fubar
parents:
diff changeset
524 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
571b5a524d6b Uploaded
fubar
parents:
diff changeset
525 doCook=F,DESeq_fitType="parameteric")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
526 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
527 # Error handling
571b5a524d6b Uploaded
fubar
parents:
diff changeset
528 if (length(unique(group))!=2){
571b5a524d6b Uploaded
fubar
parents:
diff changeset
529 print("Number of conditions identified in experiment does not equal 2")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
530 q()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
531 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
532 require(edgeR)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
533 options(width = 512)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
534 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
535 allN = nrow(Count_Matrix)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
536 nscut = round(ncol(Count_Matrix)/2)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
537 colTotmillionreads = colSums(Count_Matrix)/1e6
571b5a524d6b Uploaded
fubar
parents:
diff changeset
538 counts.dataframe = as.data.frame(c())
571b5a524d6b Uploaded
fubar
parents:
diff changeset
539 rawrs = rowSums(Count_Matrix)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
540 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
571b5a524d6b Uploaded
fubar
parents:
diff changeset
541 nzN = nrow(nonzerod)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
542 nzrs = rowSums(nonzerod)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
543 zN = allN - nzN
571b5a524d6b Uploaded
fubar
parents:
diff changeset
544 print('# Quantiles for non-zero row counts:',quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
545 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
546 if (useNDF == T)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
547 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
548 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
571b5a524d6b Uploaded
fubar
parents:
diff changeset
549 lo = colSums(Count_Matrix[!gt1rpin3,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
550 workCM = Count_Matrix[gt1rpin3,]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
551 cleanrs = rowSums(workCM)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
552 cleanN = length(cleanrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
553 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
554 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
555 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
556 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
557 useme = (nzrs > quantile(nzrs,filterquantile))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
558 workCM = nonzerod[useme,]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
559 lo = colSums(nonzerod[!useme,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
560 cleanrs = rowSums(workCM)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
561 cleanN = length(cleanrs)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
562 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
563 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
564 maint = paste('Filter below',filterquantile,'quantile')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
565 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
566 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
567 allgenes = rownames(workCM)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
568 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
569 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
570 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
571 testreg = str_match(allgenes,reg)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
572 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
571b5a524d6b Uploaded
fubar
parents:
diff changeset
573 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
574 print("@@ using ucsc substitution for urls")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
575 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
576 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
577 print.noquote("@@ using genecards substitution for urls")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
578 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
579 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
580 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
581 cmrowsums = rowSums(workCM)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
582 TName=unique(group)[1]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
583 CName=unique(group)[2]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
584 if (is.null(mydesign)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
585 if (length(subjects) == 0)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
586 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
587 mydesign = model.matrix(~group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
588 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
589 else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
590 subjf = factor(subjects)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
591 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
571b5a524d6b Uploaded
fubar
parents:
diff changeset
592 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
593 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
594 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
595 print.noquote('Using design matrix:')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
596 print.noquote(mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
597 if (doedgeR) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
598 sink('edgeR.log')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
599 #### Setup DGEList object
571b5a524d6b Uploaded
fubar
parents:
diff changeset
600 DGEList = DGEList(counts=workCM, group = group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
601 DGEList = calcNormFactors(DGEList)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
602
571b5a524d6b Uploaded
fubar
parents:
diff changeset
603 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
604 comdisp = DGEList\$common.dispersion
571b5a524d6b Uploaded
fubar
parents:
diff changeset
605 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
606 if (edgeR_priordf > 0) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
607 print.noquote(paste("prior.df =",edgeR_priordf))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
608 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
609 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
610 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
611 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
612 DGLM = glmFit(DGEList,design=mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
613 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
571b5a524d6b Uploaded
fubar
parents:
diff changeset
614 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
571b5a524d6b Uploaded
fubar
parents:
diff changeset
615 normData = (1e+06*DGEList\$counts/efflib)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
616 uoutput = cbind(
571b5a524d6b Uploaded
fubar
parents:
diff changeset
617 Name=as.character(rownames(DGEList\$counts)),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
618 DE\$table,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
619 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
620 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
621 DGEList\$counts
571b5a524d6b Uploaded
fubar
parents:
diff changeset
622 )
571b5a524d6b Uploaded
fubar
parents:
diff changeset
623 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
571b5a524d6b Uploaded
fubar
parents:
diff changeset
624 goodness = gof(DGLM, pcutoff=fdrthresh)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
625 if (sum(goodness\$outlier) > 0) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
626 print.noquote('GLM outliers:')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
627 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
628 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
629 print('No GLM fit outlier genes found\n')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
630 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
631 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
632 pdf("edgeR_GoodnessofFit.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
633 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
634 abline(0,1,lwd=3)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
635 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
636 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
637 estpriorn = getPriorN(DGEList)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
638 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
639 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
571b5a524d6b Uploaded
fubar
parents:
diff changeset
640 normData = ((1e+06*DGEList\$counts)+1e-7)/efflib)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
641 lnormData = log(normData,10)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
642 uniqueg = unique(group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
643 #### Plot MDS
571b5a524d6b Uploaded
fubar
parents:
diff changeset
644 sample_colors = match(group,levels(group))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
645 sampleTypes = levels(factor(group))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
646 print.noquote(sampleTypes)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
647 pdf("edgeR_MDSplot.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
648 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
649 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
650 grid(col="blue")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
651 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
652 colnames(normData) = paste( colnames(normData),'N',sep="_")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
653 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
654 nzd = data.frame(log(nonzerod + 1e-2,10))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
655 try( boxPlot(rawrs=nzd,cleanrs=lnormData,maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
571b5a524d6b Uploaded
fubar
parents:
diff changeset
656 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
657 tt = cbind(
571b5a524d6b Uploaded
fubar
parents:
diff changeset
658 Name=as.character(rownames(DGEList\$counts)),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
659 DE\$table,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
660 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
661 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
571b5a524d6b Uploaded
fubar
parents:
diff changeset
662 )
571b5a524d6b Uploaded
fubar
parents:
diff changeset
663 print.noquote("# edgeR Top tags\n")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
664 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
571b5a524d6b Uploaded
fubar
parents:
diff changeset
665 tt = tt[order(DE\$table\$PValue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
666 print.noquote(tt[1:50,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
667 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
668 nsig = length(deTags)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
669 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
670 deColours = ifelse(deTags,'red','black')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
671 pdf("edgeR_BCV_vs_abundance.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
672 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance",col.tagwise=deColours)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
673 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
674 dg = DGEList[order(DE\$table\$PValue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
675 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
676 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
571b5a524d6b Uploaded
fubar
parents:
diff changeset
677 normData = (1e+06*dg\$counts/efflib)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
678 outpdfname="edgeR_top_100_heatmap.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
679 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
680 outSmear = "edgeR_smearplot.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
681 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
682 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
683 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
684 norm.factor = DGEList\$samples\$norm.factors
571b5a524d6b Uploaded
fubar
parents:
diff changeset
685 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
686 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
687 edgeRcounts = rep(0, length(allgenes))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
688 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
571b5a524d6b Uploaded
fubar
parents:
diff changeset
689 sink()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
690 } ### doedgeR
571b5a524d6b Uploaded
fubar
parents:
diff changeset
691 if (doDESeq2 == T)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
692 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
693 sink("DESeq2.log")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
694 # DESeq2
571b5a524d6b Uploaded
fubar
parents:
diff changeset
695 require('DESeq2')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
696 library('RColorBrewer')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
697 if (length(subjects) == 0)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
698 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
699 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
700 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
701 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
702 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
703 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
704 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
705 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
706 #rDESeq = results(DESeq2)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
707 #newCountDataSet(workCM, group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
708 deSeqDatsizefac = estimateSizeFactors(deSEQds)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
709 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
710 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
711 rDESeq = as.data.frame(results(resDESeq))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
712 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
713 srDESeq = rDESeq[order(rDESeq\$pvalue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
714 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
715 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
716 cat("# DESeq top 50\n")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
717 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
718 srDESeq = rDESeq[order(rDESeq\$pvalue),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
719 print.noquote(srDESeq[1:50,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
720 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
721 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
722 DESeqcounts = rep(0, length(allgenes))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
723 DESeqcounts[DESeqcountsindex] = 1
571b5a524d6b Uploaded
fubar
parents:
diff changeset
724 pdf("DESeq2_dispersion_estimates.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
725 plotDispEsts(resDESeq)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
726 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
727 ysmall = abs(min(rDESeq\$log2FoldChange))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
728 ybig = abs(max(rDESeq\$log2FoldChange))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
729 ylimit = min(4,ysmall,ybig)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
730 pdf("DESeq2_MA_plot.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
731 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
732 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
733 rlogres = rlogTransformation(resDESeq)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
734 sampledists = dist( t( assay(rlogres) ) )
571b5a524d6b Uploaded
fubar
parents:
diff changeset
735 sdmat = as.matrix(sampledists)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
736 pdf("DESeq2_sample_distance_plot.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
737 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
738 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
739 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
740 ###outpdfname="DESeq2_top50_heatmap.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
741 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
742 sink()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
743 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
571b5a524d6b Uploaded
fubar
parents:
diff changeset
744 if ("try-error" %in% class(result)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
745 print.noquote('DESeq2 plotPCA failed.')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
746 } else {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
747 pdf("DESeq2_PCA_plot.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
748 #### wtf - print? Seems needed to get this to work
571b5a524d6b Uploaded
fubar
parents:
diff changeset
749 print(ppca)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
750 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
751 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
752 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
753
571b5a524d6b Uploaded
fubar
parents:
diff changeset
754 if (doVoom == T) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
755 sink('Voom.log')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
756 if (doedgeR == F) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
757 #### Setup DGEList object
571b5a524d6b Uploaded
fubar
parents:
diff changeset
758 DGEList = DGEList(counts=workCM, group = group)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
759 DGEList = calcNormFactors(DGEList)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
760 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
761 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
762 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
763 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
764 norm.factor = DGEList\$samples\$norm.factors
571b5a524d6b Uploaded
fubar
parents:
diff changeset
765 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
766 pdf("Voom_mean_variance_plot.pdf")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
767 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
768 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
769 # Use limma to fit data
571b5a524d6b Uploaded
fubar
parents:
diff changeset
770 fit = lmFit(dat.voomed, mydesign)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
771 fit = eBayes(fit)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
772 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
773 qqPlot(descr=paste(myTitle,'Voom-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='Voom_qqplot.pdf')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
774 rownames(rvoom) = rownames(workCM)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
775 rvoom = cbind(rvoom,NReads=cmrowsums)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
776 srvoom = rvoom[order(rvoom\$P.Value),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
777 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
778 rvoom = cbind(rvoom,URL=contigurls)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
779 deTags = rownames(rvoom[rvoom\$adj.p.value < fdrthresh,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
780 srvoom = rvoom[order(rvoom\$P.Value),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
781 cat("# Voom top 50\n")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
782 print(srvoom[1:50,])
571b5a524d6b Uploaded
fubar
parents:
diff changeset
783 normData = srvoom\$E
571b5a524d6b Uploaded
fubar
parents:
diff changeset
784 outpdfname="VOOM_top_100_heatmap.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
785 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('VOOM Heatmap',myTitle))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
786 outSmear = "VOOM_smearplot.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
787 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
788 smearPlot(DGEList=rvoom,deTags=deTags, outSmear=outSmear, outMain = outMain)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
789 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='VOOM_qqplot.pdf')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
790 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
571b5a524d6b Uploaded
fubar
parents:
diff changeset
791 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
792 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
793 voomcounts = rep(0, length(allgenes))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
794 voomcounts[voomcountsindex] = 1
571b5a524d6b Uploaded
fubar
parents:
diff changeset
795 sink()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
796 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
797
571b5a524d6b Uploaded
fubar
parents:
diff changeset
798 if (doCamera) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
799 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
800 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
801 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
802
571b5a524d6b Uploaded
fubar
parents:
diff changeset
803 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
804 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
805 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
806 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
807 VOOM_limma = voomcounts, row.names = allgenes)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
808 } else if ((doDESeq2==T) && (doedgeR==T)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
809 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
810 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
811 } else if ((doVoom==T) && (doedgeR==T)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
812 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
813 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
814 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
815
571b5a524d6b Uploaded
fubar
parents:
diff changeset
816 if (nrow(counts.dataframe > 1)) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
817 counts.venn = vennCounts(counts.dataframe)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
818 vennf = "Venn_significant_genes_overlap.pdf"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
819 pdf(vennf)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
820 vennDiagram(counts.venn,main=vennmain,col="maroon")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
821 dev.off()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
822 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
823 } #### doDESeq2 or doVoom
571b5a524d6b Uploaded
fubar
parents:
diff changeset
824
571b5a524d6b Uploaded
fubar
parents:
diff changeset
825 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
826 #### Done
571b5a524d6b Uploaded
fubar
parents:
diff changeset
827
571b5a524d6b Uploaded
fubar
parents:
diff changeset
828 ###sink(stdout(),append=T,type="message")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
829 builtin_gmt = ""
571b5a524d6b Uploaded
fubar
parents:
diff changeset
830 history_gmt = ""
571b5a524d6b Uploaded
fubar
parents:
diff changeset
831 history_gmt_name = ""
571b5a524d6b Uploaded
fubar
parents:
diff changeset
832 out_edgeR = F
571b5a524d6b Uploaded
fubar
parents:
diff changeset
833 out_DESeq2 = F
571b5a524d6b Uploaded
fubar
parents:
diff changeset
834 out_VOOM = "$out_VOOM"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
835 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
571b5a524d6b Uploaded
fubar
parents:
diff changeset
836 doVoom = $doVoom
571b5a524d6b Uploaded
fubar
parents:
diff changeset
837 doCamera = F
571b5a524d6b Uploaded
fubar
parents:
diff changeset
838 doedgeR = $edgeR.doedgeR
571b5a524d6b Uploaded
fubar
parents:
diff changeset
839 edgeR_priordf = 0
571b5a524d6b Uploaded
fubar
parents:
diff changeset
840
571b5a524d6b Uploaded
fubar
parents:
diff changeset
841
571b5a524d6b Uploaded
fubar
parents:
diff changeset
842 #if $doVoom == "T":
571b5a524d6b Uploaded
fubar
parents:
diff changeset
843 out_VOOM = "$out_VOOM"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
844 #end if
571b5a524d6b Uploaded
fubar
parents:
diff changeset
845
571b5a524d6b Uploaded
fubar
parents:
diff changeset
846 #if $DESeq2.doDESeq2 == "T":
571b5a524d6b Uploaded
fubar
parents:
diff changeset
847 out_DESeq2 = "$out_DESeq2"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
848 DESeq_fitType = "$DESeq2.DESeq_fitType"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
849 #end if
571b5a524d6b Uploaded
fubar
parents:
diff changeset
850
571b5a524d6b Uploaded
fubar
parents:
diff changeset
851 #if $edgeR.doedgeR == "T":
571b5a524d6b Uploaded
fubar
parents:
diff changeset
852 out_edgeR = "$out_edgeR"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
853 edgeR_priordf = $edgeR.edgeR_priordf
571b5a524d6b Uploaded
fubar
parents:
diff changeset
854 #end if
571b5a524d6b Uploaded
fubar
parents:
diff changeset
855
571b5a524d6b Uploaded
fubar
parents:
diff changeset
856
571b5a524d6b Uploaded
fubar
parents:
diff changeset
857 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
858 {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
859 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
571b5a524d6b Uploaded
fubar
parents:
diff changeset
860 quit(save="no",status=2)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
861 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
862
571b5a524d6b Uploaded
fubar
parents:
diff changeset
863 Out_Dir = "$html_file.files_path"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
864 Input = "$input1"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
865 TreatmentName = "$treatment_name"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
866 TreatmentCols = "$Treat_cols"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
867 ControlName = "$control_name"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
868 ControlCols= "$Control_cols"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
869 org = "$input1.dbkey"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
870 if (org == "") { org = "hg19"}
571b5a524d6b Uploaded
fubar
parents:
diff changeset
871 fdrtype = "$fdrtype"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
872 fdrthresh = $fdrthresh
571b5a524d6b Uploaded
fubar
parents:
diff changeset
873 useNDF = $useNDF
571b5a524d6b Uploaded
fubar
parents:
diff changeset
874 fQ = $fQ # non-differential centile cutoff
571b5a524d6b Uploaded
fubar
parents:
diff changeset
875 myTitle = "$title"
571b5a524d6b Uploaded
fubar
parents:
diff changeset
876 sids = strsplit("$subjectids",',')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
877 subjects = unlist(sids)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
878 nsubj = length(subjects)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
879 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
571b5a524d6b Uploaded
fubar
parents:
diff changeset
880 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
571b5a524d6b Uploaded
fubar
parents:
diff changeset
881 cat('Got TCols=')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
882 cat(TCols)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
883 cat('; CCols=')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
884 cat(CCols)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
885 cat('\n')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
886 useCols = c(TCols,CCols)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
887 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
888 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
571b5a524d6b Uploaded
fubar
parents:
diff changeset
889 snames = colnames(Count_Matrix)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
890 nsamples = length(snames)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
891 if (nsubj > 0 & nsubj != nsamples) {
571b5a524d6b Uploaded
fubar
parents:
diff changeset
892 options("show.error.messages"=T)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
893 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
894 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
895 write(mess, stderr())
571b5a524d6b Uploaded
fubar
parents:
diff changeset
896 quit(save="no",status=4)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
897 }
571b5a524d6b Uploaded
fubar
parents:
diff changeset
898 if (length(subjects) != 0) {subjects = subjects[useCols]}
571b5a524d6b Uploaded
fubar
parents:
diff changeset
899 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
571b5a524d6b Uploaded
fubar
parents:
diff changeset
900 rn = rownames(Count_Matrix)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
901 islib = rn %in% c('librarySize','NotInBedRegions')
571b5a524d6b Uploaded
fubar
parents:
diff changeset
902 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
571b5a524d6b Uploaded
fubar
parents:
diff changeset
903 Count_Matrix = Count_Matrix[subset(rn,! islib),]
571b5a524d6b Uploaded
fubar
parents:
diff changeset
904 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
571b5a524d6b Uploaded
fubar
parents:
diff changeset
905 group = factor(group, levels=c(ControlName,TreatmentName))
571b5a524d6b Uploaded
fubar
parents:
diff changeset
906 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
571b5a524d6b Uploaded
fubar
parents:
diff changeset
907 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
908 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
571b5a524d6b Uploaded
fubar
parents:
diff changeset
909 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
910 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
911 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
912 sessionInfo()
571b5a524d6b Uploaded
fubar
parents:
diff changeset
913 ]]>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
914 </configfile>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
915 </configfiles>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
916 <help>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
917
571b5a524d6b Uploaded
fubar
parents:
diff changeset
918 **What it does**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
919
571b5a524d6b Uploaded
fubar
parents:
diff changeset
920 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
921 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
922
571b5a524d6b Uploaded
fubar
parents:
diff changeset
923 **Input**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
924
571b5a524d6b Uploaded
fubar
parents:
diff changeset
925 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
571b5a524d6b Uploaded
fubar
parents:
diff changeset
926 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
571b5a524d6b Uploaded
fubar
parents:
diff changeset
927 non-negative integer count of reads from one sample overlapping the feature.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
928 The matrix must have a header row uniquely identifying the source samples, and unique row names in
571b5a524d6b Uploaded
fubar
parents:
diff changeset
929 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
930
571b5a524d6b Uploaded
fubar
parents:
diff changeset
931 **Specifying comparisons**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
932
571b5a524d6b Uploaded
fubar
parents:
diff changeset
933 This is basically dumbed down for two factors - case vs control.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
934
571b5a524d6b Uploaded
fubar
parents:
diff changeset
935 More complex interfaces are possible but painful at present.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
936 Probably need to specify a phenotype file to do this better.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
937 Work in progress. Send code.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
938
571b5a524d6b Uploaded
fubar
parents:
diff changeset
939 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
571b5a524d6b Uploaded
fubar
parents:
diff changeset
940 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
571b5a524d6b Uploaded
fubar
parents:
diff changeset
941 A list of integers, one for each subject or an empty string if samples are all independent.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
942 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
943 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
944
571b5a524d6b Uploaded
fubar
parents:
diff changeset
945 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
571b5a524d6b Uploaded
fubar
parents:
diff changeset
946 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
571b5a524d6b Uploaded
fubar
parents:
diff changeset
947 8,9,1,1,2,2
571b5a524d6b Uploaded
fubar
parents:
diff changeset
948 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
571b5a524d6b Uploaded
fubar
parents:
diff changeset
949
571b5a524d6b Uploaded
fubar
parents:
diff changeset
950 **Methods available**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
951
571b5a524d6b Uploaded
fubar
parents:
diff changeset
952 You can run 3 popular Bioconductor packages available for count data.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
953
571b5a524d6b Uploaded
fubar
parents:
diff changeset
954 edgeR - see edgeR_ for details
571b5a524d6b Uploaded
fubar
parents:
diff changeset
955
571b5a524d6b Uploaded
fubar
parents:
diff changeset
956 VOOM/limma - see limma_VOOM_ for details
571b5a524d6b Uploaded
fubar
parents:
diff changeset
957
571b5a524d6b Uploaded
fubar
parents:
diff changeset
958 DESeq2 - see DESeq2_ for details
571b5a524d6b Uploaded
fubar
parents:
diff changeset
959
571b5a524d6b Uploaded
fubar
parents:
diff changeset
960 and optionally camera in edgeR which works better if MSigDB is installed.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
961
571b5a524d6b Uploaded
fubar
parents:
diff changeset
962 **Outputs**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
963
571b5a524d6b Uploaded
fubar
parents:
diff changeset
964 Some helpful plots and analysis results. Note that most of these are produced using R code
571b5a524d6b Uploaded
fubar
parents:
diff changeset
965 suggested by the excellent documentation and vignettes for the Bioconductor
571b5a524d6b Uploaded
fubar
parents:
diff changeset
966 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
967
571b5a524d6b Uploaded
fubar
parents:
diff changeset
968 **Note on Voom**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
969
571b5a524d6b Uploaded
fubar
parents:
diff changeset
970 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
971
571b5a524d6b Uploaded
fubar
parents:
diff changeset
972 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
973
571b5a524d6b Uploaded
fubar
parents:
diff changeset
974 voom is an acronym for mean-variance modelling at the observational level.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
975 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
976 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
977 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
978 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
979
571b5a524d6b Uploaded
fubar
parents:
diff changeset
980 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
981 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
982 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
983 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
984 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
985 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
986
571b5a524d6b Uploaded
fubar
parents:
diff changeset
987
571b5a524d6b Uploaded
fubar
parents:
diff changeset
988 Author(s)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
989
571b5a524d6b Uploaded
fubar
parents:
diff changeset
990 Charity Law and Gordon Smyth
571b5a524d6b Uploaded
fubar
parents:
diff changeset
991
571b5a524d6b Uploaded
fubar
parents:
diff changeset
992 References
571b5a524d6b Uploaded
fubar
parents:
diff changeset
993
571b5a524d6b Uploaded
fubar
parents:
diff changeset
994 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
995
571b5a524d6b Uploaded
fubar
parents:
diff changeset
996 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
997 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
998 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
571b5a524d6b Uploaded
fubar
parents:
diff changeset
999
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1000 See Also
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1001
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1002 A voom case study is given in the edgeR User's Guide.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1003
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1004 vooma is a similar function but for microarrays instead of RNA-seq.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1005
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1006
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1007 ***old rant on changes to Bioconductor package variable names between versions***
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1008
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1009 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1010 breaking this and all other code that assumed the old name for this variable,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1011 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1012 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1013 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1014 when their old scripts break. This tool currently now works with 2.4.6.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1015
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1016 **Note on prior.N**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1017
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1018 http://seqanswers.com/forums/showthread.php?t=5591 says:
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1019
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1020 *prior.n*
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1021
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1022 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1023 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1024 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1025 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1026 common likelihood the weight of one observation.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1027
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1028 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1029 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1030 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1031 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1032 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1033 If you have more samples, then the tagwise dispersion estimates will be more reliable,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1034 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1035
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1036
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1037 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1038
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1039 Dear Dorota,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1040
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1041 The important settings are prior.df and trend.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1042
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1043 prior.n and prior.df are related through prior.df = prior.n * residual.df,
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1044 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1045 prior.n=10 is equivalent for your data to prior.df = 240, a very large
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1046 value. Going the other way, the new setting of prior.df=10 is equivalent
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1047 to prior.n=10/24.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1048
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1049 To recover old results with the current software you would use
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1050
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1051 estimateTagwiseDisp(object, prior.df=240, trend="none")
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1052
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1053 To get the new default from old software you would use
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1054
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1055 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1056
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1057 Actually the old trend method is equivalent to trend="loess" in the new
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1058 software. You should use plotBCV(object) to see whether a trend is
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1059 required.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1060
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1061 Note you could also use
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1062
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1063 prior.n = getPriorN(object, prior.df=10)
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1064
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1065 to map between prior.df and prior.n.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1066
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1067 ----
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1068
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1069 **Attributions**
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1070
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1071 edgeR - edgeR_
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1072
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1073 VOOM/limma - limma_VOOM_
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1074
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1075 DESeq2 - DESeq2_ for details
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1076
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1077 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1078
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1079 Galaxy_ (that's what you are using right now!) for gluing everything together
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1080
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1081 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1082 licensed to you under the LGPL_ like other rgenetics artefacts
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1083
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1084 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1085 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1086 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1087 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1088 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1089 .. _Galaxy: http://getgalaxy.org
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1090 </help>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1091
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1092 </tool>
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1093
571b5a524d6b Uploaded
fubar
parents:
diff changeset
1094