annotate rgedgeRpaired_nocamera.xml @ 28:c4ee2e69d691 draft

Uploaded
author fubar
date Wed, 07 Aug 2013 02:41:40 -0400
parents c0fa3dde02d9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.20">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
3 <requirements>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="2.12">biocbasics</requirement>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="3.0.1">r3</requirement>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
7 <requirement type="package" version="9.07">ghostscript</requirement>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
8 </requirements>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
9
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
10 <command interpreter="python">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
13 </command>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
14 <inputs>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
21 </sanitizer>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
22 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
27 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
28 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
29 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
30 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
31 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
32 <param name="subjectids" type="text" optional="true" size="120" value = ""
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
33 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
34 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
35 <sanitizer>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="," /> </valid>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
37 </sanitizer>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
38 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
39 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
40 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
41 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
42 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
43 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
44
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
45 <conditional name="edgeR">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
46 <param name="doedgeR" type="select"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
47 label="Run this model using edgeR"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
48 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
49 <option value="F">Do not run edgeR</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
50 <option value="T" selected="true">Run edgeR</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
51 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
52 <when value="T">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
53 <param name="edgeR_priordf" type="integer" value="20" size="3"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
54 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
55 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
56 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
57 <when value="F"></when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
58 </conditional>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
59 <conditional name="DESeq2">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
60 <param name="doDESeq2" type="select"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
61 label="Run the same model with DESeq2 and compare findings"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
62 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
63 <option value="F" selected="true">Do not run DESeq2</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
64 <option value="T">Run DESeq2</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
65 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
66 <when value="T">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
67 <param name="DESeq_fitType" type="select">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
68 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
69 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
70 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
71 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
72 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
73 <when value="F"> </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
74 </conditional>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
75 <param name="doVoom" type="select"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
76 label="Run the same model with Voom/limma and compare findings"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
77 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
78 <option value="F" selected="true">Do not run VOOM</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
79 <option value="T">Run VOOM</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
80 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
81 <!--
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
82 <conditional name="camera">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
83 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
84 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
85 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
86 <option value="T">Run GSEA tests with the Camera algorithm</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
87 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
88 <when value="T">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
89 <conditional name="gmtSource">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
90 <param name="refgmtSource" type="select"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
91 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
92 <option value="indexed" selected="true">Use a built-in gene set</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
93 <option value="history">Use a gene set from my history</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
94 <option value="both">Add a gene set from my history to a built in gene set</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
95 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
96 <when value="indexed">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
97 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
98 <options from_data_table="gseaGMT_3.1">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
99 <filter type="sort_by" column="2" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
100 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
101 </options>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
102 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
103 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
104 <when value="history">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
105 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
106 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
107 <when value="both">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
108 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
109 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
110 <options from_data_table="gseaGMT_4">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
111 <filter type="sort_by" column="2" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
112 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
113 </options>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
114 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
115 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
116 </conditional>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
117 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
118 <when value="F">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
119 </when>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
120 </conditional>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
121 -->
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
122 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
123 help="Conventional default value of 0.05 recommended"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
124 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
125 help="Use fdr or bh typically to control for the number of tests in a reliable way">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
126 <option value="fdr" selected="true">fdr</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
127 <option value="BH">Benjamini Hochberg</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
128 <option value="BY">Benjamini Yukateli</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
129 <option value="bonferroni">Bonferroni</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
130 <option value="hochberg">Hochberg</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
131 <option value="holm">Holm</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
132 <option value="hommel">Hommel</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
133 <option value="none">no control for multiple tests</option>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
134 </param>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
135 </inputs>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
136 <outputs>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
137 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
138 <filter>edgeR['doedgeR'] == "T"</filter>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
139 </data>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
140 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
141 <filter>DESeq2['doDESeq2'] == "T"</filter>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
142 </data>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
143 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
144 <filter>doVoom == "T"</filter>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
145 </data>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
146 <data format="html" name="html_file" label="${title}.html"/>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
147 </outputs>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
148 <stdio>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
149 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
150 </stdio>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
151 <tests>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
152 <test>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
153 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
154 <param name='treatment_name' value='liver' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
155 <param name='title' value='edgeRtest' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
156 <param name='useNDF' value='' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
157 <param name='doedgeR' value='T' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
158 <param name='doVoom' value='T' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
159 <param name='doDESeq2' value='T' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
160 <param name='fdrtype' value='fdr' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
161 <param name='edgeR_priordf' value="8" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
162 <param name='fdrthresh' value="0.05" />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
163 <param name='control_name' value='heart' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
164 <param name='subjectids' value='' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
165 <param name='Control_cols' value='3,4,5,9' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
166 <param name='Treat_cols' value='2,6,7,8' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
167 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
168 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
169 </test>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
170 </tests>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
171
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
172 <configfiles>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
173 <configfile name="runme">
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
174 <![CDATA[
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
175 #
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
176 # edgeR.Rscript
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
177 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
178 # Performs DGE on a count table containing n replicates of two conditions
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
179 #
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
180 # Parameters
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
181 #
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
182 # 1 - Output Dir
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
183
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
184 # Original edgeR code by: S.Lunke and A.Kaspi
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
185 reallybig = log10(.Machine\$double.xmax)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
186 reallysmall = log10(.Machine\$double.xmin)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
187 library('stringr')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
188 library('gplots')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
189 library('edgeR')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
190 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
191 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
192 # Perform clustering for significant pvalues after controlling FWER
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
193 samples = colnames(cmat)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
194 gu = unique(group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
195 gn = rownames(cmat)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
196 if (length(gu) == 2) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
197 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
198 pcols = unlist(lapply(group,col.map))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
199 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
200 colours = rainbow(length(gu),start=0,end=4/6)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
201 pcols = colours[match(group,gu)] }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
202 dm = cmat[(! is.na(gn)),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
203 # remove unlabelled hm rows
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
204 nprobes = nrow(dm)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
205 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
206 if (nprobes > nsamp) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
207 dm =dm[1:nsamp,]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
208 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
209 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
210 newcolnames = substr(colnames(dm),1,20)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
211 colnames(dm) = newcolnames
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
212 pdf(outpdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
213 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
214 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
215 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
216 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
217
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
218 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
219 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
220 # for 2 groups only was
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
221 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
222 #pcols = unlist(lapply(group,col.map))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
223 gu = unique(group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
224 colours = rainbow(length(gu),start=0.3,end=0.6)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
225 pcols = colours[match(group,gu)]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
226 nrows = nrow(cmat)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
227 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
228 if (nrows > nsamp) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
229 cmat = cmat[c(1:nsamp),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
230 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
231 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
232 newcolnames = substr(colnames(cmat),1,20)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
233 colnames(cmat) = newcolnames
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
234 pdf(outpdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
235 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
236 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
237 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
238
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
239 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
240 # stolen from https://gist.github.com/703512
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
241 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
242 o = -log10(sort(pvector,decreasing=F))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
243 e = -log10( 1:length(o)/length(o) )
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
244 o[o==-Inf] = reallysmall
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
245 o[o==Inf] = reallybig
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
246 maint = descr
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
247 pdf(outpdf)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
248 plot(e,o,pch=19,cex=1, main=maint, ...,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
249 xlab=expression(Expected~~-log[10](italic(p))),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
250 ylab=expression(Observed~~-log[10](italic(p))),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
251 xlim=c(0,max(e)), ylim=c(0,max(o)))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
252 lines(e,e,col="red")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
253 grid(col = "lightgray", lty = "dotted")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
254 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
255 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
256
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
257 smearPlot = function(DGEList,deTags, outSmear, outMain)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
258 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
259 pdf(outSmear)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
260 plotSmear(DGEList,de.tags=deTags,main=outMain)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
261 grid(col="lightgray", lty="dotted")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
262 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
263 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
264
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
265 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
266 { #
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
267 nc = ncol(rawrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
268 for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
269 fullnames = colnames(rawrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
270 newcolnames = substr(colnames(rawrs),1,20)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
271 colnames(rawrs) = newcolnames
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
272 newcolnames = substr(colnames(cleanrs),1,20)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
273 colnames(cleanrs) = newcolnames
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
274 defpar = par(no.readonly=T)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
275 print.noquote('raw contig counts by sample:')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
276 print.noquote(summary(rawrs))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
277 print.noquote('normalised contig counts by sample:')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
278 print.noquote(summary(cleanrs))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
279 pdf(pdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
280 par(mfrow=c(1,2))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
281 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
282 grid(col="lightgray",lty="dotted")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
283 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
284 grid(col="lightgray",lty="dotted")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
285 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
286 pdfname = "sample_counts_histogram.pdf"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
287 nc = ncol(rawrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
288 print.noquote(paste('Using ncol rawrs=',nc))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
289 ncroot = round(sqrt(nc))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
290 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
291 m = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
292 for (i in c(1:nc)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
293 rhist = hist(rawrs[,i],breaks=100,plot=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
294 m = append(m,max(rhist\$counts))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
295 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
296 ymax = max(m)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
297 ncols = length(fullnames)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
298 if (ncols > 20)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
299 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
300 scale = 7*ncols/20
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
301 pdf(pdfname,width=scale,height=scale)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
302 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
303 pdf(pdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
304 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
305 par(mfrow=c(ncroot,ncroot))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
306 for (i in c(1:nc)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
307 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
308 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
309 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
310 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
311 par(defpar)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
312
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
313 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
314
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
315 cumPlot = function(rawrs,cleanrs,maint,myTitle)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
316 { # updated to use ecdf
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
317 pdfname = "Filtering_rowsum_bar_charts.pdf"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
318 defpar = par(no.readonly=T)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
319 lrs = log(rawrs,10)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
320 lim = max(lrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
321 pdf(pdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
322 par(mfrow=c(2,1))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
323 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
324 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
325 grid(col="lightgray", lty="dotted")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
326 lrs = log(cleanrs,10)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
327 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
328 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
329 grid(col="lightgray", lty="dotted")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
330 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
331 par(defpar)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
332 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
333
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
334 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
335 { # updated to use ecdf
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
336 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
337 pdf(pdfname)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
338 par(mfrow=c(2,1))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
339 lastx = max(rawrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
340 rawe = knots(ecdf(rawrs))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
341 cleane = knots(ecdf(cleanrs))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
342 cy = 1:length(cleane)/length(cleane)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
343 ry = 1:length(rawe)/length(rawe)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
344 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
345 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
346 grid(col="blue")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
347 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
348 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
349 grid(col="blue")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
350 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
351 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
352
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
353
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
354
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
355 doGSEAold = function(y=NULL,design=NULL,histgmt="",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
356 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
357 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
358 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
359 sink('Camera.log')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
360 genesets = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
361 if (bigmt > "")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
362 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
363 bigenesets = readLines(bigmt)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
364 genesets = bigenesets
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
365 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
366 if (histgmt > "")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
367 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
368 hgenesets = readLines(histgmt)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
369 if (bigmt > "") {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
370 genesets = rbind(genesets,hgenesets)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
371 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
372 genesets = hgenesets
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
373 } # use only history if no bi
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
374 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
375 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
376 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
377 outf = outfname
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
378 head=paste(myTitle,'edgeR GSEA')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
379 write(head,file=outfname,append=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
380 ntest=length(genesets)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
381 urownames = toupper(rownames(y))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
382 upcam = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
383 downcam = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
384 for (i in 1:ntest) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
385 gs = unlist(genesets[i])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
386 g = gs[1] # geneset_id
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
387 u = gs[2]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
388 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
389 glist = gs[3:length(gs)] # member gene symbols
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
390 glist = toupper(glist)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
391 inglist = urownames %in% glist
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
392 nin = sum(inglist)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
393 if ((nin > minnin) && (nin < maxnin)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
394 ### print(paste('@@found',sum(inglist),'genes in glist'))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
395 camres = camera(y=y,index=inglist,design=design)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
396 if (! is.null(camres)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
397 rownames(camres) = g # gene set name
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
398 camres = cbind(GeneSet=g,URL=u,camres)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
399 if (camres\$Direction == "Up")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
400 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
401 upcam = rbind(upcam,camres) } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
402 downcam = rbind(downcam,camres)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
403 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
404 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
405 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
406 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
407 uscam = upcam[order(upcam\$PValue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
408 unadjp = uscam\$PValue
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
409 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
410 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
411 dscam = downcam[order(downcam\$PValue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
412 unadjp = dscam\$PValue
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
413 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
414 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
415 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
416 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
417 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
418 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
419 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
420 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
421 sink()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
422 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
423
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
424
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
425
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
426
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
427 doGSEA = function(y=NULL,design=NULL,histgmt="",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
428 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
429 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
430 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
431 sink('Camera.log')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
432 genesets = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
433 if (bigmt > "")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
434 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
435 bigenesets = readLines(bigmt)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
436 genesets = bigenesets
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
437 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
438 if (histgmt > "")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
439 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
440 hgenesets = readLines(histgmt)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
441 if (bigmt > "") {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
442 genesets = rbind(genesets,hgenesets)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
443 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
444 genesets = hgenesets
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
445 } # use only history if no bi
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
446 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
447 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
448 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
449 outf = outfname
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
450 head=paste(myTitle,'edgeR GSEA')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
451 write(head,file=outfname,append=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
452 ntest=length(genesets)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
453 urownames = toupper(rownames(y))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
454 upcam = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
455 downcam = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
456 incam = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
457 urls = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
458 gsids = c()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
459 for (i in 1:ntest) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
460 gs = unlist(genesets[i])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
461 gsid = gs[1] # geneset_id
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
462 url = gs[2]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
463 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
464 glist = gs[3:length(gs)] # member gene symbols
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
465 glist = toupper(glist)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
466 inglist = urownames %in% glist
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
467 nin = sum(inglist)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
468 if ((nin > minnin) && (nin < maxnin)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
469 incam = c(incam,inglist)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
470 gsids = c(gsids,gsid)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
471 urls = c(urls,url)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
472 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
473 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
474 incam = as.list(incam)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
475 names(incam) = gsids
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
476 allcam = camera(y=y,index=incam,design=design)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
477 allcamres = cbind(geneset=gsids,allcam,URL=urls)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
478 for (i in 1:ntest) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
479 camres = allcamres[i]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
480 res = try(test = (camres\$Direction == "Up"))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
481 if ("try-error" %in% class(res)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
482 cat("test failed, camres = :")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
483 print.noquote(camres)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
484 } else { if (camres\$Direction == "Up")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
485 { upcam = rbind(upcam,camres)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
486 } else { downcam = rbind(downcam,camres)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
487 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
488
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
489 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
490 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
491 uscam = upcam[order(upcam\$PValue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
492 unadjp = uscam\$PValue
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
493 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
494 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
495 dscam = downcam[order(downcam\$PValue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
496 unadjp = dscam\$PValue
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
497 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
498 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
499 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
500 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
501 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
502 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
503 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
504 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
505 sink()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
506 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
507
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
508
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
509 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
510 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
511 filterquantile=0.2, subjects=c(),mydesign=NULL,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
512 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
513 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
514 doCook=F,DESeq_fitType="parameteric")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
515 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
516 # Error handling
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
517 if (length(unique(group))!=2){
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
518 print("Number of conditions identified in experiment does not equal 2")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
519 q()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
520 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
521 require(edgeR)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
522 options(width = 512)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
523 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
524 allN = nrow(Count_Matrix)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
525 nscut = round(ncol(Count_Matrix)/2)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
526 colTotmillionreads = colSums(Count_Matrix)/1e6
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
527 counts.dataframe = as.data.frame(c())
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
528 rawrs = rowSums(Count_Matrix)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
529 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
530 nzN = nrow(nonzerod)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
531 nzrs = rowSums(nonzerod)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
532 zN = allN - nzN
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
533 print('# Quantiles for non-zero row counts:',quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
534 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
535 if (useNDF == T)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
536 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
537 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
538 lo = colSums(Count_Matrix[!gt1rpin3,])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
539 workCM = Count_Matrix[gt1rpin3,]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
540 cleanrs = rowSums(workCM)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
541 cleanN = length(cleanrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
542 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
543 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
544 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
545 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
546 useme = (nzrs > quantile(nzrs,filterquantile))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
547 workCM = nonzerod[useme,]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
548 lo = colSums(nonzerod[!useme,])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
549 cleanrs = rowSums(workCM)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
550 cleanN = length(cleanrs)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
551 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
552 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
553 maint = paste('Filter below',filterquantile,'quantile')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
554 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
555 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
556 allgenes = rownames(workCM)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
557 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
558 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
559 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
560 testreg = str_match(allgenes,reg)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
561 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
562 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
563 print("@@ using ucsc substitution for urls")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
564 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
565 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
566 print("@@ using genecards substitution for urls")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
567 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
568 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
569 print.noquote("# urls")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
570 print.noquote(head(contigurls))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
571 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
572 cmrowsums = rowSums(workCM)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
573 TName=unique(group)[1]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
574 CName=unique(group)[2]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
575 if (is.null(mydesign)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
576 if (length(subjects) == 0)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
577 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
578 mydesign = model.matrix(~group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
579 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
580 else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
581 subjf = factor(subjects)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
582 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
583 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
584 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
585 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
586 print.noquote('Using design matrix:')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
587 print.noquote(mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
588 if (doedgeR) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
589 sink('edgeR.log')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
590 #### Setup DGEList object
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
591 DGEList = DGEList(counts=workCM, group = group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
592 DGEList = calcNormFactors(DGEList)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
593
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
594 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
595 comdisp = DGEList\$common.dispersion
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
596 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
597 if (edgeR_priordf > 0) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
598 print.noquote(paste("prior.df =",edgeR_priordf))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
599 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
600 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
601 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
602 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
603 DGLM = glmFit(DGEList,design=mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
604 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
605 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
606 normData = (1e+06*DGEList\$counts/efflib)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
607 uoutput = cbind(
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
608 Name=as.character(rownames(DGEList\$counts)),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
609 DE\$table,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
610 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
611 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
612 DGEList\$counts
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
613 )
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
614 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
615 goodness = gof(DGLM, pcutoff=fdrthresh)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
616 if (sum(goodness\$outlier) > 0) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
617 print.noquote('GLM outliers:')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
618 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
619 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
620 print('No GLM fit outlier genes found\n')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
621 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
622 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
623 pdf("edgeR_GoodnessofFit.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
624 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
625 abline(0,1,lwd=3)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
626 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
627 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
628 estpriorn = getPriorN(DGEList)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
629 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
630 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
631 normData = (1e+06*DGEList\$counts/efflib)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
632 uniqueg = unique(group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
633 #### Plot MDS
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
634 sample_colors = match(group,levels(group))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
635 sampleTypes = levels(factor(group))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
636 print.noquote(sampleTypes)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
637 pdf("edgeR_MDSplot.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
638 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
639 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
640 grid(col="blue")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
641 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
642 colnames(normData) = paste( colnames(normData),'N',sep="_")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
643 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
644 nzd = data.frame(log(nonzerod + 1e-2,10))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
645 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
646 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
647 tt = cbind(
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
648 Name=as.character(rownames(DGEList\$counts)),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
649 DE\$table,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
650 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
651 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
652 )
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
653 print.noquote("# edgeR Top tags\n")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
654 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
655 tt = tt[order(DE\$table\$PValue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
656 print.noquote(tt[1:50,])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
657 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
658 nsig = length(deTags)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
659 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
660 deColours = ifelse(deTags,'red','black')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
661 pdf("edgeR_BCV_vs_abundance.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
662 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
663 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
664 dg = DGEList[order(DE\$table\$PValue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
665 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
666 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
667 normData = (1e+06*dg\$counts/efflib)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
668 outpdfname="edgeR_top_100_heatmap.pdf"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
669 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
670 outSmear = "edgeR_smearplot.pdf"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
671 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
672 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
673 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
674 norm.factor = DGEList\$samples\$norm.factors
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
675 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
676 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
677 edgeRcounts = rep(0, length(allgenes))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
678 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
679 sink()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
680 } ### doedgeR
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
681 if (doDESeq2 == T)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
682 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
683 sink("DESeq2.log")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
684 # DESeq2
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
685 require('DESeq2')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
686 library('RColorBrewer')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
687 if (length(subjects) == 0)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
688 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
689 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
690 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
691 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
692 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
693 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
694 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
695 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
696 #rDESeq = results(DESeq2)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
697 #newCountDataSet(workCM, group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
698 deSeqDatsizefac = estimateSizeFactors(deSEQds)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
699 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
700 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
701 rDESeq = as.data.frame(results(resDESeq))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
702 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
703 srDESeq = rDESeq[order(rDESeq\$pvalue),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
704 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
705 cat("# DESeq top 50\n")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
706 print.noquote(srDESeq[1:50,])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
707 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
708 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
709 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
710 DESeqcounts = rep(0, length(allgenes))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
711 DESeqcounts[DESeqcountsindex] = 1
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
712 pdf("DESeq2_dispersion_estimates.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
713 plotDispEsts(resDESeq)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
714 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
715 ysmall = abs(min(rDESeq\$log2FoldChange))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
716 ybig = abs(max(rDESeq\$log2FoldChange))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
717 ylimit = min(4,ysmall,ybig)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
718 pdf("DESeq2_MA_plot.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
719 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
720 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
721 rlogres = rlogTransformation(resDESeq)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
722 sampledists = dist( t( assay(rlogres) ) )
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
723 sdmat = as.matrix(sampledists)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
724 pdf("DESeq2_sample_distance_plot.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
725 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
726 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
727 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
728 ###outpdfname="DESeq2_top50_heatmap.pdf"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
729 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
730 sink()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
731 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
732 if ("try-error" %in% class(result)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
733 print.noquote('DESeq2 plotPCA failed.')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
734 } else {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
735 pdf("DESeq2_PCA_plot.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
736 #### wtf - print? Seems needed to get this to work
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
737 print(ppca)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
738 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
739 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
740 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
741
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
742 if (doVoom == T) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
743 sink('VOOM.log')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
744 if (doedgeR == F) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
745 #### Setup DGEList object
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
746 DGEList = DGEList(counts=workCM, group = group)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
747 DGEList = calcNormFactors(DGEList)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
748 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
749 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
750 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
751 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
752 norm.factor = DGEList\$samples\$norm.factors
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
753 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
754 pdf("VOOM_mean_variance_plot.pdf")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
755 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
756 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
757 # Use limma to fit data
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
758 fit = lmFit(dat.voomed, mydesign)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
759 fit = eBayes(fit)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
760 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
761 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
762 rownames(rvoom) = rownames(workCM)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
763 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
764 srvoom = rvoom[order(rvoom\$P.Value),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
765 cat("# VOOM top 50\n")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
766 print(srvoom[1:50,])
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
767 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
768 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
769 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
770 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
771 voomcounts = rep(0, length(allgenes))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
772 voomcounts[voomcountsindex] = 1
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
773 sink()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
774 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
775
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
776 if (doCamera) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
777 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
778 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
779 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
780
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
781 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
782 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
783 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
784 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
785 VOOM_limma = voomcounts, row.names = allgenes)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
786 } else if ((doDESeq2==T) && (doedgeR==T)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
787 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
788 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
789 } else if ((doVoom==T) && (doedgeR==T)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
790 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
791 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
792 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
793
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
794 if (nrow(counts.dataframe > 1)) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
795 counts.venn = vennCounts(counts.dataframe)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
796 vennf = "Venn_significant_genes_overlap.pdf"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
797 pdf(vennf)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
798 vennDiagram(counts.venn,main=vennmain,col="maroon")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
799 dev.off()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
800 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
801 } #### doDESeq2 or doVoom
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
802
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
803 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
804 #### Done
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
805
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
806 ###sink(stdout(),append=T,type="message")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
807 builtin_gmt = ""
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
808 history_gmt = ""
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
809 history_gmt_name = ""
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
810 out_edgeR = F
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
811 out_DESeq2 = F
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
812 out_VOOM = "$out_VOOM"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
813 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
814 doVoom = $doVoom
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
815 doCamera = F
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
816 doedgeR = $edgeR.doedgeR
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
817 edgeR_priordf = 0
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
818
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
819
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
820 #if $doVoom == "T":
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
821 out_VOOM = "$out_VOOM"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
822 #end if
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
823
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
824 #if $DESeq2.doDESeq2 == "T":
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
825 out_DESeq2 = "$out_DESeq2"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
826 DESeq_fitType = "$DESeq2.DESeq_fitType"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
827 #end if
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
828
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
829 #if $edgeR.doedgeR == "T":
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
830 out_edgeR = "$out_edgeR"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
831 edgeR_priordf = $edgeR.edgeR_priordf
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
832 #end if
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
833
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
834
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
835 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
836 {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
837 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
838 quit(save="no",status=2)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
839 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
840
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
841 Out_Dir = "$html_file.files_path"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
842 Input = "$input1"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
843 TreatmentName = "$treatment_name"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
844 TreatmentCols = "$Treat_cols"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
845 ControlName = "$control_name"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
846 ControlCols= "$Control_cols"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
847 org = "$input1.dbkey"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
848 if (org == "") { org = "hg19"}
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
849 fdrtype = "$fdrtype"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
850 fdrthresh = $fdrthresh
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
851 useNDF = $useNDF
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
852 fQ = $fQ # non-differential centile cutoff
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
853 myTitle = "$title"
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
854 sids = strsplit("$subjectids",',')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
855 subjects = unlist(sids)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
856 nsubj = length(subjects)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
857 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
858 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
859 cat('Got TCols=')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
860 cat(TCols)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
861 cat('; CCols=')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
862 cat(CCols)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
863 cat('\n')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
864 useCols = c(TCols,CCols)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
865 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
866 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
867 snames = colnames(Count_Matrix)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
868 nsamples = length(snames)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
869 if (nsubj > 0 & nsubj != nsamples) {
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
870 options("show.error.messages"=T)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
871 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
872 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
873 write(mess, stderr())
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
874 quit(save="no",status=4)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
875 }
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
876 if (length(subjects) != 0) {subjects = subjects[useCols]}
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
877 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
878 rn = rownames(Count_Matrix)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
879 islib = rn %in% c('librarySize','NotInBedRegions')
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
880 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
881 Count_Matrix = Count_Matrix[subset(rn,! islib),]
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
882 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
883 group = factor(group, levels=c(ControlName,TreatmentName))
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
884 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
885 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
886 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
887 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
888 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
889 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
890 sessionInfo()
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
891 ]]>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
892 </configfile>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
893 </configfiles>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
894 <help>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
895
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
896 **What it does**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
897
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
898 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
899 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
900
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
901 **Input**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
902
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
903 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
904 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
905 non-negative integer count of reads from one sample overlapping the feature.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
906 The matrix must have a header row uniquely identifying the source samples, and unique row names in
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
907 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
908
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
909 **Specifying comparisons**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
910
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
911 This is basically dumbed down for two factors - case vs control.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
912
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
913 More complex interfaces are possible but painful at present.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
914 Probably need to specify a phenotype file to do this better.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
915 Work in progress. Send code.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
916
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
917 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
918 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
919 A list of integers, one for each subject or an empty string if samples are all independent.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
920 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
921 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
922
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
923 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
924 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
925 8,9,1,1,2,2
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
926 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
927
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
928 **Methods available**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
929
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
930 You can run 3 popular Bioconductor packages available for count data.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
931
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
932 edgeR - see edgeR_ for details
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
933
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
934 VOOM/limma - see limma_VOOM_ for details
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
935
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
936 DESeq2 - see DESeq2_ for details
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
937
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
938 and optionally camera in edgeR which works better if MSigDB is installed.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
939
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
940 **Outputs**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
941
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
942 Some helpful plots and analysis results. Note that most of these are produced using R code
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
943 suggested by the excellent documentation and vignettes for the Bioconductor
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
944 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
945
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
946 **Note on Voom**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
947
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
948 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
949
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
950 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
951
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
952 voom is an acronym for mean-variance modelling at the observational level.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
953 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
954 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
955 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
956 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
957
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
958 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
959 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
960 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
961 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
962 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
963 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
964
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
965
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
966 Author(s)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
967
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
968 Charity Law and Gordon Smyth
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
969
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
970 References
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
971
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
972 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
973
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
974 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
975 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
976 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
977
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
978 See Also
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
979
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
980 A voom case study is given in the edgeR User's Guide.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
981
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
982 vooma is a similar function but for microarrays instead of RNA-seq.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
983
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
984
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
985 ***old rant on changes to Bioconductor package variable names between versions***
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
986
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
987 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
988 breaking this and all other code that assumed the old name for this variable,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
989 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
990 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
991 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
992 when their old scripts break. This tool currently now works with 2.4.6.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
993
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
994 **Note on prior.N**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
995
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
996 http://seqanswers.com/forums/showthread.php?t=5591 says:
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
997
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
998 *prior.n*
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
999
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1000 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1001 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1002 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1003 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1004 common likelihood the weight of one observation.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1005
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1006 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1007 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1008 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1009 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1010 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1011 If you have more samples, then the tagwise dispersion estimates will be more reliable,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1012 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1013
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1014
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1015 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1016
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1017 Dear Dorota,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1018
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1019 The important settings are prior.df and trend.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1020
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1021 prior.n and prior.df are related through prior.df = prior.n * residual.df,
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1022 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1023 prior.n=10 is equivalent for your data to prior.df = 240, a very large
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1024 value. Going the other way, the new setting of prior.df=10 is equivalent
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1025 to prior.n=10/24.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1026
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1027 To recover old results with the current software you would use
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1028
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1029 estimateTagwiseDisp(object, prior.df=240, trend="none")
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1030
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1031 To get the new default from old software you would use
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1032
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1033 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1034
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1035 Actually the old trend method is equivalent to trend="loess" in the new
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1036 software. You should use plotBCV(object) to see whether a trend is
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1037 required.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1038
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1039 Note you could also use
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1040
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1041 prior.n = getPriorN(object, prior.df=10)
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1042
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1043 to map between prior.df and prior.n.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1044
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1045 ----
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1046
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1047 **Attributions**
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1048
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1049 edgeR - edgeR_
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1050
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1051 VOOM/limma - limma_VOOM_
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1052
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1053 DESeq2 - DESeq2_ for details
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1054
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1055 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1056
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1057 Galaxy_ (that's what you are using right now!) for gluing everything together
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1058
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1059 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1060 licensed to you under the LGPL_ like other rgenetics artefacts
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1061
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1062 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1063 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1064 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1065 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1066 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1067 .. _Galaxy: http://getgalaxy.org
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1068 </help>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1069
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1070 </tool>
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1071
c0fa3dde02d9 Uploaded
fubar
parents:
diff changeset
1072