annotate rgedgeRpaired_nocamera.xml~ @ 35:375269b30baf draft

Uploaded
author fubar
date Sun, 22 Dec 2013 01:46:48 -0500
parents 8051d14cf819
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
32
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.22">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
3 <requirements>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
4 <requirement type="package" version="3.0.1">r3</requirement>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
6 <requirement type="package" version="9.07">ghostscript</requirement>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
7 <requirement type="package" version="2.12">biocbasics</requirement>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
8 </requirements>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
9
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
10 <command interpreter="python">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
13 </command>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
14 <inputs>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
21 </sanitizer>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
22 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
27 <sanitizer invalid_char="">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
28 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
29 </sanitizer>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
30 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
31 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
32 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
33 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
34 <validator type="no_options" message="Please select at least one column."/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
35 <sanitizer invalid_char="">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
37 </sanitizer>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
38 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
39 <param name="subjectids" type="text" optional="true" size="120" value = ""
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
40 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
41 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
42 <sanitizer>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
43 <valid initial="string.letters,string.digits"><add value="," /> </valid>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
44 </sanitizer>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
45 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
46 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
47 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
48 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
49 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
50 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
51
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
52 <conditional name="edgeR">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
53 <param name="doedgeR" type="select"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
54 label="Run this model using edgeR"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
55 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
56 <option value="F">Do not run edgeR</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
57 <option value="T" selected="true">Run edgeR</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
58 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
59 <when value="T">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
60 <param name="edgeR_priordf" type="integer" value="20" size="3"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
61 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
62 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
63 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
64 <when value="F"></when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
65 </conditional>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
66 <conditional name="DESeq2">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
67 <param name="doDESeq2" type="select"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
68 label="Run the same model with DESeq2 and compare findings"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
69 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
70 <option value="F" selected="true">Do not run DESeq2</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
71 <option value="T">Run DESeq2</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
72 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
73 <when value="T">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
74 <param name="DESeq_fitType" type="select">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
75 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
76 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
77 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
78 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
79 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
80 <when value="F"> </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
81 </conditional>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
82 <param name="doVoom" type="select"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
83 label="Run the same model with Voom/limma and compare findings"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
84 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
85 <option value="F" selected="true">Do not run VOOM</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
86 <option value="T">Run VOOM</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
87 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
88 <!--
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
89 <conditional name="camera">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
90 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
91 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
92 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
93 <option value="T">Run GSEA tests with the Camera algorithm</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
94 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
95 <when value="T">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
96 <conditional name="gmtSource">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
97 <param name="refgmtSource" type="select"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
98 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
99 <option value="indexed" selected="true">Use a built-in gene set</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
100 <option value="history">Use a gene set from my history</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
101 <option value="both">Add a gene set from my history to a built in gene set</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
102 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
103 <when value="indexed">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
104 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
105 <options from_data_table="gseaGMT_3.1">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
106 <filter type="sort_by" column="2" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
107 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
108 </options>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
109 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
110 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
111 <when value="history">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
112 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
113 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
114 <when value="both">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
115 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
116 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
117 <options from_data_table="gseaGMT_4">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
118 <filter type="sort_by" column="2" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
119 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
120 </options>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
121 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
122 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
123 </conditional>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
124 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
125 <when value="F">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
126 </when>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
127 </conditional>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
128 -->
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
129 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
130 help="Conventional default value of 0.05 recommended"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
131 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
132 help="Use fdr or bh typically to control for the number of tests in a reliable way">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
133 <option value="fdr" selected="true">fdr</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
134 <option value="BH">Benjamini Hochberg</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
135 <option value="BY">Benjamini Yukateli</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
136 <option value="bonferroni">Bonferroni</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
137 <option value="hochberg">Hochberg</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
138 <option value="holm">Holm</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
139 <option value="hommel">Hommel</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
140 <option value="none">no control for multiple tests</option>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
141 </param>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
142 </inputs>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
143 <outputs>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
144 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
145 <filter>edgeR['doedgeR'] == "T"</filter>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
146 </data>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
147 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
148 <filter>DESeq2['doDESeq2'] == "T"</filter>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
149 </data>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
150 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
151 <filter>doVoom == "T"</filter>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
152 </data>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
153 <data format="html" name="html_file" label="${title}.html"/>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
154 </outputs>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
155 <stdio>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
156 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
157 </stdio>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
158 <tests>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
159 <test>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
160 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
161 <param name='treatment_name' value='liver' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
162 <param name='title' value='edgeRtest' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
163 <param name='useNDF' value='' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
164 <param name='doedgeR' value='T' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
165 <param name='doVoom' value='T' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
166 <param name='doDESeq2' value='T' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
167 <param name='fdrtype' value='fdr' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
168 <param name='edgeR_priordf' value="8" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
169 <param name='fdrthresh' value="0.05" />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
170 <param name='control_name' value='heart' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
171 <param name='subjectids' value='' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
172 <param name='Control_cols' value='3,4,5,9' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
173 <param name='Treat_cols' value='2,6,7,8' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
174 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
175 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
176 </test>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
177 </tests>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
178
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
179 <configfiles>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
180 <configfile name="runme">
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
181 <![CDATA[
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
182 #
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
183 # edgeR.Rscript
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
184 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
185 # Performs DGE on a count table containing n replicates of two conditions
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
186 #
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
187 # Parameters
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
188 #
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
189 # 1 - Output Dir
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
190
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
191 # Original edgeR code by: S.Lunke and A.Kaspi
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
192 reallybig = log10(.Machine\$double.xmax)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
193 reallysmall = log10(.Machine\$double.xmin)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
194 library('stringr')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
195 library('gplots')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
196 library('edgeR')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
197 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
198 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
199 # Perform clustering for significant pvalues after controlling FWER
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
200 samples = colnames(cmat)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
201 gu = unique(group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
202 gn = rownames(cmat)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
203 if (length(gu) == 2) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
204 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
205 pcols = unlist(lapply(group,col.map))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
206 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
207 colours = rainbow(length(gu),start=0,end=4/6)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
208 pcols = colours[match(group,gu)] }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
209 dm = cmat[(! is.na(gn)),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
210 # remove unlabelled hm rows
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
211 nprobes = nrow(dm)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
212 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
213 if (nprobes > nsamp) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
214 dm =dm[1:nsamp,]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
215 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
216 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
217 newcolnames = substr(colnames(dm),1,20)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
218 colnames(dm) = newcolnames
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
219 pdf(outpdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
220 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
221 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
222 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
223 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
224
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
225 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
226 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
227 # for 2 groups only was
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
228 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
229 #pcols = unlist(lapply(group,col.map))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
230 gu = unique(group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
231 colours = rainbow(length(gu),start=0.3,end=0.6)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
232 pcols = colours[match(group,gu)]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
233 nrows = nrow(cmat)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
234 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
235 if (nrows > nsamp) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
236 cmat = cmat[c(1:nsamp),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
237 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
238 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
239 newcolnames = substr(colnames(cmat),1,20)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
240 colnames(cmat) = newcolnames
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
241 pdf(outpdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
242 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
243 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
244 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
245
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
246 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
247 # stolen from https://gist.github.com/703512
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
248 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
249 o = -log10(sort(pvector,decreasing=F))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
250 e = -log10( 1:length(o)/length(o) )
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
251 o[o==-Inf] = reallysmall
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
252 o[o==Inf] = reallybig
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
253 maint = descr
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
254 pdf(outpdf)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
255 plot(e,o,pch=19,cex=1, main=maint, ...,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
256 xlab=expression(Expected~~-log[10](italic(p))),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
257 ylab=expression(Observed~~-log[10](italic(p))),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
258 xlim=c(0,max(e)), ylim=c(0,max(o)))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
259 lines(e,e,col="red")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
260 grid(col = "lightgray", lty = "dotted")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
261 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
262 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
263
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
264 smearPlot = function(DGEList,deTags, outSmear, outMain)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
265 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
266 pdf(outSmear)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
267 plotSmear(DGEList,de.tags=deTags,main=outMain)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
268 grid(col="lightgray", lty="dotted")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
269 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
270 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
271
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
272 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
273 { #
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
274 nc = ncol(rawrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
275 for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
276 fullnames = colnames(rawrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
277 newcolnames = substr(colnames(rawrs),1,20)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
278 colnames(rawrs) = newcolnames
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
279 newcolnames = substr(colnames(cleanrs),1,20)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
280 colnames(cleanrs) = newcolnames
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
281 defpar = par(no.readonly=T)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
282 print.noquote('raw contig counts by sample:')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
283 print.noquote(summary(rawrs))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
284 print.noquote('normalised contig counts by sample:')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
285 print.noquote(summary(cleanrs))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
286 pdf(pdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
287 par(mfrow=c(1,2))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
288 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
289 grid(col="lightgray",lty="dotted")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
290 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
291 grid(col="lightgray",lty="dotted")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
292 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
293 pdfname = "sample_counts_histogram.pdf"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
294 nc = ncol(rawrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
295 print.noquote(paste('Using ncol rawrs=',nc))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
296 ncroot = round(sqrt(nc))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
297 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
298 m = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
299 for (i in c(1:nc)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
300 rhist = hist(rawrs[,i],breaks=100,plot=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
301 m = append(m,max(rhist\$counts))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
302 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
303 ymax = max(m)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
304 ncols = length(fullnames)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
305 if (ncols > 20)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
306 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
307 scale = 7*ncols/20
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
308 pdf(pdfname,width=scale,height=scale)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
309 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
310 pdf(pdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
311 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
312 par(mfrow=c(ncroot,ncroot))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
313 for (i in c(1:nc)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
314 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
315 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
316 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
317 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
318 par(defpar)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
319
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
320 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
321
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
322 cumPlot = function(rawrs,cleanrs,maint,myTitle)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
323 { # updated to use ecdf
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
324 pdfname = "Filtering_rowsum_bar_charts.pdf"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
325 defpar = par(no.readonly=T)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
326 lrs = log(rawrs,10)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
327 lim = max(lrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
328 pdf(pdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
329 par(mfrow=c(2,1))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
330 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
331 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
332 grid(col="lightgray", lty="dotted")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
333 lrs = log(cleanrs,10)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
334 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
335 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
336 grid(col="lightgray", lty="dotted")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
337 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
338 par(defpar)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
339 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
340
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
341 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
342 { # updated to use ecdf
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
343 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
344 pdf(pdfname)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
345 par(mfrow=c(2,1))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
346 lastx = max(rawrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
347 rawe = knots(ecdf(rawrs))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
348 cleane = knots(ecdf(cleanrs))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
349 cy = 1:length(cleane)/length(cleane)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
350 ry = 1:length(rawe)/length(rawe)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
351 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
352 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
353 grid(col="blue")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
354 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
355 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
356 grid(col="blue")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
357 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
358 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
359
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
360
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
361
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
362 doGSEA = function(y=NULL,design=NULL,histgmt="",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
363 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
364 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
365 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
366 sink('Camera.log')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
367 genesets = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
368 if (bigmt > "")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
369 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
370 bigenesets = readLines(bigmt)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
371 genesets = bigenesets
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
372 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
373 if (histgmt > "")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
374 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
375 hgenesets = readLines(histgmt)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
376 if (bigmt > "") {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
377 genesets = rbind(genesets,hgenesets)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
378 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
379 genesets = hgenesets
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
380 } # use only history if no bi
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
381 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
382 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
383 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
384 outf = outfname
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
385 head=paste(myTitle,'edgeR GSEA')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
386 write(head,file=outfname,append=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
387 ntest=length(genesets)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
388 urownames = toupper(rownames(y))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
389 upcam = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
390 downcam = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
391 for (i in 1:ntest) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
392 gs = unlist(genesets[i])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
393 g = gs[1] # geneset_id
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
394 u = gs[2]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
395 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
396 glist = gs[3:length(gs)] # member gene symbols
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
397 glist = toupper(glist)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
398 inglist = urownames %in% glist
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
399 nin = sum(inglist)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
400 if ((nin > minnin) && (nin < maxnin)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
401 ### print(paste('@@found',sum(inglist),'genes in glist'))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
402 camres = camera(y=y,index=inglist,design=design)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
403 if (! is.null(camres)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
404 rownames(camres) = g # gene set name
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
405 camres = cbind(GeneSet=g,URL=u,camres)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
406 if (camres\$Direction == "Up")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
407 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
408 upcam = rbind(upcam,camres) } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
409 downcam = rbind(downcam,camres)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
410 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
411 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
412 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
413 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
414 uscam = upcam[order(upcam\$PValue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
415 unadjp = uscam\$PValue
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
416 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
417 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
418 dscam = downcam[order(downcam\$PValue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
419 unadjp = dscam\$PValue
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
420 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
421 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
422 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
423 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
424 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
425 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
426 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
427 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
428 sink()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
429 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
430
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
431
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
432
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
433
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
434 doGSEAatonce = function(y=NULL,design=NULL,histgmt="",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
435 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
436 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
437 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
438 sink('Camera.log')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
439 genesets = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
440 if (bigmt > "")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
441 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
442 bigenesets = readLines(bigmt)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
443 genesets = bigenesets
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
444 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
445 if (histgmt > "")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
446 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
447 hgenesets = readLines(histgmt)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
448 if (bigmt > "") {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
449 genesets = rbind(genesets,hgenesets)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
450 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
451 genesets = hgenesets
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
452 } # use only history if no bi
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
453 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
454 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
455 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
456 outf = outfname
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
457 head=paste(myTitle,'edgeR GSEA')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
458 write(head,file=outfname,append=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
459 ntest=length(genesets)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
460 urownames = toupper(rownames(y))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
461 upcam = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
462 downcam = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
463 incam = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
464 urls = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
465 gsids = c()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
466 for (i in 1:ntest) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
467 gs = unlist(genesets[i])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
468 gsid = gs[1] # geneset_id
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
469 url = gs[2]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
470 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
471 glist = gs[3:length(gs)] # member gene symbols
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
472 glist = toupper(glist)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
473 inglist = urownames %in% glist
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
474 nin = sum(inglist)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
475 if ((nin > minnin) && (nin < maxnin)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
476 incam = c(incam,inglist)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
477 gsids = c(gsids,gsid)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
478 urls = c(urls,url)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
479 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
480 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
481 incam = as.list(incam)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
482 names(incam) = gsids
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
483 allcam = camera(y=y,index=incam,design=design)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
484 allcamres = cbind(geneset=gsids,allcam,URL=urls)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
485 for (i in 1:ntest) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
486 camres = allcamres[i]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
487 res = try(test = (camres\$Direction == "Up"))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
488 if ("try-error" %in% class(res)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
489 cat("test failed, camres = :")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
490 print.noquote(camres)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
491 } else { if (camres\$Direction == "Up")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
492 { upcam = rbind(upcam,camres)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
493 } else { downcam = rbind(downcam,camres)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
494 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
495
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
496 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
497 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
498 uscam = upcam[order(upcam\$PValue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
499 unadjp = uscam\$PValue
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
500 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
501 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
502 dscam = downcam[order(downcam\$PValue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
503 unadjp = dscam\$PValue
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
504 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
505 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
506 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
507 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
508 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
509 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
510 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
511 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
512 sink()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
513 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
514
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
515
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
516 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
517 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
518 filterquantile=0.2, subjects=c(),mydesign=NULL,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
519 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
520 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
521 doCook=F,DESeq_fitType="parameteric")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
522 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
523 # Error handling
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
524 if (length(unique(group))!=2){
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
525 print("Number of conditions identified in experiment does not equal 2")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
526 q()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
527 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
528 require(edgeR)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
529 options(width = 512)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
530 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
531 allN = nrow(Count_Matrix)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
532 nscut = round(ncol(Count_Matrix)/2)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
533 colTotmillionreads = colSums(Count_Matrix)/1e6
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
534 counts.dataframe = as.data.frame(c())
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
535 rawrs = rowSums(Count_Matrix)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
536 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
537 nzN = nrow(nonzerod)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
538 nzrs = rowSums(nonzerod)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
539 zN = allN - nzN
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
540 print('# Quantiles for non-zero row counts:',quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
541 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
542 if (useNDF == T)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
543 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
544 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
545 lo = colSums(Count_Matrix[!gt1rpin3,])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
546 workCM = Count_Matrix[gt1rpin3,]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
547 cleanrs = rowSums(workCM)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
548 cleanN = length(cleanrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
549 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
550 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
551 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
552 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
553 useme = (nzrs > quantile(nzrs,filterquantile))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
554 workCM = nonzerod[useme,]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
555 lo = colSums(nonzerod[!useme,])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
556 cleanrs = rowSums(workCM)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
557 cleanN = length(cleanrs)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
558 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
559 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
560 maint = paste('Filter below',filterquantile,'quantile')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
561 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
562 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
563 allgenes = rownames(workCM)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
564 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
565 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
566 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
567 testreg = str_match(allgenes,reg)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
568 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
569 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
570 print("@@ using ucsc substitution for urls")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
571 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
572 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
573 print.noquote("@@ using genecards substitution for urls")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
574 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
575 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
576 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
577 cmrowsums = rowSums(workCM)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
578 TName=unique(group)[1]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
579 CName=unique(group)[2]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
580 if (is.null(mydesign)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
581 if (length(subjects) == 0)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
582 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
583 mydesign = model.matrix(~group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
584 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
585 else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
586 subjf = factor(subjects)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
587 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
588 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
589 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
590 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
591 print.noquote('Using design matrix:')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
592 print.noquote(mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
593 if (doedgeR) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
594 sink('edgeR.log')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
595 #### Setup DGEList object
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
596 DGEList = DGEList(counts=workCM, group = group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
597 DGEList = calcNormFactors(DGEList)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
598
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
599 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
600 comdisp = DGEList\$common.dispersion
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
601 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
602 if (edgeR_priordf > 0) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
603 print.noquote(paste("prior.df =",edgeR_priordf))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
604 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
605 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
606 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
607 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
608 DGLM = glmFit(DGEList,design=mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
609 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
610 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
611 normData = (1e+06*DGEList\$counts/efflib)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
612 uoutput = cbind(
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
613 Name=as.character(rownames(DGEList\$counts)),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
614 DE\$table,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
615 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
616 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
617 DGEList\$counts
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
618 )
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
619 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
620 goodness = gof(DGLM, pcutoff=fdrthresh)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
621 if (sum(goodness\$outlier) > 0) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
622 print.noquote('GLM outliers:')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
623 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
624 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
625 print('No GLM fit outlier genes found\n')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
626 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
627 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
628 pdf("edgeR_GoodnessofFit.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
629 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
630 abline(0,1,lwd=3)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
631 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
632 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
633 estpriorn = getPriorN(DGEList)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
634 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
635 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
636 normData = (1e+06*DGEList\$counts/efflib)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
637 uniqueg = unique(group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
638 #### Plot MDS
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
639 sample_colors = match(group,levels(group))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
640 sampleTypes = levels(factor(group))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
641 print.noquote(sampleTypes)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
642 pdf("edgeR_MDSplot.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
643 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
644 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
645 grid(col="blue")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
646 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
647 colnames(normData) = paste( colnames(normData),'N',sep="_")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
648 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
649 nzd = data.frame(log(nonzerod + 1e-2,10))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
650 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
651 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
652 tt = cbind(
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
653 Name=as.character(rownames(DGEList\$counts)),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
654 DE\$table,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
655 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
656 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
657 )
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
658 print.noquote("# edgeR Top tags\n")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
659 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
660 tt = tt[order(DE\$table\$PValue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
661 print.noquote(tt[1:50,])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
662 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
663 nsig = length(deTags)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
664 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
665 deColours = ifelse(deTags,'red','black')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
666 pdf("edgeR_BCV_vs_abundance.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
667 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
668 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
669 dg = DGEList[order(DE\$table\$PValue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
670 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
671 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
672 normData = (1e+06*dg\$counts/efflib)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
673 outpdfname="edgeR_top_100_heatmap.pdf"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
674 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
675 outSmear = "edgeR_smearplot.pdf"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
676 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
677 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
678 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
679 norm.factor = DGEList\$samples\$norm.factors
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
680 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
681 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
682 edgeRcounts = rep(0, length(allgenes))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
683 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
684 sink()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
685 } ### doedgeR
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
686 if (doDESeq2 == T)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
687 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
688 sink("DESeq2.log")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
689 # DESeq2
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
690 require('DESeq2')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
691 library('RColorBrewer')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
692 if (length(subjects) == 0)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
693 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
694 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
695 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
696 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
697 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
698 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
699 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
700 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
701 #rDESeq = results(DESeq2)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
702 #newCountDataSet(workCM, group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
703 deSeqDatsizefac = estimateSizeFactors(deSEQds)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
704 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
705 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
706 rDESeq = as.data.frame(results(resDESeq))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
707 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
708 srDESeq = rDESeq[order(rDESeq\$pvalue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
709 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
710 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
711 cat("# DESeq top 50\n")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
712 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
713 srDESeq = rDESeq[order(rDESeq\$pvalue),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
714 print.noquote(srDESeq[1:50,])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
715 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
716 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
717 DESeqcounts = rep(0, length(allgenes))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
718 DESeqcounts[DESeqcountsindex] = 1
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
719 pdf("DESeq2_dispersion_estimates.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
720 plotDispEsts(resDESeq)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
721 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
722 ysmall = abs(min(rDESeq\$log2FoldChange))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
723 ybig = abs(max(rDESeq\$log2FoldChange))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
724 ylimit = min(4,ysmall,ybig)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
725 pdf("DESeq2_MA_plot.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
726 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
727 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
728 rlogres = rlogTransformation(resDESeq)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
729 sampledists = dist( t( assay(rlogres) ) )
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
730 sdmat = as.matrix(sampledists)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
731 pdf("DESeq2_sample_distance_plot.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
732 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
733 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
734 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
735 ###outpdfname="DESeq2_top50_heatmap.pdf"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
736 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
737 sink()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
738 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
739 if ("try-error" %in% class(result)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
740 print.noquote('DESeq2 plotPCA failed.')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
741 } else {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
742 pdf("DESeq2_PCA_plot.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
743 #### wtf - print? Seems needed to get this to work
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
744 print(ppca)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
745 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
746 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
747 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
748
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
749 if (doVoom == T) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
750 sink('Voom.log')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
751 if (doedgeR == F) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
752 #### Setup DGEList object
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
753 DGEList = DGEList(counts=workCM, group = group)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
754 DGEList = calcNormFactors(DGEList)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
755 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
756 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
757 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
758 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
759 norm.factor = DGEList\$samples\$norm.factors
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
760 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
761 pdf("Voom_mean_variance_plot.pdf")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
762 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
763 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
764 # Use limma to fit data
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
765 fit = lmFit(dat.voomed, mydesign)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
766 fit = eBayes(fit)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
767 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
768 qqPlot(descr=paste(myTitle,'Voom-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='Voom_qqplot.pdf')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
769 rownames(rvoom) = rownames(workCM)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
770 rvoom = cbind(rvoom,NReads=cmrowsums)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
771 srvoom = rvoom[order(rvoom\$P.Value),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
772 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
773 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
774 srvoom = rvoom[order(rvoom\$P.Value),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
775 cat("# Voom top 50\n")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
776 print(srvoom[1:50,])
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
777 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
778 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
779 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
780 voomcounts = rep(0, length(allgenes))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
781 voomcounts[voomcountsindex] = 1
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
782 sink()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
783 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
784
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
785 if (doCamera) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
786 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
787 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
788 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
789
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
790 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
791 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
792 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
793 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
794 VOOM_limma = voomcounts, row.names = allgenes)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
795 } else if ((doDESeq2==T) && (doedgeR==T)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
796 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
797 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
798 } else if ((doVoom==T) && (doedgeR==T)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
799 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
800 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
801 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
802
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
803 if (nrow(counts.dataframe > 1)) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
804 counts.venn = vennCounts(counts.dataframe)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
805 vennf = "Venn_significant_genes_overlap.pdf"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
806 pdf(vennf)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
807 vennDiagram(counts.venn,main=vennmain,col="maroon")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
808 dev.off()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
809 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
810 } #### doDESeq2 or doVoom
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
811
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
812 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
813 #### Done
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
814
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
815 ###sink(stdout(),append=T,type="message")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
816 builtin_gmt = ""
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
817 history_gmt = ""
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
818 history_gmt_name = ""
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
819 out_edgeR = F
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
820 out_DESeq2 = F
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
821 out_VOOM = "$out_VOOM"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
822 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
823 doVoom = $doVoom
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
824 doCamera = F
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
825 doedgeR = $edgeR.doedgeR
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
826 edgeR_priordf = 0
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
827
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
828
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
829 #if $doVoom == "T":
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
830 out_VOOM = "$out_VOOM"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
831 #end if
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
832
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
833 #if $DESeq2.doDESeq2 == "T":
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
834 out_DESeq2 = "$out_DESeq2"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
835 DESeq_fitType = "$DESeq2.DESeq_fitType"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
836 #end if
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
837
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
838 #if $edgeR.doedgeR == "T":
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
839 out_edgeR = "$out_edgeR"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
840 edgeR_priordf = $edgeR.edgeR_priordf
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
841 #end if
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
842
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
843
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
844 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
845 {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
846 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
847 quit(save="no",status=2)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
848 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
849
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
850 Out_Dir = "$html_file.files_path"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
851 Input = "$input1"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
852 TreatmentName = "$treatment_name"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
853 TreatmentCols = "$Treat_cols"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
854 ControlName = "$control_name"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
855 ControlCols= "$Control_cols"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
856 org = "$input1.dbkey"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
857 if (org == "") { org = "hg19"}
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
858 fdrtype = "$fdrtype"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
859 fdrthresh = $fdrthresh
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
860 useNDF = $useNDF
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
861 fQ = $fQ # non-differential centile cutoff
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
862 myTitle = "$title"
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
863 sids = strsplit("$subjectids",',')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
864 subjects = unlist(sids)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
865 nsubj = length(subjects)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
866 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
867 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
868 cat('Got TCols=')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
869 cat(TCols)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
870 cat('; CCols=')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
871 cat(CCols)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
872 cat('\n')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
873 useCols = c(TCols,CCols)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
874 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
875 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
876 snames = colnames(Count_Matrix)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
877 nsamples = length(snames)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
878 if (nsubj > 0 & nsubj != nsamples) {
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
879 options("show.error.messages"=T)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
880 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
881 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
882 write(mess, stderr())
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
883 quit(save="no",status=4)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
884 }
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
885 if (length(subjects) != 0) {subjects = subjects[useCols]}
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
886 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
887 rn = rownames(Count_Matrix)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
888 islib = rn %in% c('librarySize','NotInBedRegions')
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
889 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
890 Count_Matrix = Count_Matrix[subset(rn,! islib),]
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
891 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
892 group = factor(group, levels=c(ControlName,TreatmentName))
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
893 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
894 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
895 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
896 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
897 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
898 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
899 sessionInfo()
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
900 ]]>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
901 </configfile>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
902 </configfiles>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
903 <help>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
904
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
905 **What it does**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
906
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
907 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
908 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
909
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
910 **Input**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
911
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
912 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
913 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
914 non-negative integer count of reads from one sample overlapping the feature.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
915 The matrix must have a header row uniquely identifying the source samples, and unique row names in
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
916 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
917
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
918 **Specifying comparisons**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
919
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
920 This is basically dumbed down for two factors - case vs control.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
921
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
922 More complex interfaces are possible but painful at present.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
923 Probably need to specify a phenotype file to do this better.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
924 Work in progress. Send code.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
925
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
926 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
927 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
928 A list of integers, one for each subject or an empty string if samples are all independent.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
929 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
930 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
931
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
932 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
933 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
934 8,9,1,1,2,2
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
935 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
936
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
937 **Methods available**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
938
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
939 You can run 3 popular Bioconductor packages available for count data.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
940
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
941 edgeR - see edgeR_ for details
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
942
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
943 VOOM/limma - see limma_VOOM_ for details
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
944
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
945 DESeq2 - see DESeq2_ for details
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
946
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
947 and optionally camera in edgeR which works better if MSigDB is installed.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
948
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
949 **Outputs**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
950
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
951 Some helpful plots and analysis results. Note that most of these are produced using R code
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
952 suggested by the excellent documentation and vignettes for the Bioconductor
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
953 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
954
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
955 **Note on Voom**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
956
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
957 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
958
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
959 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
960
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
961 voom is an acronym for mean-variance modelling at the observational level.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
962 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
963 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
964 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
965 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
966
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
967 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
968 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
969 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
970 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
971 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
972 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
973
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
974
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
975 Author(s)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
976
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
977 Charity Law and Gordon Smyth
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
978
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
979 References
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
980
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
981 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
982
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
983 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
984 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
985 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
986
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
987 See Also
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
988
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
989 A voom case study is given in the edgeR User's Guide.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
990
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
991 vooma is a similar function but for microarrays instead of RNA-seq.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
992
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
993
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
994 ***old rant on changes to Bioconductor package variable names between versions***
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
995
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
996 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
997 breaking this and all other code that assumed the old name for this variable,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
998 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
999 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1000 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1001 when their old scripts break. This tool currently now works with 2.4.6.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1002
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1003 **Note on prior.N**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1004
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1005 http://seqanswers.com/forums/showthread.php?t=5591 says:
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1006
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1007 *prior.n*
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1008
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1009 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1010 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1011 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1012 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1013 common likelihood the weight of one observation.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1014
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1015 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1016 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1017 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1018 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1019 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1020 If you have more samples, then the tagwise dispersion estimates will be more reliable,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1021 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1022
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1023
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1024 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1025
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1026 Dear Dorota,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1027
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1028 The important settings are prior.df and trend.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1029
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1030 prior.n and prior.df are related through prior.df = prior.n * residual.df,
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1031 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1032 prior.n=10 is equivalent for your data to prior.df = 240, a very large
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1033 value. Going the other way, the new setting of prior.df=10 is equivalent
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1034 to prior.n=10/24.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1035
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1036 To recover old results with the current software you would use
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1037
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1038 estimateTagwiseDisp(object, prior.df=240, trend="none")
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1039
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1040 To get the new default from old software you would use
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1041
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1042 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1043
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1044 Actually the old trend method is equivalent to trend="loess" in the new
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1045 software. You should use plotBCV(object) to see whether a trend is
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1046 required.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1047
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1048 Note you could also use
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1049
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1050 prior.n = getPriorN(object, prior.df=10)
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1051
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1052 to map between prior.df and prior.n.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1053
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1054 ----
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1055
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1056 **Attributions**
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1057
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1058 edgeR - edgeR_
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1059
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1060 VOOM/limma - limma_VOOM_
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1061
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1062 DESeq2 - DESeq2_ for details
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1063
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1064 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1065
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1066 Galaxy_ (that's what you are using right now!) for gluing everything together
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1067
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1068 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1069 licensed to you under the LGPL_ like other rgenetics artefacts
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1070
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1071 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1072 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1073 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1074 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1075 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1076 .. _Galaxy: http://getgalaxy.org
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1077 </help>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1078
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1079 </tool>
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1080
8051d14cf819 update with better filtering
fubar
parents:
diff changeset
1081