annotate rgedgeRpaired.xml.camera @ 75:8596080a8326 draft

Uploaded
author fubar
date Tue, 18 Feb 2014 05:48:23 -0500
parents ccc5a33bb358
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
51
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.30">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
2 <description>models using BioConductor packages</description>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
3 <requirements>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
4 <requirement type="package" version="2.14">biocbasics</requirement>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
5 <requirement type="package" version="3.0.2">r302</requirement>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
7 <requirement type="package" version="9.10">ghostscript</requirement>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
8 </requirements>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
9
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
10 <command interpreter="python">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
13 </command>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
14 <inputs>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
16 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
17 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
18 help="Supply a meaningful name here to remind you what the outputs contain">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
19 <sanitizer invalid_char="">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
20 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
21 </sanitizer>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
22 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
23 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
24 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
25 multiple="true" use_header_names="true" size="120" display="checkboxes">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
26 <validator type="no_options" message="Please select at least one column."/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
27 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
28 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
29 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
30 multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
31 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
32 <param name="subjectids" type="text" optional="true" size="120" value = ""
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
33 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
34 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
35 <sanitizer>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
36 <valid initial="string.letters,string.digits"><add value="," /> </valid>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
37 </sanitizer>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
38 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
39 <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
40 help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
41 <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
42 label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
43 help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
44
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
45 <conditional name="edgeR">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
46 <param name="doedgeR" type="select"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
47 label="Run this model using edgeR"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
48 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
49 <option value="F">Do not run edgeR</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
50 <option value="T" selected="true">Run edgeR</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
51 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
52 <when value="T">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
53 <param name="edgeR_priordf" type="integer" value="20" size="3"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
54 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
55 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
56 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
57 <when value="F"></when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
58 </conditional>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
59 <conditional name="DESeq2">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
60 <param name="doDESeq2" type="select"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
61 label="Run the same model with DESeq2 and compare findings"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
62 help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
63 <option value="F" selected="true">Do not run DESeq2</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
64 <option value="T">Run DESeq2</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
65 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
66 <when value="T">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
67 <param name="DESeq_fitType" type="select">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
68 <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
69 <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
70 <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
71 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
72 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
73 <when value="F"> </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
74 </conditional>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
75 <param name="doVoom" type="select"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
76 label="Run the same model with Voom/limma and compare findings"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
77 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
78 <option value="F" selected="true">Do not run VOOM</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
79 <option value="T">Run VOOM</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
80 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
81 <!--
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
82 <conditional name="camera">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
83 <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
84 help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
85 <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
86 <option value="T">Run GSEA tests with the Camera algorithm</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
87 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
88 <when value="T">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
89 <conditional name="gmtSource">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
90 <param name="refgmtSource" type="select"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
91 label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
92 <option value="indexed" selected="true">Use a built-in gene set</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
93 <option value="history">Use a gene set from my history</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
94 <option value="both">Add a gene set from my history to a built in gene set</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
95 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
96 <when value="indexed">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
97 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
98 <options from_data_table="gseaGMT_3.1">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
99 <filter type="sort_by" column="2" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
100 <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
101 </options>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
102 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
103 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
104 <when value="history">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
105 <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
106 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
107 <when value="both">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
108 <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
109 <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
110 <options from_data_table="gseaGMT_4">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
111 <filter type="sort_by" column="2" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
112 <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
113 </options>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
114 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
115 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
116 </conditional>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
117 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
118 <when value="F">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
119 </when>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
120 </conditional>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
121 -->
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
122 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
123 help="Conventional default value of 0.05 recommended"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
124 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
125 help="Use fdr or bh typically to control for the number of tests in a reliable way">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
126 <option value="fdr" selected="true">fdr</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
127 <option value="BH">Benjamini Hochberg</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
128 <option value="BY">Benjamini Yukateli</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
129 <option value="bonferroni">Bonferroni</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
130 <option value="hochberg">Hochberg</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
131 <option value="holm">Holm</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
132 <option value="hommel">Hommel</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
133 <option value="none">no control for multiple tests</option>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
134 </param>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
135 </inputs>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
136 <outputs>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
137 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
138 <filter>edgeR['doedgeR'] == "T"</filter>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
139 </data>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
140 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
141 <filter>DESeq2['doDESeq2'] == "T"</filter>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
142 </data>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
143 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
144 <filter>doVoom == "T"</filter>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
145 </data>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
146 <data format="html" name="html_file" label="${title}.html"/>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
147 </outputs>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
148 <stdio>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
149 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
150 </stdio>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
151 <tests>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
152 <test>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
153 <param name='input1' value='test_bams2mx.xls' ftype='tabular' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
154 <param name='treatment_name' value='liver' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
155 <param name='title' value='edgeRtest' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
156 <param name='useNDF' value='' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
157 <param name='doedgeR' value='T' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
158 <param name='doVoom' value='T' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
159 <param name='doDESeq2' value='T' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
160 <param name='fdrtype' value='fdr' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
161 <param name='edgeR_priordf' value="8" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
162 <param name='fdrthresh' value="0.05" />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
163 <param name='control_name' value='heart' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
164 <param name='subjectids' value='' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
165 <param name='Control_cols' value='3,4,5,9' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
166 <param name='Treat_cols' value='2,6,7,8' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
167 <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
168 <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' />
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
169 </test>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
170 </tests>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
171
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
172 <configfiles>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
173 <configfile name="runme">
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
174 <![CDATA[
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
175 #
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
176 # edgeR.Rscript
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
177 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
178 # Performs DGE on a count table containing n replicates of two conditions
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
179 #
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
180 # Parameters
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
181 #
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
182 # 1 - Output Dir
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
183
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
184 # Original edgeR code by: S.Lunke and A.Kaspi
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
185 reallybig = log10(.Machine\$double.xmax)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
186 reallysmall = log10(.Machine\$double.xmin)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
187 library('stringr')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
188 library('gplots')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
189 library('edgeR')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
190 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
191 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
192 # Perform clustering for significant pvalues after controlling FWER
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
193 samples = colnames(cmat)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
194 gu = unique(group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
195 gn = rownames(cmat)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
196 if (length(gu) == 2) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
197 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
198 pcols = unlist(lapply(group,col.map))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
199 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
200 colours = rainbow(length(gu),start=0,end=4/6)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
201 pcols = colours[match(group,gu)] }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
202 dm = cmat[(! is.na(gn)),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
203 # remove unlabelled hm rows
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
204 nprobes = nrow(dm)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
205 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
206 if (nprobes > nsamp) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
207 dm =dm[1:nsamp,]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
208 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
209 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
210 newcolnames = substr(colnames(dm),1,20)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
211 colnames(dm) = newcolnames
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
212 pdf(outpdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
213 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
214 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
215 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
216 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
217
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
218 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
219 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
220 # for 2 groups only was
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
221 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
222 #pcols = unlist(lapply(group,col.map))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
223 gu = unique(group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
224 colours = rainbow(length(gu),start=0.3,end=0.6)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
225 pcols = colours[match(group,gu)]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
226 nrows = nrow(cmat)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
227 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
228 if (nrows > nsamp) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
229 cmat = cmat[c(1:nsamp),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
230 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
231 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
232 newcolnames = substr(colnames(cmat),1,20)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
233 colnames(cmat) = newcolnames
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
234 pdf(outpdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
235 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
236 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
237 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
238
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
239 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
240 # stolen from https://gist.github.com/703512
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
241 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
242 o = -log10(sort(pvector,decreasing=F))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
243 e = -log10( 1:length(o)/length(o) )
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
244 o[o==-Inf] = reallysmall
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
245 o[o==Inf] = reallybig
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
246 maint = descr
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
247 pdf(outpdf)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
248 plot(e,o,pch=19,cex=1, main=maint, ...,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
249 xlab=expression(Expected~~-log[10](italic(p))),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
250 ylab=expression(Observed~~-log[10](italic(p))),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
251 xlim=c(0,max(e)), ylim=c(0,max(o)))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
252 lines(e,e,col="red")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
253 grid(col = "lightgray", lty = "dotted")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
254 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
255 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
256
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
257 smearPlot = function(DGEList,deTags, outSmear, outMain)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
258 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
259 pdf(outSmear)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
260 plotSmear(DGEList,de.tags=deTags,main=outMain)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
261 grid(col="lightgray", lty="dotted")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
262 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
263 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
264
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
265 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
266 { #
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
267 nc = ncol(rawrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
268 for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
269 fullnames = colnames(rawrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
270 newcolnames = substr(colnames(rawrs),1,20)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
271 colnames(rawrs) = newcolnames
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
272 newcolnames = substr(colnames(cleanrs),1,20)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
273 colnames(cleanrs) = newcolnames
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
274 defpar = par(no.readonly=T)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
275 print.noquote('raw contig counts by sample:')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
276 print.noquote(summary(rawrs))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
277 print.noquote('normalised contig counts by sample:')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
278 print.noquote(summary(cleanrs))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
279 pdf(pdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
280 par(mfrow=c(1,2))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
281 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
282 grid(col="lightgray",lty="dotted")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
283 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
284 grid(col="lightgray",lty="dotted")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
285 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
286 pdfname = "sample_counts_histogram.pdf"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
287 nc = ncol(rawrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
288 print.noquote(paste('Using ncol rawrs=',nc))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
289 ncroot = round(sqrt(nc))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
290 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
291 m = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
292 for (i in c(1:nc)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
293 rhist = hist(rawrs[,i],breaks=100,plot=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
294 m = append(m,max(rhist\$counts))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
295 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
296 ymax = max(m)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
297 ncols = length(fullnames)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
298 if (ncols > 20)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
299 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
300 scale = 7*ncols/20
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
301 pdf(pdfname,width=scale,height=scale)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
302 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
303 pdf(pdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
304 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
305 par(mfrow=c(ncroot,ncroot))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
306 for (i in c(1:nc)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
307 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
308 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
309 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
310 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
311 par(defpar)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
312
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
313 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
314
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
315 cumPlot = function(rawrs,cleanrs,maint,myTitle)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
316 { # updated to use ecdf
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
317 pdfname = "Filtering_rowsum_bar_charts.pdf"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
318 defpar = par(no.readonly=T)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
319 lrs = log(rawrs,10)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
320 lim = max(lrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
321 pdf(pdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
322 par(mfrow=c(2,1))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
323 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
324 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
325 grid(col="lightgray", lty="dotted")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
326 lrs = log(cleanrs,10)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
327 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
328 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
329 grid(col="lightgray", lty="dotted")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
330 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
331 par(defpar)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
332 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
333
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
334 cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
335 { # updated to use ecdf
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
336 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
337 pdf(pdfname)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
338 par(mfrow=c(2,1))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
339 lastx = max(rawrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
340 rawe = knots(ecdf(rawrs))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
341 cleane = knots(ecdf(cleanrs))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
342 cy = 1:length(cleane)/length(cleane)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
343 ry = 1:length(rawe)/length(rawe)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
344 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
345 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
346 grid(col="blue")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
347 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
348 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
349 grid(col="blue")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
350 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
351 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
352
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
353
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
354
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
355 doGSEAold = function(y=NULL,design=NULL,histgmt="",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
356 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
357 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
358 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
359 sink('Camera.log')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
360 genesets = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
361 if (bigmt > "")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
362 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
363 bigenesets = readLines(bigmt)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
364 genesets = bigenesets
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
365 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
366 if (histgmt > "")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
367 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
368 hgenesets = readLines(histgmt)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
369 if (bigmt > "") {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
370 genesets = rbind(genesets,hgenesets)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
371 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
372 genesets = hgenesets
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
373 } # use only history if no bi
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
374 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
375 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
376 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
377 outf = outfname
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
378 head=paste(myTitle,'edgeR GSEA')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
379 write(head,file=outfname,append=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
380 ntest=length(genesets)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
381 urownames = toupper(rownames(y))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
382 upcam = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
383 downcam = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
384 for (i in 1:ntest) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
385 gs = unlist(genesets[i])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
386 g = gs[1] # geneset_id
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
387 u = gs[2]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
388 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
389 glist = gs[3:length(gs)] # member gene symbols
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
390 glist = toupper(glist)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
391 inglist = urownames %in% glist
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
392 nin = sum(inglist)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
393 if ((nin > minnin) && (nin < maxnin)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
394 ### print(paste('@@found',sum(inglist),'genes in glist'))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
395 camres = camera(y=y,index=inglist,design=design)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
396 if (! is.null(camres)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
397 rownames(camres) = g # gene set name
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
398 camres = cbind(GeneSet=g,URL=u,camres)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
399 if (camres\$Direction == "Up")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
400 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
401 upcam = rbind(upcam,camres) } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
402 downcam = rbind(downcam,camres)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
403 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
404 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
405 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
406 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
407 uscam = upcam[order(upcam\$PValue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
408 unadjp = uscam\$PValue
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
409 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
410 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
411 dscam = downcam[order(downcam\$PValue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
412 unadjp = dscam\$PValue
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
413 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
414 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
415 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
416 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
417 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
418 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
419 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
420 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
421 sink()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
422 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
423
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
424
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
425
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
426
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
427 doGSEA = function(y=NULL,design=NULL,histgmt="",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
428 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
429 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
430 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
431 sink('Camera.log')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
432 genesets = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
433 if (bigmt > "")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
434 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
435 bigenesets = readLines(bigmt)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
436 genesets = bigenesets
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
437 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
438 if (histgmt > "")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
439 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
440 hgenesets = readLines(histgmt)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
441 if (bigmt > "") {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
442 genesets = rbind(genesets,hgenesets)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
443 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
444 genesets = hgenesets
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
445 } # use only history if no bi
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
446 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
447 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
448 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
449 outf = outfname
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
450 head=paste(myTitle,'edgeR GSEA')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
451 write(head,file=outfname,append=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
452 ntest=length(genesets)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
453 urownames = toupper(rownames(y))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
454 upcam = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
455 downcam = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
456 incam = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
457 urls = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
458 gsids = c()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
459 for (i in 1:ntest) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
460 gs = unlist(genesets[i])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
461 gsid = gs[1] # geneset_id
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
462 url = gs[2]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
463 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
464 glist = gs[3:length(gs)] # member gene symbols
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
465 glist = toupper(glist)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
466 inglist = urownames %in% glist
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
467 nin = sum(inglist)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
468 if ((nin > minnin) && (nin < maxnin)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
469 incam = c(incam,inglist)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
470 gsids = c(gsids,gsid)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
471 urls = c(urls,url)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
472 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
473 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
474 incam = as.list(incam)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
475 names(incam) = gsids
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
476 allcam = camera(y=y,index=incam,design=design)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
477 allcamres = cbind(geneset=gsids,allcam,URL=urls)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
478 for (i in 1:ntest) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
479 camres = allcamres[i]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
480 res = try(test = (camres\$Direction == "Up"))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
481 if ("try-error" %in% class(res)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
482 cat("test failed, camres = :")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
483 print.noquote(camres)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
484 } else { if (camres\$Direction == "Up")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
485 { upcam = rbind(upcam,camres)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
486 } else { downcam = rbind(downcam,camres)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
487 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
488
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
489 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
490 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
491 uscam = upcam[order(upcam\$PValue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
492 unadjp = uscam\$PValue
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
493 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
494 nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
495 dscam = downcam[order(downcam\$PValue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
496 unadjp = dscam\$PValue
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
497 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
498 ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
499 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
500 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
501 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
502 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
503 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
504 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
505 sink()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
506 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
507
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
508
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
509 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
510 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
511 filterquantile=0.2, subjects=c(),mydesign=NULL,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
512 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
513 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
514 doCook=F,DESeq_fitType="parameteric")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
515 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
516 # Error handling
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
517 if (length(unique(group))!=2){
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
518 print("Number of conditions identified in experiment does not equal 2")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
519 q()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
520 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
521 require(edgeR)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
522 options(width = 512)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
523 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
524 allN = nrow(Count_Matrix)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
525 nscut = round(ncol(Count_Matrix)/2)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
526 colTotmillionreads = colSums(Count_Matrix)/1e6
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
527 counts.dataframe = as.data.frame(c())
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
528 rawrs = rowSums(Count_Matrix)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
529 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
530 nzN = nrow(nonzerod)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
531 nzrs = rowSums(nonzerod)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
532 zN = allN - nzN
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
533 print('# Quantiles for non-zero row counts:',quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
534 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
535 if (useNDF == T)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
536 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
537 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
538 lo = colSums(Count_Matrix[!gt1rpin3,])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
539 workCM = Count_Matrix[gt1rpin3,]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
540 cleanrs = rowSums(workCM)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
541 cleanN = length(cleanrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
542 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
543 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
544 maint = paste('Filter >=1/million reads in >=',nscut,'samples')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
545 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
546 useme = (nzrs > quantile(nzrs,filterquantile))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
547 workCM = nonzerod[useme,]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
548 lo = colSums(nonzerod[!useme,])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
549 cleanrs = rowSums(workCM)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
550 cleanN = length(cleanrs)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
551 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
552 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
553 maint = paste('Filter below',filterquantile,'quantile')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
554 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
555 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
556 allgenes = rownames(workCM)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
557 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
558 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
559 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
560 testreg = str_match(allgenes,reg)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
561 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
562 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
563 print("@@ using ucsc substitution for urls")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
564 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
565 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
566 print("@@ using genecards substitution for urls")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
567 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
568 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
569 print.noquote("# urls")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
570 print.noquote(head(contigurls))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
571 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
572 cmrowsums = rowSums(workCM)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
573 TName=unique(group)[1]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
574 CName=unique(group)[2]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
575 if (is.null(mydesign)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
576 if (length(subjects) == 0)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
577 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
578 mydesign = model.matrix(~group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
579 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
580 else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
581 subjf = factor(subjects)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
582 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
583 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
584 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
585 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
586 print.noquote('Using design matrix:')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
587 print.noquote(mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
588 if (doedgeR) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
589 sink('edgeR.log')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
590 #### Setup DGEList object
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
591 DGEList = DGEList(counts=workCM, group = group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
592 DGEList = calcNormFactors(DGEList)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
593
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
594 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
595 comdisp = DGEList\$common.dispersion
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
596 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
597 if (edgeR_priordf > 0) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
598 print.noquote(paste("prior.df =",edgeR_priordf))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
599 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
600 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
601 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
602 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
603 DGLM = glmFit(DGEList,design=mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
604 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
605 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
606 normData = (1e+06*DGEList\$counts/efflib)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
607 uoutput = cbind(
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
608 Name=as.character(rownames(DGEList\$counts)),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
609 DE\$table,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
610 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
611 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
612 DGEList\$counts
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
613 )
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
614 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
615 goodness = gof(DGLM, pcutoff=fdrthresh)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
616 if (sum(goodness\$outlier) > 0) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
617 print.noquote('GLM outliers:')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
618 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
619 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
620 print('No GLM fit outlier genes found\n')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
621 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
622 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
623 pdf("edgeR_GoodnessofFit.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
624 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
625 abline(0,1,lwd=3)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
626 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
627 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
628 estpriorn = getPriorN(DGEList)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
629 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
630 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
631 normData = (1e+06*DGEList\$counts/efflib)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
632 uniqueg = unique(group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
633 #### Plot MDS
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
634 sample_colors = match(group,levels(group))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
635 sampleTypes = levels(factor(group))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
636 print.noquote(sampleTypes)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
637 pdf("edgeR_MDSplot.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
638 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
639 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
640 grid(col="blue")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
641 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
642 colnames(normData) = paste( colnames(normData),'N',sep="_")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
643 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
644 nzd = data.frame(log(nonzerod + 1e-2,10))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
645 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
646 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
647 tt = cbind(
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
648 Name=as.character(rownames(DGEList\$counts)),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
649 DE\$table,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
650 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
651 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
652 )
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
653 print.noquote("# edgeR Top tags\n")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
654 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
655 tt = tt[order(DE\$table\$PValue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
656 print.noquote(tt[1:50,])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
657 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
658 nsig = length(deTags)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
659 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
660 deColours = ifelse(deTags,'red','black')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
661 pdf("edgeR_BCV_vs_abundance.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
662 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
663 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
664 dg = DGEList[order(DE\$table\$PValue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
665 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
666 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
667 normData = (1e+06*dg\$counts/efflib)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
668 outpdfname="edgeR_top_100_heatmap.pdf"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
669 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
670 outSmear = "edgeR_smearplot.pdf"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
671 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
672 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
673 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
674 norm.factor = DGEList\$samples\$norm.factors
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
675 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
676 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
677 edgeRcounts = rep(0, length(allgenes))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
678 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
679 sink()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
680 } ### doedgeR
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
681 if (doDESeq2 == T)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
682 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
683 sink("DESeq2.log")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
684 # DESeq2
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
685 require('DESeq2')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
686 library('RColorBrewer')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
687 if (length(subjects) == 0)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
688 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
689 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
690 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
691 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
692 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
693 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
694 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
695 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
696 #rDESeq = results(DESeq2)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
697 #newCountDataSet(workCM, group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
698 deSeqDatsizefac = estimateSizeFactors(deSEQds)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
699 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
700 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
701 rDESeq = as.data.frame(results(resDESeq))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
702 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
703 srDESeq = rDESeq[order(rDESeq\$pvalue),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
704 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
705 cat("# DESeq top 50\n")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
706 print.noquote(srDESeq[1:50,])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
707 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
708 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
709 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
710 DESeqcounts = rep(0, length(allgenes))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
711 DESeqcounts[DESeqcountsindex] = 1
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
712 pdf("DESeq2_dispersion_estimates.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
713 plotDispEsts(resDESeq)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
714 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
715 ysmall = abs(min(rDESeq\$log2FoldChange))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
716 ybig = abs(max(rDESeq\$log2FoldChange))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
717 ylimit = min(4,ysmall,ybig)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
718 pdf("DESeq2_MA_plot.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
719 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
720 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
721 rlogres = rlogTransformation(resDESeq)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
722 sampledists = dist( t( assay(rlogres) ) )
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
723 sdmat = as.matrix(sampledists)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
724 pdf("DESeq2_sample_distance_plot.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
725 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
726 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
727 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
728 ###outpdfname="DESeq2_top50_heatmap.pdf"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
729 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
730 sink()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
731 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
732 if ("try-error" %in% class(result)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
733 print.noquote('DESeq2 plotPCA failed.')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
734 } else {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
735 pdf("DESeq2_PCA_plot.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
736 #### wtf - print? Seems needed to get this to work
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
737 print(ppca)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
738 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
739 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
740 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
741
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
742 if (doVoom == T) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
743 sink('VOOM.log')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
744 if (doedgeR == F) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
745 #### Setup DGEList object
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
746 DGEList = DGEList(counts=workCM, group = group)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
747 DGEList = calcNormFactors(DGEList)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
748 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
749 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
750 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
751 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
752 norm.factor = DGEList\$samples\$norm.factors
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
753 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
754 pdf("VOOM_mean_variance_plot.pdf")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
755 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
756 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
757 # Use limma to fit data
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
758 fit = lmFit(dat.voomed, mydesign)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
759 fit = eBayes(fit)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
760 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
761 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
762 rownames(rvoom) = rownames(workCM)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
763 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
764 srvoom = rvoom[order(rvoom\$P.Value),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
765 cat("# VOOM top 50\n")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
766 print(srvoom[1:50,])
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
767 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
768 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
769 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
770 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
771 voomcounts = rep(0, length(allgenes))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
772 voomcounts[voomcountsindex] = 1
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
773 sink()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
774 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
775
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
776 if (doCamera) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
777 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
778 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
779 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
780
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
781 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
782 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
783 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
784 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
785 VOOM_limma = voomcounts, row.names = allgenes)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
786 } else if ((doDESeq2==T) && (doedgeR==T)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
787 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
788 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
789 } else if ((doVoom==T) && (doedgeR==T)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
790 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
791 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
792 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
793
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
794 if (nrow(counts.dataframe > 1)) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
795 counts.venn = vennCounts(counts.dataframe)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
796 vennf = "Venn_significant_genes_overlap.pdf"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
797 pdf(vennf)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
798 vennDiagram(counts.venn,main=vennmain,col="maroon")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
799 dev.off()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
800 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
801 } #### doDESeq2 or doVoom
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
802
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
803 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
804 #### Done
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
805
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
806 ###sink(stdout(),append=T,type="message")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
807 builtin_gmt = ""
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
808 history_gmt = ""
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
809 history_gmt_name = ""
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
810 out_edgeR = F
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
811 out_DESeq2 = F
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
812 out_VOOM = "$out_VOOM"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
813 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
814 doVoom = $doVoom
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
815 doCamera = F
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
816 doedgeR = $edgeR.doedgeR
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
817 edgeR_priordf = 0
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
818
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
819
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
820 #if $doVoom == "T":
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
821 out_VOOM = "$out_VOOM"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
822 #end if
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
823
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
824 #if $DESeq2.doDESeq2 == "T":
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
825 out_DESeq2 = "$out_DESeq2"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
826 DESeq_fitType = "$DESeq2.DESeq_fitType"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
827 #end if
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
828
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
829 #if $edgeR.doedgeR == "T":
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
830 out_edgeR = "$out_edgeR"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
831 edgeR_priordf = $edgeR.edgeR_priordf
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
832 #end if
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
833
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
834 <!--
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
835 #if $camera.doCamera == 'T'
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
836 doCamera = $camera.doCamera
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
837 #if $camera.gmtSource.refgmtSource == "indexed" or $camera.gmtSource.refgmtSource == "both":
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
838 builtin_gmt = "${camera.gmtSource.builtinGMT.fields.path}"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
839 #end if
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
840 #if $camera.gmtSource.refgmtSource == "history" or $camera.gmtSource.refgmtSource == "both":
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
841 history_gmt = "${camera.gmtSource.ownGMT}"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
842 history_gmt_name = "${camera.gmtSource.ownGMT.name}"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
843 #end if
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
844 #end if
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
845 -->
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
846
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
847 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
848 {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
849 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
850 quit(save="no",status=2)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
851 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
852
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
853 Out_Dir = "$html_file.files_path"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
854 Input = "$input1"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
855 TreatmentName = "$treatment_name"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
856 TreatmentCols = "$Treat_cols"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
857 ControlName = "$control_name"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
858 ControlCols= "$Control_cols"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
859 org = "$input1.dbkey"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
860 if (org == "") { org = "hg19"}
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
861 fdrtype = "$fdrtype"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
862 fdrthresh = $fdrthresh
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
863 useNDF = $useNDF
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
864 fQ = $fQ # non-differential centile cutoff
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
865 myTitle = "$title"
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
866 sids = strsplit("$subjectids",',')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
867 subjects = unlist(sids)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
868 nsubj = length(subjects)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
869 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
870 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
871 cat('Got TCols=')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
872 cat(TCols)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
873 cat('; CCols=')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
874 cat(CCols)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
875 cat('\n')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
876 useCols = c(TCols,CCols)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
877 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
878 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
879 snames = colnames(Count_Matrix)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
880 nsamples = length(snames)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
881 if (nsubj > 0 & nsubj != nsamples) {
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
882 options("show.error.messages"=T)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
883 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
884 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
885 write(mess, stderr())
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
886 quit(save="no",status=4)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
887 }
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
888 if (length(subjects) != 0) {subjects = subjects[useCols]}
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
889 Count_Matrix = Count_Matrix[,useCols] ### reorder columns
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
890 rn = rownames(Count_Matrix)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
891 islib = rn %in% c('librarySize','NotInBedRegions')
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
892 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
893 Count_Matrix = Count_Matrix[subset(rn,! islib),]
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
894 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
895 group = factor(group, levels=c(ControlName,TreatmentName))
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
896 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
897 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
898 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
899 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
900 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
901 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
902 sessionInfo()
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
903 ]]>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
904 </configfile>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
905 </configfiles>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
906 <help>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
907
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
908 **What it does**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
909
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
910 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
911 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
912
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
913 **Input**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
914
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
915 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
916 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
917 non-negative integer count of reads from one sample overlapping the feature.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
918 The matrix must have a header row uniquely identifying the source samples, and unique row names in
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
919 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
920
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
921 **Specifying comparisons**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
922
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
923 This is basically dumbed down for two factors - case vs control.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
924
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
925 More complex interfaces are possible but painful at present.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
926 Probably need to specify a phenotype file to do this better.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
927 Work in progress. Send code.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
928
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
929 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
930 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
931 A list of integers, one for each subject or an empty string if samples are all independent.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
932 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
933 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
934
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
935 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
936 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
937 8,9,1,1,2,2
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
938 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
939
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
940 **Methods available**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
941
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
942 You can run 3 popular Bioconductor packages available for count data.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
943
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
944 edgeR - see edgeR_ for details
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
945
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
946 VOOM/limma - see limma_VOOM_ for details
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
947
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
948 DESeq2 - see DESeq2_ for details
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
949
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
950 and optionally camera in edgeR which works better if MSigDB is installed.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
951
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
952 **Outputs**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
953
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
954 Some helpful plots and analysis results. Note that most of these are produced using R code
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
955 suggested by the excellent documentation and vignettes for the Bioconductor
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
956 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
957
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
958 **Note on Voom**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
959
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
960 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
961
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
962 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
963
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
964 voom is an acronym for mean-variance modelling at the observational level.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
965 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
966 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
967 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
968 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
969
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
970 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
971 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
972 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
973 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
974 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
975 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
976
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
977
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
978 Author(s)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
979
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
980 Charity Law and Gordon Smyth
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
981
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
982 References
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
983
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
984 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
985
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
986 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
987 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
988 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
989
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
990 See Also
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
991
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
992 A voom case study is given in the edgeR User's Guide.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
993
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
994 vooma is a similar function but for microarrays instead of RNA-seq.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
995
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
996
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
997 ***old rant on changes to Bioconductor package variable names between versions***
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
998
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
999 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1000 breaking this and all other code that assumed the old name for this variable,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1001 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1002 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1003 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1004 when their old scripts break. This tool currently now works with 2.4.6.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1005
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1006 **Note on prior.N**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1007
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1008 http://seqanswers.com/forums/showthread.php?t=5591 says:
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1009
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1010 *prior.n*
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1011
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1012 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1013 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1014 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1015 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1016 common likelihood the weight of one observation.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1017
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1018 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1019 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1020 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1021 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment?
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1022 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1023 If you have more samples, then the tagwise dispersion estimates will be more reliable,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1024 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1025
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1026
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1027 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1028
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1029 Dear Dorota,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1030
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1031 The important settings are prior.df and trend.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1032
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1033 prior.n and prior.df are related through prior.df = prior.n * residual.df,
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1034 and your experiment has residual.df = 36 - 12 = 24. So the old setting of
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1035 prior.n=10 is equivalent for your data to prior.df = 240, a very large
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1036 value. Going the other way, the new setting of prior.df=10 is equivalent
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1037 to prior.n=10/24.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1038
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1039 To recover old results with the current software you would use
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1040
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1041 estimateTagwiseDisp(object, prior.df=240, trend="none")
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1042
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1043 To get the new default from old software you would use
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1044
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1045 estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1046
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1047 Actually the old trend method is equivalent to trend="loess" in the new
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1048 software. You should use plotBCV(object) to see whether a trend is
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1049 required.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1050
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1051 Note you could also use
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1052
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1053 prior.n = getPriorN(object, prior.df=10)
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1054
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1055 to map between prior.df and prior.n.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1056
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1057 ----
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1058
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1059 **Attributions**
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1060
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1061 edgeR - edgeR_
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1062
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1063 VOOM/limma - limma_VOOM_
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1064
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1065 DESeq2 - DESeq2_ for details
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1066
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1067 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1068
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1069 Galaxy_ (that's what you are using right now!) for gluing everything together
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1070
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1071 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1072 licensed to you under the LGPL_ like other rgenetics artefacts
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1073
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1074 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1075 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1076 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1077 .. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1078 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1079 .. _Galaxy: http://getgalaxy.org
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1080 </help>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1081
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1082 </tool>
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1083
ccc5a33bb358 updating to r 3.0.2 for bioc 2.14
fubar
parents:
diff changeset
1084