annotate dexseq.xml @ 2:28f42c7e6b12 draft

Uploaded
author pavanvidem
date Fri, 28 Aug 2015 08:43:24 -0400
parents bc7eab5753a8
children defd31925739
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
28f42c7e6b12 Uploaded
pavanvidem
parents: 1
diff changeset
1 <tool id="dexseq" name="DEXSeq" version="1.0">
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
2 <description>Determines differentially expressed features from count tables</description>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
3 <requirements>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
4 <requirement type="package" version="3.1.2">R</requirement>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
5 <requirement type="binary">Rscript</requirement>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
6 <requirement type="package" version="1.6.1">deseq2</requirement>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
7 <requirement type="R-module">DESeq2</requirement>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
8 </requirements>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
9 <code file="single_helper.py" />
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
10 <command>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
11 #import json
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
12 Rscript /usr/local/galaxy/galaxy-dist/tools/test/deseq2/dexseq.R
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
13 #set $reportdir = $deseq_out.files_path
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
14 -o "$deseq_out"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
15 -p 12
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
16 #set $temp_factor_names = list()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
17 #for $factor in $rep_factorName:
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
18 #set $temp_factor = list()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
19 #for $level in $factor.rep_factorLevel:
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
20 #set $count_files = list()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
21 #for $file in $level.countsFile:
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
22 $count_files.append(str($file))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
23 #end for
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
24 $temp_factor.append( {str($level.factorLevel): $count_files} )
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
25 #end for
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
26 $temp_factor_names.append([str($factor.factorName), $temp_factor])
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
27 #end for
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
28 -f '#echo json.dumps(temp_factor_names)#'
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
29 -a $gtf
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
30 #if $report:
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
31 -p "$reportdir"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
32 #end if
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
33 -c $fdr_cutoff
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
34 </command>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
35 <stdio>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
36 <regex match="Execution halted"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
37 source="both"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
38 level="fatal"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
39 description="Execution halted." />
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
40 <regex match="Input-Error 01"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
41 source="both"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
42 level="fatal"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
43 description="Error in your input parameters: Make sure you only apply factors to selected samples." />
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
44 <regex match="Error in"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
45 source="both"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
46 level="fatal"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
47 description="An undefined error occured, please check your intput carefully and contact your administrator." />
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
48 </stdio>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
49 <inputs>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
50 <param name="gtf" type="data" label="GTF file created from dexseq_count tool"/>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
51 <repeat name="rep_factorName" title="Factor" min="1">
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
52 <param name="factorName" type="text" value="FactorName" label="Specify a factor name"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
53 help="Only letters, numbers and underscores will be retained in this field">
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
54 <sanitizer>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
55 <valid initial="string.letters,string.digits"><add value="_" /></valid>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
56 </sanitizer>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
57 </param>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
58 <repeat name="rep_factorLevel" title="Factor level" min="2" max="2" default="2">
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
59 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
60 help="Only letters, numbers and underscores will be retained in this field">
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
61 <sanitizer>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
62 <valid initial="string.letters,string.digits"><add value="_" /></valid>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
63 </sanitizer>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
64 </param>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
65 <param name="countsFile" type="data" multiple="true" label="Counts file"/>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
66 </repeat>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
67 </repeat>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
68 <param name="report" type="boolean" truevalue="" falsevalue="" checked="true"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
69 label="Visualise the analysis results?"
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
70 help="output an additional html file" />
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
71 <param name="fdr_cutoff" type="float" min="0" max="1" value="0.05" label="All the genes under this FDR threshold will be shown in the html report."/>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
72 </inputs>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
73
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
74 <outputs>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
75 <data format="tabular" name="dexseq_out" label="DESeq2 result file on ${on_string}"/>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
76 <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
77 <filter>pdf == True</filter>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
78 </data>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
79 </outputs>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
80 <code file="dexseq_helper.py" />
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
81 <help>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
82
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
83 .. class:: infomark
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
84
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
85 **What it does**
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
86
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
87 Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
88
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
89
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
90 **Inputs**
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
91
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
92 DESeq2_ takes count tables that generated from the htseq-count as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
93
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
94 The following table gives some examples of factors and their levels:
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
95
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
96 ========= ============== ===============
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
97 Factor Factor level 1 Factor level 2
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
98 --------- -------------- ---------------
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
99 Treatment Treated Untreated
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
100 --------- -------------- ---------------
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
101 Condition Knockdown Wildtype
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
102 --------- -------------- ---------------
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
103 TimePoint Day4 Day1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
104 --------- -------------- ---------------
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
105 SeqType SingleEnd PairedEnd
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
106 --------- -------------- ---------------
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
107 Gender Female Male
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
108 ========= ============== ===============
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
109
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
110 *Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
111
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
112 **Output**
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
113
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
114 DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
115
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
116 ====== ==========================================================
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
117 Column Description
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
118 ------ ----------------------------------------------------------
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
119 1 Gene Identifiers
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
120 2 mean normalised counts, averaged over all samples from both conditions
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
121 3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
122 4 standard error estimate for the log2 fold change estimate
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
123 5 Wald statistic
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
124 6 p value for the statistical significance of this change
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
125 7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
126 which controls false discovery rate (FDR)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
127 ====== ==========================================================
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
128
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
129
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
130
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
131
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
132
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
133 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
134
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
135
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
136 </help>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
137 <citations>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
138 <citation type="doi">10.1101/002832</citation>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
139 </citations>
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
140 </tool>