comparison cuffcompare_wrapper.xml @ 3:5aac9b9d6f2a draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/cufflinks/cuffcompare commit 82ee6fc860c52c531b7a57bbb346ab1a67a434a5
author devteam
date Sun, 19 Feb 2017 12:11:14 -0500
parents a5674ddf2ad7
children 806c27c97df7
comparison
equal deleted inserted replaced
2:a5674ddf2ad7 3:5aac9b9d6f2a
1 <tool id="cuffcompare" name="Cuffcompare" version="2.2.1.0"> 1 <tool id="cuffcompare" name="Cuffcompare" version="@VERSION@.1">
2 <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description> 2 <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description>
3 <macros> 3 <macros>
4 <import>cuff_macros.xml</import> 4 <import>cuff_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <version_command>cuffcompare 2>&amp;1 | head -n 1</version_command> 7 <version_command>cuffcompare 2>&amp;1 | head -n 1</version_command>
9 <command> 8 <command detect_errors="aggressive"><![CDATA[
10 python '$__tool_directory__/cuffcompare_wrapper.py' 9 python '$__tool_directory__/cuffcompare_wrapper.py'
11 ## Use annotation reference? 10 ## Use annotation reference?
12 #if $annotation.use_ref_annotation == "Yes": 11 #if $annotation.use_ref_annotation == "Yes":
13 -r '$annotation.reference_annotation' 12 -r '$annotation.reference_annotation'
14 #if $annotation.ignore_nonoverlapping_reference: 13 #if $annotation.ignore_nonoverlapping_reference:
17 #if $annotation.ignore_nonoverlapping_transfrags: 16 #if $annotation.ignore_nonoverlapping_transfrags:
18 -Q 17 -Q
19 #end if 18 #end if
20 19
21 #end if 20 #end if
22 21
23 ## Use sequence data? 22 ## Use sequence data?
24 #if $seq_data.use_seq_data == "Yes": 23 #if $seq_data.use_seq_data == "Yes":
25 -s 24 -s
26 #if $seq_data.seq_source.index_source == "history": 25 #if $seq_data.seq_source.index_source == "history":
27 --ref_file '$seq_data.seq_source.ref_file' 26 --ref_file '$seq_data.seq_source.ref_file'
28 #else: 27 #else:
29 --index '${seq_data.seq_source.index.fields.path}' 28 --index '${seq_data.seq_source.index.fields.path}'
30 #end if 29 #end if
31 #end if 30 #end if
32 31
33 $discard_single_exon 32 $discard_single_exon
34 33
35 -e $max_dist_exon 34 -e $max_dist_exon
36 -d $max_dist_group 35 -d $max_dist_group
37 36
38 #if $discard_intron_redundant_transfrags: 37 #if $discard_intron_redundant_transfrags:
39 -F 38 -F
40 #end if 39 #end if
41 40
42 ## Outputs. 41 ## Outputs.
43 --combined-transcripts '${transcripts_combined}' 42 --combined-transcripts '${transcripts_combined}'
44 43
45 @CUFFLINKS_GTF_INPUTS@ 44 @CUFFLINKS_GTF_INPUTS@
46 </command> 45 ]]></command>
47 <inputs> 46 <inputs>
48 <expand macro="cufflinks_gtf_inputs" /> 47 <expand macro="cufflinks_gtf_inputs" />
49 <conditional name="annotation"> 48 <conditional name="annotation">
50 <param name="use_ref_annotation" type="select" label="Use Reference Annotation"> 49 <param name="use_ref_annotation" type="select" label="Use Reference Annotation">
51 <option value="No">No</option> 50 <option value="No">No</option>
58 </when> 57 </when>
59 <when value="No"> 58 <when value="No">
60 </when> 59 </when>
61 </conditional> 60 </conditional>
62 <conditional name="seq_data"> 61 <conditional name="seq_data">
63 <param name="use_seq_data" type="select" label="Use Sequence Data" 62 <param name="use_seq_data" type="select" label="Use Sequence Data"
64 help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff."> 63 help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff.">
65 <option value="Yes">Yes</option> 64 <option value="Yes">Yes</option>
66 <option value="No">No</option> 65 <option value="No">No</option>
67 </param> 66 </param>
68 <when value="No"></when> 67 <when value="No"></when>
89 <param type="select" name="discard_single_exon" label="discard (ignore) single-exon transcripts"> 88 <param type="select" name="discard_single_exon" label="discard (ignore) single-exon transcripts">
90 <option value="" selected="True">No</option> 89 <option value="" selected="True">No</option>
91 <option value="-M">Discard single-exon transfrags and reference transcripts</option> 90 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
92 <option value="-N">Discard single-exon reference transcripts</option> 91 <option value="-N">Discard single-exon reference transcripts</option>
93 </param> 92 </param>
94 <param type="integer" name="max_dist_exon" value="100" label="Max. Distance for assessing exon accuracy" 93 <param type="integer" name="max_dist_exon" value="100" label="Max. Distance for assessing exon accuracy"
95 help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" /> 94 help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" />
96 <param type="integer" name="max_dist_group" value="100" label="Max.Distance for transcript grouping" 95 <param type="integer" name="max_dist_group" value="100" label="Max.Distance for transcript grouping"
97 help="max. distance (range) for grouping transcript start sites. Default: 100" /> 96 help="max. distance (range) for grouping transcript start sites. Default: 100" />
98 <param type="boolean" name="discard_intron_redundant_transfrags" label="discard intron-redundant transfrags sharing 5'" 97 <param type="boolean" name="discard_intron_redundant_transfrags" label="discard intron-redundant transfrags sharing 5'"
99 help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" /> 98 help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" />
100 </inputs> 99 </inputs>
101 100
102 <outputs> 101 <outputs>
103 <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy" 102 <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy"
104 from_work_dir="cc_output.stats" /> 103 from_work_dir="cc_output.stats" />
105 <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tmap file" 104 <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tmap file"
106 from_work_dir="cc_output.input1.tmap" /> 105 from_work_dir="cc_output.input1.tmap" />
107 <data format="tabular" name="input1_refmap" 106 <data format="tabular" name="input1_refmap"
108 label="${tool.name} on ${on_string}: data ${inputs[0].hid} refmap file" 107 label="${tool.name} on ${on_string}: data ${inputs[0].hid} refmap file"
109 from_work_dir="cc_output.input1.refmap"> 108 from_work_dir="cc_output.input1.refmap">
110 <filter>annotation['use_ref_annotation'] == 'Yes'</filter> 109 <filter>annotation['use_ref_annotation'] == 'Yes'</filter>
111 </data> 110 </data>
112 <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${inputs[1].hid} tmap file" from_work_dir="cc_output.input2.tmap"> 111 <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${inputs[1].hid} tmap file" from_work_dir="cc_output.input2.tmap">
113 <filter>@HAS_MULTIPLE_INPUTS@</filter> 112 <filter>@HAS_MULTIPLE_INPUTS@</filter>
114 </data> 113 </data>
115 <data format="tabular" name="input2_refmap" 114 <data format="tabular" name="input2_refmap"
116 label="${tool.name} on ${on_string}: data ${inputs[1].hid} refmap file" 115 label="${tool.name} on ${on_string}: data ${inputs[1].hid} refmap file"
117 from_work_dir="cc_output.input2.refmap"> 116 from_work_dir="cc_output.input2.refmap">
118 <filter>annotation['use_ref_annotation'] == 'Yes' and @HAS_MULTIPLE_INPUTS@</filter> 117 <filter>annotation['use_ref_annotation'] == 'Yes' and @HAS_MULTIPLE_INPUTS@</filter>
119 </data> 118 </data>
120 <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking"> 119 <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking">
121 <filter>@HAS_MULTIPLE_INPUTS@</filter> 120 <filter>@HAS_MULTIPLE_INPUTS@</filter>
122 </data> 121 </data>
123 <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/> 122 <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/>
124 </outputs> 123 </outputs>
125 124
126 <tests> 125 <tests>
127 <!-- 126 <!--
128 cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf 127 cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf
129 --> 128 -->
130 <test> 129 <test>
131 <param name="inputs" value="cuffcompare_in1.gtf,cuffcompare_in2.gtf" ftype="gtf"/> 130 <param name="inputs" value="cuffcompare_in1.gtf,cuffcompare_in2.gtf" ftype="gtf"/>
132 <param name="use_ref_annotation" value="Yes"/> 131 <param name="use_ref_annotation" value="Yes"/>
171 170
172 **Input format** 171 **Input format**
173 172
174 Cuffcompare takes Cufflinks' GTF output as input, and optionally can take a "reference" annotation (such as from Ensembl_) 173 Cuffcompare takes Cufflinks' GTF output as input, and optionally can take a "reference" annotation (such as from Ensembl_)
175 174
176 .. _Ensembl: http://www.ensembl.org 175 .. _Ensembl: http://www.ensembl.org
177 176
178 ------ 177 ------
179 178
180 **Outputs** 179 **Outputs**
181 180
185 184
186 Cuffcompare reports various statistics related to the "accuracy" of the transcripts in each sample when compared to the reference annotation data. The typical gene finding measures of "sensitivity" and "specificity" (as defined in Burset, M., Guigó, R. : Evaluation of gene structure prediction programs (1996) Genomics, 34 (3), pp. 353-367. doi: 10.1006/geno.1996.0298) are calculated at various levels (nucleotide, exon, intron, transcript, gene) for each input file and reported in this file. The Sn and Sp columns show specificity and sensitivity values at each level, while the fSn and fSp columns are "fuzzy" variants of these same accuracy calculations, allowing for a very small variation in exon boundaries to still be counted as a "match". 185 Cuffcompare reports various statistics related to the "accuracy" of the transcripts in each sample when compared to the reference annotation data. The typical gene finding measures of "sensitivity" and "specificity" (as defined in Burset, M., Guigó, R. : Evaluation of gene structure prediction programs (1996) Genomics, 34 (3), pp. 353-367. doi: 10.1006/geno.1996.0298) are calculated at various levels (nucleotide, exon, intron, transcript, gene) for each input file and reported in this file. The Sn and Sp columns show specificity and sensitivity values at each level, while the fSn and fSp columns are "fuzzy" variants of these same accuracy calculations, allowing for a very small variation in exon boundaries to still be counted as a "match".
187 186
188 Transcripts Combined File: 187 Transcripts Combined File:
189 188
190 Cuffcompare reports a GTF file containing the "union" of all transfrags in each sample. If a transfrag is present in both samples, it is thus reported once in the combined gtf. 189 Cuffcompare reports a GTF file containing the "union" of all transfrags in each sample. If a transfrag is present in both samples, it is thus reported once in the combined gtf.
191 190
192 Transcripts Tracking File: 191 Transcripts Tracking File:
193 192
194 This file matches transcripts up between samples. Each row contains a transcript structure that is present in one or more input GTF files. Because the transcripts will generally have different IDs (unless you assembled your RNA-Seq reads against a reference transcriptome), cuffcompare examines the structure of each the transcripts, matching transcripts that agree on the coordinates and order of all of their introns, as well as strand. Matching transcripts are allowed to differ on the length of the first and last exons, since these lengths will naturally vary from sample to sample due to the random nature of sequencing. 193 This file matches transcripts up between samples. Each row contains a transcript structure that is present in one or more input GTF files. Because the transcripts will generally have different IDs (unless you assembled your RNA-Seq reads against a reference transcriptome), cuffcompare examines the structure of each the transcripts, matching transcripts that agree on the coordinates and order of all of their introns, as well as strand. Matching transcripts are allowed to differ on the length of the first and last exons, since these lengths will naturally vary from sample to sample due to the random nature of sequencing.
195 If you ran cuffcompare with the -r option, the first and second columns contain the closest matching reference transcript to the one described by each row. 194 If you ran cuffcompare with the -r option, the first and second columns contain the closest matching reference transcript to the one described by each row.
207 1 Cufflinks transfrag id TCONS_00000045 A unique internal id for the transfrag 206 1 Cufflinks transfrag id TCONS_00000045 A unique internal id for the transfrag
208 2 Cufflinks locus id XLOC_000023 A unique internal id for the locus 207 2 Cufflinks locus id XLOC_000023 A unique internal id for the locus
209 3 Reference gene id Tcea The gene_name attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript 208 3 Reference gene id Tcea The gene_name attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript
210 4 Reference transcript id uc007afj.1 The transcript_id attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript 209 4 Reference transcript id uc007afj.1 The transcript_id attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript
211 5 Class code c The type of match between the Cufflinks transcripts in column 6 and the reference transcript. See class codes 210 5 Class code c The type of match between the Cufflinks transcripts in column 6 and the reference transcript. See class codes
212 211
213 Each of the columns after the fifth have the following format: 212 Each of the columns after the fifth have the following format:
214 qJ:gene_id|transcript_id|FMI|FPKM|conf_lo|conf_hi 213 qJ:gene_id|transcript_id|FMI|FPKM|conf_lo|conf_hi
215 214
216 A transcript need be present in all samples to be reported in the tracking file. A sample not containing a transcript will have a "-" in its entry in the row for that transcript. 215 A transcript need be present in all samples to be reported in the tracking file. A sample not containing a transcript will have a "-" in its entry in the row for that transcript.
217 216
220 If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column:: 219 If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column::
221 220
222 Priority Code Description 221 Priority Code Description
223 --------------------------------- 222 ---------------------------------
224 1 = Match 223 1 = Match
225 2 c Contained 224 2 c Contained
226 3 j New isoform 225 3 j New isoform
227 4 e A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment. 226 4 e A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.
228 5 i A single exon transcript falling entirely with a reference intron 227 5 i A single exon transcript falling entirely with a reference intron
229 6 o Generic exonic overlap with a reference transcript 228 6 o Generic exonic overlap with a reference transcript
230 7 p Possible polymerase run-on fragment 229 7 p Possible polymerase run-on fragment
231 8 r Repeat. Currently determined by looking at the soft-masked reference sequence and applied to transcripts where at least 50% of the bases are lower case 230 8 r Repeat. Currently determined by looking at the soft-masked reference sequence and applied to transcripts where at least 50% of the bases are lower case
232 9 u Unknown, intergenic transcript 231 9 u Unknown, intergenic transcript
233 10 x Exonic overlap with reference on the opposite strand 232 10 x Exonic overlap with reference on the opposite strand
234 11 s An intron of the transfrag overlaps a reference intron on the opposite strand (likely due to read mapping errors) 233 11 s An intron of the transfrag overlaps a reference intron on the opposite strand (likely due to read mapping errors)
235 12 . (.tracking file only, indicates multiple classifications) 234 12 . (.tracking file only, indicates multiple classifications)
236 235
237 ------- 236 -------
238 237
239 **Settings** 238 **Settings**
240 239
241 All of the options have a default value. You can change any of them. Most of the options in Cuffcompare have been implemented here. 240 All of the options have a default value. You can change any of them. Most of the options in Cuffcompare have been implemented here.