annotate cuffcompare_wrapper.xml @ 1:6d8ab54229a0 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:37:24 -0400
parents d0d26169cc2a
children a5674ddf2ad7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
1 <tool id="cuffcompare" name="Cuffcompare" version="2.2.1.0">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
2 <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
3 <expand macro="requirements" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
4 <expand macro="stdio" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
5 <macros>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
6 <import>cuff_macros.xml</import>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
7 </macros>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
8 <version_command>cuffcompare 2>&amp;1 | head -n 1</version_command>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
9 <command interpreter="python">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
10 cuffcompare_wrapper.py
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
11 ## Use annotation reference?
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
12 #if $annotation.use_ref_annotation == "Yes":
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
13 -r $annotation.reference_annotation
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
14 #if $annotation.ignore_nonoverlapping_reference:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
15 -R
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
16 #end if
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
17 #if $annotation.ignore_nonoverlapping_transfrags:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
18 -Q
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
19 #end if
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
20
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
21 #end if
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
22
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
23 ## Use sequence data?
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
24 #if $seq_data.use_seq_data == "Yes":
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
25 -s
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
26 #if $seq_data.seq_source.index_source == "history":
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
27 --ref_file=$seq_data.seq_source.ref_file
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
28 #else:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
29 --index=${seq_data.seq_source.index.fields.path}
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
30 #end if
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
31 #end if
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
32
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
33 $discard_single_exon
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
34
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
35 -e $max_dist_exon
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
36 -d $max_dist_group
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
37
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
38 #if $discard_intron_redundant_transfrags:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
39 -F
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
40 #end if
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
41
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
42 ## Outputs.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
43 --combined-transcripts=${transcripts_combined}
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
44
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
45 @CUFFLINKS_GTF_INPUTS@
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
46 </command>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
47 <inputs>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
48 <expand macro="cufflinks_gtf_inputs" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
49 <conditional name="annotation">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
50 <param name="use_ref_annotation" type="select" label="Use Reference Annotation">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
51 <option value="No">No</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
52 <option value="Yes">Yes</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
53 </param>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
54 <when value="Yes">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
55 <param format="gff3,gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Requires an annotation file in GFF3 or GTF format."/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
56 <param name="ignore_nonoverlapping_reference" type="boolean" label="Ignore reference transcripts that are not overlapped by any input transfrags" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
57 <param name="ignore_nonoverlapping_transfrags" type="boolean" label="Ignore input transcripts that are not overlapped by any reference transcripts" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
58 </when>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
59 <when value="No">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
60 </when>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
61 </conditional>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
62 <conditional name="seq_data">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
63 <param name="use_seq_data" type="select" label="Use Sequence Data"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
64 help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff.">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
65 <option value="Yes">Yes</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
66 <option value="No">No</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
67 </param>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
68 <when value="No"></when>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
69 <when value="Yes">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
70 <conditional name="seq_source">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
71 <param name="index_source" type="select" label="Choose the source for the reference list">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
72 <option value="cached">Locally cached</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
73 <option value="history">History</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
74 </param>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
75 <when value="cached">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
76 <param name="index" type="select" label="Using reference genome">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
77 <options from_data_table="fasta_indexes">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
78 <filter type="data_meta" ref="inputs" key="dbkey" column="1" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
79 <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
80 </options>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
81 </param>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
82 </when>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
83 <when value="history">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
84 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
85 </when>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
86 </conditional>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
87 </when>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
88 </conditional>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
89 <param type="select" name="discard_single_exon" label="discard (ignore) single-exon transcripts">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
90 <option value="" selected="True">No</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
91 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
92 <option value="-N">Discard single-exon reference transcripts</option>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
93 </param>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
94 <param type="integer" name="max_dist_exon" value="100" label="Max. Distance for assessing exon accuracy"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
95 help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
96 <param type="integer" name="max_dist_group" value="100" label="Max.Distance for transcript grouping"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
97 help="max. distance (range) for grouping transcript start sites. Default: 100" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
98 <param type="boolean" name="discard_intron_redundant_transfrags" label="discard intron-redundant transfrags sharing 5'"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
99 help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
100 </inputs>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
101
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
102 <outputs>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
103 <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
104 from_work_dir="cc_output.stats" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
105 <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tmap file"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
106 from_work_dir="cc_output.input1.tmap" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
107 <data format="tabular" name="input1_refmap"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
108 label="${tool.name} on ${on_string}: data ${inputs[0].hid} refmap file"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
109 from_work_dir="cc_output.input1.refmap">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
110 <filter>annotation['use_ref_annotation'] == 'Yes'</filter>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
111 </data>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
112 <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${inputs[1].hid} tmap file" from_work_dir="cc_output.input2.tmap">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
113 <filter>@HAS_MULTIPLE_INPUTS@</filter>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
114 </data>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
115 <data format="tabular" name="input2_refmap"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
116 label="${tool.name} on ${on_string}: data ${inputs[1].hid} refmap file"
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
117 from_work_dir="cc_output.input2.refmap">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
118 <filter>annotation['use_ref_annotation'] == 'Yes' and @HAS_MULTIPLE_INPUTS@</filter>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
119 </data>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
120 <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking">
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
121 <filter>@HAS_MULTIPLE_INPUTS@</filter>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
122 </data>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
123 <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
124 </outputs>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
125
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
126 <tests>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
127 <!--
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
128 cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
129 -->
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
130 <test>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
131 <param name="inputs" value="cuffcompare_in1.gtf,cuffcompare_in2.gtf" ftype="gtf"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
132 <param name="use_ref_annotation" value="Yes"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
133 <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
134 <param name="ignore_nonoverlapping_reference" value="Yes"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
135 <param name="ignore_nonoverlapping_transfrags" value="No"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
136 <param name="use_seq_data" value="No"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
137 <param name="discard_single_exon" value="" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
138 <param name="max_dist_exon" value="100" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
139 <param name="max_dist_group" value="100" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
140 <param name="discard_intron_redundant_transfrags" value="No" />
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
141 <!-- Line diffs are the result of different locations for input files; this cannot be fixed as cuffcompare outputs
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
142 full input path for each input. -->
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
143 <output name="transcripts_accuracy" file="cuffcompare_out7.txt" lines_diff="2"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
144 <output name="input1_tmap" file="cuffcompare_out1.tmap"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
145 <output name="input1_refmap" file="cuffcompare_out2.refmap"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
146 <output name="input2_tmap" file="cuffcompare_out3.tmap"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
147 <output name="input2_refmap" file="cuffcompare_out4.refmap"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
148 <output name="transcripts_tracking" file="cuffcompare_out6.tracking"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
149 <output name="transcripts_combined" file="cuffcompare_out5.gtf"/>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
150 </test>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
151 </tests>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
152
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
153 <help>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
154 **Cuffcompare Overview**
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
155
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
156 Cuffcompare is part of Cufflinks_. Cuffcompare helps you: (a) compare your assembled transcripts to a reference annotation and (b) track Cufflinks transcripts across multiple experiments (e.g. across a time course). Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
157
1
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
158 .. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
159
0
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
160 ------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
161
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
162 **Know what you are doing**
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
163
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
164 .. class:: warningmark
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
165
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
166 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
167
1
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
168 .. __: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/
0
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
169
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
170 ------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
171
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
172 **Input format**
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
173
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
174 Cuffcompare takes Cufflinks' GTF output as input, and optionally can take a "reference" annotation (such as from Ensembl_)
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
175
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
176 .. _Ensembl: http://www.ensembl.org
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
177
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
178 ------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
179
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
180 **Outputs**
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
181
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
182 Cuffcompare produces the following output files:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
183
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
184 Transcripts Accuracy File:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
185
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
186 Cuffcompare reports various statistics related to the "accuracy" of the transcripts in each sample when compared to the reference annotation data. The typical gene finding measures of "sensitivity" and "specificity" (as defined in Burset, M., Guigó, R. : Evaluation of gene structure prediction programs (1996) Genomics, 34 (3), pp. 353-367. doi: 10.1006/geno.1996.0298) are calculated at various levels (nucleotide, exon, intron, transcript, gene) for each input file and reported in this file. The Sn and Sp columns show specificity and sensitivity values at each level, while the fSn and fSp columns are "fuzzy" variants of these same accuracy calculations, allowing for a very small variation in exon boundaries to still be counted as a "match".
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
187
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
188 Transcripts Combined File:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
189
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
190 Cuffcompare reports a GTF file containing the "union" of all transfrags in each sample. If a transfrag is present in both samples, it is thus reported once in the combined gtf.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
191
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
192 Transcripts Tracking File:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
193
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
194 This file matches transcripts up between samples. Each row contains a transcript structure that is present in one or more input GTF files. Because the transcripts will generally have different IDs (unless you assembled your RNA-Seq reads against a reference transcriptome), cuffcompare examines the structure of each the transcripts, matching transcripts that agree on the coordinates and order of all of their introns, as well as strand. Matching transcripts are allowed to differ on the length of the first and last exons, since these lengths will naturally vary from sample to sample due to the random nature of sequencing.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
195 If you ran cuffcompare with the -r option, the first and second columns contain the closest matching reference transcript to the one described by each row.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
196
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
197 Here's an example of a line from the tracking file::
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
198
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
199 TCONS_00000045 XLOC_000023 Tcea|uc007afj.1 j \
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
200 q1:exp.115|exp.115.0|100|3.061355|0.350242|0.350207 \
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
201 q2:60hr.292|60hr.292.0|100|4.094084|0.000000|0.000000
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
202
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
203 In this example, a transcript present in the two input files, called exp.115.0 in the first and 60hr.292.0 in the second, doesn't match any reference transcript exactly, but shares exons with uc007afj.1, an isoform of the gene Tcea, as indicated by the class code j. The first three columns are as follows::
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
204
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
205 Column number Column name Example Description
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
206 -----------------------------------------------------------------------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
207 1 Cufflinks transfrag id TCONS_00000045 A unique internal id for the transfrag
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
208 2 Cufflinks locus id XLOC_000023 A unique internal id for the locus
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
209 3 Reference gene id Tcea The gene_name attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
210 4 Reference transcript id uc007afj.1 The transcript_id attribute of the reference GTF record for this transcript, or '-' if no reference transcript overlaps this Cufflinks transcript
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
211 5 Class code c The type of match between the Cufflinks transcripts in column 6 and the reference transcript. See class codes
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
212
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
213 Each of the columns after the fifth have the following format:
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
214 qJ:gene_id|transcript_id|FMI|FPKM|conf_lo|conf_hi
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
215
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
216 A transcript need be present in all samples to be reported in the tracking file. A sample not containing a transcript will have a "-" in its entry in the row for that transcript.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
217
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
218 Class Codes
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
219
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
220 If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column::
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
221
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
222 Priority Code Description
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
223 ---------------------------------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
224 1 = Match
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
225 2 c Contained
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
226 3 j New isoform
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
227 4 e A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.
1
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
228 5 i A single exon transcript falling entirely with a reference intron
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
229 6 o Generic exonic overlap with a reference transcript
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
230 7 p Possible polymerase run-on fragment
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
231 8 r Repeat. Currently determined by looking at the soft-masked reference sequence and applied to transcripts where at least 50% of the bases are lower case
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
232 9 u Unknown, intergenic transcript
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
233 10 x Exonic overlap with reference on the opposite strand
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
234 11 s An intron of the transfrag overlaps a reference intron on the opposite strand (likely due to read mapping errors)
6d8ab54229a0 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
235 12 . (.tracking file only, indicates multiple classifications)
0
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
236
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
237 -------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
238
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
239 **Settings**
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
240
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
241 All of the options have a default value. You can change any of them. Most of the options in Cuffcompare have been implemented here.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
242
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
243 ------
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
244
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
245 **Cuffcompare parameter list**
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
246
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
247 This is a list of implemented Cuffcompare options::
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
248
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
249 -r An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
250 -R If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts_accuracy file
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
251 </help>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
252 <citations>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
253 <citation type="doi">10.1038/nbt.1621</citation>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
254 </citations>
d0d26169cc2a Uploaded
devteam
parents:
diff changeset
255 </tool>