Mercurial > repos > iuc > gffcompare
comparison gffcompare.xml @ 0:e52ca9ad69ca draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/packages/gffcompare commit 33ef7ef2f829bf46a6fde7637715d974c17f898a
| author | iuc |
|---|---|
| date | Fri, 07 Apr 2017 16:01:52 -0400 |
| parents | |
| children | 77c22296fb8e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e52ca9ad69ca |
|---|---|
| 1 <tool id="gffcompare" name="GffCompare" version="0.9.8"> | |
| 2 <description>compare assembled transcripts to a reference annotation</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="0.9.8">gffcompare</requirement> | |
| 5 </requirements> | |
| 6 <stdio> | |
| 7 <exit_code range="1:" /> | |
| 8 <exit_code range=":-1" /> | |
| 9 <regex match="Error" /> | |
| 10 <regex match="Exception" /> | |
| 11 </stdio> | |
| 12 <version_command>gffcompare -v | awk '{print $2}'</version_command> | |
| 13 <command> | |
| 14 <![CDATA[ | |
| 15 #set $input_gtf = "' '".join(str($inputs).split(',')) | |
| 16 #if $seq_data.use_seq_data == "Yes": | |
| 17 #if $seq_data.seq_source.index_source == "history": | |
| 18 ln -s '$seq_data.seq_source.ref_file' ref_seq.fa && | |
| 19 #else: | |
| 20 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa && | |
| 21 #end if | |
| 22 #end if | |
| 23 gffcompare | |
| 24 ## Use annotation reference? | |
| 25 #if $annotation.use_ref_annotation == "Yes": | |
| 26 -r '$annotation.reference_annotation' $annotation.ignore_nonoverlapping_reference $annotation.ignore_nonoverlapping_transfrags | |
| 27 #end if | |
| 28 | |
| 29 ## Use sequence data? | |
| 30 | |
| 31 #if $seq_data.use_seq_data == "Yes": | |
| 32 -s ref_seq.fa | |
| 33 #end if | |
| 34 | |
| 35 $discard_single_exon -e $max_dist_exon -d $max_dist_group $discard_intron_redundant_transfrags | |
| 36 | |
| 37 '$input_gtf' | |
| 38 ]]> | |
| 39 </command> | |
| 40 <inputs> | |
| 41 <param format="gtf" name="inputs" type="data" label="GTF inputs for comparison" help="" multiple="true" /> | |
| 42 <conditional name="annotation"> | |
| 43 <param label="Use Reference Annotation" name="use_ref_annotation" type="select"> | |
| 44 <option value="No">No</option> | |
| 45 <option value="Yes">Yes</option> | |
| 46 </param> | |
| 47 <when value="Yes"> | |
| 48 <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" /> | |
| 49 <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" /> | |
| 50 <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" /> | |
| 51 </when> | |
| 52 <when value="No"> | |
| 53 </when> | |
| 54 </conditional> | |
| 55 <conditional name="seq_data"> | |
| 56 <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select"> | |
| 57 <option value="Yes">Yes</option> | |
| 58 <option value="No">No</option> | |
| 59 </param> | |
| 60 <when value="No" /> | |
| 61 <when value="Yes"> | |
| 62 <conditional name="seq_source"> | |
| 63 <param label="Choose the source for the reference list" name="index_source" type="select"> | |
| 64 <option value="cached">Locally cached</option> | |
| 65 <option value="history">History</option> | |
| 66 </param> | |
| 67 <when value="cached"> | |
| 68 <param argument="-s" label="Using reference genome" name="index" type="select"> | |
| 69 <options from_data_table="fasta_indexes"> | |
| 70 <filter column="1" key="dbkey" ref="inputs" type="data_meta" /> | |
| 71 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> | |
| 72 </options> | |
| 73 </param> | |
| 74 </when> | |
| 75 <when value="history"> | |
| 76 <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" /> | |
| 77 </when> | |
| 78 </conditional> | |
| 79 </when> | |
| 80 </conditional> | |
| 81 <param argument="-M/-N" label="discard (ignore) single-exon transcripts" name="discard_single_exon" type="select"> | |
| 82 <option selected="True" value="">No</option> | |
| 83 <option value="-M">Discard single-exon transfrags and reference transcripts</option> | |
| 84 <option value="-N">Discard single-exon reference transcripts</option> | |
| 85 </param> | |
| 86 <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" /> | |
| 87 <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" /> | |
| 88 <param argument="-F" help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" truevalue="-F" falsevalue="" label="discard intron-redundant transfrags sharing 5'" name="discard_intron_redundant_transfrags" type="boolean" /> | |
| 89 </inputs> | |
| 90 <outputs> | |
| 91 <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" /> | |
| 92 <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" /> | |
| 93 <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tracking file" name="transcripts_tracking" /> | |
| 94 <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined"> | |
| 95 <filter>(use_seq_data == 'No')</filter> | |
| 96 </data> | |
| 97 <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated"> | |
| 98 <filter>(use_seq_data == 'Yes')</filter> | |
| 99 </data> | |
| 100 </outputs> | |
| 101 <tests> | |
| 102 <test> | |
| 103 <param ftype="gtf" name="inputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> | |
| 104 <param name="use_ref_annotation" value="Yes" /> | |
| 105 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" /> | |
| 106 <param name="ignore_nonoverlapping_reference" value="Yes" /> | |
| 107 <param name="ignore_nonoverlapping_transfrags" value="No" /> | |
| 108 <param name="use_seq_data" value="No" /> | |
| 109 <param name="discard_single_exon" value="" /> | |
| 110 <param name="max_dist_exon" value="100" /> | |
| 111 <param name="max_dist_group" value="100" /> | |
| 112 <param name="discard_intron_redundant_transfrags" value="No" /> | |
| 113 <output file="gffcompare_out1.stats" name="transcripts_stats" lines_diff="6" /> | |
| 114 <output file="gffcompare_out1.loci" name="transcripts_loci" /> | |
| 115 <output file="gffcompare_out1.tracking" name="transcripts_tracking" /> | |
| 116 <output file="gffcompare_out1.gtf" name="transcripts_combined" /> | |
| 117 </test> | |
| 118 <test> | |
| 119 <param ftype="gtf" name="inputs" value="gffcompare_in4.gtf" /> | |
| 120 <param name="use_ref_annotation" value="Yes" /> | |
| 121 <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" /> | |
| 122 <param name="ignore_nonoverlapping_reference" value="Yes" /> | |
| 123 <param name="ignore_nonoverlapping_transfrags" value="No" /> | |
| 124 <param name="use_seq_data" value="No" /> | |
| 125 <param name="discard_single_exon" value="" /> | |
| 126 <param name="max_dist_exon" value="100" /> | |
| 127 <param name="max_dist_group" value="100" /> | |
| 128 <param name="discard_intron_redundant_transfrags" value="No" /> | |
| 129 <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" /> | |
| 130 <output file="gffcompare_out2.loci" name="transcripts_loci" lines_diff="2" /> | |
| 131 <output file="gffcompare_out2.tracking" name="transcripts_tracking" /> | |
| 132 <output file="gffcompare_out2.gtf" name="transcripts_annotated" /> | |
| 133 </test> | |
| 134 </tests> | |
| 135 <help> | |
| 136 <![CDATA[ | |
| 137 **GffCompare Overview** | |
| 138 | |
| 139 ## GffCompare | |
| 140 * compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie). | |
| 141 * collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples) | |
| 142 * classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a | |
| 143 annotation file (also in GTF/GFF3 format) | |
| 144 | |
| 145 The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare" | |
| 146 (see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare | |
| 147 are supported by GffCompare, while new features will likely be added to GffCompare in the future. | |
| 148 | |
| 149 A notable difference from GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option), | |
| 150 gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the | |
| 151 same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per | |
| 152 their relationship with the matching/overlapping reference transcript), but the original transcript IDs are preserved, so gffcompare can thus | |
| 153 be used as a simple way of annotating a set of transcripts. | |
| 154 | |
| 155 Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e. | |
| 156 contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into | |
| 157 their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed | |
| 158 and is simply ignored when given). | |
| 159 ]]> | |
| 160 </help> | |
| 161 <citations> | |
| 162 <citation type="doi">10.1038/nbt.1621</citation> | |
| 163 </citations> | |
| 164 </tool> |
