comparison star_fusion.xml @ 0:5ff7593a7220 draft

Uploaded
author jjohnson
date Wed, 04 Oct 2017 15:23:36 -0400
parents
children 5748e43a73e0
comparison
equal deleted inserted replaced
-1:000000000000 0:5ff7593a7220
1 <tool id="star_fusion" name="STAR-Fusion" version="1.1.0" profile="17.01">
2 <description>detect fusion genes in RNA-Seq data</description>
3 <requirements>
4 <!-- Bio-conda -->
5 <requirement type="package" version="1.1.0">star-fusion</requirement>
6 </requirements>
7
8 <stdio>
9 <regex match="command not found" source="stderr" level="fatal"/>
10 <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/>
11 <regex match="FATAL ERROR" source="stderr" level="fatal"/>
12
13 <regex match="Warning:" source="stderr" level="warning"/>
14 <regex match="CMD:" source="stderr" level="warning"/>
15
16 <regex match="-done creating index file:" source="stderr" level="warning"/>
17 <regex match="-parsing GTF file:" source="stderr" level="warning"/>
18 <regex match="-building interval tree" source="stderr" level="warning"/>
19 <regex match="-parsing fusion evidence:" source="stderr" level="warning"/>
20 <regex match="-mapping reads to genes" source="stderr" level="warning"/>
21 <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/>
22
23 <regex match="Process complete" source="stderr" level="warning"/>
24 </stdio>
25
26 <version_command>STAR-Fusion --version 2>&amp;1 | grep version | grep -o -E "software version.*?"</version_command>
27
28 <command><![CDATA[
29 #if reference.reference_type_selector == 'cached':
30 ln -s '$reference.ctat_resource_lib' tmp_star_fusion_genome_dir
31 #else
32 ## 1. ensure the blastn file is provided as *.gz
33 if file --mime-type '${reference.blast_pairs}' | grep -q /gzip\$; then
34 gzip_suffix='' ;
35 else
36 ## Older versions of gzip do not support the -k option to keep
37 ## the original file - this should be an universion solution
38
39 gzip -1 -c -- '${reference.blast_pairs}' > '${reference.blast_pairs}.gz' &&
40 gzip_suffix='.gz' ;
41 fi &&
42
43 ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory
44 ## - @todo once write a decent STAR and STAR Fusion data manager
45 prep_genome_lib.pl
46 --genome_fa '${reference.fasta_type.ownFile}'
47 --gtf '${reference.geneModel}'
48 --blast_pairs "${reference.blast_pairs}\$gzip_suffix"
49 --CPU \${GALAXY_SLOTS:-1}
50 --output_dir "\$(pwd)/tmp_star_fusion_genome_dir"
51 #end if
52 &&
53
54 ## Link in fastq files so they have appropriate extensions
55 #if str($input_params.input_source) != "use_chimeric":
56 #if $input_params.left_fq.is_of_type("fastq.gz"):
57 #set read1 = 'input_1.fastq.gz'
58 #else:
59 #set read1 = 'input_1.fastq'
60 #end if
61 ln -f -s '${input_params.left_fq}' ${read1} &&
62
63 #if $input_params.right_fq:
64 #if $input_params.right_fq.is_of_type("fastq.gz"):
65 #set read2 = 'input_2.fastq.gz'
66 #else:
67 #set read2 = 'input_2.fastq'
68 #end if
69 ln -f -s '${input_params.right_fq}' ${read2} &&
70 #end if
71 #end if
72
73 ## 3. Run STAR-Fusion
74 STAR-Fusion
75 #if str($input_params.input_source) == "use_chimeric":
76 --chimeric_junction '${input_params.chimeric_junction}'
77 #else:
78 --left_fq ${read1}
79 #if $input_params.right_fq:
80 --right_fq ${read2}
81 #end if
82 #end if
83
84 --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir"
85
86 str($input_params.optional_outputs).replace(',',' ')
87
88 #if str($params.settingsType) == "full":
89 --min_junction_reads $params.min_junction_reads
90 --min_sum_frags $params.min_sum_frags
91 --max_promiscuity $params.max_promiscuity
92 --min_novel_junction_support $params.min_novel_junction_support
93 --min_alt_pct_junction $params.min_alt_pct_junction
94 --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist
95 --E $params.E
96 #end if
97 --CPU \${GALAXY_SLOTS:-1}
98 ]]></command>
99
100 <inputs>
101 <conditional name="reference">
102 <param name="reference_type_selector" type="select" label="Star Fusion Genome Reference Source">
103 <option value="cached">Locally Cached sequences</option>
104 <option value="build" selected="true">Sequences from your history</option>
105 </param>
106 <when value="cached">
107 <param name="ctat_resource_lib" type="select" label="Genome to search">
108 <options from_data_table="ctat_resource">
109 <column name="dbkey" index="1"/>
110 <column name="name" index="2"/>
111 <column name="value" index="3"/>
112 </options>
113 </param>
114 </when>
115 <when value="build">
116 <!-- Genome source. -->
117 <conditional name="fasta_type">
118 <param name="fasta_type_selector" type="select" label="Source for sequence to search">
119 <option value="cached">Locally Cached sequences</option>
120 <option value="history" selected="true">Sequences from your history</option>
121 </param>
122 <when value="cached">
123 <param name="ownFile"
124 type="select" label="Genome to search">
125 <options from_data_table="all_fasta">
126 <column name="dbkey" index="1"/>
127 <column name="name" index="2"/>
128 <column name="value" index="3"/>
129 </options>
130 </param>
131 </when>
132 <when value="history">
133 <param name="ownFile"
134 type="data"
135 format="fasta"
136 label="Select the reference genome (FASTA file)"/>
137 </when>
138 </conditional>
139 <param name="geneModel"
140 type="data"
141 format="gff3,gtf"
142 label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/>
143 <param name="blast_pairs"
144 type="data"
145 format="tabular"
146 label="Result of BLAST+-blastn of the reference fasta sequence with itself"
147 help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/>
148 </when>
149 </conditional>
150
151
152 <conditional name="input_params">
153 <param name="input_source"
154 type="select"
155 label="Use output from earlier STAR run or let STAR Fusion control running STAR">
156 <option value="use_chimeric">Use output from earlier STAR</option>
157 <option value="use_fastq">Let STAR Fusion control running STAR</option>
158 </param>
159 <when value="use_chimeric">
160 <param name="chimeric_junction"
161 type="data"
162 format="interval"
163 argument="--chimeric_junction"
164 label="Chimeric junction file from STAR (with STAR-Fusion settings)"/>
165 <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs">
166 <option value="--annotate">--annotate</option>
167 <option value="--examine_coding_effect">--examine_coding_effect</option>
168 </param>
169 </when>
170 <when value="use_fastq">
171 <param name="left_fq"
172 type="data"
173 format="fastqsanger,fastqsanger.gz"
174 argument="--left_fq"
175 label="left.fq file"/>
176 <param name="right_fq"
177 type="data"
178 format="fastqsanger,fastqsanger.gz"
179 optional="true"
180 argument="--right_fq"
181 label="right.fq file (actually optional, but highly recommended)"/>
182 <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs">
183 <option value="--annotate">--annotate</option>
184 <option value="--examine_coding_effect">--examine_coding_effect</option>
185 <option value="--extract_fusion_reads">--extract_fusion_reads</option>
186 </param>
187 </when>
188 </conditional>
189
190 <conditional name="params">
191 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter.">
192 <option value="default" selected="true">Use Defaults</option>
193 <option value="full">Full parameter list</option>
194 </param>
195 <when value="default" />
196 <when value="full"><!-- Full/advanced params. -->
197 <param name="min_junction_reads"
198 type="integer" value="1"
199 label="minimum number of junction-spanning reads required."
200 argument="--min_junction_reads"/>
201 <param name="min_sum_frags"
202 type="integer"
203 value="2"
204 label="minimum fusion support = (#junction_reads + #spanning_frags)"
205 argument="--min_sum_frags"/>
206 <param name="max_promiscuity"
207 type="integer"
208 value="3"
209 label="maximum number of partners allowed for a given fusion"
210 argument="--max_promiscuity"/>
211 <param name="min_novel_junction_support"
212 type="integer"
213 value="3"
214 label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions"
215 argument="--min_novel_junction_support"/>
216 <param name="min_alt_pct_junction"
217 type="float"
218 value="10"
219 label="10% of the dominant isoform junction support"
220 argument="--min_alt_pct_junction"/>
221 <param name="aggregate_novel_junction_dist"
222 type="integer"
223 value="5"
224 label="non-ref junctions within 5 are merged into single calls"
225 argument="--aggregate_novel_junction_dist"/>
226 <param name="E"
227 type="float"
228 value="0.001"
229 label="E-value threshold for blast searches"
230 argument="-E"/>
231 </when>
232 </conditional>
233 </inputs>
234
235 <outputs>
236 <data format="bam" name="out_bam" label="${tool.name} on ${on_string}: Aligned.sortedByCoord.out.bam" from_work_dir="star-fusion.fusion_candidates.final"/>
237 <data format="fastqsanger" name="output_reads_1" label="${tool.name} on ${on_string}: fusion_evidence_reads_1" from_work_dir="star-fusion.fusion_evidence_reads_1.fq">
238 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
239 </data>
240 <data format="fastqsanger" name="output_reads_2" label="${tool.name} on ${on_string}: fusion_evidence_reads_2" from_work_dir="star-fusion.fusion_evidence_reads_2.fq">
241 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
242 </data>
243 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.tsv" from_work_dir="star-fusion.fusion_predictions.tsv"/>
244 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.tsv"/>
245 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.tsv">
246 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
247 </data>
248 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.coding_effect.tsv">
249 <filter>str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0</filter>
250 </data>
251 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv">
252 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
253 </data>
254 </outputs>
255
256 <tests>
257 <test>
258 <param name="input_source" value="use_chimeric" />
259 <param name="chimeric_junction" ftype="interval" value="test1.tabular" />
260 <param name="fasta_type_selector" value="history" />
261 <param name="ownFile" ftype="fasta" value="test1.fa" />
262 <param name="geneModel" ftype="gtf" value="test1.gtf" />
263 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
264 <param name="settingsType" value="default" />
265
266 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
267 <output name="output_final">
268 <assert_contents>
269 <has_line line="#fusion_name&#009;JunctionReads&#009;SpanningFrags&#009;Splice_type&#009;LeftGene&#009;LeftBreakpoint&#009;RightGene&#009;RightBreakpoint&#009;JunctionReads&#009;SpanningFrags" />
270 <has_text text="GENE1--GENE2&#009;24&#009;0&#009;INCL_NON_REF_SPLICE&#009;GENE1^GENE1&#009;chr1:240:+&#009;GENE2^GENE2&#009;chr2:241:+" />
271 </assert_contents>
272 </output>
273 </test>
274 <test>
275 <param name="input_source" value="use_fastq" />
276 <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/>
277 <param name="fasta_type_selector" value="history" />
278 <param name="ownFile" ftype="fasta" value="test1.fa" />
279 <param name="geneModel" ftype="gtf" value="test1.gtf" />
280 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
281 <param name="settingsType" value="default" />
282
283 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
284 <output name="output_final">
285 <assert_contents>
286 <has_line line="#fusion_name&#009;JunctionReads&#009;SpanningFrags&#009;Splice_type&#009;LeftGene&#009;LeftBreakpoint&#009;RightGene&#009;RightBreakpoint&#009;JunctionReads&#009;SpanningFrags" />
287 <has_text text="GENE1--GENE2&#009;24&#009;0&#009;INCL_NON_REF_SPLICE&#009;GENE1^GENE1&#009;chr1:240:+&#009;GENE2^GENE2&#009;chr2:241:+" />
288 </assert_contents>
289 </output>
290 </test>
291 <test>
292 <param name="input_source" value="use_fastq" />
293 <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/>
294 <param name="fasta_type_selector" value="history" />
295 <param name="ownFile" ftype="fasta" value="test1.fa" />
296 <param name="geneModel" ftype="gtf" value="test1.gtf" />
297 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
298 <param name="settingsType" value="default" />
299
300 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
301 <output name="output_final">
302 <assert_contents>
303 <has_line line="#fusion_name&#009;JunctionReads&#009;SpanningFrags&#009;Splice_type&#009;LeftGene&#009;LeftBreakpoint&#009;RightGene&#009;RightBreakpoint&#009;JunctionReads&#009;SpanningFrags" />
304 <has_text text="GENE1--GENE2&#009;24&#009;0&#009;INCL_NON_REF_SPLICE&#009;GENE1^GENE1&#009;chr1:240:+&#009;GENE2^GENE2&#009;chr2:241:+" />
305 </assert_contents>
306 </output>
307 </test>
308 </tests>
309 <help>
310 **What it does**
311
312 STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set.
313
314 **Input: files required to run STAR-Fusion**
315 - A genome reference sequence (FASTA-format)
316 - A corresponding protein-coding gene annotation set (GTF/GFF Format)
317 - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
318 - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well.
319
320 The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts.
321 More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki
322
323 </help>
324
325 <citations>
326 <citation type="bibtex">
327 @unpublished{star_fusion,
328 author = {Brian Haas and Nicolas Stransky and Daniel Nicorici},
329 title = {STAR-Fusion},
330 url = {https://github.com/STAR-Fusion/STAR-Fusion}
331 }
332 </citation>
333 </citations>
334 </tool>