1 <tool id="star_fusion" name="STAR-Fusion" version="1.1.0" profile="17.01">
2 <description>detect fusion genes in RNA-Seq data</description>
3 <requirements>
4 <!-- Bio-conda -->
5 <requirement type="package" version="1.1.0">star-fusion</requirement>
6 </requirements>
8 <stdio>
9 <regex match="Can't locate " source="stderr" level="fatal"/>
10 <regex match="command not found" source="stderr" level="fatal"/>
11 <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/>
12 <regex match="FATAL ERROR" source="stderr" level="fatal"/>
14 <regex match="Warning:" source="stderr" level="warning"/>
15 <regex match="CMD:" source="stderr" level="warning"/>
17 <regex match="-done creating index file:" source="stderr" level="warning"/>
18 <regex match="-parsing GTF file:" source="stderr" level="warning"/>
19 <regex match="-building interval tree" source="stderr" level="warning"/>
20 <regex match="-parsing fusion evidence:" source="stderr" level="warning"/>
21 <regex match="-mapping reads to genes" source="stderr" level="warning"/>
22 <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/>
24 <regex match="Process complete" source="stderr" level="warning"/>
25 </stdio>
27 <version_command>STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"</version_command>
29 <command><![CDATA[
30 #if $reference.reference_type_selector == 'cached':
31 ln -s '$reference.ctat_resource_lib' tmp_star_fusion_genome_dir
32 #else
33 ## 1. ensure the blastn file is provided as *.gz
34 if file --mime-type '${reference.blast_pairs}' | grep -q /gzip\$; then
35 gzip_suffix='' ;
36 else
37 ## Older versions of gzip do not support the -k option to keep
38 ## the original file - this should be an universion solution
40 gzip -1 -c -- '${reference.blast_pairs}' > '${reference.blast_pairs}.gz' &&
41 gzip_suffix='.gz' ;
42 fi &&
44 ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory
45 ## - @todo once write a decent STAR and STAR Fusion data manager
46 prep_genome_lib.pl
47 --genome_fa '${reference.fasta_type.ownFile}'
48 --gtf '${reference.geneModel}'
49 --blast_pairs "${reference.blast_pairs}\$gzip_suffix"
50 --CPU \${GALAXY_SLOTS:-1}
51 --output_dir "\$(pwd)/tmp_star_fusion_genome_dir"
52 #end if
53 &&
55 ## Link in fastq files so they have appropriate extensions
56 #if str($input_params.input_source) != "use_chimeric":
57 #if $input_params.left_fq.is_of_type("fastq.gz"):
58 #set read1 = 'input_1.fastq.gz'
59 #else:
60 #set read1 = 'input_1.fastq'
61 #end if
62 ln -f -s '${input_params.left_fq}' ${read1} &&
64 #if $input_params.right_fq:
65 #if $input_params.right_fq.is_of_type("fastq.gz"):
66 #set read2 = 'input_2.fastq.gz'
67 #else:
68 #set read2 = 'input_2.fastq'
69 #end if
70 ln -f -s '${input_params.right_fq}' ${read2} &&
71 #end if
72 #end if
74 ## 3. Run STAR-Fusion
75 STAR-Fusion
76 #if str($input_params.input_source) == "use_chimeric":
77 --chimeric_junction '${input_params.chimeric_junction}'
78 #else:
79 --left_fq ${read1}
80 #if $input_params.right_fq:
81 --right_fq ${read2}
82 #end if
83 #end if
85 --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir"
86 --output_dir "\$(pwd)"
88 #echo str($input_params.optional_outputs).replace(',',' ')
90 #if str($params.settingsType) == "full":
91 --min_junction_reads $params.min_junction_reads
92 --min_sum_frags $params.min_sum_frags
93 --max_promiscuity $params.max_promiscuity
94 --min_novel_junction_support $params.min_novel_junction_support
95 --min_alt_pct_junction $params.min_alt_pct_junction
96 --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist
97 --E $params.E
98 #end if
99 --CPU \${GALAXY_SLOTS:-1}
100 ]]></command>
102 <inputs>
103 <conditional name="reference">
104 <param name="reference_type_selector" type="select" label="Star Fusion Genome Reference Source">
105 <option value="cached">Locally Cached Star Fusion Genome Reference</option>
106 <option value="build">Build a Star Fusion Genome Reference</option>
107 </param>
108 <when value="cached">
109 <param name="ctat_resource_lib" type="select" label="Star Fusion Genome to search">
110 <options from_data_table="ctat_resource">
111 <column name="dbkey" index="1"/>
112 <column name="name" index="2"/>
113 <column name="value" index="3"/>
114 </options>
115 </param>
116 </when>
117 <when value="build">
118 <!-- Genome source. -->
119 <conditional name="fasta_type">
120 <param name="fasta_type_selector" type="select" label="Source for sequence to search">
121 <option value="cached">Locally Cached sequences</option>
122 <option value="history" selected="true">Sequences from your history</option>
123 </param>
124 <when value="cached">
125 <param name="ownFile"
126 type="select" label="Genome to search">
127 <options from_data_table="all_fasta">
128 <column name="dbkey" index="1"/>
129 <column name="name" index="2"/>
130 <column name="value" index="3"/>
131 </options>
132 </param>
133 </when>
134 <when value="history">
135 <param name="ownFile"
136 type="data"
137 format="fasta"
138 label="Select the reference genome (FASTA file)"/>
139 </when>
140 </conditional>
141 <param name="geneModel"
142 type="data"
143 format="gff3,gtf"
144 label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/>
145 <param name="blast_pairs"
146 type="data"
147 format="tabular"
148 label="Result of BLAST+-blastn of the reference fasta sequence with itself"
149 help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/>
150 </when>
151 </conditional>
154 <conditional name="input_params">
155 <param name="input_source"
156 type="select"
157 label="Use output from earlier STAR run or let STAR Fusion control running STAR">
158 <option value="use_chimeric">Use output from earlier STAR</option>
159 <option value="use_fastq">Let STAR Fusion control running STAR</option>
160 </param>
161 <when value="use_chimeric">
162 <param name="chimeric_junction"
163 type="data"
164 format="interval"
165 argument="--chimeric_junction"
166 label="Chimeric junction file from STAR (with STAR-Fusion settings)"/>
167 <param name="optional_outputs" type="select" display="checkboxes" multiple="true" label="Additional Outputs">
168 <option value="--annotate">--annotate</option>
169 <option value="--examine_coding_effect">--examine_coding_effect</option>
170 </param>
171 </when>
172 <when value="use_fastq">
173 <param name="left_fq"
174 type="data"
175 format="fastqsanger,fastqsanger.gz"
176 argument="--left_fq"
177 label="left.fq file"/>
178 <param name="right_fq"
179 type="data"
180 format="fastqsanger,fastqsanger.gz"
181 optional="true"
182 argument="--right_fq"
183 label="right.fq file (actually optional, but highly recommended)"/>
184 <param name="optional_outputs" type="select" display="checkboxes" multiple="true" label="Additional Outputs">
185 <option value="--annotate">--annotate</option>
186 <option value="--examine_coding_effect">--examine_coding_effect</option>
187 <option value="--extract_fusion_reads">--extract_fusion_reads</option>
188 </param>
189 </when>
190 </conditional>
192 <conditional name="params">
193 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter.">
194 <option value="default" selected="true">Use Defaults</option>
195 <option value="full">Full parameter list</option>
196 </param>
197 <when value="default" />
198 <when value="full"><!-- Full/advanced params. -->
199 <param name="min_junction_reads"
200 type="integer" value="1"
201 label="minimum number of junction-spanning reads required."
202 argument="--min_junction_reads"/>
203 <param name="min_sum_frags"
204 type="integer"
205 value="2"
206 label="minimum fusion support = (#junction_reads + #spanning_frags)"
207 argument="--min_sum_frags"/>
208 <param name="max_promiscuity"
209 type="integer"
210 value="3"
211 label="maximum number of partners allowed for a given fusion"
212 argument="--max_promiscuity"/>
213 <param name="min_novel_junction_support"
214 type="integer"
215 value="3"
216 label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions"
217 argument="--min_novel_junction_support"/>
218 <param name="min_alt_pct_junction"
219 type="float"
220 value="10"
221 label="10% of the dominant isoform junction support"
222 argument="--min_alt_pct_junction"/>
223 <param name="aggregate_novel_junction_dist"
224 type="integer"
225 value="5"
226 label="non-ref junctions within 5 are merged into single calls"
227 argument="--aggregate_novel_junction_dist"/>
228 <param name="E"
229 type="float"
230 value="0.001"
231 label="E-value threshold for blast searches"
232 argument="-E"/>
233 </when>
234 </conditional>
235 </inputs>
237 <outputs>
238 <data format="bam" name="out_bam" label="${tool.name} on ${on_string}: Aligned.sortedByCoord.out.bam" from_work_dir="Aligned.sortedByCoord.out.bam"/>
239 <data format="fastqsanger" name="output_reads_1" label="${tool.name} on ${on_string}: fusion_evidence_reads_1" from_work_dir="star-fusion.fusion_evidence_reads_1.fq">
240 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
241 </data>
242 <data format="fastqsanger" name="output_reads_2" label="${tool.name} on ${on_string}: fusion_evidence_reads_2" from_work_dir="star-fusion.fusion_evidence_reads_2.fq">
243 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
244 </data>
245 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.tsv" from_work_dir="star-fusion.fusion_predictions.tsv"/>
246 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.tsv"/>
247 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.tsv">
248 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
249 </data>
250 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.coding_effect.tsv">
251 <filter>str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0</filter>
252 </data>
253 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv">
254 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
255 </data>
256 </outputs>
258 <tests>
259 <test>
260 <param name="input_source" value="use_chimeric" />
261 <param name="chimeric_junction" ftype="interval" value="test1.tabular" />
262 <param name="fasta_type_selector" value="history" />
263 <param name="ownFile" ftype="fasta" value="test1.fa" />
264 <param name="geneModel" ftype="gtf" value="test1.gtf" />
265 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
266 <param name="settingsType" value="default" />
268 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
269 <output name="output_final">
270 <assert_contents>
271 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
272 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
273 </assert_contents>
274 </output>
275 </test>
276 <test>
277 <param name="input_source" value="use_fastq" />
278 <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/>
279 <param name="fasta_type_selector" value="history" />
280 <param name="ownFile" ftype="fasta" value="test1.fa" />
281 <param name="geneModel" ftype="gtf" value="test1.gtf" />
282 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
283 <param name="settingsType" value="default" />
285 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
286 <output name="output_final">
287 <assert_contents>
288 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
289 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
290 </assert_contents>
291 </output>
292 </test>
293 <test>
294 <param name="input_source" value="use_fastq" />
295 <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/>
296 <param name="fasta_type_selector" value="history" />
297 <param name="ownFile" ftype="fasta" value="test1.fa" />
298 <param name="geneModel" ftype="gtf" value="test1.gtf" />
299 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
300 <param name="settingsType" value="default" />
302 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
303 <output name="output_final">
304 <assert_contents>
305 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
306 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
307 </assert_contents>
308 </output>
309 </test>
310 </tests>
311 <help>
312 **What it does**
314 STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set.
316 **Input: files required to run STAR-Fusion**
317 - A genome reference sequence (FASTA-format)
318 - A corresponding protein-coding gene annotation set (GTF/GFF Format)
319 - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
320 - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well.
322 The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts.
323 More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki
325 </help>
327 <citations>
328 <citation type="bibtex">
329 @unpublished{star_fusion,
330 author = {Brian Haas and Nicolas Stransky and Daniel Nicorici},
331 title = {STAR-Fusion},
332 url = {https://github.com/STAR-Fusion/STAR-Fusion}
333 }
334 </citation>
335 </citations>
336 </tool>