0
|
1 <tool id="star_fusion" name="STAR-Fusion" version="1.1.0" profile="17.01">
|
|
2 <description>detect fusion genes in RNA-Seq data</description>
|
|
3 <requirements>
|
|
4 <!-- Bio-conda -->
|
|
5 <requirement type="package" version="1.1.0">star-fusion</requirement>
|
|
6 </requirements>
|
|
7
|
|
8 <stdio>
|
1
|
9 <regex match="Can't locate " source="stderr" level="fatal"/>
|
0
|
10 <regex match="command not found" source="stderr" level="fatal"/>
|
|
11 <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/>
|
|
12 <regex match="FATAL ERROR" source="stderr" level="fatal"/>
|
|
13
|
|
14 <regex match="Warning:" source="stderr" level="warning"/>
|
|
15 <regex match="CMD:" source="stderr" level="warning"/>
|
|
16
|
|
17 <regex match="-done creating index file:" source="stderr" level="warning"/>
|
|
18 <regex match="-parsing GTF file:" source="stderr" level="warning"/>
|
|
19 <regex match="-building interval tree" source="stderr" level="warning"/>
|
|
20 <regex match="-parsing fusion evidence:" source="stderr" level="warning"/>
|
|
21 <regex match="-mapping reads to genes" source="stderr" level="warning"/>
|
|
22 <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/>
|
|
23
|
|
24 <regex match="Process complete" source="stderr" level="warning"/>
|
|
25 </stdio>
|
|
26
|
|
27 <version_command>STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"</version_command>
|
|
28
|
|
29 <command><![CDATA[
|
1
|
30 #if $reference.reference_type_selector == 'cached':
|
0
|
31 ln -s '$reference.ctat_resource_lib' tmp_star_fusion_genome_dir
|
|
32 #else
|
|
33 ## 1. ensure the blastn file is provided as *.gz
|
|
34 if file --mime-type '${reference.blast_pairs}' | grep -q /gzip\$; then
|
|
35 gzip_suffix='' ;
|
|
36 else
|
|
37 ## Older versions of gzip do not support the -k option to keep
|
|
38 ## the original file - this should be an universion solution
|
|
39
|
|
40 gzip -1 -c -- '${reference.blast_pairs}' > '${reference.blast_pairs}.gz' &&
|
|
41 gzip_suffix='.gz' ;
|
|
42 fi &&
|
|
43
|
|
44 ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory
|
|
45 ## - @todo once write a decent STAR and STAR Fusion data manager
|
|
46 prep_genome_lib.pl
|
|
47 --genome_fa '${reference.fasta_type.ownFile}'
|
|
48 --gtf '${reference.geneModel}'
|
|
49 --blast_pairs "${reference.blast_pairs}\$gzip_suffix"
|
|
50 --CPU \${GALAXY_SLOTS:-1}
|
|
51 --output_dir "\$(pwd)/tmp_star_fusion_genome_dir"
|
|
52 #end if
|
|
53 &&
|
|
54
|
|
55 ## Link in fastq files so they have appropriate extensions
|
|
56 #if str($input_params.input_source) != "use_chimeric":
|
|
57 #if $input_params.left_fq.is_of_type("fastq.gz"):
|
|
58 #set read1 = 'input_1.fastq.gz'
|
|
59 #else:
|
|
60 #set read1 = 'input_1.fastq'
|
|
61 #end if
|
|
62 ln -f -s '${input_params.left_fq}' ${read1} &&
|
|
63
|
|
64 #if $input_params.right_fq:
|
|
65 #if $input_params.right_fq.is_of_type("fastq.gz"):
|
|
66 #set read2 = 'input_2.fastq.gz'
|
|
67 #else:
|
|
68 #set read2 = 'input_2.fastq'
|
|
69 #end if
|
|
70 ln -f -s '${input_params.right_fq}' ${read2} &&
|
|
71 #end if
|
|
72 #end if
|
|
73
|
|
74 ## 3. Run STAR-Fusion
|
|
75 STAR-Fusion
|
|
76 #if str($input_params.input_source) == "use_chimeric":
|
|
77 --chimeric_junction '${input_params.chimeric_junction}'
|
|
78 #else:
|
|
79 --left_fq ${read1}
|
|
80 #if $input_params.right_fq:
|
|
81 --right_fq ${read2}
|
|
82 #end if
|
|
83 #end if
|
|
84
|
|
85 --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir"
|
1
|
86 --output_dir "\$(pwd)"
|
0
|
87
|
1
|
88 #echo str($input_params.optional_outputs).replace(',',' ')
|
0
|
89
|
|
90 #if str($params.settingsType) == "full":
|
|
91 --min_junction_reads $params.min_junction_reads
|
|
92 --min_sum_frags $params.min_sum_frags
|
|
93 --max_promiscuity $params.max_promiscuity
|
|
94 --min_novel_junction_support $params.min_novel_junction_support
|
|
95 --min_alt_pct_junction $params.min_alt_pct_junction
|
|
96 --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist
|
|
97 --E $params.E
|
|
98 #end if
|
|
99 --CPU \${GALAXY_SLOTS:-1}
|
|
100 ]]></command>
|
|
101
|
|
102 <inputs>
|
|
103 <conditional name="reference">
|
|
104 <param name="reference_type_selector" type="select" label="Star Fusion Genome Reference Source">
|
1
|
105 <option value="cached">Locally Cached Star Fusion Genome Reference</option>
|
|
106 <option value="build">Build a Star Fusion Genome Reference</option>
|
0
|
107 </param>
|
|
108 <when value="cached">
|
1
|
109 <param name="ctat_resource_lib" type="select" label="Star Fusion Genome to search">
|
0
|
110 <options from_data_table="ctat_resource">
|
|
111 <column name="dbkey" index="1"/>
|
|
112 <column name="name" index="2"/>
|
|
113 <column name="value" index="3"/>
|
|
114 </options>
|
|
115 </param>
|
|
116 </when>
|
|
117 <when value="build">
|
|
118 <!-- Genome source. -->
|
|
119 <conditional name="fasta_type">
|
|
120 <param name="fasta_type_selector" type="select" label="Source for sequence to search">
|
|
121 <option value="cached">Locally Cached sequences</option>
|
|
122 <option value="history" selected="true">Sequences from your history</option>
|
|
123 </param>
|
|
124 <when value="cached">
|
|
125 <param name="ownFile"
|
|
126 type="select" label="Genome to search">
|
|
127 <options from_data_table="all_fasta">
|
|
128 <column name="dbkey" index="1"/>
|
|
129 <column name="name" index="2"/>
|
|
130 <column name="value" index="3"/>
|
|
131 </options>
|
|
132 </param>
|
|
133 </when>
|
|
134 <when value="history">
|
|
135 <param name="ownFile"
|
|
136 type="data"
|
|
137 format="fasta"
|
|
138 label="Select the reference genome (FASTA file)"/>
|
|
139 </when>
|
|
140 </conditional>
|
|
141 <param name="geneModel"
|
|
142 type="data"
|
|
143 format="gff3,gtf"
|
|
144 label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/>
|
|
145 <param name="blast_pairs"
|
|
146 type="data"
|
|
147 format="tabular"
|
|
148 label="Result of BLAST+-blastn of the reference fasta sequence with itself"
|
|
149 help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/>
|
|
150 </when>
|
|
151 </conditional>
|
|
152
|
|
153
|
|
154 <conditional name="input_params">
|
|
155 <param name="input_source"
|
|
156 type="select"
|
|
157 label="Use output from earlier STAR run or let STAR Fusion control running STAR">
|
|
158 <option value="use_chimeric">Use output from earlier STAR</option>
|
|
159 <option value="use_fastq">Let STAR Fusion control running STAR</option>
|
|
160 </param>
|
|
161 <when value="use_chimeric">
|
|
162 <param name="chimeric_junction"
|
|
163 type="data"
|
|
164 format="interval"
|
|
165 argument="--chimeric_junction"
|
|
166 label="Chimeric junction file from STAR (with STAR-Fusion settings)"/>
|
1
|
167 <param name="optional_outputs" type="select" display="checkboxes" multiple="true" label="Additional Outputs">
|
0
|
168 <option value="--annotate">--annotate</option>
|
|
169 <option value="--examine_coding_effect">--examine_coding_effect</option>
|
|
170 </param>
|
|
171 </when>
|
|
172 <when value="use_fastq">
|
|
173 <param name="left_fq"
|
|
174 type="data"
|
|
175 format="fastqsanger,fastqsanger.gz"
|
|
176 argument="--left_fq"
|
|
177 label="left.fq file"/>
|
|
178 <param name="right_fq"
|
|
179 type="data"
|
|
180 format="fastqsanger,fastqsanger.gz"
|
|
181 optional="true"
|
|
182 argument="--right_fq"
|
|
183 label="right.fq file (actually optional, but highly recommended)"/>
|
1
|
184 <param name="optional_outputs" type="select" display="checkboxes" multiple="true" label="Additional Outputs">
|
0
|
185 <option value="--annotate">--annotate</option>
|
|
186 <option value="--examine_coding_effect">--examine_coding_effect</option>
|
|
187 <option value="--extract_fusion_reads">--extract_fusion_reads</option>
|
|
188 </param>
|
|
189 </when>
|
|
190 </conditional>
|
|
191
|
|
192 <conditional name="params">
|
|
193 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter.">
|
|
194 <option value="default" selected="true">Use Defaults</option>
|
|
195 <option value="full">Full parameter list</option>
|
|
196 </param>
|
|
197 <when value="default" />
|
|
198 <when value="full"><!-- Full/advanced params. -->
|
|
199 <param name="min_junction_reads"
|
|
200 type="integer" value="1"
|
|
201 label="minimum number of junction-spanning reads required."
|
|
202 argument="--min_junction_reads"/>
|
|
203 <param name="min_sum_frags"
|
|
204 type="integer"
|
|
205 value="2"
|
|
206 label="minimum fusion support = (#junction_reads + #spanning_frags)"
|
|
207 argument="--min_sum_frags"/>
|
|
208 <param name="max_promiscuity"
|
|
209 type="integer"
|
|
210 value="3"
|
|
211 label="maximum number of partners allowed for a given fusion"
|
|
212 argument="--max_promiscuity"/>
|
|
213 <param name="min_novel_junction_support"
|
|
214 type="integer"
|
|
215 value="3"
|
|
216 label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions"
|
|
217 argument="--min_novel_junction_support"/>
|
|
218 <param name="min_alt_pct_junction"
|
|
219 type="float"
|
|
220 value="10"
|
|
221 label="10% of the dominant isoform junction support"
|
|
222 argument="--min_alt_pct_junction"/>
|
|
223 <param name="aggregate_novel_junction_dist"
|
|
224 type="integer"
|
|
225 value="5"
|
|
226 label="non-ref junctions within 5 are merged into single calls"
|
|
227 argument="--aggregate_novel_junction_dist"/>
|
|
228 <param name="E"
|
|
229 type="float"
|
|
230 value="0.001"
|
|
231 label="E-value threshold for blast searches"
|
|
232 argument="-E"/>
|
|
233 </when>
|
|
234 </conditional>
|
|
235 </inputs>
|
|
236
|
|
237 <outputs>
|
2
|
238 <data format="bam" name="out_bam" label="${tool.name} on ${on_string}: Aligned.sortedByCoord.out.bam" from_work_dir="Aligned.sortedByCoord.out.bam"/>
|
0
|
239 <data format="fastqsanger" name="output_reads_1" label="${tool.name} on ${on_string}: fusion_evidence_reads_1" from_work_dir="star-fusion.fusion_evidence_reads_1.fq">
|
|
240 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
|
|
241 </data>
|
|
242 <data format="fastqsanger" name="output_reads_2" label="${tool.name} on ${on_string}: fusion_evidence_reads_2" from_work_dir="star-fusion.fusion_evidence_reads_2.fq">
|
|
243 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
|
|
244 </data>
|
|
245 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.tsv" from_work_dir="star-fusion.fusion_predictions.tsv"/>
|
|
246 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.tsv"/>
|
|
247 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.tsv">
|
|
248 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
|
|
249 </data>
|
|
250 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.coding_effect.tsv">
|
|
251 <filter>str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0</filter>
|
|
252 </data>
|
|
253 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv">
|
|
254 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
|
|
255 </data>
|
|
256 </outputs>
|
|
257
|
|
258 <tests>
|
|
259 <test>
|
|
260 <param name="input_source" value="use_chimeric" />
|
|
261 <param name="chimeric_junction" ftype="interval" value="test1.tabular" />
|
|
262 <param name="fasta_type_selector" value="history" />
|
|
263 <param name="ownFile" ftype="fasta" value="test1.fa" />
|
|
264 <param name="geneModel" ftype="gtf" value="test1.gtf" />
|
|
265 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
|
|
266 <param name="settingsType" value="default" />
|
|
267
|
|
268 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
|
|
269 <output name="output_final">
|
|
270 <assert_contents>
|
|
271 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
|
|
272 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
|
|
273 </assert_contents>
|
|
274 </output>
|
|
275 </test>
|
|
276 <test>
|
|
277 <param name="input_source" value="use_fastq" />
|
|
278 <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/>
|
|
279 <param name="fasta_type_selector" value="history" />
|
|
280 <param name="ownFile" ftype="fasta" value="test1.fa" />
|
|
281 <param name="geneModel" ftype="gtf" value="test1.gtf" />
|
|
282 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
|
|
283 <param name="settingsType" value="default" />
|
|
284
|
|
285 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
|
|
286 <output name="output_final">
|
|
287 <assert_contents>
|
|
288 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
|
|
289 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
|
|
290 </assert_contents>
|
|
291 </output>
|
|
292 </test>
|
|
293 <test>
|
|
294 <param name="input_source" value="use_fastq" />
|
|
295 <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/>
|
|
296 <param name="fasta_type_selector" value="history" />
|
|
297 <param name="ownFile" ftype="fasta" value="test1.fa" />
|
|
298 <param name="geneModel" ftype="gtf" value="test1.gtf" />
|
|
299 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
|
|
300 <param name="settingsType" value="default" />
|
|
301
|
|
302 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
|
|
303 <output name="output_final">
|
|
304 <assert_contents>
|
|
305 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
|
|
306 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
|
|
307 </assert_contents>
|
|
308 </output>
|
|
309 </test>
|
|
310 </tests>
|
|
311 <help>
|
|
312 **What it does**
|
|
313
|
|
314 STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set.
|
|
315
|
|
316 **Input: files required to run STAR-Fusion**
|
|
317 - A genome reference sequence (FASTA-format)
|
|
318 - A corresponding protein-coding gene annotation set (GTF/GFF Format)
|
|
319 - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
|
|
320 - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well.
|
|
321
|
|
322 The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts.
|
|
323 More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki
|
|
324
|
|
325 </help>
|
|
326
|
|
327 <citations>
|
|
328 <citation type="bibtex">
|
|
329 @unpublished{star_fusion,
|
|
330 author = {Brian Haas and Nicolas Stransky and Daniel Nicorici},
|
|
331 title = {STAR-Fusion},
|
|
332 url = {https://github.com/STAR-Fusion/STAR-Fusion}
|
|
333 }
|
|
334 </citation>
|
|
335 </citations>
|
|
336 </tool>
|