0
|
1 <tool id="star_fusion" name="STAR-Fusion" version="1.1.0" profile="17.01">
|
|
2 <description>detect fusion genes in RNA-Seq data</description>
|
|
3 <requirements>
|
|
4 <!-- Bio-conda -->
|
|
5 <requirement type="package" version="1.1.0">star-fusion</requirement>
|
|
6 </requirements>
|
|
7
|
|
8 <stdio>
|
|
9 <regex match="command not found" source="stderr" level="fatal"/>
|
|
10 <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/>
|
|
11 <regex match="FATAL ERROR" source="stderr" level="fatal"/>
|
|
12
|
|
13 <regex match="Warning:" source="stderr" level="warning"/>
|
|
14 <regex match="CMD:" source="stderr" level="warning"/>
|
|
15
|
|
16 <regex match="-done creating index file:" source="stderr" level="warning"/>
|
|
17 <regex match="-parsing GTF file:" source="stderr" level="warning"/>
|
|
18 <regex match="-building interval tree" source="stderr" level="warning"/>
|
|
19 <regex match="-parsing fusion evidence:" source="stderr" level="warning"/>
|
|
20 <regex match="-mapping reads to genes" source="stderr" level="warning"/>
|
|
21 <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/>
|
|
22
|
|
23 <regex match="Process complete" source="stderr" level="warning"/>
|
|
24 </stdio>
|
|
25
|
|
26 <version_command>STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"</version_command>
|
|
27
|
|
28 <command><![CDATA[
|
|
29 #if reference.reference_type_selector == 'cached':
|
|
30 ln -s '$reference.ctat_resource_lib' tmp_star_fusion_genome_dir
|
|
31 #else
|
|
32 ## 1. ensure the blastn file is provided as *.gz
|
|
33 if file --mime-type '${reference.blast_pairs}' | grep -q /gzip\$; then
|
|
34 gzip_suffix='' ;
|
|
35 else
|
|
36 ## Older versions of gzip do not support the -k option to keep
|
|
37 ## the original file - this should be an universion solution
|
|
38
|
|
39 gzip -1 -c -- '${reference.blast_pairs}' > '${reference.blast_pairs}.gz' &&
|
|
40 gzip_suffix='.gz' ;
|
|
41 fi &&
|
|
42
|
|
43 ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory
|
|
44 ## - @todo once write a decent STAR and STAR Fusion data manager
|
|
45 prep_genome_lib.pl
|
|
46 --genome_fa '${reference.fasta_type.ownFile}'
|
|
47 --gtf '${reference.geneModel}'
|
|
48 --blast_pairs "${reference.blast_pairs}\$gzip_suffix"
|
|
49 --CPU \${GALAXY_SLOTS:-1}
|
|
50 --output_dir "\$(pwd)/tmp_star_fusion_genome_dir"
|
|
51 #end if
|
|
52 &&
|
|
53
|
|
54 ## Link in fastq files so they have appropriate extensions
|
|
55 #if str($input_params.input_source) != "use_chimeric":
|
|
56 #if $input_params.left_fq.is_of_type("fastq.gz"):
|
|
57 #set read1 = 'input_1.fastq.gz'
|
|
58 #else:
|
|
59 #set read1 = 'input_1.fastq'
|
|
60 #end if
|
|
61 ln -f -s '${input_params.left_fq}' ${read1} &&
|
|
62
|
|
63 #if $input_params.right_fq:
|
|
64 #if $input_params.right_fq.is_of_type("fastq.gz"):
|
|
65 #set read2 = 'input_2.fastq.gz'
|
|
66 #else:
|
|
67 #set read2 = 'input_2.fastq'
|
|
68 #end if
|
|
69 ln -f -s '${input_params.right_fq}' ${read2} &&
|
|
70 #end if
|
|
71 #end if
|
|
72
|
|
73 ## 3. Run STAR-Fusion
|
|
74 STAR-Fusion
|
|
75 #if str($input_params.input_source) == "use_chimeric":
|
|
76 --chimeric_junction '${input_params.chimeric_junction}'
|
|
77 #else:
|
|
78 --left_fq ${read1}
|
|
79 #if $input_params.right_fq:
|
|
80 --right_fq ${read2}
|
|
81 #end if
|
|
82 #end if
|
|
83
|
|
84 --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir"
|
|
85
|
|
86 str($input_params.optional_outputs).replace(',',' ')
|
|
87
|
|
88 #if str($params.settingsType) == "full":
|
|
89 --min_junction_reads $params.min_junction_reads
|
|
90 --min_sum_frags $params.min_sum_frags
|
|
91 --max_promiscuity $params.max_promiscuity
|
|
92 --min_novel_junction_support $params.min_novel_junction_support
|
|
93 --min_alt_pct_junction $params.min_alt_pct_junction
|
|
94 --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist
|
|
95 --E $params.E
|
|
96 #end if
|
|
97 --CPU \${GALAXY_SLOTS:-1}
|
|
98 ]]></command>
|
|
99
|
|
100 <inputs>
|
|
101 <conditional name="reference">
|
|
102 <param name="reference_type_selector" type="select" label="Star Fusion Genome Reference Source">
|
|
103 <option value="cached">Locally Cached sequences</option>
|
|
104 <option value="build" selected="true">Sequences from your history</option>
|
|
105 </param>
|
|
106 <when value="cached">
|
|
107 <param name="ctat_resource_lib" type="select" label="Genome to search">
|
|
108 <options from_data_table="ctat_resource">
|
|
109 <column name="dbkey" index="1"/>
|
|
110 <column name="name" index="2"/>
|
|
111 <column name="value" index="3"/>
|
|
112 </options>
|
|
113 </param>
|
|
114 </when>
|
|
115 <when value="build">
|
|
116 <!-- Genome source. -->
|
|
117 <conditional name="fasta_type">
|
|
118 <param name="fasta_type_selector" type="select" label="Source for sequence to search">
|
|
119 <option value="cached">Locally Cached sequences</option>
|
|
120 <option value="history" selected="true">Sequences from your history</option>
|
|
121 </param>
|
|
122 <when value="cached">
|
|
123 <param name="ownFile"
|
|
124 type="select" label="Genome to search">
|
|
125 <options from_data_table="all_fasta">
|
|
126 <column name="dbkey" index="1"/>
|
|
127 <column name="name" index="2"/>
|
|
128 <column name="value" index="3"/>
|
|
129 </options>
|
|
130 </param>
|
|
131 </when>
|
|
132 <when value="history">
|
|
133 <param name="ownFile"
|
|
134 type="data"
|
|
135 format="fasta"
|
|
136 label="Select the reference genome (FASTA file)"/>
|
|
137 </when>
|
|
138 </conditional>
|
|
139 <param name="geneModel"
|
|
140 type="data"
|
|
141 format="gff3,gtf"
|
|
142 label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/>
|
|
143 <param name="blast_pairs"
|
|
144 type="data"
|
|
145 format="tabular"
|
|
146 label="Result of BLAST+-blastn of the reference fasta sequence with itself"
|
|
147 help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/>
|
|
148 </when>
|
|
149 </conditional>
|
|
150
|
|
151
|
|
152 <conditional name="input_params">
|
|
153 <param name="input_source"
|
|
154 type="select"
|
|
155 label="Use output from earlier STAR run or let STAR Fusion control running STAR">
|
|
156 <option value="use_chimeric">Use output from earlier STAR</option>
|
|
157 <option value="use_fastq">Let STAR Fusion control running STAR</option>
|
|
158 </param>
|
|
159 <when value="use_chimeric">
|
|
160 <param name="chimeric_junction"
|
|
161 type="data"
|
|
162 format="interval"
|
|
163 argument="--chimeric_junction"
|
|
164 label="Chimeric junction file from STAR (with STAR-Fusion settings)"/>
|
|
165 <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs">
|
|
166 <option value="--annotate">--annotate</option>
|
|
167 <option value="--examine_coding_effect">--examine_coding_effect</option>
|
|
168 </param>
|
|
169 </when>
|
|
170 <when value="use_fastq">
|
|
171 <param name="left_fq"
|
|
172 type="data"
|
|
173 format="fastqsanger,fastqsanger.gz"
|
|
174 argument="--left_fq"
|
|
175 label="left.fq file"/>
|
|
176 <param name="right_fq"
|
|
177 type="data"
|
|
178 format="fastqsanger,fastqsanger.gz"
|
|
179 optional="true"
|
|
180 argument="--right_fq"
|
|
181 label="right.fq file (actually optional, but highly recommended)"/>
|
|
182 <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs">
|
|
183 <option value="--annotate">--annotate</option>
|
|
184 <option value="--examine_coding_effect">--examine_coding_effect</option>
|
|
185 <option value="--extract_fusion_reads">--extract_fusion_reads</option>
|
|
186 </param>
|
|
187 </when>
|
|
188 </conditional>
|
|
189
|
|
190 <conditional name="params">
|
|
191 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter.">
|
|
192 <option value="default" selected="true">Use Defaults</option>
|
|
193 <option value="full">Full parameter list</option>
|
|
194 </param>
|
|
195 <when value="default" />
|
|
196 <when value="full"><!-- Full/advanced params. -->
|
|
197 <param name="min_junction_reads"
|
|
198 type="integer" value="1"
|
|
199 label="minimum number of junction-spanning reads required."
|
|
200 argument="--min_junction_reads"/>
|
|
201 <param name="min_sum_frags"
|
|
202 type="integer"
|
|
203 value="2"
|
|
204 label="minimum fusion support = (#junction_reads + #spanning_frags)"
|
|
205 argument="--min_sum_frags"/>
|
|
206 <param name="max_promiscuity"
|
|
207 type="integer"
|
|
208 value="3"
|
|
209 label="maximum number of partners allowed for a given fusion"
|
|
210 argument="--max_promiscuity"/>
|
|
211 <param name="min_novel_junction_support"
|
|
212 type="integer"
|
|
213 value="3"
|
|
214 label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions"
|
|
215 argument="--min_novel_junction_support"/>
|
|
216 <param name="min_alt_pct_junction"
|
|
217 type="float"
|
|
218 value="10"
|
|
219 label="10% of the dominant isoform junction support"
|
|
220 argument="--min_alt_pct_junction"/>
|
|
221 <param name="aggregate_novel_junction_dist"
|
|
222 type="integer"
|
|
223 value="5"
|
|
224 label="non-ref junctions within 5 are merged into single calls"
|
|
225 argument="--aggregate_novel_junction_dist"/>
|
|
226 <param name="E"
|
|
227 type="float"
|
|
228 value="0.001"
|
|
229 label="E-value threshold for blast searches"
|
|
230 argument="-E"/>
|
|
231 </when>
|
|
232 </conditional>
|
|
233 </inputs>
|
|
234
|
|
235 <outputs>
|
|
236 <data format="bam" name="out_bam" label="${tool.name} on ${on_string}: Aligned.sortedByCoord.out.bam" from_work_dir="star-fusion.fusion_candidates.final"/>
|
|
237 <data format="fastqsanger" name="output_reads_1" label="${tool.name} on ${on_string}: fusion_evidence_reads_1" from_work_dir="star-fusion.fusion_evidence_reads_1.fq">
|
|
238 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
|
|
239 </data>
|
|
240 <data format="fastqsanger" name="output_reads_2" label="${tool.name} on ${on_string}: fusion_evidence_reads_2" from_work_dir="star-fusion.fusion_evidence_reads_2.fq">
|
|
241 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter>
|
|
242 </data>
|
|
243 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.tsv" from_work_dir="star-fusion.fusion_predictions.tsv"/>
|
|
244 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.tsv"/>
|
|
245 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.tsv">
|
|
246 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
|
|
247 </data>
|
|
248 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.coding_effect.tsv">
|
|
249 <filter>str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0</filter>
|
|
250 </data>
|
|
251 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv">
|
|
252 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter>
|
|
253 </data>
|
|
254 </outputs>
|
|
255
|
|
256 <tests>
|
|
257 <test>
|
|
258 <param name="input_source" value="use_chimeric" />
|
|
259 <param name="chimeric_junction" ftype="interval" value="test1.tabular" />
|
|
260 <param name="fasta_type_selector" value="history" />
|
|
261 <param name="ownFile" ftype="fasta" value="test1.fa" />
|
|
262 <param name="geneModel" ftype="gtf" value="test1.gtf" />
|
|
263 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
|
|
264 <param name="settingsType" value="default" />
|
|
265
|
|
266 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
|
|
267 <output name="output_final">
|
|
268 <assert_contents>
|
|
269 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
|
|
270 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
|
|
271 </assert_contents>
|
|
272 </output>
|
|
273 </test>
|
|
274 <test>
|
|
275 <param name="input_source" value="use_fastq" />
|
|
276 <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/>
|
|
277 <param name="fasta_type_selector" value="history" />
|
|
278 <param name="ownFile" ftype="fasta" value="test1.fa" />
|
|
279 <param name="geneModel" ftype="gtf" value="test1.gtf" />
|
|
280 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
|
|
281 <param name="settingsType" value="default" />
|
|
282
|
|
283 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
|
|
284 <output name="output_final">
|
|
285 <assert_contents>
|
|
286 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
|
|
287 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
|
|
288 </assert_contents>
|
|
289 </output>
|
|
290 </test>
|
|
291 <test>
|
|
292 <param name="input_source" value="use_fastq" />
|
|
293 <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/>
|
|
294 <param name="fasta_type_selector" value="history" />
|
|
295 <param name="ownFile" ftype="fasta" value="test1.fa" />
|
|
296 <param name="geneModel" ftype="gtf" value="test1.gtf" />
|
|
297 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
|
|
298 <param name="settingsType" value="default" />
|
|
299
|
|
300 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
|
|
301 <output name="output_final">
|
|
302 <assert_contents>
|
|
303 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" />
|
|
304 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" />
|
|
305 </assert_contents>
|
|
306 </output>
|
|
307 </test>
|
|
308 </tests>
|
|
309 <help>
|
|
310 **What it does**
|
|
311
|
|
312 STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set.
|
|
313
|
|
314 **Input: files required to run STAR-Fusion**
|
|
315 - A genome reference sequence (FASTA-format)
|
|
316 - A corresponding protein-coding gene annotation set (GTF/GFF Format)
|
|
317 - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
|
|
318 - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well.
|
|
319
|
|
320 The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts.
|
|
321 More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki
|
|
322
|
|
323 </help>
|
|
324
|
|
325 <citations>
|
|
326 <citation type="bibtex">
|
|
327 @unpublished{star_fusion,
|
|
328 author = {Brian Haas and Nicolas Stransky and Daniel Nicorici},
|
|
329 title = {STAR-Fusion},
|
|
330 url = {https://github.com/STAR-Fusion/STAR-Fusion}
|
|
331 }
|
|
332 </citation>
|
|
333 </citations>
|
|
334 </tool>
|