Mercurial > repos > jjohnson > star_fusion
comparison star_fusion.xml @ 0:5ff7593a7220 draft
Uploaded
author | jjohnson |
---|---|
date | Wed, 04 Oct 2017 15:23:36 -0400 |
parents | |
children | 5748e43a73e0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5ff7593a7220 |
---|---|
1 <tool id="star_fusion" name="STAR-Fusion" version="1.1.0" profile="17.01"> | |
2 <description>detect fusion genes in RNA-Seq data</description> | |
3 <requirements> | |
4 <!-- Bio-conda --> | |
5 <requirement type="package" version="1.1.0">star-fusion</requirement> | |
6 </requirements> | |
7 | |
8 <stdio> | |
9 <regex match="command not found" source="stderr" level="fatal"/> | |
10 <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/> | |
11 <regex match="FATAL ERROR" source="stderr" level="fatal"/> | |
12 | |
13 <regex match="Warning:" source="stderr" level="warning"/> | |
14 <regex match="CMD:" source="stderr" level="warning"/> | |
15 | |
16 <regex match="-done creating index file:" source="stderr" level="warning"/> | |
17 <regex match="-parsing GTF file:" source="stderr" level="warning"/> | |
18 <regex match="-building interval tree" source="stderr" level="warning"/> | |
19 <regex match="-parsing fusion evidence:" source="stderr" level="warning"/> | |
20 <regex match="-mapping reads to genes" source="stderr" level="warning"/> | |
21 <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/> | |
22 | |
23 <regex match="Process complete" source="stderr" level="warning"/> | |
24 </stdio> | |
25 | |
26 <version_command>STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"</version_command> | |
27 | |
28 <command><![CDATA[ | |
29 #if reference.reference_type_selector == 'cached': | |
30 ln -s '$reference.ctat_resource_lib' tmp_star_fusion_genome_dir | |
31 #else | |
32 ## 1. ensure the blastn file is provided as *.gz | |
33 if file --mime-type '${reference.blast_pairs}' | grep -q /gzip\$; then | |
34 gzip_suffix='' ; | |
35 else | |
36 ## Older versions of gzip do not support the -k option to keep | |
37 ## the original file - this should be an universion solution | |
38 | |
39 gzip -1 -c -- '${reference.blast_pairs}' > '${reference.blast_pairs}.gz' && | |
40 gzip_suffix='.gz' ; | |
41 fi && | |
42 | |
43 ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory | |
44 ## - @todo once write a decent STAR and STAR Fusion data manager | |
45 prep_genome_lib.pl | |
46 --genome_fa '${reference.fasta_type.ownFile}' | |
47 --gtf '${reference.geneModel}' | |
48 --blast_pairs "${reference.blast_pairs}\$gzip_suffix" | |
49 --CPU \${GALAXY_SLOTS:-1} | |
50 --output_dir "\$(pwd)/tmp_star_fusion_genome_dir" | |
51 #end if | |
52 && | |
53 | |
54 ## Link in fastq files so they have appropriate extensions | |
55 #if str($input_params.input_source) != "use_chimeric": | |
56 #if $input_params.left_fq.is_of_type("fastq.gz"): | |
57 #set read1 = 'input_1.fastq.gz' | |
58 #else: | |
59 #set read1 = 'input_1.fastq' | |
60 #end if | |
61 ln -f -s '${input_params.left_fq}' ${read1} && | |
62 | |
63 #if $input_params.right_fq: | |
64 #if $input_params.right_fq.is_of_type("fastq.gz"): | |
65 #set read2 = 'input_2.fastq.gz' | |
66 #else: | |
67 #set read2 = 'input_2.fastq' | |
68 #end if | |
69 ln -f -s '${input_params.right_fq}' ${read2} && | |
70 #end if | |
71 #end if | |
72 | |
73 ## 3. Run STAR-Fusion | |
74 STAR-Fusion | |
75 #if str($input_params.input_source) == "use_chimeric": | |
76 --chimeric_junction '${input_params.chimeric_junction}' | |
77 #else: | |
78 --left_fq ${read1} | |
79 #if $input_params.right_fq: | |
80 --right_fq ${read2} | |
81 #end if | |
82 #end if | |
83 | |
84 --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir" | |
85 | |
86 str($input_params.optional_outputs).replace(',',' ') | |
87 | |
88 #if str($params.settingsType) == "full": | |
89 --min_junction_reads $params.min_junction_reads | |
90 --min_sum_frags $params.min_sum_frags | |
91 --max_promiscuity $params.max_promiscuity | |
92 --min_novel_junction_support $params.min_novel_junction_support | |
93 --min_alt_pct_junction $params.min_alt_pct_junction | |
94 --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist | |
95 --E $params.E | |
96 #end if | |
97 --CPU \${GALAXY_SLOTS:-1} | |
98 ]]></command> | |
99 | |
100 <inputs> | |
101 <conditional name="reference"> | |
102 <param name="reference_type_selector" type="select" label="Star Fusion Genome Reference Source"> | |
103 <option value="cached">Locally Cached sequences</option> | |
104 <option value="build" selected="true">Sequences from your history</option> | |
105 </param> | |
106 <when value="cached"> | |
107 <param name="ctat_resource_lib" type="select" label="Genome to search"> | |
108 <options from_data_table="ctat_resource"> | |
109 <column name="dbkey" index="1"/> | |
110 <column name="name" index="2"/> | |
111 <column name="value" index="3"/> | |
112 </options> | |
113 </param> | |
114 </when> | |
115 <when value="build"> | |
116 <!-- Genome source. --> | |
117 <conditional name="fasta_type"> | |
118 <param name="fasta_type_selector" type="select" label="Source for sequence to search"> | |
119 <option value="cached">Locally Cached sequences</option> | |
120 <option value="history" selected="true">Sequences from your history</option> | |
121 </param> | |
122 <when value="cached"> | |
123 <param name="ownFile" | |
124 type="select" label="Genome to search"> | |
125 <options from_data_table="all_fasta"> | |
126 <column name="dbkey" index="1"/> | |
127 <column name="name" index="2"/> | |
128 <column name="value" index="3"/> | |
129 </options> | |
130 </param> | |
131 </when> | |
132 <when value="history"> | |
133 <param name="ownFile" | |
134 type="data" | |
135 format="fasta" | |
136 label="Select the reference genome (FASTA file)"/> | |
137 </when> | |
138 </conditional> | |
139 <param name="geneModel" | |
140 type="data" | |
141 format="gff3,gtf" | |
142 label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/> | |
143 <param name="blast_pairs" | |
144 type="data" | |
145 format="tabular" | |
146 label="Result of BLAST+-blastn of the reference fasta sequence with itself" | |
147 help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/> | |
148 </when> | |
149 </conditional> | |
150 | |
151 | |
152 <conditional name="input_params"> | |
153 <param name="input_source" | |
154 type="select" | |
155 label="Use output from earlier STAR run or let STAR Fusion control running STAR"> | |
156 <option value="use_chimeric">Use output from earlier STAR</option> | |
157 <option value="use_fastq">Let STAR Fusion control running STAR</option> | |
158 </param> | |
159 <when value="use_chimeric"> | |
160 <param name="chimeric_junction" | |
161 type="data" | |
162 format="interval" | |
163 argument="--chimeric_junction" | |
164 label="Chimeric junction file from STAR (with STAR-Fusion settings)"/> | |
165 <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs"> | |
166 <option value="--annotate">--annotate</option> | |
167 <option value="--examine_coding_effect">--examine_coding_effect</option> | |
168 </param> | |
169 </when> | |
170 <when value="use_fastq"> | |
171 <param name="left_fq" | |
172 type="data" | |
173 format="fastqsanger,fastqsanger.gz" | |
174 argument="--left_fq" | |
175 label="left.fq file"/> | |
176 <param name="right_fq" | |
177 type="data" | |
178 format="fastqsanger,fastqsanger.gz" | |
179 optional="true" | |
180 argument="--right_fq" | |
181 label="right.fq file (actually optional, but highly recommended)"/> | |
182 <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs"> | |
183 <option value="--annotate">--annotate</option> | |
184 <option value="--examine_coding_effect">--examine_coding_effect</option> | |
185 <option value="--extract_fusion_reads">--extract_fusion_reads</option> | |
186 </param> | |
187 </when> | |
188 </conditional> | |
189 | |
190 <conditional name="params"> | |
191 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter."> | |
192 <option value="default" selected="true">Use Defaults</option> | |
193 <option value="full">Full parameter list</option> | |
194 </param> | |
195 <when value="default" /> | |
196 <when value="full"><!-- Full/advanced params. --> | |
197 <param name="min_junction_reads" | |
198 type="integer" value="1" | |
199 label="minimum number of junction-spanning reads required." | |
200 argument="--min_junction_reads"/> | |
201 <param name="min_sum_frags" | |
202 type="integer" | |
203 value="2" | |
204 label="minimum fusion support = (#junction_reads + #spanning_frags)" | |
205 argument="--min_sum_frags"/> | |
206 <param name="max_promiscuity" | |
207 type="integer" | |
208 value="3" | |
209 label="maximum number of partners allowed for a given fusion" | |
210 argument="--max_promiscuity"/> | |
211 <param name="min_novel_junction_support" | |
212 type="integer" | |
213 value="3" | |
214 label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions" | |
215 argument="--min_novel_junction_support"/> | |
216 <param name="min_alt_pct_junction" | |
217 type="float" | |
218 value="10" | |
219 label="10% of the dominant isoform junction support" | |
220 argument="--min_alt_pct_junction"/> | |
221 <param name="aggregate_novel_junction_dist" | |
222 type="integer" | |
223 value="5" | |
224 label="non-ref junctions within 5 are merged into single calls" | |
225 argument="--aggregate_novel_junction_dist"/> | |
226 <param name="E" | |
227 type="float" | |
228 value="0.001" | |
229 label="E-value threshold for blast searches" | |
230 argument="-E"/> | |
231 </when> | |
232 </conditional> | |
233 </inputs> | |
234 | |
235 <outputs> | |
236 <data format="bam" name="out_bam" label="${tool.name} on ${on_string}: Aligned.sortedByCoord.out.bam" from_work_dir="star-fusion.fusion_candidates.final"/> | |
237 <data format="fastqsanger" name="output_reads_1" label="${tool.name} on ${on_string}: fusion_evidence_reads_1" from_work_dir="star-fusion.fusion_evidence_reads_1.fq"> | |
238 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter> | |
239 </data> | |
240 <data format="fastqsanger" name="output_reads_2" label="${tool.name} on ${on_string}: fusion_evidence_reads_2" from_work_dir="star-fusion.fusion_evidence_reads_2.fq"> | |
241 <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter> | |
242 </data> | |
243 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.tsv" from_work_dir="star-fusion.fusion_predictions.tsv"/> | |
244 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.tsv"/> | |
245 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.tsv"> | |
246 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter> | |
247 </data> | |
248 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.coding_effect.tsv"> | |
249 <filter>str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0</filter> | |
250 </data> | |
251 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv"> | |
252 <filter>str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter> | |
253 </data> | |
254 </outputs> | |
255 | |
256 <tests> | |
257 <test> | |
258 <param name="input_source" value="use_chimeric" /> | |
259 <param name="chimeric_junction" ftype="interval" value="test1.tabular" /> | |
260 <param name="fasta_type_selector" value="history" /> | |
261 <param name="ownFile" ftype="fasta" value="test1.fa" /> | |
262 <param name="geneModel" ftype="gtf" value="test1.gtf" /> | |
263 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> | |
264 <param name="settingsType" value="default" /> | |
265 | |
266 <!-- Last column of the results contains data in a random order so exact matching is not feasible --> | |
267 <output name="output_final"> | |
268 <assert_contents> | |
269 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> | |
270 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> | |
271 </assert_contents> | |
272 </output> | |
273 </test> | |
274 <test> | |
275 <param name="input_source" value="use_fastq" /> | |
276 <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/> | |
277 <param name="fasta_type_selector" value="history" /> | |
278 <param name="ownFile" ftype="fasta" value="test1.fa" /> | |
279 <param name="geneModel" ftype="gtf" value="test1.gtf" /> | |
280 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> | |
281 <param name="settingsType" value="default" /> | |
282 | |
283 <!-- Last column of the results contains data in a random order so exact matching is not feasible --> | |
284 <output name="output_final"> | |
285 <assert_contents> | |
286 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> | |
287 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> | |
288 </assert_contents> | |
289 </output> | |
290 </test> | |
291 <test> | |
292 <param name="input_source" value="use_fastq" /> | |
293 <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/> | |
294 <param name="fasta_type_selector" value="history" /> | |
295 <param name="ownFile" ftype="fasta" value="test1.fa" /> | |
296 <param name="geneModel" ftype="gtf" value="test1.gtf" /> | |
297 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> | |
298 <param name="settingsType" value="default" /> | |
299 | |
300 <!-- Last column of the results contains data in a random order so exact matching is not feasible --> | |
301 <output name="output_final"> | |
302 <assert_contents> | |
303 <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> | |
304 <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> | |
305 </assert_contents> | |
306 </output> | |
307 </test> | |
308 </tests> | |
309 <help> | |
310 **What it does** | |
311 | |
312 STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. | |
313 | |
314 **Input: files required to run STAR-Fusion** | |
315 - A genome reference sequence (FASTA-format) | |
316 - A corresponding protein-coding gene annotation set (GTF/GFF Format) | |
317 - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus | |
318 - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well. | |
319 | |
320 The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts. | |
321 More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki | |
322 | |
323 </help> | |
324 | |
325 <citations> | |
326 <citation type="bibtex"> | |
327 @unpublished{star_fusion, | |
328 author = {Brian Haas and Nicolas Stransky and Daniel Nicorici}, | |
329 title = {STAR-Fusion}, | |
330 url = {https://github.com/STAR-Fusion/STAR-Fusion} | |
331 } | |
332 </citation> | |
333 </citations> | |
334 </tool> |