Mercurial > repos > jjohnson > star_fusion
diff star_fusion.xml @ 0:5ff7593a7220 draft
Uploaded
author | jjohnson |
---|---|
date | Wed, 04 Oct 2017 15:23:36 -0400 |
parents | |
children | 5748e43a73e0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/star_fusion.xml Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,334 @@ +<tool id="star_fusion" name="STAR-Fusion" version="1.1.0" profile="17.01"> + <description>detect fusion genes in RNA-Seq data</description> + <requirements> + <!-- Bio-conda --> + <requirement type="package" version="1.1.0">star-fusion</requirement> + </requirements> + + <stdio> + <regex match="command not found" source="stderr" level="fatal"/> + <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/> + <regex match="FATAL ERROR" source="stderr" level="fatal"/> + + <regex match="Warning:" source="stderr" level="warning"/> + <regex match="CMD:" source="stderr" level="warning"/> + + <regex match="-done creating index file:" source="stderr" level="warning"/> + <regex match="-parsing GTF file:" source="stderr" level="warning"/> + <regex match="-building interval tree" source="stderr" level="warning"/> + <regex match="-parsing fusion evidence:" source="stderr" level="warning"/> + <regex match="-mapping reads to genes" source="stderr" level="warning"/> + <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/> + + <regex match="Process complete" source="stderr" level="warning"/> + </stdio> + + <version_command>STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"</version_command> + + <command><![CDATA[ + #if reference.reference_type_selector == 'cached': + ln -s '$reference.ctat_resource_lib' tmp_star_fusion_genome_dir + #else + ## 1. ensure the blastn file is provided as *.gz + if file --mime-type '${reference.blast_pairs}' | grep -q /gzip\$; then + gzip_suffix='' ; + else + ## Older versions of gzip do not support the -k option to keep + ## the original file - this should be an universion solution + + gzip -1 -c -- '${reference.blast_pairs}' > '${reference.blast_pairs}.gz' && + gzip_suffix='.gz' ; + fi && + + ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory + ## - @todo once write a decent STAR and STAR Fusion data manager + prep_genome_lib.pl + --genome_fa '${reference.fasta_type.ownFile}' + --gtf '${reference.geneModel}' + --blast_pairs "${reference.blast_pairs}\$gzip_suffix" + --CPU \${GALAXY_SLOTS:-1} + --output_dir "\$(pwd)/tmp_star_fusion_genome_dir" + #end if + && + + ## Link in fastq files so they have appropriate extensions + #if str($input_params.input_source) != "use_chimeric": + #if $input_params.left_fq.is_of_type("fastq.gz"): + #set read1 = 'input_1.fastq.gz' + #else: + #set read1 = 'input_1.fastq' + #end if + ln -f -s '${input_params.left_fq}' ${read1} && + + #if $input_params.right_fq: + #if $input_params.right_fq.is_of_type("fastq.gz"): + #set read2 = 'input_2.fastq.gz' + #else: + #set read2 = 'input_2.fastq' + #end if + ln -f -s '${input_params.right_fq}' ${read2} && + #end if + #end if + + ## 3. Run STAR-Fusion + STAR-Fusion + #if str($input_params.input_source) == "use_chimeric": + --chimeric_junction '${input_params.chimeric_junction}' + #else: + --left_fq ${read1} + #if $input_params.right_fq: + --right_fq ${read2} + #end if + #end if + + --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir" + + str($input_params.optional_outputs).replace(',',' ') + + #if str($params.settingsType) == "full": + --min_junction_reads $params.min_junction_reads + --min_sum_frags $params.min_sum_frags + --max_promiscuity $params.max_promiscuity + --min_novel_junction_support $params.min_novel_junction_support + --min_alt_pct_junction $params.min_alt_pct_junction + --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist + --E $params.E + #end if + --CPU \${GALAXY_SLOTS:-1} + ]]></command> + + <inputs> + <conditional name="reference"> + <param name="reference_type_selector" type="select" label="Star Fusion Genome Reference Source"> + <option value="cached">Locally Cached sequences</option> + <option value="build" selected="true">Sequences from your history</option> + </param> + <when value="cached"> + <param name="ctat_resource_lib" type="select" label="Genome to search"> + <options from_data_table="ctat_resource"> + <column name="dbkey" index="1"/> + <column name="name" index="2"/> + <column name="value" index="3"/> + </options> + </param> + </when> + <when value="build"> + <!-- Genome source. --> + <conditional name="fasta_type"> + <param name="fasta_type_selector" type="select" label="Source for sequence to search"> + <option value="cached">Locally Cached sequences</option> + <option value="history" selected="true">Sequences from your history</option> + </param> + <when value="cached"> + <param name="ownFile" + type="select" label="Genome to search"> + <options from_data_table="all_fasta"> + <column name="dbkey" index="1"/> + <column name="name" index="2"/> + <column name="value" index="3"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" + type="data" + format="fasta" + label="Select the reference genome (FASTA file)"/> + </when> + </conditional> + <param name="geneModel" + type="data" + format="gff3,gtf" + label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/> + <param name="blast_pairs" + type="data" + format="tabular" + label="Result of BLAST+-blastn of the reference fasta sequence with itself" + help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/> + </when> + </conditional> + + + <conditional name="input_params"> + <param name="input_source" + type="select" + label="Use output from earlier STAR run or let STAR Fusion control running STAR"> + <option value="use_chimeric">Use output from earlier STAR</option> + <option value="use_fastq">Let STAR Fusion control running STAR</option> + </param> + <when value="use_chimeric"> + <param name="chimeric_junction" + type="data" + format="interval" + argument="--chimeric_junction" + label="Chimeric junction file from STAR (with STAR-Fusion settings)"/> + <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs"> + <option value="--annotate">--annotate</option> + <option value="--examine_coding_effect">--examine_coding_effect</option> + </param> + </when> + <when value="use_fastq"> + <param name="left_fq" + type="data" + format="fastqsanger,fastqsanger.gz" + argument="--left_fq" + label="left.fq file"/> + <param name="right_fq" + type="data" + format="fastqsanger,fastqsanger.gz" + optional="true" + argument="--right_fq" + label="right.fq file (actually optional, but highly recommended)"/> + <param name="optional_outputs" type="select" multiple="true" label="Additional Outputs"> + <option value="--annotate">--annotate</option> + <option value="--examine_coding_effect">--examine_coding_effect</option> + <option value="--extract_fusion_reads">--extract_fusion_reads</option> + </param> + </when> + </conditional> + + <conditional name="params"> + <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter."> + <option value="default" selected="true">Use Defaults</option> + <option value="full">Full parameter list</option> + </param> + <when value="default" /> + <when value="full"><!-- Full/advanced params. --> + <param name="min_junction_reads" + type="integer" value="1" + label="minimum number of junction-spanning reads required." + argument="--min_junction_reads"/> + <param name="min_sum_frags" + type="integer" + value="2" + label="minimum fusion support = (#junction_reads + #spanning_frags)" + argument="--min_sum_frags"/> + <param name="max_promiscuity" + type="integer" + value="3" + label="maximum number of partners allowed for a given fusion" + argument="--max_promiscuity"/> + <param name="min_novel_junction_support" + type="integer" + value="3" + label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions" + argument="--min_novel_junction_support"/> + <param name="min_alt_pct_junction" + type="float" + value="10" + label="10% of the dominant isoform junction support" + argument="--min_alt_pct_junction"/> + <param name="aggregate_novel_junction_dist" + type="integer" + value="5" + label="non-ref junctions within 5 are merged into single calls" + argument="--aggregate_novel_junction_dist"/> + <param name="E" + type="float" + value="0.001" + label="E-value threshold for blast searches" + argument="-E"/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="bam" name="out_bam" label="${tool.name} on ${on_string}: Aligned.sortedByCoord.out.bam" from_work_dir="star-fusion.fusion_candidates.final"/> + <data format="fastqsanger" name="output_reads_1" label="${tool.name} on ${on_string}: fusion_evidence_reads_1" from_work_dir="star-fusion.fusion_evidence_reads_1.fq"> + <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter> + </data> + <data format="fastqsanger" name="output_reads_2" label="${tool.name} on ${on_string}: fusion_evidence_reads_2" from_work_dir="star-fusion.fusion_evidence_reads_2.fq"> + <filter>input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0</filter> + </data> + <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.tsv" from_work_dir="star-fusion.fusion_predictions.tsv"/> + <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.tsv"/> + <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.tsv"> + <filter>str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter> + </data> + <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.coding_effect.tsv"> + <filter>str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0</filter> + </data> + <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_predictions.abridged.annotated.coding_effect.tsv" from_work_dir="star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv"> + <filter>str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0</filter> + </data> + </outputs> + + <tests> + <test> + <param name="input_source" value="use_chimeric" /> + <param name="chimeric_junction" ftype="interval" value="test1.tabular" /> + <param name="fasta_type_selector" value="history" /> + <param name="ownFile" ftype="fasta" value="test1.fa" /> + <param name="geneModel" ftype="gtf" value="test1.gtf" /> + <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> + <param name="settingsType" value="default" /> + + <!-- Last column of the results contains data in a random order so exact matching is not feasible --> + <output name="output_final"> + <assert_contents> + <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> + <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_source" value="use_fastq" /> + <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/> + <param name="fasta_type_selector" value="history" /> + <param name="ownFile" ftype="fasta" value="test1.fa" /> + <param name="geneModel" ftype="gtf" value="test1.gtf" /> + <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> + <param name="settingsType" value="default" /> + + <!-- Last column of the results contains data in a random order so exact matching is not feasible --> + <output name="output_final"> + <assert_contents> + <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> + <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_source" value="use_fastq" /> + <param name="left_fq" ftype="fastqsanger.gz" value="test1.fastqsanger.gz"/> + <param name="fasta_type_selector" value="history" /> + <param name="ownFile" ftype="fasta" value="test1.fa" /> + <param name="geneModel" ftype="gtf" value="test1.gtf" /> + <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" /> + <param name="settingsType" value="default" /> + + <!-- Last column of the results contains data in a random order so exact matching is not feasible --> + <output name="output_final"> + <assert_contents> + <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	JunctionReads	SpanningFrags" /> + <has_text text="GENE1--GENE2	24	0	INCL_NON_REF_SPLICE	GENE1^GENE1	chr1:240:+	GENE2^GENE2	chr2:241:+" /> + </assert_contents> + </output> + </test> + </tests> + <help> +**What it does** + +STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. + +**Input: files required to run STAR-Fusion** + - A genome reference sequence (FASTA-format) + - A corresponding protein-coding gene annotation set (GTF/GFF Format) + - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus + - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well. + +The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts. +More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki + + </help> + + <citations> + <citation type="bibtex"> + @unpublished{star_fusion, + author = {Brian Haas and Nicolas Stransky and Daniel Nicorici}, + title = {STAR-Fusion}, + url = {https://github.com/STAR-Fusion/STAR-Fusion} + } + </citation> + </citations> +</tool>