Mercurial > repos > trinity_ctat > ctat_star_fusion
changeset 0:8484a9930003 draft
Making sure tools are up to date.
author | trinity_ctat |
---|---|
date | Thu, 12 Apr 2018 10:48:19 -0400 |
parents | |
children | dc3454ad7e09 |
files | ctat_star_fusion.xml test-data/StarFusion/reads_1.fq.gz test-data/StarFusion/reads_2.fq.gz tool-data/ctat_genome_ref_libs.loc.sample tool_data_table_conf.xml.sample |
diffstat | 5 files changed, 133 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ctat_star_fusion.xml Thu Apr 12 10:48:19 2018 -0400 @@ -0,0 +1,108 @@ +<tool id="ctat_star_fusion" name="ctat_star_fusion" version="1.0.0" profile="17.05"> + <description>Fusion-finding Pipeline Using the STAR Aligner</description> + <requirements> + <requirement type="package" version="1.3.2">star-fusion</requirement> + <!-- + FIX - the following may get added to the Star-Fusion recipe, but + are not in it at the current time. + --> + <requirement type="package">perl-carp</requirement> + <requirement type="package">perl-json-xs<plrequirement> + <requirement type="package">perl-io-gzip<plrequirement> + <requirement type="package">samtools</requirement> + <!-- + FIX - If samtools is really a requirement, shouldn't it be a star-fusion dependency? + Also, the current version of samtools is up to 1.8.3. 1.3.0 is quite old. Why that version? + I am trying it without pinning to a particular version. + <requirement type="package" version="1.3">samtools</requirement> + --> + </requirements> + <command detect_errors="default"> + <![CDATA[ + STAR-Fusion + --genome_lib_dir "${genome_ref_lib.fields.path}" + --left_fq "${left_input}" + --right_fq "${right_input}" + --output_dir subdir + ]]> + </command> + <stdio> + <exit_code range="1:" level="fatal" description="Error returned from pipeline" /> + </stdio> + <regex match="Must investigate error above." + source="stderr" + level="fatal" + description="Unknown error encountered" /> + <inputs> + <param format="fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/> + <param format="fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/> + <param name="genome_ref_lib" type="select" label="Select a reference genome"> + <options from_data_table="ctat_genome_ref_libs"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + + </inputs> + <outputs> + <data format="txt" name="starfusion_log" label="${tool.name} on ${on_string}: log" from_work_dir="subdir/Log.out"/> + <data format="bam" name="aligned_bam" label="${tool.name} on ${on_string}: Aligned Bam" from_work_dir="subdir/Aligned.sortedByCoord.out.bam"/> + <data format="tabular" name="candidates" label="${tool.name} on ${on_string}: Fusion candidates" from_work_dir="subdir/star-fusion.fusion_candidates.final.abridged"/> + <data format="tabular" name="chimeric_junc" label="${tool.name} on ${on_string}: Chimeric.out.junction" from_work_dir="subdir/Chimeric.out.junction"/> + </outputs> + <tests> + <test> + <param name="left_input" value="StarFusion/reads_1.fq.gz" /> + <param name="right_input" value="StarFusion/reads_2.fq.gz" /> + <!-- + <param name="left_input" value="reads.left.simPE.fq" /> + <param name="right_input" value="reads.right.simPE.fq" /> + --> + <!-- FIX - now that we added the CTAT ref lib path as a parameter, how do we find it for testing? + <param name="genome_ref_lib.fields.path" value="?????" /> + --> + <!-- + <output name="aligned_bam" file="SF_out_aligned.bam" /> + <output name="candidates" file="SF_out_fusion_candidates.dat" /> + <output name="chimeric_junc" file="SF_out_chimeric.junction" /> + <output name="starfusion_log" file="SF_out.log" /> + --> + <output name="starfusion_log"> + <assert_contents> + <has_line_matching expression=".+" /> + <has_line line="ALL DONE!" /> + </assert_contents> + </output> + <output name="aligned_bam"> + <assert_contents> + <has_line_matching expression=".+" /> + <!-- The following checks for the magic number at the start of the bam file --> + <!-- At first I thought "\x8B\x1F" was the number, but it turns out the file + produced by the StarFusion test had that sequence in the file somewhere else. + --> + <has_text_matching expression="\x1F\x8B" /> + </assert_contents> + </output> + <output name="candidates"> + <assert_contents> + <has_line_matching expression=".+" /> + <has_line line="#fusion_name	JunctionReads	SpanningFrags	Splice_type	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	Long_double_anchor_support" /> + </assert_contents> + </output> + <output name="chimeric_junc"> + <assert_contents> + <has_line_matching expression=".+" /> + <has_line_matching expression="^chr20.*" /> + </assert_contents> + </output> + </test> + </tests> + <help> +.. class:: infomark + +STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. Please read more here_. + +.. _here: https://github.com/STAR-Fusion/STAR-Fusion/wiki + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ctat_genome_ref_libs.loc.sample Thu Apr 12 10:48:19 2018 -0400 @@ -0,0 +1,15 @@ +# This file lists the locations of CTAT Genome Reference Libraries +# Usually there will only be one library, but it is concievable +# that there could be multiple libraries. +# This file format is as follows +# (white space characters are TAB characters): +# +#<value> <name> <path> +# value is a unique id +# name is the display name +# path is the directory where the genome lib files are stored +# +#ctat_genome_ref_libs.loc could look like: +# +#GRCh38_v27_CTAT_lib_Feb092018 CTAT_GenomeRefLib_GRCh38_v27_CTAT_lib_Feb092018 /ctat/genome/reference/lib/path +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Apr 12 10:48:19 2018 -0400 @@ -0,0 +1,10 @@ +<tables> + <table name="ctat_genome_ref_libs" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/ctat_genome_ref_libs.loc" /> + </table> + <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/ctat_centrifuge_indexes.loc" /> + </table> +</tables>