# HG changeset patch # User jjohnson # Date 1507145016 14400 # Node ID 5ff7593a7220b06e1f5a0bf048f2de9c1856a694 Uploaded diff -r 000000000000 -r 5ff7593a7220 star_fusion.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/star_fusion.xml Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,334 @@ + + detect fusion genes in RNA-Seq data + + + star-fusion + + + + + + + + + + + + + + + + + + + + + STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?" + + '${reference.blast_pairs}.gz' && + gzip_suffix='.gz' ; + fi && + + ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory + ## - @todo once write a decent STAR and STAR Fusion data manager + prep_genome_lib.pl + --genome_fa '${reference.fasta_type.ownFile}' + --gtf '${reference.geneModel}' + --blast_pairs "${reference.blast_pairs}\$gzip_suffix" + --CPU \${GALAXY_SLOTS:-1} + --output_dir "\$(pwd)/tmp_star_fusion_genome_dir" + #end if + && + + ## Link in fastq files so they have appropriate extensions + #if str($input_params.input_source) != "use_chimeric": + #if $input_params.left_fq.is_of_type("fastq.gz"): + #set read1 = 'input_1.fastq.gz' + #else: + #set read1 = 'input_1.fastq' + #end if + ln -f -s '${input_params.left_fq}' ${read1} && + + #if $input_params.right_fq: + #if $input_params.right_fq.is_of_type("fastq.gz"): + #set read2 = 'input_2.fastq.gz' + #else: + #set read2 = 'input_2.fastq' + #end if + ln -f -s '${input_params.right_fq}' ${read2} && + #end if + #end if + + ## 3. Run STAR-Fusion + STAR-Fusion + #if str($input_params.input_source) == "use_chimeric": + --chimeric_junction '${input_params.chimeric_junction}' + #else: + --left_fq ${read1} + #if $input_params.right_fq: + --right_fq ${read2} + #end if + #end if + + --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir" + + str($input_params.optional_outputs).replace(',',' ') + + #if str($params.settingsType) == "full": + --min_junction_reads $params.min_junction_reads + --min_sum_frags $params.min_sum_frags + --max_promiscuity $params.max_promiscuity + --min_novel_junction_support $params.min_novel_junction_support + --min_alt_pct_junction $params.min_alt_pct_junction + --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist + --E $params.E + #end if + --CPU \${GALAXY_SLOTS:-1} + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0 + + + input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0 + + + + + str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0 + + + str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0 + + + str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. + +**Input: files required to run STAR-Fusion** + - A genome reference sequence (FASTA-format) + - A corresponding protein-coding gene annotation set (GTF/GFF Format) + - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus + - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well. + +The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts. +More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki + + + + + + @unpublished{star_fusion, + author = {Brian Haas and Nicolas Stransky and Daniel Nicorici}, + title = {STAR-Fusion}, + url = {https://github.com/STAR-Fusion/STAR-Fusion} + } + + + diff -r 000000000000 -r 5ff7593a7220 test-data/test1-test1.blastn.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1-test1.blastn.tabular Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,2 @@ +chr1 chr1 100.00 480 0 0 1 480 1 480 0.0 866 +chr2 chr2 100.00 480 0 0 1 480 1 480 0.0 866 diff -r 000000000000 -r 5ff7593a7220 test-data/test1.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.fa Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,18 @@ +>chr1 +GACGGACGTATTCCTCTGGCCTCAACGGTTCCTGCTTTCGCTGGGATCCAAGATTGGCAG +CTGAAACCGCCTTTCCAAAGTGAGTCCTTCGTCTGTGACTAACTGTGCCAAATCGTCTTG +CAAACTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATC +ATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCG +GTGCAGCCGATTAGGACCATCTAATGCACTTGTTACAAGACTTCTTTTAAATACTTTCTT +CCTGCCCAGTAGCGGATGATAATGGTTGTTGCCAGCCGGTGTGGAAGGTAACAGCACCGG +TGCGAGCCTAATGTGCCGTCTCCACCAACACAAGGCTATCCGGTCGTATAATAGGATTCC +GCAATGGGGTTAGCAAATGGCAGCCTAAACGATATCGGGGACTTGCGATGTACATGCTTT +>chr2 +TCAACAATAAGCGCTTTTTGTAGGCAGGGGCACCCCCTATCAGTGGCTGCGCCAAAACAT +CTTCGGATCCCCTTGTCCAATCAAATTGATCGAATTCTTTCATTTAAGACCCTAATATGA +CATCATTAGTGATTAAATGCCACTCCCAAAATTCTGCCTAGAAATGTTTAAGTTCGCTCC +ACTAAAGTTGTTTAAAACGACTACTAAATCCGCGTGATAGGGGATTTCATATTTAATCTT +TTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCG +CGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTATAAGA +TTGCACATTGCGTCTACTTATAAGATGTCTCAACGGCATGCGCAACTTGTGAAGTGCCTA +CTATCCTTAAACGCATATCTCGCACAGTAACTCCCCAATATGTGAGCATCTGATGTTGCC diff -r 000000000000 -r 5ff7593a7220 test-data/test1.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.fastqsanger Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,332 @@ +@test_chimeric_mRNA_0 +CAAACTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_2 +AACTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_4 +CTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_6 +CCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_8 +TGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_10 +ATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_12 +CCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_14 +AGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_16 +TTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_18 +TAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_20 +ACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_22 +TCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_24 +ACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_26 +CAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_28 +AATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_30 +TTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_32 +ATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_34 +AGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_36 +CCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_38 +ATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_40 +ACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_42 +AGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_44 +ACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_46 +CCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_48 +AAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_50 +ATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_52 +TTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_54 +TAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_56 +AATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_58 +TCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_60 +ATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_62 +ATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_64 +CACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_66 +CGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_68 +CGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_70 +ACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_72 +TAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_74 +GCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_76 +CTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_78 +CTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_80 +GCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_82 +TTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_84 +AATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_86 +TTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_88 +TCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_90 +TGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_92 +TGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_94 +CTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_96 +CAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_98 +AGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_100 +GGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_102 +TTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_104 +TTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_106 +GGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_108 +TCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_110 +CGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_112 +CCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_114 +CGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_116 +AGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_118 +CGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_120 +TTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_122 +ATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_124 +CGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_126 +TAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_128 +AGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_130 +GAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_132 +ACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_134 +AGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_136 +CCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_138 +GATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_140 +TCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_142 +TTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_144 +AATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_146 +TGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_148 +GATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_150 +TGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_152 +GCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_154 +CGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_156 +CAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_158 +GGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_160 +TGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_162 +GTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTATA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@test_chimeric_mRNA_164 +ATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTATAAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 000000000000 -r 5ff7593a7220 test-data/test1.fastqsanger.gz Binary file test-data/test1.fastqsanger.gz has changed diff -r 000000000000 -r 5ff7593a7220 test-data/test1.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.gtf Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,8 @@ +chr1 test gene 140 363 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1"; +chr1 test transcript 150 353 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1"; +chr1 test exon 150 353 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1"; +chr1 test CDS 153 350 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1"; +chr2 test gene 140 363 . + . gene_id "GENE2"; gene_name "GENE2"; transcript_id "GENE2_t1"; +chr2 test transcript 150 353 . + . gene_id "GENE2"; gene_name "GENE2"; transcript_id "GENE2_t1"; +chr2 test exon 150 353 . + . gene_id "GENE2"; transcript_id "GENE2_t1"; exon_number "1"; gene_name "GENE2"; +chr2 test CDS 153 350 . + . gene_id "GENE2"; transcript_id "GENE2_t1"; exon_number "1"; gene_name "GENE2"; diff -r 000000000000 -r 5ff7593a7220 test-data/test1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.tabular Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,24 @@ +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_60 181 60M15S 241 60S15M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_62 183 58M17S 241 58S17M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_64 185 56M19S 241 56S19M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_66 187 54M21S 241 54S21M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_68 189 52M23S 241 52S23M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_70 191 50M25S 241 50S25M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_72 193 48M27S 241 48S27M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_74 195 46M29S 241 46S29M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_76 197 44M31S 241 44S31M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_78 199 42M33S 241 42S33M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_80 201 40M35S 241 40S35M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_82 203 38M37S 241 38S37M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_84 205 36M39S 241 36S39M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_86 207 34M41S 241 34S41M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_88 209 32M43S 241 32S43M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_90 211 30M45S 241 30S45M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_92 213 28M47S 241 28S47M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_94 215 26M49S 241 26S49M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_96 217 24M51S 241 24S51M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_98 219 22M53S 241 22S53M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_100 221 20M55S 241 20S55M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_102 223 18M57S 241 18S57M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_104 225 16M59S 241 16S59M +chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_106 227 14M61S 241 14S61M diff -r 000000000000 -r 5ff7593a7220 tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# diff -r 000000000000 -r 5ff7593a7220 tool-data/ctat_resource.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ctat_resource.loc.sample Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,9 @@ +#This file lists the locations and dbkeys of all the ctat_reference_lib directories +#Prebuilt plug-n-play downloads: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ +# For example: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v26_CTAT_lib_July192017.plug-n-play.tar.gz +# tar zxf GRCh38_gencode_v26_CTAT_lib_July192017.plug-n-play.tar.gz +# +#This ctat_resource.loc file has the format (white space characters are TAB characters): +# +# +#GRCh38_gencode_v26_CTAT_lib_July192017 GRCh38 GRCh38_gencode_v26_CTAT_lib_July192017 /depot/GRCh38_gencode_v26_CTAT_lib_July192017/ctat_genome_lib_build_dir diff -r 000000000000 -r 5ff7593a7220 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Oct 04 15:23:36 2017 -0400 @@ -0,0 +1,11 @@ + + + + value, dbkey, name, path + +
+ + value, dbkey, name, path + +
+