# HG changeset patch
# User jjohnson
# Date 1507145016 14400
# Node ID 5ff7593a7220b06e1f5a0bf048f2de9c1856a694
Uploaded
diff -r 000000000000 -r 5ff7593a7220 star_fusion.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/star_fusion.xml Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,334 @@
+
+ detect fusion genes in RNA-Seq data
+
+
+ star-fusion
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ STAR-Fusion --version 2>&1 | grep version | grep -o -E "software version.*?"
+
+ '${reference.blast_pairs}.gz' &&
+ gzip_suffix='.gz' ;
+ fi &&
+
+ ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory
+ ## - @todo once write a decent STAR and STAR Fusion data manager
+ prep_genome_lib.pl
+ --genome_fa '${reference.fasta_type.ownFile}'
+ --gtf '${reference.geneModel}'
+ --blast_pairs "${reference.blast_pairs}\$gzip_suffix"
+ --CPU \${GALAXY_SLOTS:-1}
+ --output_dir "\$(pwd)/tmp_star_fusion_genome_dir"
+ #end if
+ &&
+
+ ## Link in fastq files so they have appropriate extensions
+ #if str($input_params.input_source) != "use_chimeric":
+ #if $input_params.left_fq.is_of_type("fastq.gz"):
+ #set read1 = 'input_1.fastq.gz'
+ #else:
+ #set read1 = 'input_1.fastq'
+ #end if
+ ln -f -s '${input_params.left_fq}' ${read1} &&
+
+ #if $input_params.right_fq:
+ #if $input_params.right_fq.is_of_type("fastq.gz"):
+ #set read2 = 'input_2.fastq.gz'
+ #else:
+ #set read2 = 'input_2.fastq'
+ #end if
+ ln -f -s '${input_params.right_fq}' ${read2} &&
+ #end if
+ #end if
+
+ ## 3. Run STAR-Fusion
+ STAR-Fusion
+ #if str($input_params.input_source) == "use_chimeric":
+ --chimeric_junction '${input_params.chimeric_junction}'
+ #else:
+ --left_fq ${read1}
+ #if $input_params.right_fq:
+ --right_fq ${read2}
+ #end if
+ #end if
+
+ --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir"
+
+ str($input_params.optional_outputs).replace(',',' ')
+
+ #if str($params.settingsType) == "full":
+ --min_junction_reads $params.min_junction_reads
+ --min_sum_frags $params.min_sum_frags
+ --max_promiscuity $params.max_promiscuity
+ --min_novel_junction_support $params.min_novel_junction_support
+ --min_alt_pct_junction $params.min_alt_pct_junction
+ --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist
+ --E $params.E
+ #end if
+ --CPU \${GALAXY_SLOTS:-1}
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0
+
+
+ input_params['input_source'] == 'use_fastq' and str(input_params['optional_outputs']).find('extract_fusion_reads') > 0
+
+
+
+
+ str(input_params['optional_outputs']).find('annotate') > 0 and not str(input_params['optional_outputs']).find('examine_coding_effect') > 0
+
+
+ str(input_params['optional_outputs']).find('examine_coding_effect') > 0 and not str(input_params['optional_outputs']).find('annotate') > 0
+
+
+ str(input_params['optional_outputs']).find('annotate') > 0 and str(input_params['optional_outputs']).find('examine_coding_effect') > 0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set.
+
+**Input: files required to run STAR-Fusion**
+ - A genome reference sequence (FASTA-format)
+ - A corresponding protein-coding gene annotation set (GTF/GFF Format)
+ - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+ - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well.
+
+The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts.
+More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki
+
+
+
+
+
+ @unpublished{star_fusion,
+ author = {Brian Haas and Nicolas Stransky and Daniel Nicorici},
+ title = {STAR-Fusion},
+ url = {https://github.com/STAR-Fusion/STAR-Fusion}
+ }
+
+
+
diff -r 000000000000 -r 5ff7593a7220 test-data/test1-test1.blastn.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1-test1.blastn.tabular Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,2 @@
+chr1 chr1 100.00 480 0 0 1 480 1 480 0.0 866
+chr2 chr2 100.00 480 0 0 1 480 1 480 0.0 866
diff -r 000000000000 -r 5ff7593a7220 test-data/test1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fa Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,18 @@
+>chr1
+GACGGACGTATTCCTCTGGCCTCAACGGTTCCTGCTTTCGCTGGGATCCAAGATTGGCAG
+CTGAAACCGCCTTTCCAAAGTGAGTCCTTCGTCTGTGACTAACTGTGCCAAATCGTCTTG
+CAAACTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATC
+ATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCG
+GTGCAGCCGATTAGGACCATCTAATGCACTTGTTACAAGACTTCTTTTAAATACTTTCTT
+CCTGCCCAGTAGCGGATGATAATGGTTGTTGCCAGCCGGTGTGGAAGGTAACAGCACCGG
+TGCGAGCCTAATGTGCCGTCTCCACCAACACAAGGCTATCCGGTCGTATAATAGGATTCC
+GCAATGGGGTTAGCAAATGGCAGCCTAAACGATATCGGGGACTTGCGATGTACATGCTTT
+>chr2
+TCAACAATAAGCGCTTTTTGTAGGCAGGGGCACCCCCTATCAGTGGCTGCGCCAAAACAT
+CTTCGGATCCCCTTGTCCAATCAAATTGATCGAATTCTTTCATTTAAGACCCTAATATGA
+CATCATTAGTGATTAAATGCCACTCCCAAAATTCTGCCTAGAAATGTTTAAGTTCGCTCC
+ACTAAAGTTGTTTAAAACGACTACTAAATCCGCGTGATAGGGGATTTCATATTTAATCTT
+TTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCG
+CGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTATAAGA
+TTGCACATTGCGTCTACTTATAAGATGTCTCAACGGCATGCGCAACTTGTGAAGTGCCTA
+CTATCCTTAAACGCATATCTCGCACAGTAACTCCCCAATATGTGAGCATCTGATGTTGCC
diff -r 000000000000 -r 5ff7593a7220 test-data/test1.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fastqsanger Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,332 @@
+@test_chimeric_mRNA_0
+CAAACTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_2
+AACTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_4
+CTCCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_6
+CCTGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_8
+TGATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_10
+ATCCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_12
+CCAGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_14
+AGTTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_16
+TTTAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_18
+TAACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_20
+ACTCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_22
+TCACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_24
+ACCAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_26
+CAAATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_28
+AATTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_30
+TTATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_32
+ATAGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_34
+AGCCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_36
+CCATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_38
+ATACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_40
+ACAGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_42
+AGACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_44
+ACCCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_46
+CCAAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_48
+AAATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_50
+ATTTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_52
+TTTAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_54
+TAAATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_56
+AATCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_58
+TCATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_60
+ATATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_62
+ATCACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_64
+CACGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_66
+CGCGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_68
+CGACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_70
+ACTAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_72
+TAGCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_74
+GCCTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_76
+CTCTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_78
+CTGCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_80
+GCTTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_82
+TTAATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_84
+AATTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_86
+TTTCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_88
+TCTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_90
+TGTGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_92
+TGCTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_94
+CTCAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_96
+CAAGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_98
+AGGGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_100
+GGTTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_102
+TTTTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_104
+TTGGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_106
+GGTCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_108
+TCCGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_110
+CGCCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_112
+CCCGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_114
+CGAGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_116
+AGCGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_118
+CGTTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_120
+TTATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_122
+ATCGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_124
+CGTAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_126
+TAAGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_128
+AGGAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_130
+GAACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_132
+ACAGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_134
+AGCCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_136
+CCGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_138
+GATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_140
+TCTTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_142
+TTAATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_144
+AATGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_146
+TGGATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_148
+GATGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_150
+TGGCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_152
+GCCGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_154
+CGCAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_156
+CAGGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_158
+GGTGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_160
+TGGTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_162
+GTATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@test_chimeric_mRNA_164
+ATGGAAGCTATAAGCGCGGGTGAGAGGGTAATTAGGCGTGTTCACCTACACTACGCTAACGGGCGATTCTATAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff -r 000000000000 -r 5ff7593a7220 test-data/test1.fastqsanger.gz
Binary file test-data/test1.fastqsanger.gz has changed
diff -r 000000000000 -r 5ff7593a7220 test-data/test1.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.gtf Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,8 @@
+chr1 test gene 140 363 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+chr1 test transcript 150 353 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+chr1 test exon 150 353 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
+chr1 test CDS 153 350 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
+chr2 test gene 140 363 . + . gene_id "GENE2"; gene_name "GENE2"; transcript_id "GENE2_t1";
+chr2 test transcript 150 353 . + . gene_id "GENE2"; gene_name "GENE2"; transcript_id "GENE2_t1";
+chr2 test exon 150 353 . + . gene_id "GENE2"; transcript_id "GENE2_t1"; exon_number "1"; gene_name "GENE2";
+chr2 test CDS 153 350 . + . gene_id "GENE2"; transcript_id "GENE2_t1"; exon_number "1"; gene_name "GENE2";
diff -r 000000000000 -r 5ff7593a7220 test-data/test1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.tabular Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,24 @@
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_60 181 60M15S 241 60S15M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_62 183 58M17S 241 58S17M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_64 185 56M19S 241 56S19M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_66 187 54M21S 241 54S21M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_68 189 52M23S 241 52S23M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_70 191 50M25S 241 50S25M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_72 193 48M27S 241 48S27M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_74 195 46M29S 241 46S29M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_76 197 44M31S 241 44S31M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_78 199 42M33S 241 42S33M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_80 201 40M35S 241 40S35M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_82 203 38M37S 241 38S37M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_84 205 36M39S 241 36S39M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_86 207 34M41S 241 34S41M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_88 209 32M43S 241 32S43M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_90 211 30M45S 241 30S45M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_92 213 28M47S 241 28S47M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_94 215 26M49S 241 26S49M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_96 217 24M51S 241 24S51M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_98 219 22M53S 241 22S53M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_100 221 20M55S 241 20S55M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_102 223 18M57S 241 18S57M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_104 225 16M59S 241 16S59M
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_106 227 14M61S 241 14S61M
diff -r 000000000000 -r 5ff7593a7220 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
diff -r 000000000000 -r 5ff7593a7220 tool-data/ctat_resource.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_resource.loc.sample Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,9 @@
+#This file lists the locations and dbkeys of all the ctat_reference_lib directories
+#Prebuilt plug-n-play downloads: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/
+# For example: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v26_CTAT_lib_July192017.plug-n-play.tar.gz
+# tar zxf GRCh38_gencode_v26_CTAT_lib_July192017.plug-n-play.tar.gz
+#
+#This ctat_resource.loc file has the format (white space characters are TAB characters):
+#
+#
+#GRCh38_gencode_v26_CTAT_lib_July192017 GRCh38 GRCh38_gencode_v26_CTAT_lib_July192017 /depot/GRCh38_gencode_v26_CTAT_lib_July192017/ctat_genome_lib_build_dir
diff -r 000000000000 -r 5ff7593a7220 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Oct 04 15:23:36 2017 -0400
@@ -0,0 +1,11 @@
+
+
+
+ value, dbkey, name, path
+
+
+
+ value, dbkey, name, path
+
+
+