# HG changeset patch # User Makoto Nasuno # Date 1456464202 -32400 # Node ID 589de4bdaca48af51eac443cd5ab748e5e7089eb for Apache Aurora jobrunner diff -r 000000000000 -r 589de4bdaca4 tophat2_aurora.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tophat2_aurora.xml Fri Feb 26 14:23:22 2016 +0900 @@ -0,0 +1,410 @@ + + + Gapped-read mapper for RNA-seq data + + + nasuno/tophat2:2.0.9 + + + + ## + ## Set path to index, building the reference if necessary. + ## + + #set index_path = '' + #if $refGenomeSource.genomeSource == "history": + bowtie2-build "$refGenomeSource.ownFile" genome ; ln -s "$refGenomeSource.ownFile" genome.fa ; + #set index_path = 'genome' + #else: + #set index_path = $refGenomeSource.index.fields.path + #end if + + ## + ## Run tophat. + ## + + tophat2 + + ## Change this to accommodate the number of threads you have available. + --num-threads \${GALAXY_SLOTS:-4} + + --output-dir ./tophat_out + + ## Set params. + #if $params.settingsType == "full": + --read-mismatches $params.read_mismatches + #if str($params.bowtie_n) == "Yes": + --bowtie-n + #end if + + --read-edit-dist $params.read_edit_dist + --read-realign-edit-dist $params.read_realign_edit_dist + -a $params.anchor_length + -m $params.splice_mismatches + -i $params.min_intron_length + -I $params.max_intron_length + -g $params.max_multihits + --min-segment-intron $params.min_segment_intron + --max-segment-intron $params.max_segment_intron + --segment-mismatches $params.seg_mismatches + --segment-length $params.seg_length + --library-type $params.library_type + + ## Indel search. + #if $params.indel_search.allow_indel_search == "Yes": + ## --allow-indels + --max-insertion-length $params.indel_search.max_insertion_length + --max-deletion-length $params.indel_search.max_deletion_length + #else: + --no-novel-indels + #end if + + ## Supplying junctions parameters. + #if $params.own_junctions.use_junctions == "Yes": + #if $params.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $params.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $params.own_junctions.raw_juncs.use_juncs == "Yes": + -j $params.own_junctions.raw_juncs.raw_juncs + #end if + #if str($params.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + + #if $params.coverage_search.use_search == "Yes": + --coverage-search + --min-coverage-intron $params.coverage_search.min_coverage_intron + --max-coverage-intron $params.coverage_search.max_coverage_intron + #else: + --no-coverage-search + #end if + + #if str($params.microexon_search) == "Yes": + --microexon-search + #end if + + #if $params.fusion_search.do_search == "Yes": + --fusion-search + --fusion-anchor-length $params.fusion_search.anchor_len + --fusion-min-dist $params.fusion_search.min_dist + --fusion-read-mismatches $params.fusion_search.read_mismatches + --fusion-multireads $params.fusion_search.multireads + --fusion-multipairs $params.fusion_search.multipairs + --fusion-ignore-chromosomes "$params.fusion_search.ignore_chromosomes" + #end if + + #if $params.bowtie2_settings.b2_settings == "Yes": + #if $params.bowtie2_settings.preset.b2_preset == "Yes": + --b2-$params.bowtie2_settings.preset.b2_preset_select + #end if + #end if + + #end if + + ## Read group information. + #if $readGroup.specReadGroup == "yes" + --rg-id "$readGroup.rgid" + --rg-library "$readGroup.rglb" + --rg-platform "$readGroup.rgpl" + --rg-sample "$readGroup.rgsm" + #end if + + ## Set index path, inputs and parameters specific to paired data. + #if $singlePaired.sPaired != "single" + -r $singlePaired.mate_inner_distance + --mate-std-dev=$singlePaired.mate_std_dev + + #if str($singlePaired.report_discordant_pairs) == "No": + --no-discordant + #end if + + #if $singlePaired.sPaired == "paired" + ${index_path} "$singlePaired.input1" "$singlePaired.input2" + #else + ${index_path} "$singlePaired.input.forward" "$singlePaired.input.reverse" + #end if + #else + ${index_path} "$singlePaired.input1" + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + tophat_macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + +**Tophat Overview** + +TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie(2), and then analyzes the mapping results to identify splice junctions between exons. Please cite: Kim D, Pertea G, Trapnell C, Pimentel H, Kelley R, and Salzberg SL. TopHat2: accurate alignment +of transcriptomes in the presence of insertions, deletions and gene fusions. Genome Biol 14:R36, 2013. + +.. _Tophat: http://ccb.jhu.edu/software/tophat/ + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + +.. __: http://ccb.jhu.edu/software/tophat/manual.shtml + +------ + +**Input formats** + +Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. + +------ + +**Outputs** + +Tophat produces two output files: + +- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction. +- accepted_hits -- A list of read alignments in BAM_ format. + +.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1 +.. _BAM: http://samtools.sourceforge.net/ + +Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format. + +------- + +**Tophat settings** + +All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here. + +------ + +**Tophat parameter list** + +This is a list of implemented Tophat options:: + + -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments + selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter + is required for paired end runs. + --mate-std-dev INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp. + -a/--min-anchor-length INT The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced + alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one + read with this many bases on each side. This must be at least 3 and the default is 8. + -m/--splice-mismatches INT The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0. + -i/--min-intron-length INT The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70. + -I/--max-intron-length INT The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000. + -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many + alignments. The default is 40. + -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. + -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. + -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) + --no-coverage-search Disables the coverage based search for junctions. + --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity. + --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer. + --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2. + --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25. + --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50. + --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000. + --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. + --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000. + + + 10.1186/gb-2013-14-4-r36 + + diff -r 000000000000 -r 589de4bdaca4 tophat_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tophat_macros.xml Fri Feb 26 14:23:22 2016 +0900 @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +