Mercurial > repos > devteam > tophat_fusion_post
changeset 0:47e483967b48
Upload
author | devteam@galaxyproject.org |
---|---|
date | Mon, 27 Jan 2014 13:04:32 -0500 |
parents | |
children | ae4ed992d98a |
files | tool-data/bowtie2_indices.loc.sample tool-data/tophat_fusion_blastdb.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml tophat_fusion_post.xml |
diffstat | 5 files changed, 143 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bowtie2_indices.loc.sample Mon Jan 27 13:04:32 2014 -0500 @@ -0,0 +1,37 @@ +# bowtie2_indices.loc.sample +# This is a *.loc.sample file distributed with Galaxy that enables tools +# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. +# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup +# First create these data files and save them in your own data directory structure. +# Then, create a bowtie_indices.loc file to use those indexes with tools. +# Copy this file, save it with the same name (minus the .sample), +# follow the format examples, and store the result in this directory. +# The file should include an one line entry for each index set. +# The path points to the "basename" for the set, not a specific file. +# It has four text columns seperated by TABS. +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +# So, for example, if you had hg18 indexes stored in: +# +# /depot/data2/galaxy/hg19/bowtie2/ +# +# containing hg19 genome and hg19.*.bt2 files, such as: +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 +# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 +# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +# +# then the bowtie2_indices.loc entry could look like this: +# +#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +# +#More examples: +# +#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 +#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +# +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tophat_fusion_blastdb.loc.sample Mon Jan 27 13:04:32 2014 -0500 @@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of nucleotide BLAST databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in +#it, and that there are only two tabs on each line. +# +#So, for example, if your database is nt and the path to your base name +#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry +#would look like this: +# +#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +# +#and your /depot/data2/galaxy/blastdb/nt directory would contain all of +#your "base names" (e.g.): +# +#-rw-r--r-- 1 wychung galaxy 23437408 2008-04-09 11:26 nt.chunk.00.nhr +#-rw-r--r-- 1 wychung galaxy 3689920 2008-04-09 11:26 nt.chunk.00.nin +#-rw-r--r-- 1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq +#...etc... +# +#Your blastdb.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk +#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test +#...etc... +# +#You can download the NCBI provided protein databases like NT from here: +#ftp://ftp.ncbi.nlm.nih.gov/blast/db/ +# +#See also blastdb_p.loc which is for any protein BLAST database, and +#blastdb_d.loc which is for any protein domains databases (like CDD). \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Jan 27 13:04:32 2014 -0500 @@ -0,0 +1,10 @@ +<tables> + <table name="bowtie2_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie2_indices.loc" /> + </table> + <table name="tophat_fusion_blastdb_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie2_indices.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jan 27 13:04:32 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="blast+" version="2.2.28"> + <repository name="package_blast_plus_2_2_28" owner="iuc"/> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tophat_fusion_post.xml Mon Jan 27 13:04:32 2014 -0500 @@ -0,0 +1,53 @@ +<tool id="tophat_fusion_post" name="Tophat Fusion Post" version="0.1"> + <description>post-processing to identify fusion genes</description> + <version_command>tophat-fusion-post --version</version_command> + <requirements> + <requirement type="package">tophat2</requirement> + </requirements> + <command> + ## Set up tophat_out structure. + mkdir tophat_out; ln -s $accepted_hits tophat_out/accepted_hits.bam; ln -s $fusions tophat_out/fusions.out; + + ## Set up blast directory structure. + #set blast_db_path = ${__get_data_table_entry__('tophat_fusion_blastdb_indexes', 'dbkey', $accepted_hits.dbkey, 'path')} + ln -s ${blast_db_path}/ blast; + + ## Set up additional files: + ln -s ${blast_db_path}/refGene.txt .; + ln -s ${blast_db_path}/ensGene.txt .; + ln -s ${blast_db_path}/mcl .; + + ## Run tophat-fusion-post + tophat-fusion-post -p 22 + --num-fusion-reads $num_fusion_reads --num-fusion-pairs $num_fusion_pairs --num-fusion-both $num_fusion_both --fusion-read-mismatches $fusion_read_mismatches --fusion-multireads $fusion_multireads + #if str($is_human) == 'No': + --non-human + #end if + + ## Bowtie2 indices. + ${__get_data_table_entry__('bowtie2_indexes', 'dbkey', $accepted_hits.dbkey, 'path')} + </command> + + <inputs> + <param format="bam" name="accepted_hits" type="data" label="BAM file of aligned RNA-Seq reads" help=""/> + <param format="tabular" name="fusions" type="data" label="Tabular file of potential fusions" help=""/> + <param name="num_fusion_reads" type="integer" value="3" label="Num Fusion Reads" help="Fusions with at least this many supporting reads will be reported."/> + <param name="num_fusion_pairs" type="integer" value="2" label="Num Fusion Pairs" help="Fusions with at least this many supporting pairs will be reported."/> + <param name="num_fusion_both" type="integer" value="0" label="Num Fusion Reads + Pairs" help="The sum of supporting reads and pairs is at least this number for a fusion to be reported."/> + <param name="fusion_read_mismatches" type="integer" value="2" label="Fusion Read Mismatches" help="Reads support fusions if they map across fusion with at most this many mismatches."/> + <param name="fusion_multireads" type="integer" value="2" label="Reads that map to more than this many places will be ignored." help=""/> + <param name="is_human" type="select" label="Is your data from humans?"> + <option value="Yes" selected="True">Yes</option> + <option value="No">No</option> + </param> + </inputs> + + <stdio> + <regex match=".*" source="both" level="log" description="tool progress"/> + </stdio> + + <outputs> + <data format="txt" name="results_txt" label="${tool.name} on ${on_string}: text results" from_work_dir="tophatfusion_out/result.txt"/> + <data format="html" name="results_html" label="${tool.name} on ${on_string}: html results" from_work_dir="tophatfusion_out/result.html"/> + </outputs> +</tool>