Mercurial > repos > devteam > data_manager_gatk_picard_index_builder
changeset 2:e09681f51de4 draft
Uploaded updated data manager tool.
author | devteam |
---|---|
date | Fri, 04 Apr 2014 15:43:20 -0400 (2014-04-04) |
parents | f6052387284e |
children | bb4185c65cc7 |
files | README data_manager/data_manager_gatk_picard_index_builder.py data_manager/data_manager_gatk_picard_index_builder.xml tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 5 files changed, 82 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_gatk_picard_index_builder.py Tue Apr 01 10:40:49 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.py Fri Apr 04 15:43:20 2014 -0400 @@ -27,16 +27,27 @@ sequence_name = dbkey return sequence_id, sequence_name -def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME ): +def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, + sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME, sort_fasta=False, + sam_index_path=None ): fasta_base_name = os.path.split( fasta_filename )[-1] - gatk_sorted_fasta_filename = os.path.join( target_directory, fasta_base_name ) - shutil.copy( fasta_filename, gatk_sorted_fasta_filename ) - _sort_fasta_gatk( gatk_sorted_fasta_filename ) - sam_index_filename = '%s.fai' % gatk_sorted_fasta_filename - if not os.path.exists( sam_index_filename ): - sam_command = [ 'samtools', 'faidx', gatk_sorted_fasta_filename ] + fasta_to_index = os.path.join( target_directory, fasta_base_name ) + if sort_fasta: + shutil.copy( fasta_filename, fasta_to_index ) + _sort_fasta_gatk( fasta_to_index ) + else: + os.symlink(fasta_filename, fasta_to_index) + sam_index_filename = '%s.fai' % fasta_base_name + if sam_index_path is not None: + sam_index_basepath = os.path.split( sam_index_path )[0] + else: + sam_index_basepath = os.path.join( target_directory, sam_index_filename ) + if sam_index_path is not None and os.path.exists( sam_index_path ) and os.path.exists( os.path.join( sam_index_basepath, sam_index_filename ) ): + os.symlink( sam_index_path + '.fai', os.path.join( target_directory, sam_index_filename ) ) + else: + sam_command = [ 'samtools', 'faidx', fasta_to_index ] _run_command( sam_command, target_directory ) - args = [ 'java', '-jar', jar, 'R=%s' % gatk_sorted_fasta_filename, 'O=%s.dict' % sequence_id ] + args = [ 'java', '-jar', jar, 'R=%s' % fasta_to_index, 'O=%s.dict' % sequence_id ] _run_command( args, target_directory ) data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) @@ -149,6 +160,8 @@ parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) parser.add_option( '-j', '--jar', dest='jar', action='store', type="string", default=None, help='GATK .jar file' ) + parser.add_option( '-s', '--sort_fasta', dest='sort_fasta', action='store', type="string", default=False, help='Sort input FASTA' ) + parser.add_option( '-i', '--sam_index', dest='sam_index', action='store', type="string", default=None, help='SAM index of input FASTA' ) (options, args) = parser.parse_args() filename = args[0] @@ -170,7 +183,9 @@ options.fasta_dbkey, sequence_id, sequence_name, - options.jar, + options.jar, + sort_fasta=options.sort_fasta, + sam_index_path=options.sam_index, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME ) #save info to json file
--- a/data_manager/data_manager_gatk_picard_index_builder.xml Tue Apr 01 10:40:49 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.xml Fri Apr 04 15:43:20 2014 -0400 @@ -1,5 +1,5 @@ -<tool id="gatk_picard_index_builder" name="Generate GATK-sorted Picard indexes" tool_type="manage_data" version="0.0.1"> - <description>builder</description> +<tool id="gatk_picard_index_builder" name="Generate Picard indexes" tool_type="manage_data" version="0.0.2"> + <description>sorted for GATK</description> <requirements> <requirement type="package" version="0.1.18">samtools</requirement> <requirement type="package" version="1.56.0">picard</requirement> @@ -10,19 +10,37 @@ --fasta_filename "${all_fasta_source.fields.path}" \ --fasta_dbkey "${all_fasta_source.fields.dbkey}" \ --fasta_description "${all_fasta_source.fields.name}" \ + #if $input_fasta_sorting.sort_input_fasta == 'sort_fasta': + --sort_fasta \ + #else: + --sam_index "${input_fasta_sorting.sam_index_source.fields.path}" \ + #end if --data_table_name "gatk_picard_indexes" </command> <inputs> <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> - <options from_data_table="all_fasta"/> + <options from_data_table="all_fasta" /> </param> + <conditional name="input_fasta_sorting"> + <param name="sort_input_fasta" type="select" label="Input fasta sorting"> + <option value="input_order" selected="True">As-is</option> + <option value="sort_fasta">Sort for GATK</option> + </param> + <when value="input_order"> + <param name="sam_index_source" type="select" label="SAM index of FASTA"> + <options from_data_table="fasta_indexes"> + <filter type="param_value" column="0" key="value" ref="all_fasta_source" /> + <validator type="no_options" message="This FASTA file does not have SAM indexes."/> + </options> + </param> + </when> + </conditional> <param type="text" name="sequence_name" value="" label="Name of sequence" /> <param type="text" name="sequence_id" value="" label="ID for sequence" /> </inputs> <outputs> <data name="out_file" format="data_manager_json"/> </outputs> - <help> .. class:: infomark
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Fri Apr 04 15:43:20 2014 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool_data_table_conf.xml.sample Tue Apr 01 10:40:49 2014 -0400 +++ b/tool_data_table_conf.xml.sample Fri Apr 04 15:43:20 2014 -0400 @@ -10,4 +10,9 @@ <columns>value, dbkey, name, path</columns> <file path="tool-data/gatk_sorted_picard_index.loc" /> </table> + <!-- Location of SAMTools indexes and other files --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> </tables>
--- a/tool_dependencies.xml Tue Apr 01 10:40:49 2014 -0400 +++ b/tool_dependencies.xml Fri Apr 04 15:43:20 2014 -0400 @@ -1,9 +1,9 @@ <?xml version="1.0"?> <tool_dependency> <package name="picard" version="1.56.0"> - <repository changeset_revision="7206dbf34dcd" name="package_picard_1_56_0" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="7206dbf34dcd" name="package_picard_1_56_0" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> <package name="samtools" version="0.1.18"> - <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>