Mercurial > repos > devteam > data_manager_gatk_picard_index_builder
changeset 3:bb4185c65cc7 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 10:18:31 -0400 (2015-10-13) |
parents | e09681f51de4 |
children | b7e58560ac6a |
files | data_manager/data_manager_gatk_picard_index_builder.py data_manager/data_manager_gatk_picard_index_builder.xml tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 5 files changed, 20 insertions(+), 87 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_gatk_picard_index_builder.py Fri Apr 04 15:43:20 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.py Tue Oct 13 10:18:31 2015 -0400 @@ -27,27 +27,16 @@ sequence_name = dbkey return sequence_id, sequence_name -def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, - sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME, sort_fasta=False, - sam_index_path=None ): +def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME ): fasta_base_name = os.path.split( fasta_filename )[-1] - fasta_to_index = os.path.join( target_directory, fasta_base_name ) - if sort_fasta: - shutil.copy( fasta_filename, fasta_to_index ) - _sort_fasta_gatk( fasta_to_index ) - else: - os.symlink(fasta_filename, fasta_to_index) - sam_index_filename = '%s.fai' % fasta_base_name - if sam_index_path is not None: - sam_index_basepath = os.path.split( sam_index_path )[0] - else: - sam_index_basepath = os.path.join( target_directory, sam_index_filename ) - if sam_index_path is not None and os.path.exists( sam_index_path ) and os.path.exists( os.path.join( sam_index_basepath, sam_index_filename ) ): - os.symlink( sam_index_path + '.fai', os.path.join( target_directory, sam_index_filename ) ) - else: - sam_command = [ 'samtools', 'faidx', fasta_to_index ] + gatk_sorted_fasta_filename = os.path.join( target_directory, fasta_base_name ) + shutil.copy( fasta_filename, gatk_sorted_fasta_filename ) + _sort_fasta_gatk( gatk_sorted_fasta_filename ) + sam_index_filename = '%s.fai' % gatk_sorted_fasta_filename + if not os.path.exists( sam_index_filename ): + sam_command = [ 'samtools', 'faidx', gatk_sorted_fasta_filename ] _run_command( sam_command, target_directory ) - args = [ 'java', '-jar', jar, 'R=%s' % fasta_to_index, 'O=%s.dict' % sequence_id ] + args = [ 'java', '-jar', jar, 'R=%s' % gatk_sorted_fasta_filename, 'O=%s.dict' % sequence_id ] _run_command( args, target_directory ) data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) @@ -160,8 +149,6 @@ parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) parser.add_option( '-j', '--jar', dest='jar', action='store', type="string", default=None, help='GATK .jar file' ) - parser.add_option( '-s', '--sort_fasta', dest='sort_fasta', action='store', type="string", default=False, help='Sort input FASTA' ) - parser.add_option( '-i', '--sam_index', dest='sam_index', action='store', type="string", default=None, help='SAM index of input FASTA' ) (options, args) = parser.parse_args() filename = args[0] @@ -183,9 +170,7 @@ options.fasta_dbkey, sequence_id, sequence_name, - options.jar, - sort_fasta=options.sort_fasta, - sam_index_path=options.sam_index, + options.jar, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME ) #save info to json file
--- a/data_manager/data_manager_gatk_picard_index_builder.xml Fri Apr 04 15:43:20 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.xml Tue Oct 13 10:18:31 2015 -0400 @@ -1,46 +1,28 @@ -<tool id="gatk_picard_index_builder" name="Generate Picard indexes" tool_type="manage_data" version="0.0.2"> - <description>sorted for GATK</description> +<tool id="gatk_picard_index_builder" name="Generate GATK-sorted Picard indexes" tool_type="manage_data" version="0.0.1"> + <description>builder</description> <requirements> <requirement type="package" version="0.1.18">samtools</requirement> <requirement type="package" version="1.56.0">picard</requirement> </requirements> <command interpreter="python"> - data_manager_gatk_picard_index_builder.py "${out_file}" \ - --jar "\$JAVA_JAR_PATH/CreateSequenceDictionary.jar" \ - --fasta_filename "${all_fasta_source.fields.path}" \ - --fasta_dbkey "${all_fasta_source.fields.dbkey}" \ - --fasta_description "${all_fasta_source.fields.name}" \ - #if $input_fasta_sorting.sort_input_fasta == 'sort_fasta': - --sort_fasta \ - #else: - --sam_index "${input_fasta_sorting.sam_index_source.fields.path}" \ - #end if + data_manager_gatk_picard_index_builder.py "${out_file}" + --jar "\$JAVA_JAR_PATH/CreateSequenceDictionary.jar" + --fasta_filename "${all_fasta_source.fields.path}" + --fasta_dbkey "${all_fasta_source.fields.dbkey}" + --fasta_description "${all_fasta_source.fields.name}" --data_table_name "gatk_picard_indexes" </command> <inputs> <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> - <options from_data_table="all_fasta" /> + <options from_data_table="all_fasta"/> </param> - <conditional name="input_fasta_sorting"> - <param name="sort_input_fasta" type="select" label="Input fasta sorting"> - <option value="input_order" selected="True">As-is</option> - <option value="sort_fasta">Sort for GATK</option> - </param> - <when value="input_order"> - <param name="sam_index_source" type="select" label="SAM index of FASTA"> - <options from_data_table="fasta_indexes"> - <filter type="param_value" column="0" key="value" ref="all_fasta_source" /> - <validator type="no_options" message="This FASTA file does not have SAM indexes."/> - </options> - </param> - </when> - </conditional> <param type="text" name="sequence_name" value="" label="Name of sequence" /> <param type="text" name="sequence_id" value="" label="ID for sequence" /> </inputs> <outputs> <data name="out_file" format="data_manager_json"/> </outputs> + <help> .. class:: infomark
--- a/tool-data/fasta_indexes.loc.sample Fri Apr 04 15:43:20 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a fasta_indexes.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The fasta_indexes.loc -#file has this format (white space characters are TAB characters): -# -# <unique_build_id> <dbkey> <display_name> <file_base_path> -# -#So, for example, if you had hg19 Canonical indexed stored in -# -# /depot/data2/galaxy/hg19/sam/, -# -#then the fasta_indexes.loc entry would look like this: -# -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa -# -#and your /depot/data2/galaxy/hg19/sam/ directory -#would contain hg19canon.fa and hg19canon.fa.fai files. -# -#Your fasta_indexes.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa -#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa -#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool_data_table_conf.xml.sample Fri Apr 04 15:43:20 2014 -0400 +++ b/tool_data_table_conf.xml.sample Tue Oct 13 10:18:31 2015 -0400 @@ -10,9 +10,4 @@ <columns>value, dbkey, name, path</columns> <file path="tool-data/gatk_sorted_picard_index.loc" /> </table> - <!-- Location of SAMTools indexes and other files --> - <table name="fasta_indexes" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="tool-data/fasta_indexes.loc" /> - </table> </tables>
--- a/tool_dependencies.xml Fri Apr 04 15:43:20 2014 -0400 +++ b/tool_dependencies.xml Tue Oct 13 10:18:31 2015 -0400 @@ -1,9 +1,9 @@ <?xml version="1.0"?> <tool_dependency> <package name="picard" version="1.56.0"> - <repository changeset_revision="7206dbf34dcd" name="package_picard_1_56_0" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="7206dbf34dcd" name="package_picard_1_56_0" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="samtools" version="0.1.18"> - <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>