# HG changeset patch # User devteam # Date 1396640600 14400 # Node ID e09681f51de478c83b7f07de4a59e4d948d6f8d7 # Parent f6052387284e423562bfc21a6c4cc06563db3272 Uploaded updated data manager tool. diff -r f6052387284e -r e09681f51de4 README diff -r f6052387284e -r e09681f51de4 data_manager/data_manager_gatk_picard_index_builder.py --- a/data_manager/data_manager_gatk_picard_index_builder.py Tue Apr 01 10:40:49 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.py Fri Apr 04 15:43:20 2014 -0400 @@ -27,16 +27,27 @@ sequence_name = dbkey return sequence_id, sequence_name -def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME ): +def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, + sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME, sort_fasta=False, + sam_index_path=None ): fasta_base_name = os.path.split( fasta_filename )[-1] - gatk_sorted_fasta_filename = os.path.join( target_directory, fasta_base_name ) - shutil.copy( fasta_filename, gatk_sorted_fasta_filename ) - _sort_fasta_gatk( gatk_sorted_fasta_filename ) - sam_index_filename = '%s.fai' % gatk_sorted_fasta_filename - if not os.path.exists( sam_index_filename ): - sam_command = [ 'samtools', 'faidx', gatk_sorted_fasta_filename ] + fasta_to_index = os.path.join( target_directory, fasta_base_name ) + if sort_fasta: + shutil.copy( fasta_filename, fasta_to_index ) + _sort_fasta_gatk( fasta_to_index ) + else: + os.symlink(fasta_filename, fasta_to_index) + sam_index_filename = '%s.fai' % fasta_base_name + if sam_index_path is not None: + sam_index_basepath = os.path.split( sam_index_path )[0] + else: + sam_index_basepath = os.path.join( target_directory, sam_index_filename ) + if sam_index_path is not None and os.path.exists( sam_index_path ) and os.path.exists( os.path.join( sam_index_basepath, sam_index_filename ) ): + os.symlink( sam_index_path + '.fai', os.path.join( target_directory, sam_index_filename ) ) + else: + sam_command = [ 'samtools', 'faidx', fasta_to_index ] _run_command( sam_command, target_directory ) - args = [ 'java', '-jar', jar, 'R=%s' % gatk_sorted_fasta_filename, 'O=%s.dict' % sequence_id ] + args = [ 'java', '-jar', jar, 'R=%s' % fasta_to_index, 'O=%s.dict' % sequence_id ] _run_command( args, target_directory ) data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) @@ -149,6 +160,8 @@ parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) parser.add_option( '-j', '--jar', dest='jar', action='store', type="string", default=None, help='GATK .jar file' ) + parser.add_option( '-s', '--sort_fasta', dest='sort_fasta', action='store', type="string", default=False, help='Sort input FASTA' ) + parser.add_option( '-i', '--sam_index', dest='sam_index', action='store', type="string", default=None, help='SAM index of input FASTA' ) (options, args) = parser.parse_args() filename = args[0] @@ -170,7 +183,9 @@ options.fasta_dbkey, sequence_id, sequence_name, - options.jar, + options.jar, + sort_fasta=options.sort_fasta, + sam_index_path=options.sam_index, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME ) #save info to json file diff -r f6052387284e -r e09681f51de4 data_manager/data_manager_gatk_picard_index_builder.xml --- a/data_manager/data_manager_gatk_picard_index_builder.xml Tue Apr 01 10:40:49 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.xml Fri Apr 04 15:43:20 2014 -0400 @@ -1,5 +1,5 @@ - - builder + + sorted for GATK samtools picard @@ -10,19 +10,37 @@ --fasta_filename "${all_fasta_source.fields.path}" \ --fasta_dbkey "${all_fasta_source.fields.dbkey}" \ --fasta_description "${all_fasta_source.fields.name}" \ + #if $input_fasta_sorting.sort_input_fasta == 'sort_fasta': + --sort_fasta \ + #else: + --sam_index "${input_fasta_sorting.sam_index_source.fields.path}" \ + #end if --data_table_name "gatk_picard_indexes" - + + + + + + + + + + + + + + + - .. class:: infomark diff -r f6052387284e -r e09681f51de4 tool-data/fasta_indexes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Fri Apr 04 15:43:20 2014 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa diff -r f6052387284e -r e09681f51de4 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Tue Apr 01 10:40:49 2014 -0400 +++ b/tool_data_table_conf.xml.sample Fri Apr 04 15:43:20 2014 -0400 @@ -10,4 +10,9 @@ value, dbkey, name, path + + + value, dbkey, name, path + +
diff -r f6052387284e -r e09681f51de4 tool_dependencies.xml --- a/tool_dependencies.xml Tue Apr 01 10:40:49 2014 -0400 +++ b/tool_dependencies.xml Fri Apr 04 15:43:20 2014 -0400 @@ -1,9 +1,9 @@ - + - +