# HG changeset patch # User Dave Bouvier # Date 1396363249 14400 # Node ID f6052387284e423562bfc21a6c4cc06563db3272 # Parent dfa522822967b8295713cc1744fe552b7760c9d4 Uploaded correct files. diff -r dfa522822967 -r f6052387284e data_manager/data_manager_gatk_picard_index_builder.py --- a/data_manager/data_manager_gatk_picard_index_builder.py Tue Apr 01 09:11:41 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.py Tue Apr 01 10:40:49 2014 -0400 @@ -113,7 +113,6 @@ def _sort_fasta_gatk( fasta_filename ): ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename ) sorted_names = map( str, range( 1, 100 ) ) + map( _int_to_roman, range( 1, 100 ) ) + [ 'X', 'Y', 'M' ] - file( '/afs/bx.psu.edu/user/d/dave/gatpciard.log', 'w' ).write( str( sorted_names ) ) #detect if we have chrN, or just N has_chr = False for chrom in sorted_names: @@ -132,7 +131,6 @@ # Append each chromosome only once. if name in current_order and name not in existing_sorted_names: existing_sorted_names.append( name ) - file( '/afs/bx.psu.edu/user/d/dave/gatpciard.log', 'w' ).write( str( existing_sorted_names ) ) for name in current_order: #TODO: confirm that non-canonical names do not need to be sorted specially if name not in existing_sorted_names: diff -r dfa522822967 -r f6052387284e data_manager/data_manager_gatk_picard_index_builder.xml --- a/data_manager/data_manager_gatk_picard_index_builder.xml Tue Apr 01 09:11:41 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.xml Tue Apr 01 10:40:49 2014 -0400 @@ -1,18 +1,22 @@ builder + samtools picard - samtools - - export ; - echo ${input} + + data_manager_gatk_picard_index_builder.py "${out_file}" \ + --jar "\$JAVA_JAR_PATH/CreateSequenceDictionary.jar" \ + --fasta_filename "${all_fasta_source.fields.path}" \ + --fasta_dbkey "${all_fasta_source.fields.dbkey}" \ + --fasta_description "${all_fasta_source.fields.name}" \ + --data_table_name "gatk_picard_indexes" - + diff -r dfa522822967 -r f6052387284e tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Tue Apr 01 10:40:49 2014 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# diff -r dfa522822967 -r f6052387284e tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Tue Apr 01 09:11:41 2014 -0400 +++ b/tool_data_table_conf.xml.sample Tue Apr 01 10:40:49 2014 -0400 @@ -1,5 +1,10 @@ + + + value, dbkey, name, path + +

value, dbkey, name, path