Mercurial > repos > devteam > data_manager_gatk_picard_index_builder
changeset 1:f6052387284e
Uploaded correct files.
author | Dave Bouvier <dave@bx.psu.edu> |
---|---|
date | Tue, 01 Apr 2014 10:40:49 -0400 |
parents | dfa522822967 |
children | e09681f51de4 |
files | data_manager/data_manager_gatk_picard_index_builder.py data_manager/data_manager_gatk_picard_index_builder.xml tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 32 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_gatk_picard_index_builder.py Tue Apr 01 09:11:41 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.py Tue Apr 01 10:40:49 2014 -0400 @@ -113,7 +113,6 @@ def _sort_fasta_gatk( fasta_filename ): ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename ) sorted_names = map( str, range( 1, 100 ) ) + map( _int_to_roman, range( 1, 100 ) ) + [ 'X', 'Y', 'M' ] - file( '/afs/bx.psu.edu/user/d/dave/gatpciard.log', 'w' ).write( str( sorted_names ) ) #detect if we have chrN, or just N has_chr = False for chrom in sorted_names: @@ -132,7 +131,6 @@ # Append each chromosome only once. if name in current_order and name not in existing_sorted_names: existing_sorted_names.append( name ) - file( '/afs/bx.psu.edu/user/d/dave/gatpciard.log', 'w' ).write( str( existing_sorted_names ) ) for name in current_order: #TODO: confirm that non-canonical names do not need to be sorted specially if name not in existing_sorted_names:
--- a/data_manager/data_manager_gatk_picard_index_builder.xml Tue Apr 01 09:11:41 2014 -0400 +++ b/data_manager/data_manager_gatk_picard_index_builder.xml Tue Apr 01 10:40:49 2014 -0400 @@ -1,18 +1,22 @@ <tool id="gatk_picard_index_builder" name="Generate GATK-sorted Picard indexes" tool_type="manage_data" version="0.0.1"> <description>builder</description> <requirements> + <requirement type="package" version="0.1.18">samtools</requirement> <requirement type="package" version="1.56.0">picard</requirement> - <requirement type="package" version="0.1.18">samtools</requirement> </requirements> - <command> - export ; - echo ${input} + <command interpreter="python"> + data_manager_gatk_picard_index_builder.py "${out_file}" \ + --jar "\$JAVA_JAR_PATH/CreateSequenceDictionary.jar" \ + --fasta_filename "${all_fasta_source.fields.path}" \ + --fasta_dbkey "${all_fasta_source.fields.dbkey}" \ + --fasta_description "${all_fasta_source.fields.name}" \ + --data_table_name "gatk_picard_indexes" </command> <inputs> <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> <options from_data_table="all_fasta"/> </param> - <param type="text" name="input" value="" label="text to pe" /> + <param type="text" name="sequence_name" value="" label="Name of sequence" /> <param type="text" name="sequence_id" value="" label="ID for sequence" /> </inputs> <outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Tue Apr 01 10:40:49 2014 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- a/tool_data_table_conf.xml.sample Tue Apr 01 09:11:41 2014 -0400 +++ b/tool_data_table_conf.xml.sample Tue Apr 01 10:40:49 2014 -0400 @@ -1,5 +1,10 @@ <!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> <tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> <!-- Location of Picard dict files valid for GATK --> <table name="gatk_picard_indexes" comment_char="#"> <columns>value, dbkey, name, path</columns>