Mercurial > repos > devteam > data_manager_gatk_picard_index_builder

--- a/data_manager/data_manager_gatk_picard_index_builder.py	Tue Apr 01 09:11:41 2014 -0400
+++ b/data_manager/data_manager_gatk_picard_index_builder.py	Tue Apr 01 10:40:49 2014 -0400
@@ -113,7 +113,6 @@
 def _sort_fasta_gatk( fasta_filename ):
     ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename )
     sorted_names = map( str, range( 1, 100 ) ) + map( _int_to_roman, range( 1, 100 ) ) + [ 'X', 'Y', 'M' ]
-    file( '/afs/bx.psu.edu/user/d/dave/gatpciard.log', 'w' ).write( str( sorted_names ) )
     #detect if we have chrN, or just N
     has_chr = False
     for chrom in sorted_names:
@@ -132,7 +131,6 @@
         # Append each chromosome only once.
         if name in current_order and name not in existing_sorted_names:
             existing_sorted_names.append( name )
-    file( '/afs/bx.psu.edu/user/d/dave/gatpciard.log', 'w' ).write( str( existing_sorted_names ) )
     for name in current_order:
         #TODO: confirm that non-canonical names do not need to be sorted specially
         if name not in existing_sorted_names:
--- a/data_manager/data_manager_gatk_picard_index_builder.xml	Tue Apr 01 09:11:41 2014 -0400
+++ b/data_manager/data_manager_gatk_picard_index_builder.xml	Tue Apr 01 10:40:49 2014 -0400
@@ -1,18 +1,22 @@
 <tool id="gatk_picard_index_builder" name="Generate GATK-sorted Picard indexes" tool_type="manage_data" version="0.0.1">
     <description>builder</description>
     <requirements>
+        <requirement type="package" version="0.1.18">samtools</requirement>
         <requirement type="package" version="1.56.0">picard</requirement>
-        <requirement type="package" version="0.1.18">samtools</requirement>
     </requirements>
-    <command>
-        export ;
-        echo ${input}
+    <command interpreter="python">
+        data_manager_gatk_picard_index_builder.py "${out_file}" \
+            --jar "\$JAVA_JAR_PATH/CreateSequenceDictionary.jar" \
+            --fasta_filename "${all_fasta_source.fields.path}" \
+            --fasta_dbkey "${all_fasta_source.fields.dbkey}" \
+            --fasta_description "${all_fasta_source.fields.name}" \
+            --data_table_name "gatk_picard_indexes"
     </command>
     <inputs>
         <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
             <options from_data_table="all_fasta"/>
         </param>
-        <param type="text" name="input" value="" label="text to pe" />
+        <param type="text" name="sequence_name" value="" label="Name of sequence" />
         <param type="text" name="sequence_id" value="" label="ID for sequence" />
     </inputs>
     <outputs>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Tue Apr 01 10:40:49 2014 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- a/tool_data_table_conf.xml.sample	Tue Apr 01 09:11:41 2014 -0400
+++ b/tool_data_table_conf.xml.sample	Tue Apr 01 10:40:49 2014 -0400
@@ -1,5 +1,10 @@
 <!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
 <tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
     <!-- Location of Picard dict files valid for GATK -->
     <table name="gatk_picard_indexes" comment_char="#">
         <columns>value, dbkey, name, path</columns>