diff print_reads.xml @ 0:6fc6b56b342e draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 09:11:51 -0400
parents
children 64d5adc286bb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/print_reads.xml	Tue Apr 01 09:11:51 2014 -0400
@@ -0,0 +1,150 @@
+<tool id="gatk_print_reads" name="Print Reads" version="0.0.1">
+  <description>from BAM files</description>
+  <requirements>
+      <requirement type="package" version="1.4">gatk</requirement>
+      <requirement type="package" version="0.1.18">samtools</requirement>
+  </requirements>
+  <macros>
+    <import>gatk_macros.xml</import>
+  </macros>
+  <command interpreter="python">gatk_wrapper.py
+   --max_jvm_heap_fraction "1"
+   --stdout "${output_log}"
+   #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+       -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
+       #if str( $input_bam.input_bam.metadata.bam_index ) != "None":
+           -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
+       #end if
+   #end for
+   -p 'java 
+    -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
+    -T "PrintReads"
+    ##--num_threads 4 ##hard coded, for now
+    --out "${output_bam}"
+    -et "NO_ET" ##ET no phone home
+    #if $reference_source.reference_source_selector != "history":
+        -R "${reference_source.ref_file.fields.path}"
+    #end if
+    --number "${number}"
+    #if $platform:
+        --platform "${platform}"
+    #end if
+    #if $read_group:
+        --readGroup "${read_group}"
+    #end if
+    #for $sample_file in $sample_file_repeat:
+        --sample_file "${sample_file.input_sample_file}"
+    #end for
+    #for $sample_name in $sample_name_repeat:
+        --sample_name "${sample_name.sample_name}"
+    #end for
+   '
+   
+    #include source=$standard_gatk_options#
+    
+  </command>
+  <inputs>
+    <conditional name="reference_source">
+      <expand macro="reference_source_selector_param" />
+      <when value="cached">
+        <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
+            <param name="input_bam" type="data" format="bam" label="BAM file">
+              <validator type="unspecified_build" />
+              <validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
+            </param>
+        </repeat>
+        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
+          <options from_data_table="gatk_picard_indexes">
+            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> 
+          </options>
+          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history"> <!-- FIX ME!!!! -->
+        <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
+            <param name="input_bam" type="data" format="bam" label="BAM file" >
+            </param>
+        </repeat>
+        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
+      </when>
+    </conditional>
+    
+    <param name="number" type="integer" value="-1" label="Print the first n reads from the file, discarding the rest" help="-n,--number &amp;lt;number&amp;gt;" />
+    <param name="platform" type="text" value="" label="Exclude all reads with this platform from the output" help="-platform,--platform &amp;lt;platform&amp;gt;" />
+    <param name="read_group" type="text" value="" label="Exclude all reads with this read group from the output" help="-readGroup,--readGroup &amp;lt;readGroup&amp;gt;" />
+    <repeat name="sample_file_repeat" title="File containing a list of samples to include" help="-sf,--sample_file &amp;lt;sample_file&amp;gt;">
+        <param name="input_sample_file" type="data" format="text" label="Sample file" />
+    </repeat>
+    <repeat name="sample_name_repeat" title="Sample name to be included in the analysis" help="-sn,--sample_name &amp;lt;sample_name&amp;gt;">
+        <param name="sample_name" type="text" label="Sample name" />
+    </repeat>
+    
+    <expand macro="gatk_param_type_conditional" />
+    
+  </inputs>
+  <outputs>
+    <data format="bam" name="output_bam" label="${tool.name} on ${on_string} (BAM)" />
+    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
+  </outputs>
+    <param name="number" type="integer" value="-1" label="Print the first n reads from the file, discarding the rest" />
+    <param name="platform" type="text" value="" label="Exclude all reads with this platform from the output" />
+    <param name="read_group" type="text" value="" label="Exclude all reads with this read group from the output" />
+    <repeat name="sample_file_repeat" title="File containing a list of samples to include">
+        <param name="input_sample_file" type="data" format="text" label="Sample file" />
+    </repeat>
+    <repeat name="sample_name_repeat" title="Sample name to be included in the analysis">
+        <param name="sample_name" type="text" label="Sample name" />
+    </repeat>
+  <tests>
+      <test>
+          <param name="reference_source_selector" value="history" />
+          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
+          <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
+          <param name="number" value="-1" />
+          <param name="platform" value="" />
+          <param name="read_group" value="" />
+          <param name="sample_file_repeat" value="0" />
+          <param name="sample_name_repeat" value="0" />
+          <param name="gatk_param_type_selector" value="basic" />
+          <output name="output_bam" file="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" compare="contains"/> 
+          <output name="output_log" file="gatk/gatk_print_reads/gatk_print_reads_out_1.log.contains" compare="contains" />
+      </test>
+  </tests>
+  <help>
+**What it does**
+
+PrintReads can dynamically merge the contents of multiple input BAM files, resulting in merged output sorted in coordinate order.
+
+For more information on the GATK Print Reads Walker, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/gatkdocs/release/org_broadinstitute_sting_gatk_walkers_PrintReadsWalker.html&gt;`_.
+
+To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
+
+If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
+
+------
+
+**Inputs**
+
+GenomeAnalysisTK: PrintReads accepts one or more BAM or SAM input files.
+
+
+**Outputs**
+
+The output is in BAM format.
+
+
+Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
+
+-------
+
+**Settings**::
+
+ number         int     -1     Print the first n reads from the file, discarding the rest
+ platform         String     NA     Exclude all reads with this platform from the output
+ readGroup         String     NA     Exclude all reads with this read group from the output
+ sample_file     Set[File]     []     File containing a list of samples (one per line). Can be specified multiple times
+ sample_name     Set[String]     []     Sample name to be included in the analysis. Can be specified multiple times.
+
+@CITATION_SECTION@
+  </help>
+</tool>