view picard_MergeBamAlignment.xml @ 148:b6662c2c8d64 draft

modified version number of downsamplesam to make toolshed aware that it has been modified
author Rayan Chikhi <chikhi@psu.edu>
date Mon, 21 Jul 2014 16:07:50 -0400
parents 9f95141d8f32
children
line wrap: on
line source

<tool id="picard_mergeBAMAlignment" name="Merge BAM Alignment" version="1.106.0">
    <!-- Documentation: http://picard.sourceforge.net/command-line-overview.shtml#MergeBamAlignment
Merges alignment data from a SAM or BAM file with additional data stored in an unmapped BAM file and produces a third SAM or BAM file of aligned and unaligned reads. NOTE that this program expects to find a sequence dictionary in the same directory as REFERENCE_SEQUENCE and expects it to have the same base name as the reference fasta except with the extension '.dict'

OUTPUT=File	Merged SAM or BAM file to write to. Required.
REFERENCE_SEQUENCE=File	Path to the fasta file for the reference sequence. Required. 
-->
  <description>Merges alignment data from a SAM or BAM file with additional data stored</description>
  <requirements><requirement type="package" version="1.106.0">picard</requirement></requirements>
  <command interpreter="bash">
     mergebamalignment_wrapper.sh $output1 $outformat $REFERENCE_SEQUENCE UNMAPPED_BAM=$UNMAPPED_BAM PAIRED_RUN=$PAIRED_RUN

     ## optional arguments from here

     #if str( $ALIGNED_BAM ):
     ALIGNED_BAM=$ALIGNED_BAM
     #end if
     #for $i in $ALIGNED_BAMs
       ALIGNED_BAM=${i.ALIGNED_BAM_file}
     #end for 

     #if str( $READ1_ALIGNED_BAM ):
     READ1_ALIGNED_BAM=$READ1_ALIGNED_BAM
     #end if
     #for $i in $READ1_ALIGNED_BAMs
       READ1_ALIGNED_BAM=${i.READ1_ALIGNED_BAM_file}
     #end for 

     #if str( $ATTRIBUTES_TO_RETAIN ):
     ATTRIBUTES_TO_RETAIN=$ATTRIBUTES_TO_RETAIN
     #end if
     #for $i in $ATTRIBUTES_TO_RETAINs
       ATTRIBUTES_TO_RETAIN=${i.ATTRIBUTES_TO_RETAIN_extra}
     #end for 

     #if str( $READ2_ALIGNED_BAM ):
     READ2_ALIGNED_BAM=$READ2_ALIGNED_BAM
     #end if
     #for $i in $READ2_ALIGNED_BAMs
       READ2_ALIGNED_BAM=${i.READ2_ALIGNED_BAM_file}
     #end for 

     #if str( $PROGRAM_RECORD_ID ):
     PROGRAM_RECORD_ID=$PROGRAM_RECORD_ID
     #end if

     #if str( $PROGRAM_GROUP_VERSION ):
     PROGRAM_GROUP_VERSION=$PROGRAM_GROUP_VERSION
     #end if

     #if str( $PROGRAM_GROUP_COMMAND_LINE ):
     PROGRAM_GROUP_COMMAND_LINE=$PROGRAM_GROUP_COMMAND_LINE
     #end if

     #if str( $PROGRAM_GROUP_NAME ):
     PROGRAM_GROUP_NAME=$PROGRAM_GROUP_NAME
     #end if

     #if str( $CLIP_ADAPTERS ):
     CLIP_ADAPTERS=$CLIP_ADAPTERS
     #end if

     #if str( $IS_BISULFITE_SEQUENCE ):
     IS_BISULFITE_SEQUENCE=$IS_BISULFITE_SEQUENCE
     #end if

     #if str( $ALIGNED_READS_ONLY ):
     ALIGNED_READS_ONLY=$ALIGNED_READS_ONLY
     #end if

     #if str( $MAX_INSERTIONS_OR_DELETIONS ):
     MAX_INSERTIONS_OR_DELETIONS=$MAX_INSERTIONS_OR_DELETIONS
     #end if

     #if str( $READ1_TRIM ):
     READ1_TRIM=$READ1_TRIM
     #end if

     #if str( $READ2_TRIM ):
     READ2_TRIM=$READ2_TRIM
     #end if

     #if str( $EXPECTED_ORIENTATIONS ):
     EXPECTED_ORIENTATIONS=$EXPECTED_ORIENTATIONS
     #end if

     #if str( $SORT_ORDER ):
     SORT_ORDER=$SORT_ORDER
     #end if

     #if str( $PRIMARY_ALIGNMENT_STRATEGY ):
     PRIMARY_ALIGNMENT_STRATEGY=$PRIMARY_ALIGNMENT_STRATEGY
     #end if

     #if str( $CLIP_OVERLAPPING_READS ):
     CLIP_OVERLAPPING_READS=$CLIP_OVERLAPPING_READS
     #end if

     #if str( $INCLUDE_SECONDARY_ALIGNMENTS ):
     INCLUDE_SECONDARY_ALIGNMENTS=$INCLUDE_SECONDARY_ALIGNMENTS
     #end if

      VALIDATION_STRINGENCY=LENIENT TMP_DIR=$__new_file_path__
          2&gt; $outlog
    ##|| echo "Error running Picard MergeBamAlignment" >&amp;2
  </command>
  <inputs>
<!-- required arguments below -->
    <param name="title" label="Name for the output merged BAM or SAM file" type="text" default="Merged.bam" help="This name will appear in your history so use it to remember what the new file in your history contains" />
    <param name="outformat" type="select" label="Output format" >
      <option value="bam" selected="True">BAM</option>
      <option value="sam">SAM</option>
    </param>
    <param name="UNMAPPED_BAM" label="Original SAM or BAM file of unmapped reads, which must be in queryname order.  Required." type="data" format="bam,sam" />
    <param name="REFERENCE_SEQUENCE" label="Path to the fasta file for the reference sequence. Required." type="data" format="fasta" />
    <param name="PAIRED_RUN" value="true" type="boolean"  label="Whether this is a paired-end run.   Required." truevalue="true" falsevalue="false" checked="yes" />
<!-- optional arguments below -->
    <param name="ALIGNED_BAM" label="SAM or BAM file(s) with alignment data.  This option may be specified 0 or more times. Cannot be used in conjuction with option(s) READ1_ALIGNED_BAM (R1_ALIGNED) READ2_ALIGNED_BAM (R2_ALIGNED)" type="data" format="bam,sam" help="Need to add more files? Use controls below." />
    <repeat name="ALIGNED_BAMs" title="More ALIGNED_BAMs">
      <param name="ALIGNED_BAM_file" label="ALIGNED_BAM" type="data" format="bam,sam" />
    </repeat>
    <param name="READ1_ALIGNED_BAM" label="SAM or BAM file(s) with alignment data from the first read of a pair.  This option may be specified 0 or more times.  Cannot be used in conjuction with option(s) ALIGNED_BAM (ALIGNED)" type="data" format="bam,sam" help="Need to add more files? Use controls below." />
    <repeat name="READ1_ALIGNED_BAMs" title="More READ1_ALIGNED_BAMs">
      <param name="READ1_ALIGNED_BAM_file" label="READ1_ALIGNED_BAM" type="data" format="bam,sam" />
    </repeat>
    <param name="READ2_ALIGNED_BAM" label="SAM or BAM file(s) with alignment data from the second read of a pair.  This option may be specified 0 or more times.  Cannot be used in conjuction with option(s) ALIGNED_BAM (ALIGNED)" type="data" format="bam,sam" help="Need to add more files? Use controls below." />
    <repeat name="READ2_ALIGNED_BAMs" title="More READ2_ALIGNED_BAMs">
      <param name="READ2_ALIGNED_BAM_file" label="READ2_ALIGNED_BAM" type="data" format="bam,sam" />
    </repeat>
    <param name="PROGRAM_RECORD_ID" label="The program group ID of the aligner (if not supplied by the aligned file)." type="text" default="" />
    <param name="PROGRAM_GROUP_VERSION" label="The version of the program group (if not supplied by the aligned file)." type="text" default="" />
    <param name="PROGRAM_GROUP_COMMAND_LINE" label="The command line of the program group (if not supplied by the aligned file)." type="text" default="" />
    <param name="PROGRAM_GROUP_NAME" label="The name of the program group (if not supplied by the aligned file)." type="text" default="" />
    <param name="CLIP_ADAPTERS" value="true" type="boolean"  label="Whether to clip adapters where identified.  Default value: true." truevalue="true" falsevalue="false" checked="yes" />
    <param name="IS_BISULFITE_SEQUENCE" value="false" type="boolean"  label="Whether the lane is bisulfite sequence (used when caculating the NM tag)." truevalue="true" falsevalue="false" checked="yes" />
    <param name="ALIGNED_READS_ONLY" value="false" type="boolean" label="Whether to output only aligned reads." truevalue="true" falsevalue="false" checked="yes" />
    <param name="MAX_INSERTIONS_OR_DELETIONS" value="1" type="integer"
      label="The maximum number of insertions or deletions permitted for an alignment to be included." 
      help="Alignments with more than this many insertions or deletions will be ignored.  Set to -1 to allow any number of insertions or deletions.  Default value: 1." />

    <param name="ATTRIBUTES_TO_RETAIN" label="Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over from the alignment data when merging.  This option may be specified 0 or more times." type="text" default="" />
    <repeat name="ATTRIBUTES_TO_RETAINs" title="More ATTRIBUTES_TO_RETAIN">
      <param name="ATTRIBUTES_TO_RETAIN_extra" label="ATTRIBUTES_TO_RETAIN" type="text" default="" />
    </repeat>
    <param name="READ1_TRIM" value="0" type="integer" label="The number of bases trimmed from the beginning of read 1 prior to alignment"/>
    <param name="READ2_TRIM" value="0" type="integer" label="The number of bases trimmed from the beginning of read 2 prior to alignment"/>
    <param name="EXPECTED_ORIENTATIONS" label="The name of the program group (if not supplied by the aligned file)." type="select">
      <option value="null" selected="True">null</option>
      <option value="FR">FR</option>
      <option value="RF">RF</option>
      <option value="TANDEM">TANDEM</option>
    </param>
    <param name="SORT_ORDER" label="The order in which the merged reads should be output.  Default value: coordinate." type="select">
      <option value="coordinate" selected="True">coordinate</option>
      <option value="queryname">queryname</option>
      <option value="unsorted">unsorted</option>
    </param>
    <param name="PRIMARY_ALIGNMENT_STRATEGY" label="Strategy for selecting primary alignment when the aligner has provided more than one alignment for a pair or fragment, and none are marked as primary, more than one is marked as primary, or the primary alignment is filtered out for some reason." type="select" help="BestMapq expects that multiple alignments will be correlated with HI tag, and prefers the pair of 
                              alignments with the largest MAPQ, in the absence of a primary selected by the aligner. EarliestFragment prefers the alignment which maps the earliest base in the read. Note that EarliestFragment may not be used for paired reads. BestEndMapq is appropriate for cases in which the aligner is not pair-aware, and does not output the HI tag. It simply picks the alignment for each end with the highest MAPQ, and makes those alignments primary, regardless of whether the two alignments make sense together.MostDistant is also for a non-pair-aware aligner, and picks the alignment pair with the largest insert size. If all alignments would be chimeric, it picks the alignments for each end with the best MAPQ.  For all algorithms, ties are resolved arbitrarily.  Default value: BestMapq.">
      <option value="BestMapq" selected="True">BestMapq</option>
      <option value="EarliestFragment">EarliestFragment</option>
      <option value="BestEndMapq">BestEndMapq</option>
      <option value="MostDistant">MostDistant</option>
    </param>

    <param name="CLIP_OVERLAPPING_READS" value="true" type="boolean" label="For paired reads, soft clip the 3' end of each read if necessary so that it does not extend past the 5' end of its mate." truevalue="true" falsevalue="false" checked="yes" />
    <param name="INCLUDE_SECONDARY_ALIGNMENTS" value="true" type="boolean" label="If false, do not write secondary alignments to output." truevalue="true" falsevalue="false" checked="yes" />
  </inputs>

  <outputs>
    <data format="bam" name="output1" label="${title}.${outformat}" >
       <change_format>
           <when input="outformat" value="sam" format="sam" />
       </change_format>
    </data>
  </outputs>
  <tests>
	<!-- note: this test is untested :) -->
      <test>
          <param name="REFERENCE_SEQUENCE" value="merger.fasta" />
          <param name="UNMAPPED_BAM" value="unmapped.sam" />
          <param name="PAIRED_RUN" value="false" />
          <param name="outformat" value="sam" />
          <output name="output1" file="output_mergebamalignment" lines_diff="1"/> 
      </test>
  </tests>
  <help>

**What it does**

Merges alignment data from a SAM or BAM file with additional data stored in an unmapped BAM file and produces a third SAM or BAM file of aligned and unaligned reads. NOTE that this program expects to find a sequence dictionary in the same directory as REFERENCE_SEQUENCE and expects it to have the same base name as the reference fasta except with the extension '.dict' 
metadata such as read groups
 
.. _Picard: http://picard.sourceforge.net/command-line-overview.shtml#MergeBamAlignment


  </help>
</tool>