Mercurial > repos > devteam > picard

<tool name="MergeBamAlignment" id="picard_MergeBamAlignment" version="@TOOL_VERSION@.0">
  <description>merge alignment data with additional info stored in an unmapped BAM dataset</description>
  <macros>
    <import>picard_macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <command detect_errors="exit_code"><![CDATA[
    @java_options@
    #set $picard_dict = "localref.dict"
    #set $ref_fasta = "localref.fa"     ## This is done because picards "likes" .fa extension

    ln -s "${reference_source.ref_file}" "${ref_fasta}" &&

    #if str( $reference_source.reference_source_selector ) == "history":

      picard CreateSequenceDictionary REFERENCE="${ref_fasta}" OUTPUT="${picard_dict}"
      QUIET=true
      VERBOSITY=ERROR

      &&

    #else:

      #set $ref_fasta = str( $reference_source.ref_file.fields.path )

    #end if

    picard
    MergeBamAlignment
      UNMAPPED_BAM="${unmapped_bam}"

      PAIRED_RUN=true ##This argument is ignored and will be removed. Required. Possible values: {true, false}

      #if str( $aligned_or_read1_and_read2.aligned_or_read1_and_read2_selector ) == "paired_one_file":
        #for $dataset in $aligned_or_read1_and_read2.aligned_bams:
          ALIGNED_BAM="${dataset.aligned_bam}"
        #end for
      #elif str( $aligned_or_read1_and_read2.aligned_or_read1_and_read2_selector ) == "paired_two_files":
        #for $dataset in $aligned_or_read1_and_read2.read1_aligned_bams:
          READ1_ALIGNED_BAM="${dataset.read1_aligned_bam}"
        #end for
        #for $dataset in $aligned_or_read1_and_read2.read2_aligned_bams:
          READ2_ALIGNED_BAM="${dataset.read2_aligned_bam}"
        #end for
      #else
        #for $dataset in $aligned_or_read1_and_read2.read1_aligned_bams:
          READ1_ALIGNED_BAM="${dataset.read1_aligned_bam}"
        #end for
      #end if

      OUTPUT="${outFile}"
      REFERENCE_SEQUENCE="${ref_fasta}"

      CLIP_ADAPTERS="${clip_adapters}"
      IS_BISULFITE_SEQUENCE="${is_bisulfite_sequence}"
      ALIGNED_READS_ONLY="${aligned_reads_only}"
      MAX_INSERTIONS_OR_DELETIONS="${max_insertions_or_deletions}"

      #for $attribute in $attributes_to_retain:
        ATTRIBUTES_TO_RETAIN="${$attribute.attribute}"
      #end for

      #for $attribute in $attributes_to_remove:
        ATTRIBUTES_TO_REMOVE="${$attribute.attribute}"
      #end for

      READ1_TRIM="${read1_trim}"
      READ2_TRIM="${read2_trim}"

      #if str( $orientations ) != "None":
        #for $orientation in str( $orientations ).split(','):   ## See trello card https://trello.com/c/9nW02Zhd
          EXPECTED_ORIENTATIONS="${orientation}"
        #end for
      #end if

      ALIGNER_PROPER_PAIR_FLAGS="${aligner_proper_pair_flags}"
      PRIMARY_ALIGNMENT_STRATEGY="${primary_alignment_strategy}"
      CLIP_OVERLAPPING_READS="${clip_overlapping_reads}"
      INCLUDE_SECONDARY_ALIGNMENTS="${include_secondary_alignments}"
      ADD_MATE_CIGAR="${add_mate_cigar}"

      VALIDATION_STRINGENCY="${validation_stringency}"

      SORT_ORDER=coordinate
      QUIET=true
      VERBOSITY=ERROR

  ]]></command>

  <inputs>

     <conditional name="reference_source">
      <param name="reference_source_selector" type="select" label="Load reference genome from">
        <option value="cached">Local cache</option>
        <option value="history">History</option>
      </param>
      <when value="cached">
        <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
          <options from_data_table="picard_indexes">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No indexes are available" />
          </options>
          <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
        </param>
      </when>
      <when value="history">
        <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" />
      </when>
    </conditional>

    <param format="sam,bam" name="unmapped_bam" type="data" label="Selected unaligned SAM or BAM with original reads" help="UNMAPPED_BAM; This dataset must be sorted in queryname order (use picard_SortSam to do this)" />
    <conditional name="aligned_or_read1_and_read2">
      <param name="aligned_or_read1_and_read2_selector" type="select" label="What type of aligned data do you have?">
        <option value="paired_one_file">Paired data in a single BAM file (ALIGNED_BAM)</option>
        <option value="paired_two_files">Paired data in separate files (READ1_ALIGNED_BAM and READ2_ALIGNED_BAM)</option>
        <option value="single_file">Singe end data (READ1_ALIGNED_BAM)</option>
      </param>
      <when value="paired_one_file">
        <repeat name="aligned_bams" title="Aligned SAM or BAM dataset" min="1" help="You can provide multiple datasets">
          <param name="aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data" help="ALIGNED_BAM"/>
        </repeat>
      </when>
      <when value="paired_two_files">
        <repeat name="read1_aligned_bams" title="Aligned SAM or BAM dataset for Read 1" min="1" help="You can provide multiple datasets">
          <param name="read1_aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data for Read1" help="READ1_ALIGNED_BAM"/>
        </repeat>
        <repeat name="read2_aligned_bams" title="Aligned SAM or BAM dataset for Read 2" min="1" help="You can provide multiple datasets">
          <param name="read2_aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data for Read2" help="READ2_ALIGNED_BAM"/>
        </repeat>
      </when>
      <when value="single_file">
        <repeat name="read1_aligned_bams" title="Aligned SAM or BAM dataset for Single Reads" min="1" help="You can provide multiple datasets">
          <param name="read1_aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data" help="READ1_ALIGNED_BAM"/>
        </repeat>
      </when>
    </conditional>

    <param name="clip_adapters" type="boolean" checked="true" label="Whether to clip adapters where identified" help="CLIP_ADAPTERS; default=True"/>
    <param name="is_bisulfite_sequence" type="boolean" label="Whether the data is from bisulfite sequencing (used when caculating the NM tag)" help="IS_BISULFITE_SEQUENCE; default=False"/>
    <param name="aligned_reads_only" type="boolean" label="Whether to output only aligned reads" help="ALIGNED_READS_ONLY; default=False"/>
    <param name="max_insertions_or_deletions" type="integer" value="1" label="The maximum number of insertions or deletions permitted for an alignment to be included" help="MAX_INSERTIONS_OR_DELETIONS; Alignments with more than this many insertions or deletions will be ignored. Set to -1 to allow any number of insertions or deletions. default=1"/>

    <repeat name="attributes_to_retain" title="Retain the following alignment attribute" min="0" help="You can provide multiple attributes">
      <param name="attribute" type="text" label="Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over from the alignment data when merging" help="ATTRIBUTES_TO_RETAIN; example: XA"/>
    </repeat>

    <repeat name="attributes_to_remove" title="Remove the following alignment attribute" min="0" help="You can provide multiple attributes">
      <param name="attribute" type="text" label="Attributes from the alignment record that should be removed when merging." help="ATTRIBUTES_TO_REMOVE; This overrides ATTRIBUTES_TO_RETAIN if they share common tags"/>
    </repeat>

    <param name="read1_trim" type="integer" value="0" label="The number of bases trimmed from the beginning of read 1 prior to alignment" help="READ1_TRIM; default=0"/>
    <param name="read2_trim" type="integer" value="0" label="The number of bases trimmed from the beginning of read 2 prior to alignment" help="READ2_TRIM; default=0"/>

    <param name="orientations" type="select" multiple="True" display="checkboxes" label="The expected orientation of proper read pairs" help="EXPECTED_ORIENTATIONS; multiple orinetations can be selected">
      <option value="FR">Forward/Reverse (FR)</option>
      <option value="RF">Reverse/Forward (RF)</option>
      <option value="TANDEM">Tandem</option>
    </param>


    <param name="aligner_proper_pair_flags" type="boolean" label="Use the aligner's idea of what a proper pair is rather than computing in this program" help="ALIGNER_PROPER_PAIR_FLAGS; default=False"/>

    <param name="primary_alignment_strategy" type="select" label="Strategy for selecting primary alignment when the aligner has provided more than one alignment for a pair or fragments" help="PRIMARY_ALIGNMENT_STRATEGY; see help below for more info; default=BestMapq">
      <option value="BestMapq" selected="True">BestMapq</option>
      <option value="EarliestFragment">EarliestFragment</option>
      <option value="BestEndMapq">BestEndMapq</option>
      <option value="MostDistant">MostDistant</option>
    </param>

    <param name="clip_overlapping_reads" type="boolean" checked="True" label="For paired reads, soft clip the 3' end of each read if necessary so that it does not extend past the 5' end of its mate" help="CLIP_OVERLAPPING_READS; default=True"/>
    <param name="include_secondary_alignments" type="boolean" checked="True" label="If false, do not write secondary alignments to output" help="INCLUDE_SECONDARY_ALIGNMENTS; default=True"/>
    <param name="add_mate_cigar" type="boolean" checked="True" label="Adds the mate CIGAR tag (MC) if true, does not if false" help="ADD_MATE_CIGAR; default=True"/>
    <expand macro="VS" />
  </inputs>
  <outputs>
    <data name="outFile" format="bam" label="${tool.name} on ${on_string}: BAM with merged alignments"/>
  </outputs>
  <tests>
    <test>
      <param name="reference_source_selector" value="history" />
      <param name="ref_file" value="picard_MergeBamAlignment_ref.fa" ftype="fasta" />
      <param name="unmapped_bam" value="picard_MergeBamAlignment_unaligned.bam" ftype="bam"/>
      <param name="aligned_or_read1_and_read2_selector" value="paired_one_file"/>
      <param name="aligned_bam" value="picard_MergeBamAlignment_aligned.bam" ftype="bam"/>
      <param name="clip_adapters" value="True"/>
      <param name="is_bisulfite_sequence" value="False"/>
      <param name="aligned_reads_only" value="False"/>
      <param name="max_insertions_or_deletions" value="1"/>
      <param name="read1_trim" value="0"/>
      <param name="read2_trim" value="0"/>
      <param name="orientation" value="FR"/>
      <param name="aligner_proper_pair_flags" value="False"/>
      <param name="primary_alignment_strategy" value="BestMapq"/>
      <param name="clip_overlapping_reads" value="True"/>
      <param name="include_secondary_alignments" value="True"/>
      <param name="add_mate_cigar" value="True"/>
      <output name="outFile" file="picard_MergeBamAlignment_test1.bam" ftype="bam" lines_diff="4"/>
    </test>
  </tests>


  <help>

.. class:: infomark

**Purpose**

Merges alignment data from a SAM or BAM dataset with additional data stored in an unmapped BAM dataset and produces a third SAM or BAM dataset of aligned and unaligned reads.

@dataset_collections@

@description@

  UNMAPPED_BAM=File
  UNMAPPED=File                 Original SAM or BAM file of unmapped reads, which must be in queryname order.  Required.

  ALIGNED_BAM=File
  ALIGNED=File                  SAM or BAM file(s) with alignment data.  This option may be specified 0 or more times.
                                Cannot be used in conjuction with option(s) READ1_ALIGNED_BAM (R1_ALIGNED)
                                READ2_ALIGNED_BAM (R2_ALIGNED)

  READ1_ALIGNED_BAM=File
  R1_ALIGNED=File               SAM or BAM file(s) with alignment data from the first read of a pair.  This option may be
                                specified 0 or more times.  Cannot be used in conjuction with option(s) ALIGNED_BAM
                                (ALIGNED)

  READ2_ALIGNED_BAM=File
  R2_ALIGNED=File               SAM or BAM file(s) with alignment data from the second read of a pair.  This option may
                                be specified 0 or more times.  Cannot be used in conjuction with option(s) ALIGNED_BAM
                                (ALIGNED)

  PAIRED_RUN=Boolean
  PE=Boolean                    This argument is ignored and will be removed.  Required. Possible values: {true, false}

  JUMP_SIZE=Integer
  JUMP=Integer                  The expected jump size (required if this is a jumping library). Deprecated. Use
                                EXPECTED_ORIENTATIONS instead  Default value: null.  Cannot be used in conjuction with
                                option(s) EXPECTED_ORIENTATIONS (ORIENTATIONS)

  CLIP_ADAPTERS=Boolean         Whether to clip adapters where identified.  Default value: true. Possible values: {true, false}

  IS_BISULFITE_SEQUENCE=Boolean Whether the lane is bisulfite sequence (used when caculating the NM tag).  Default value:
                                false. Possible values: {true, false}

  ALIGNED_READS_ONLY=Boolean    Whether to output only aligned reads. Default value: false. Possible values: {true, false}

  MAX_INSERTIONS_OR_DELETIONS=Integer
  MAX_GAPS=Integer              The maximum number of insertions or deletions permitted for an alignment to be included.
                                Alignments with more than this many insertions or deletions will be ignored. Set to -1 to
                                allow any number of insertions or deletions.  Default value: 1.

  ATTRIBUTES_TO_RETAIN=String   Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over
                                from the alignment data when merging.  This option may be specified 0 or more times.

  ATTRIBUTES_TO_REMOVE=String   Attributes from the alignment record that should be removed when merging.  This overrides
                                ATTRIBUTES_TO_RETAIN if they share common tags.  This option may be specified 0 or more
                                times.

  READ1_TRIM=Integer
  R1_TRIM=Integer               The number of bases trimmed from the beginning of read 1 prior to alignment  Default
                                value: 0.

  READ2_TRIM=Integer
  R2_TRIM=Integer               The number of bases trimmed from the beginning of read 2 prior to alignment  Default
                                value: 0.

  EXPECTED_ORIENTATIONS=PairOrientation
  ORIENTATIONS=PairOrientation  The expected orientation of proper read pairs. Replaces JUMP_SIZE  Possible values: {FR,
                                RF, TANDEM} This option may be specified 0 or more times.  Cannot be used in conjuction
                                with option(s) JUMP_SIZE (JUMP)

  ALIGNER_PROPER_PAIR_FLAGS=Boolean
                                Use the aligner's idea of what a proper pair is rather than computing in this program.
                                Default value: false. Possible values: {true, false}

  SORT_ORDER=SortOrder
  SO=SortOrder                  The order in which the merged reads should be output.  Default value: coordinate.
                                Possible values: {unsorted, queryname, coordinate}

  PRIMARY_ALIGNMENT_STRATEGY=PrimaryAlignmentStrategy
                                Strategy for selecting primary alignment when the aligner has provided more than one
                                alignment for a pair or fragment, and none are marked as primary, more than one is marked
                                as primary, or the primary alignment is filtered out for some reason. BestMapq expects
                                that multiple alignments will be correlated with HI tag, and prefers the pair of
                                alignments with the largest MAPQ, in the absence of a primary selected by the aligner.
                                EarliestFragment prefers the alignment which maps the earliest base in the read. Note
                                that EarliestFragment may not be used for paired reads. BestEndMapq is appropriate for
                                cases in which the aligner is not pair-aware, and does not output the HI tag. It simply
                                picks the alignment for each end with the highest MAPQ, and makes those alignments
                                primary, regardless of whether the two alignments make sense together.MostDistant is also
                                for a non-pair-aware aligner, and picks the alignment pair with the largest insert size.
                                If all alignments would be chimeric, it picks the alignments for each end with the best
                                MAPQ.  For all algorithms, ties are resolved arbitrarily.  Default value: BestMapq.
                                Possible values: {BestMapq, EarliestFragment, BestEndMapq, MostDistant}

  CLIP_OVERLAPPING_READS=BooleanFor paired reads, soft clip the 3' end of each read if necessary so that it does not
                                extend past the 5' end of its mate.  Default value: true. Possible values: {true, false}

  INCLUDE_SECONDARY_ALIGNMENTS=Boolean
                                If false, do not write secondary alignments to output.  Default value: true.
                                Possible values: {true, false}

  ADD_MATE_CIGAR=Boolean
  MC=Boolean                    Adds the mate CIGAR tag (MC) if true, does not if false. Possible values: {true, false}


@more_info@
  </help>
</tool>
author	iuc
date	Wed, 08 Feb 2017 12:41:25 -0500
parents	e417b1d6288d
children	486d7500da69