view crosscontamination_barcode_filter.xml @ 2:e20001675838 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 82a0fd493f5866b3ef65019709ae5c865998f802
author iuc
date Wed, 12 Jun 2019 04:57:52 -0400
parents 78341ccbad0a
children
line wrap: on
line source

<tool id="crosscontamination_barcode_filter" name="Cross-contamination Barcode Filter" version="@VERSION@">
    <description>for use in plate-based barcoded analyses</description>
    <macros>
        <token name="@VERSION@">0.3</token>
        <macro name="sanitize_batch">
            <sanitizer invalid_char="">
                <valid initial="string.digits">
                    <add value=","/>
                </valid>
            </sanitizer>
        </macro>
        <macro name="sanitize_regex">
            <sanitizer invalid_char="">
                <valid initial="string.letters,string.digits">
                    <add value="!"/>
                    <add value="="/>
                    <add value="-"/>
                    <add value="."/>
                    <add value="*"/>
                    <add value="?"/>
                    <add value="+"/>
                    <add value="\\"/>
                    <add value="_"/>
                    <add value="&#91;"/> <!-- left square bracket, e.g subselecting from vec[1] -->
                    <add value="&#93;"/> <!-- right square bracket -->
                    <add value="&#40;"/> <!-- left parenthesis -->
                    <add value="&#41;"/> <!-- right parenthesis -->
                </valid>
            </sanitizer>
        </macro>
    </macros>
    <requirements>
        <requirement type="package" version="3.1.1">r-ggplot2</requirement>
        <requirement type="package" version="1.12.2">r-data.table </requirement>
    </requirements>
    <version_command><![CDATA[
        Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' | head -1 | cut -d' ' -f 2
    ]]></version_command>
    <command detect_errors="exit_code"><![CDATA[
        Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' '$crossconf'
    ]]></command>
    <configfiles>
        <configfile name="crossconf"><![CDATA[
script.dir = '$__tool_directory__/scripts'
input_matrix <- read.table(
    '$input_table',
    stringsAsFactors = F,
    na.strings=c("NA", "-", "?", "."),
    header=TRUE,
    row.names=1
)
input_matrix[is.na(input_matrix)] <- 0
#if str($inbuilt_spec.select_use) == "mpi_sagar":
spec = list(
    barcodes = '$input_barcodes',
    format = list(
        "1-96"   = c(1,3),
        "97-192" = c(2,4)
    ),
    plates = list(
        "1" = c(1,2),
        "2" = c(3,4)
    )
)
#elif str($inbuilt_spec.select_use) == "custom":
spec = list(
    barcodes = '$input_barcodes',
    format = list(
    #for $i, $s in enumerate($inbuilt_spec.barcode_format)
        "${s.range_start}-${s.range_end}" = c( ${s.batches} )
        #if $i < len(list($inbuilt_spec.barcode_format)) - 1
        ,
        #end if
    #end for
    ),
    plates = list(
    #for $i, $s in enumerate($inbuilt_spec.plate_format)
        "${s.plate}" = c( ${s.batches} )
        #if $i < len(list($inbuilt_spec.plate_format)) - 1
        ,
        #end if
    #end for
    )
)
#end if
regex.extract = '$advanced.regex_extract'
regex.display = '$advanced.regex_display'
sort.cells = as.logical('$advanced.sort_sizes')
out.pdf = '$out_plots'
out.table = '$out_table'
]]>
        </configfile>
    </configfiles>
    <inputs>
        <param name="input_table" type="data" format="tsv,tabular" label="Input Matrix" />
        <param name="input_barcodes" type="data" format="tsv,tabular,txt" label="Complete Barcodes" />
        <conditional name="inbuilt_spec" >
            <param name="select_use" type="select" label="Plate Protocol" >
                <option value="mpi_sagar">CelSeq2 Plate Protocol (Sagar)</option>
                <option value="custom">Custom</option>
            </param>
            <when value="mpi_sagar" />
            <when value="custom">
                <repeat name="barcode_format" title="Barcode Format" help="e.g. Batches 1 and 4 use barcodes 1-100 in the Barcodes file, and Batches 2 and 3 use barcodes 101-200 in the Barcodes file; specify '1' and '100' as Range values, and '1,4' as Batch values, and in the next format specify '101' and '200' as Range values and '2,3' as Batch values" >
                    <param name="range_start" type="integer" min="1" value="1" label="Barcode Range: Start" />
                    <param name="range_end" type="integer" min="2" value="100" label="Barcode Range: End" />
                    <param name="batches" type="text" value="1,4" label="Batches utilizing this Range"  >
                        <expand macro="sanitize_batch" />
                    </param>
                </repeat>
                <repeat name="plate_format" title="Plate Format" help="e.g. Plate 1 encompasses Batches 1-4, and Plate 2 encompasses Batches 5-8; specify '1' as a Plate value, and '1,2,3,4' as Batch values, and in the next format specify '2' as a Plate value and '5,6,7,8' as Batch values">
                    <param name="plate" type="integer" min="1" value="1" label="Plate Number"  />
                    <param name="batches" type="text" value="1,2,3,4" label="Batches within this Plate Number" >
                        <expand macro="sanitize_batch" />
                    </param>
                </repeat>
            </when>
        </conditional>
        <section name="advanced" expanded="false" title="Advanced Parameters" >
            <param name="regex_extract" type="text" value=".*P(\\d)_B(\\d)_([ACTG]+)" label="RegEx to extract Plate, Batch, and Barcodes from headers" >
                <expand macro="sanitize_regex" />
            </param>
            <param name="regex_display" type="text" value="P\\1_B\\2_\\3" label="RegEx to replace Plate, Batch, and Barcodes from headers" >
                <expand macro="sanitize_regex" />
            </param>
            <param name="sort_sizes" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Sort Cells by Library Size" />
        </section>
    </inputs>
    <outputs>
        <data name="out_plots" format="pdf" label="${tool.name} on ${on_string}: Plots" />
        <data name="out_table" format="tabular" label="${tool.name} on ${on_string}: Filtered Table" />
    </outputs>
    <tests>
        <test><!-- Inbuilt MPI -->
            <param name="input_table" value="test.matrix" />
            <param name="input_barcodes" value="celseq_barcodes.192.raw" />
            <conditional name="inbuilt_spec" >
                <param name="select_use" value="mpi_sagar" />
            </conditional>
            <output name="out_plots" value="out.pdf" compare="sim_size" />
            <output name="out_table" value="out.table" />
        </test>
        <test><!-- Plate and Lane test -->
            <param name="input_table" value="test.matrix" />
            <param name="input_barcodes" value="celseq_barcodes.192.raw" />
            <conditional name="inbuilt_spec" >
                <param name="select_use" value="custom" />
                <repeat name="barcode_format" >
                    <param name="range_start" value="1"/>
                    <param name="range_end" value="96" />
                    <param name="batches" value="1,3" />
                </repeat>
                <repeat name="barcode_format" >
                    <param name="range_start" value="97"/>
                    <param name="range_end" value="192" />
                    <param name="batches" value="2,4" />
                </repeat>
                <repeat name="plate_format" >
                    <param name="plate" value="1" />
                    <param name="batches" value="1,2" />
                </repeat>
                <repeat name="plate_format" >
                    <param name="plate" value="2" />
                    <param name="batches" value="3,4" />
                </repeat>
            </conditional>
            <output name="out_plots" value="out.pdf" compare="sim_size" />
            <output name="out_table" value="out.table" />
        </test>
        <test><!-- 12 Batch test -->
            <param name="input_table" value="b12.matrix" />
            <param name="input_barcodes" value="celseq_barcodes.192.raw" />
            <conditional name="inbuilt_spec" >
                <param name="select_use" value="custom" />
                <repeat name="barcode_format" >
                    <param name="range_start" value="1"/>
                    <param name="range_end" value="96" />
                    <param name="batches" value="1,3,5,7,9,11" />
                </repeat>
                <repeat name="barcode_format" >
                    <param name="range_start" value="97"/>
                    <param name="range_end" value="192" />
                    <param name="batches" value="2,4,6,8,10,12" />
                </repeat>
                <repeat name="plate_format" >
                    <param name="plate" value="1" />
                    <param name="batches" value="1,2,3,4" />
                </repeat>
                <repeat name="plate_format" >
                    <param name="plate" value="2" />
                    <param name="batches" value="5,6,7,8" />
                </repeat>
                <repeat name="plate_format" >
                    <param name="plate" value="3" />
                    <param name="batches" value="9,10,11,12" />
                </repeat>
            </conditional>
            <output name="out_plots" value="b12.pdf" compare="sim_size" />
            <output name="out_table" value="b12.table" />
        </test>
    </tests>
    <help><![CDATA[
Cross-contamination Filter Plot
###################################

For a set of barcodes and an experimental setup that uses a subset of these barcodes for each batch, this tool compares each batch against the full range of barcodes in order to determine whether any cross-contamination between batches has occured.
*Note -- Do not reuse batch numbering across plates!*

If a significant number of transcripts are shown in a batch for cell barcodes that were not designed for that batch, then this tool will show that. In the below plot, we can see that there is no significant cross-contamination taking place (pre-filter), and so we can filter out the false barcodes (post-filter).

.. image:: $PATH_TO_IMAGES/crosscontam_pretopost.png
   :scale: 50 %


Example
~~~~~~~~

Consider the following experimental setup, with a list of 100 possible barcodes, used over 3 sequencing plates, with each plate containing 4 unique batches, and each plate using a specific subset of the 100 barcodes.

::

 Barcodes

  1 - 10 | AAA AAC AAT AAG ACA AGA ATA CAC GAG TAT
 11 - 20 | CCC CCA CCT CCG CTC CGC TCT GCG TCT CGT
    .
    .
 91 -100 | TTT TAT TCT TGT TTA TTC TTG TCC TGG TAA



 Plate 1  +-------+-------+-------+-------+
          |  B1   |  B2   |  B3   |  B4   |
          +-------+-------+-------+-------+
             1-50   51-100  51-100   1-50

 Plate 2  +-------+-------+-------+-------+
          |  B5   |  B6   |  B7   |  B8   |
          +-------+-------+-------+-------+
             1-40   41-80    1-40   41-80

 Plate 3  +-------+-------+-------+-------+
          |  B9   |  B10  |  B11  |  B12  |
          +-------+-------+-------+-------+
             1-40   41-80    1-40   41-80


****

The above plate and barcoding setup can be more textually represented by specifying barcode ranges and plate numbers, with each denoting which batch numbers they describe as outlined below:

::

 *Barcodes → Batches*
  1- 50: B1, B4
 51-100: B2, B3
  1- 40: B5, B7, B9 , B11
 41- 80: B6, B8, B10, B12

 *Plates → Batches*
   1: B1, B2 , B3 , B4
   2: B5, B6 , B7 , B8
   3: B9, B10, B11, B12

]]></help>
    <citations>
        <citation type="doi">10.1007/978-1-4939-7768-0_15</citation>
    </citations>
</tool>