diff crosscontamination_barcode_filter.xml @ 0:fd938aa845d0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 6f73edc667e61fabdab8b24a7ff40942588fee5b
author iuc
date Thu, 24 Jan 2019 09:52:35 -0500
parents
children 78341ccbad0a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crosscontamination_barcode_filter.xml	Thu Jan 24 09:52:35 2019 -0500
@@ -0,0 +1,247 @@
+<tool id="crosscontamination_barcode_filter" name="Cross-contamination Barcode Filter" version="@VERSION@">
+    <description>for use in plate-based barcoded analyses</description>
+    <macros>
+        <token name="@VERSION@">0.1</token>
+        <macro name="assert_conts" >
+            <assert_contents>
+                <has_text text="/CreationDate" />
+                <has_text text="/Producer" />
+                <has_line line="startxref" />
+                <has_line line="%%EOF" />
+            </assert_contents>
+        </macro>
+        <macro name="sanitize_batch">
+            <sanitizer invalid_char="">
+                <valid initial="string.digits">
+                    <add value=","/>
+                </valid>
+            </sanitizer>
+        </macro>
+        <macro name="sanitize_regex">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits">
+                    <add value="!"/>
+                    <add value="="/>
+                    <add value="-"/>
+                    <add value="."/>
+                    <add value="*"/>
+                    <add value="?"/>
+                    <add value="+"/>
+                    <add value="\\"/>
+                    <add value="_"/>
+                    <add value="&#91;"/> <!-- left square bracket, e.g subselecting from vec[1] -->
+                    <add value="&#93;"/> <!-- right square bracket -->
+                    <add value="&#40;"/> <!-- left parenthesis -->
+                    <add value="&#41;"/> <!-- right parenthesis -->
+                </valid>
+            </sanitizer>
+        </macro>        
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.2.1" >r-ggplot2</requirement>
+    </requirements>
+    <version_command><![CDATA[
+        Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' | head -1 | cut -d' ' -f 2
+    ]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$__tool_directory__/scripts/crosscontamination_filter.R' '$crossconf'
+    ]]></command>
+    <configfiles>
+        <configfile name="crossconf"><![CDATA[
+script.dir = '$__tool_directory__/scripts'
+input_matrix <- read.table(
+    '$input_table',
+    stringsAsFactors = F,
+    na.strings=c("NA", "-", "?", "."),
+    header=TRUE,
+    row.names=1
+)
+input_matrix[is.na(input_matrix)] <- 0
+#if str($inbuilt_spec.select_use) == "mpi_sagar":
+spec = list(
+    barcodes = '$input_barcodes',
+    format = list(
+        "1-96"   = c(1,3,5,7),
+        "97-192" = c(2,4,6,8)
+    ),
+    plates = list(
+        "1" = c(1,2,3,4),
+        "2" = c(5,6,7,8)
+    )
+)
+#elif str($inbuilt_spec.select_use) == "custom":
+spec = list(
+    barcodes = '$input_barcodes',
+    format = list(
+    #for $i, $s in enumerate($inbuilt_spec.barcode_format)
+        "${s.range_start}-${s.range_end}" = c( ${s.batches} ) 
+        #if $i < len(list($inbuilt_spec.barcode_format)) - 1
+        ,
+        #end if
+    #end for
+    ),
+    plates = list(
+    #for $i, $s in enumerate($inbuilt_spec.plate_format)
+        "${s.plate}" = c( ${s.batches} )
+        #if $i < len(list($inbuilt_spec.plate_format)) - 1
+        ,
+        #end if
+    #end for
+    )
+)
+#end if
+regex.extract = '$advanced.regex_extract'
+regex.display = '$advanced.regex_display'
+out.pdf = '$out_plots'
+out.table = '$out_table'
+]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="input_table" type="data" format="tsv,tabular" label="Input Matrix" />
+        <param name="input_barcodes" type="data" format="tsv,tabular,txt" label="Complete Barcodes" />
+        <conditional name="inbuilt_spec" >
+            <param name="select_use" type="select" label="Plate Protocol" >
+                <option value="mpi_sagar">CelSeq2 Plate Protocol (Sagar)</option>
+                <option value="custom">Custom</option>
+            </param>
+            <when value="mpi_sagar" />
+            <when value="custom">
+                <repeat name="barcode_format" title="Barcode Format" help="e.g. Batches 1 and 4 use barcodes 1-100 in the Barcodes file, and Batches 2 and 3 use barcodes 101-200 in the Barcodes file; specify '1' and '100' as Range values, and '1,4' as Batch values, and in the next format specify '101' and '200' as Range values and '2,3' as Batch values" >
+                    <param name="range_start" type="integer" min="1" value="1" label="Barcode Range: Start" />
+                    <param name="range_end" type="integer" min="2" value="100" label="Barcode Range: End" />
+                    <param name="batches" type="text" value="1,4" label="Batches utilizing this Range"  >
+                        <expand macro="sanitize_batch" />
+                    </param>
+                </repeat>
+                <repeat name="plate_format" title="Plate Format" help="e.g. Plate 1 encompasses Batches 1-4, and Plate 2 encompasses Batches 5-8; specify '1' as a Plate value, and '1,2,3,4' as Batch values, and in the next format specify '2' as a Plate value and '5,6,7,8' as Batch values">
+                    <param name="plate" type="integer" min="1" value="1" label="Plate Number"  />
+                    <param name="batches" type="text" value="1,2,3,4" label="Batches within this Plate Number" >
+                        <expand macro="sanitize_batch" />
+                    </param>
+                </repeat>
+            </when>
+        </conditional>
+        <section name="advanced" expanded="false" title="RegEx Parameters" >
+            <param name="regex_extract" type="text" value=".*P(\\d)_(\\d)_([ACTG]+)" label="RegEx to extract Plate, Batch, and Barcodes from headers" >
+                <expand macro="sanitize_regex" />
+            </param>
+            <param name="regex_display" type="text" value="P\\1_B\\2_\\3" label="RegEx to replace Plate, Batch, and Barcodes from headers" >
+                <expand macro="sanitize_regex" />
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="out_plots" format="pdf" label="${tool.name} on ${on_string}: Plots" />
+        <data name="out_table" format="tabular" label="${tool.name} on ${on_string}: Filtered Table" />
+    </outputs>
+    <tests>
+        <test><!-- Inbuilt MPI -->
+            <param name="input_table" value="out3.subtable" />
+            <param name="input_barcodes" value="celseq_barcodes.192.raw" />
+            <conditional name="inbuilt_spec" >
+                <param name="select_use" value="mpi_sagar" />
+            </conditional>
+            <output name="out_plots" >
+                <expand macro="assert_conts" />
+            </output>
+            <output name="out_table" value="test.table" />
+        </test>
+        <test><!-- Plate and Lane test -->
+            <param name="input_table" value="out3.subtable" />
+            <param name="input_barcodes" value="celseq_barcodes.192.raw" />
+            <conditional name="inbuilt_spec" >
+                <param name="select_use" value="custom" />
+                <repeat name="barcode_format" >
+                    <param name="range_start" value="1"/>
+                    <param name="range_end" value="96" />
+                    <param name="batches" value="1,3,5,7" />
+                </repeat>
+                <repeat name="barcode_format" >
+                    <param name="range_start" value="97"/>
+                    <param name="range_end" value="192" />
+                    <param name="batches" value="2,4,6,8" />
+                </repeat>
+                <repeat name="plate_format" >
+                    <param name="plate" value="1" />
+                    <param name="batches" value="1,2,3,4" />
+                </repeat>
+                <repeat name="plate_format" >
+                    <param name="plate" value="2" />
+                    <param name="batches" value="5,6,7,8" />
+                </repeat>
+            </conditional>
+            <output name="out_plots" >
+                <expand macro="assert_conts" />
+            </output>
+            <output name="out_table" value="test.table" />
+        </test>
+    </tests>
+    <help><![CDATA[
+Cross-contamination Filter Plot
+###################################
+
+For a set of barcodes and an experimental setup that uses a subset of these barcodes for each batch, this tool compares each batch against the full range of barcodes in order to determine whether any cross-contamination between batches has occured.
+
+If a significant number of transcripts are shown in a batch for cell barcodes that were not designed for that batch, then this tool will show that. In the below plot, we can see that there is no significant cross-contamination taking place (pre-filter), and so we can filter out the false barcodes (post-filter).
+
+.. image:: $PATH_TO_IMAGES/crosscontam_pretopost.png
+   :scale: 50 %
+
+
+Example
+~~~~~~~~
+
+Consider the following experimental setup, with a list of 100 possible barcodes, used over 3 sequencing plates, with each plate containing 4 unique batches, and each plate using a specific subset of the 100 barcodes.
+
+:: 
+
+ Barcodes
+    
+  1 - 10 | AAA AAC AAT AAG ACA AGA ATA CAC GAG TAT
+ 11 - 20 | CCC CCA CCT CCG CTC CGC TCT GCG TCT CGT
+    .
+    .
+ 91 -100 | TTT TAT TCT TGT TTA TTC TTG TCC TGG TAA
+
+
+
+ Plate 1  +-------+-------+-------+-------+
+          |  B1   |  B2   |  B3   |  B4   |
+          +-------+-------+-------+-------+
+             1-50   51-100  51-100   1-50  
+
+ Plate 2  +-------+-------+-------+-------+
+          |  B5   |  B6   |  B7   |  B8   |
+          +-------+-------+-------+-------+
+             1-40   41-80    1-40   41-80
+
+ Plate 3  +-------+-------+-------+-------+
+          |  B9   |  B10  |  B11  |  B12  |
+          +-------+-------+-------+-------+
+             1-40   41-80    1-40   41-80 
+
+
+****
+
+The above plate and barcoding setup can be more textually represented by specifying barcode ranges and plate numbers, with each denoting which batch numbers they describe as outlined below:
+
+::
+
+ *Barcodes → Batches*
+  1- 50: B1, B4
+ 51-100: B2, B3
+  1- 40: B5, B7, B9 , B11
+ 41- 80: B6, B8, B10, B12
+
+ *Plates → Batches*
+   1: B1, B2 , B3 , B4
+   2: B5, B6 , B7 , B8
+   3: B9, B10, B11, B12
+
+]]></help>
+    <citations>
+        <citation type="doi">10.1007/978-1-4939-7768-0_15</citation>
+    </citations>
+</tool>
+