changeset 0:119fccb59597 draft

Uploaded
author rnateam
date Thu, 22 Oct 2015 09:52:51 -0400
parents
children ae0f58d3318f
files convert_bc_to_binary_RY.xml coords2clnt.xml extract_aln_ends.xml extract_bcs.xml macros.xml merge_pcr_duplicates.xml remove_tail.xml rm_spurious_events.xml tool_dependencies.xml
diffstat 9 files changed, 386 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_bc_to_binary_RY.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,39 @@
+<tool id="convert_bc_to_binary_RY.py" name="convert_bc_to_binary_RY.py" version="0.1.0">
+  <description>Convert to binary barcodes.</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <version_command>python convert_bc_to_binary_RY.py --version</version_command>
+  <command interpreter="python"><![CDATA[
+convert_bc_to_binary_RY.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+> $default]]></command>
+  <inputs>
+    <param label="Fasta file to convert." name="positional_1" type="data" format="fasta"/>
+  </inputs>
+  <outputs>
+    <data hidden="false" name="default" format="fasta"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="result.fa"/>
+      <output name="default" file="converted_bcs.fa"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes.
+
+A and G are converted to nucleotide code R. T, U and C are converted to Y.
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coords2clnt.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,45 @@
+<tool id="coords2clnt.py" name="coords2clnt.py" version="1.0">
+  <description>Extract crosslinked nucleotide.</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <stdio>
+    <exit_code level="fatal" range="1:"/>
+  </stdio>
+  <version_command>python coords2clnt.py --version</version_command>
+  <command interpreter="python"><![CDATA[coords2clnt.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+
+> $default]]></command>
+  <inputs>
+    <param area="false" label="Alignments in bed format." name="positional_1" type="data" format="bed"/>
+  </inputs>
+  <outputs>
+    <data hidden="false" name="default" format="bed"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="merged_pcr_dupes.bed"/>
+      <output name="default" file="merged_pcr_dupes_clnts.bed"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Given coordinates of the aligned reads, calculate positions of the crosslinked nucleotides.
+Crosslinked nts are assumed to be one nt upstream of the 5'-end of the read.
+
+Input:
+* bed6 file containing coordinates of aligned reads
+* bed6 file containing coordinates of crosslinking events
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_aln_ends.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,52 @@
+<tool id="extract_aln_ends.py" name="extract_aln_ends.py" version="0.1.0">
+  <description>Extract alignment ends from sam file.</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <version_command>python extract_aln_ends.py --version</version_command>
+  <command interpreter="python"><![CDATA[
+extract_aln_ends.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+
+> $default]]></command>
+  <inputs>
+    <param area="false" label="Sam input." name="positional_1" type="data" format="sam"/>
+  </inputs>
+  <outputs>
+    <data format="bed" hidden="false" name="default"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="twomates.sam"/>
+      <output name="default" file="tworeads_aln_ends.bed"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Extract alignment ends from sam file.
+
+The resulting bed file contains the outer coordinates of the alignments. The
+bed name field is set to the read id and the score field is set to the edit
+distance of the alignment. The crosslinked nucleotide is one nt upstream of the
+5'-end of the bed entries.
+
+This tool only reports results for alignments that are properly aligned in FR
+("forward-reverse") direction.
+
+Input:
+* sam file containing alignments (paired-end sequencing)
+
+Output:
+* bed6 file containing outer coordinates (sorted by read id)
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Development
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_bcs.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,44 @@
+<tool id="extract_bcs.py" name="extract_bcs.py" version="1.0">
+  <description>Extract barcodes using pattern.</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <version_command>python extract_bcs.py --version</version_command>
+  <command interpreter="python"><![CDATA[
+extract_bcs.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+
+#if $positional_2 and $positional_2 is not None:
+$positional_2
+#end if
+
+> $default]]></command>
+  <inputs>
+    <param area="false" label="Barcoded sequences." name="positional_1" type="data" format="fastq"/>
+    <param area="false" label="Pattern of barcode nucleotides starting at 5'-end. X positions will be moved to the header, N positions will be kept." name="positional_2" type="text"/>
+  </inputs>
+  <outputs>
+    <data hidden="false" name="default" format="fastq" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="reads.fastq"/>
+      <param name="positional_2" value="XXXNNXXX"/>
+      <output name="default" file="result.fastq"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Exract barcodes from a FASTQ file according to a user-specified pattern.
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,17 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.1.0">bctools</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+          <exit_code level="fatal" range="1:"/>
+        </stdio>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1016/j.molcel.2013.07.001</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_pcr_duplicates.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,53 @@
+<tool id="merge_pcr_duplicates.py" name="merge_pcr_duplicates.py" version="1.0">
+  <description>
+</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <version_command>python merge_pcr_duplicates.py --version</version_command>
+  <command interpreter="python"><![CDATA[merge_pcr_duplicates.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+
+#if $positional_2 and $positional_2 is not None:
+$positional_2
+#end if
+
+> $default]]></command>
+  <inputs>
+    <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/>
+    <param area="false" label="fasta barcode library." name="positional_2" type="data" format="fasta"/>
+  </inputs>
+  <outputs>
+    <data format="bed" hidden="false" name="default"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="pcr_dupes_sorted_2.bed"/>
+      <param name="positional_2" value="pcr_dupes_randomdict.fa"/>
+      <output name="default" file="merged_pcr_dupes.bed"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Merge PCR duplicates according to random barcode library.
+
+Barcodes containing uncalled base 'N' are removed.
+
+Input:
+* bed6 file containing alignments with fastq read-id in name field
+* fasta library with fastq read-id as sequence ids
+
+Output:
+* bed6 file with random barcode in name field and number of PCR duplicates as score, sorted by fields chrom, start, stop, strand, name
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/remove_tail.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,43 @@
+<tool id="remove_tail.py" name="remove_tail.py" version="1.0">
+  <description>Remove nts from 3'-end.</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <version_command>python remove_tail.py --version</version_command>
+  <command interpreter="python"><![CDATA[remove_tail.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+
+#if $positional_2 and $positional_2 is not None:
+$positional_2
+#end if
+
+> $default]]></command>
+  <inputs>
+    <param area="false" label="Fastq file." name="positional_1" type="data" format="fastq"/>
+    <param label="Remove this many nts." name="positional_2" type="integer" value="0"/>
+  </inputs>
+  <outputs>
+    <data format="fastq" hidden="false" name="default"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="readswithtail.fastq"/>
+      <param name="positional_2" value="7"/>
+      <output name="default" file="readswithtailremoved.fastq"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Remove a certain number of nucleotides from the 3'-tails of sequences in fastq format.
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rm_spurious_events.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,54 @@
+<tool id="rm_spurious_events.py" name="rm_spurious_events.py" version="1.0">
+  <description>Remove spurious events.</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="stdio" />
+  <version_command>python rm_spurious_events.py --version</version_command>
+  <command interpreter="python"><![CDATA[rm_spurious_events.py
+#if $positional_1 and $positional_1 is not None:
+$positional_1
+#end if
+
+#if $threshold and $threshold is not None:
+--threshold $threshold
+#end if
+> $default]]></command>
+  <inputs>
+    <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/>
+    <param help="(--threshold)" label="Threshold for spurious event removal." name="threshold" optional="true" type="float" value="0.1"/>
+  </inputs>
+  <outputs>
+    <data format="bed" hidden="false" name="default"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="merged_pcr_dupes_spurious.bed"/>
+      <param name="threshold" value="0.5"/>
+      <output name="default" file="merged_pcr_dupes_spurious_filtered_thresh05.bed"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+Remove spurious events originating from errors in random sequence tags.
+
+This script compares all events sharing the same coordinates. Among each group
+of events the maximum number of PCR duplicates is determined. All events that
+are supported by less than 10 percent of this maximum count are removed.
+
+Input:
+* bed6 file containing crosslinking events with score field set to number of PCR
+  duplicates
+
+Output:
+* bed6 file with spurious crosslinking events removed, sorted by fields chrom,
+  start, stop, strand
+
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+]]></help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Oct 22 09:52:51 2015 -0400
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bctools" version="0.1.0">
+        <install version="1.0">
+            <actions_group>
+                <actions>
+                    <action target_filename="bctools-0.1.0-alpha1.tar.gz" type="download_by_url">https://github.com/tzk/bctools/archive/v0.1.0-alpha1.tar.gz</action>
+                    <action type="set_environment_for_install">
+                        <repository changeset_revision="b3a791f6e3ba" name="package_biopython_1_65" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu">
+                            <package name="biopython" version="1.65" />
+                        </repository>
+                        <!-- <repository name="package_python_2_7_pandas_0_16" owner="iuc">
+                            <package name="pandas" version="0.16" />
+                        </repository> -->
+                        <repository changeset_revision="045c4645abdf" name="package_pandas_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
+                            <package name="pandas" version="0.14" />
+                        </repository>
+                        <!-- <repository name="package_python_2_7_pybedtools_0_6_9" owner="iuc">
+                            <package name="pybedtools" version="0.6.9" />
+                        </repository> -->
+                        <repository changeset_revision="372c85bed2ca" name="package_pybedtools_0_6_6" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
+                            <package name="pybedtools" version="0.6.6" />
+                        </repository>
+                    </action>
+                    <action type="shell_command">
+                        python setup.py install --install-scripts $INSTALL_DIR
+                    </action>
+                    <action type="set_environment">
+                        <environment_variable action="set_to" name="BCTOOLS_ROOT_DIR">$INSTALL_DIR</environment_variable>
+                    </action>
+                </actions>
+            </actions_group>
+        </install>
+        <readme>
+bctools - Set of tools for handling barcodes in NGS data.
+https://github.com/tzk/bctools
+        </readme>
+    </package>
+</tool_dependency>