Mercurial > repos > rnateam > bctools
changeset 0:119fccb59597 draft
Uploaded
author | rnateam |
---|---|
date | Thu, 22 Oct 2015 09:52:51 -0400 |
parents | |
children | ae0f58d3318f |
files | convert_bc_to_binary_RY.xml coords2clnt.xml extract_aln_ends.xml extract_bcs.xml macros.xml merge_pcr_duplicates.xml remove_tail.xml rm_spurious_events.xml tool_dependencies.xml |
diffstat | 9 files changed, 386 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_bc_to_binary_RY.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,39 @@ +<tool id="convert_bc_to_binary_RY.py" name="convert_bc_to_binary_RY.py" version="0.1.0"> + <description>Convert to binary barcodes.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>python convert_bc_to_binary_RY.py --version</version_command> + <command interpreter="python"><![CDATA[ +convert_bc_to_binary_RY.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if +> $default]]></command> + <inputs> + <param label="Fasta file to convert." name="positional_1" type="data" format="fasta"/> + </inputs> + <outputs> + <data hidden="false" name="default" format="fasta"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="result.fa"/> + <output name="default" file="converted_bcs.fa"/> + </test> + </tests> + <help><![CDATA[ +Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes. + +A and G are converted to nucleotide code R. T, U and C are converted to Y. + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Testing +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coords2clnt.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,45 @@ +<tool id="coords2clnt.py" name="coords2clnt.py" version="1.0"> + <description>Extract crosslinked nucleotide.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <stdio> + <exit_code level="fatal" range="1:"/> + </stdio> + <version_command>python coords2clnt.py --version</version_command> + <command interpreter="python"><![CDATA[coords2clnt.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if + +> $default]]></command> + <inputs> + <param area="false" label="Alignments in bed format." name="positional_1" type="data" format="bed"/> + </inputs> + <outputs> + <data hidden="false" name="default" format="bed"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="merged_pcr_dupes.bed"/> + <output name="default" file="merged_pcr_dupes_clnts.bed"/> + </test> + </tests> + <help><![CDATA[ +Given coordinates of the aligned reads, calculate positions of the crosslinked nucleotides. +Crosslinked nts are assumed to be one nt upstream of the 5'-end of the read. + +Input: +* bed6 file containing coordinates of aligned reads +* bed6 file containing coordinates of crosslinking events + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Testing +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_aln_ends.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,52 @@ +<tool id="extract_aln_ends.py" name="extract_aln_ends.py" version="0.1.0"> + <description>Extract alignment ends from sam file.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>python extract_aln_ends.py --version</version_command> + <command interpreter="python"><![CDATA[ +extract_aln_ends.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if + +> $default]]></command> + <inputs> + <param area="false" label="Sam input." name="positional_1" type="data" format="sam"/> + </inputs> + <outputs> + <data format="bed" hidden="false" name="default"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="twomates.sam"/> + <output name="default" file="tworeads_aln_ends.bed"/> + </test> + </tests> + <help><![CDATA[ +Extract alignment ends from sam file. + +The resulting bed file contains the outer coordinates of the alignments. The +bed name field is set to the read id and the score field is set to the edit +distance of the alignment. The crosslinked nucleotide is one nt upstream of the +5'-end of the bed entries. + +This tool only reports results for alignments that are properly aligned in FR +("forward-reverse") direction. + +Input: +* sam file containing alignments (paired-end sequencing) + +Output: +* bed6 file containing outer coordinates (sorted by read id) + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Development +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_bcs.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,44 @@ +<tool id="extract_bcs.py" name="extract_bcs.py" version="1.0"> + <description>Extract barcodes using pattern.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>python extract_bcs.py --version</version_command> + <command interpreter="python"><![CDATA[ +extract_bcs.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if + +#if $positional_2 and $positional_2 is not None: +$positional_2 +#end if + +> $default]]></command> + <inputs> + <param area="false" label="Barcoded sequences." name="positional_1" type="data" format="fastq"/> + <param area="false" label="Pattern of barcode nucleotides starting at 5'-end. X positions will be moved to the header, N positions will be kept." name="positional_2" type="text"/> + </inputs> + <outputs> + <data hidden="false" name="default" format="fastq" /> + </outputs> + <tests> + <test> + <param name="positional_1" value="reads.fastq"/> + <param name="positional_2" value="XXXNNXXX"/> + <output name="default" file="result.fastq"/> + </test> + </tests> + <help><![CDATA[ +Exract barcodes from a FASTQ file according to a user-specified pattern. + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Testing +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,17 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.1.0">bctools</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code level="fatal" range="1:"/> + </stdio> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1016/j.molcel.2013.07.001</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_pcr_duplicates.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,53 @@ +<tool id="merge_pcr_duplicates.py" name="merge_pcr_duplicates.py" version="1.0"> + <description> +</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>python merge_pcr_duplicates.py --version</version_command> + <command interpreter="python"><![CDATA[merge_pcr_duplicates.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if + +#if $positional_2 and $positional_2 is not None: +$positional_2 +#end if + +> $default]]></command> + <inputs> + <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/> + <param area="false" label="fasta barcode library." name="positional_2" type="data" format="fasta"/> + </inputs> + <outputs> + <data format="bed" hidden="false" name="default"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="pcr_dupes_sorted_2.bed"/> + <param name="positional_2" value="pcr_dupes_randomdict.fa"/> + <output name="default" file="merged_pcr_dupes.bed"/> + </test> + </tests> + <help><![CDATA[ +Merge PCR duplicates according to random barcode library. + +Barcodes containing uncalled base 'N' are removed. + +Input: +* bed6 file containing alignments with fastq read-id in name field +* fasta library with fastq read-id as sequence ids + +Output: +* bed6 file with random barcode in name field and number of PCR duplicates as score, sorted by fields chrom, start, stop, strand, name + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Testing +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_tail.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,43 @@ +<tool id="remove_tail.py" name="remove_tail.py" version="1.0"> + <description>Remove nts from 3'-end.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>python remove_tail.py --version</version_command> + <command interpreter="python"><![CDATA[remove_tail.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if + +#if $positional_2 and $positional_2 is not None: +$positional_2 +#end if + +> $default]]></command> + <inputs> + <param area="false" label="Fastq file." name="positional_1" type="data" format="fastq"/> + <param label="Remove this many nts." name="positional_2" type="integer" value="0"/> + </inputs> + <outputs> + <data format="fastq" hidden="false" name="default"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="readswithtail.fastq"/> + <param name="positional_2" value="7"/> + <output name="default" file="readswithtailremoved.fastq"/> + </test> + </tests> + <help><![CDATA[ +Remove a certain number of nucleotides from the 3'-tails of sequences in fastq format. + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Testing +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rm_spurious_events.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,54 @@ +<tool id="rm_spurious_events.py" name="rm_spurious_events.py" version="1.0"> + <description>Remove spurious events.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>python rm_spurious_events.py --version</version_command> + <command interpreter="python"><![CDATA[rm_spurious_events.py +#if $positional_1 and $positional_1 is not None: +$positional_1 +#end if + +#if $threshold and $threshold is not None: +--threshold $threshold +#end if +> $default]]></command> + <inputs> + <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/> + <param help="(--threshold)" label="Threshold for spurious event removal." name="threshold" optional="true" type="float" value="0.1"/> + </inputs> + <outputs> + <data format="bed" hidden="false" name="default"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="merged_pcr_dupes_spurious.bed"/> + <param name="threshold" value="0.5"/> + <output name="default" file="merged_pcr_dupes_spurious_filtered_thresh05.bed"/> + </test> + </tests> + <help><![CDATA[ +Remove spurious events originating from errors in random sequence tags. + +This script compares all events sharing the same coordinates. Among each group +of events the maximum number of PCR duplicates is determined. All events that +are supported by less than 10 percent of this maximum count are removed. + +Input: +* bed6 file containing crosslinking events with score field set to number of PCR + duplicates + +Output: +* bed6 file with spurious crosslinking events removed, sorted by fields chrom, + start, stop, strand + +Author: Daniel Maticzka +Copyright: 2015 +License: Apache +Email: maticzkd@informatik.uni-freiburg.de +Status: Testing +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Oct 22 09:52:51 2015 -0400 @@ -0,0 +1,39 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bctools" version="0.1.0"> + <install version="1.0"> + <actions_group> + <actions> + <action target_filename="bctools-0.1.0-alpha1.tar.gz" type="download_by_url">https://github.com/tzk/bctools/archive/v0.1.0-alpha1.tar.gz</action> + <action type="set_environment_for_install"> + <repository changeset_revision="b3a791f6e3ba" name="package_biopython_1_65" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="biopython" version="1.65" /> + </repository> + <!-- <repository name="package_python_2_7_pandas_0_16" owner="iuc"> + <package name="pandas" version="0.16" /> + </repository> --> + <repository changeset_revision="045c4645abdf" name="package_pandas_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="pandas" version="0.14" /> + </repository> + <!-- <repository name="package_python_2_7_pybedtools_0_6_9" owner="iuc"> + <package name="pybedtools" version="0.6.9" /> + </repository> --> + <repository changeset_revision="372c85bed2ca" name="package_pybedtools_0_6_6" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="pybedtools" version="0.6.6" /> + </repository> + </action> + <action type="shell_command"> + python setup.py install --install-scripts $INSTALL_DIR + </action> + <action type="set_environment"> + <environment_variable action="set_to" name="BCTOOLS_ROOT_DIR">$INSTALL_DIR</environment_variable> + </action> + </actions> + </actions_group> + </install> + <readme> +bctools - Set of tools for handling barcodes in NGS data. +https://github.com/tzk/bctools + </readme> + </package> +</tool_dependency>