changeset 0:4dfdf2090b35 draft

planemo upload
author yating-l
date Tue, 16 May 2017 15:10:47 -0400
parents
children 1c9fa1af931b
files test-data/contigs.2bit test-data/contigs_chromInfo.tab test-data/contigs_gaps.bed test-data/contigs_no_Ns_chromInfo.tab tool_dependencies.xml twobit_info.xml ucsc_macros.xml
diffstat 7 files changed, 203 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/contigs.2bit has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs_chromInfo.tab	Tue May 16 15:10:47 2017 -0400
@@ -0,0 +1,2 @@
+contig12	56019
+contig10	43013
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs_gaps.bed	Tue May 16 15:10:47 2017 -0400
@@ -0,0 +1,4 @@
+contig10	16942	16962	gap_0
+contig12	18710	18730	gap_1
+contig12	29582	29904	gap_2
+contig12	30821	30841	gap_3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs_no_Ns_chromInfo.tab	Tue May 16 15:10:47 2017 -0400
@@ -0,0 +1,2 @@
+contig12	55657
+contig10	42993
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue May 16 15:10:47 2017 -0400
@@ -0,0 +1,57 @@
+<?xml version="1.0" ?>
+<tool_dependency>
+    <package name="ucsc_twobit" version="340">
+        <install version="1.0">
+            <actions_group>
+                <actions os="darwin" architecture="x86_64">
+                    <action type="download_by_url"
+                            sha256sum="aa0df6fc9c5dfe83cb94456e1100a5ecd7a247e7521d91f4e91c46c1b21fc9ab">
+                        http://old-gep.wustl.edu/~wilson/packages/ucsc_twobit/ucsc_twobit_340_macOS_x86_64.tar.gz
+                    </action>
+                    <action type="move_directory_files">
+                        <source_directory>bin</source_directory>
+                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                    </action>
+                </actions>
+
+                <actions os="linux" architecture="x86_64">
+                    <action type="download_by_url"
+                            sha256sum="31b18fd6f0f24981815ded0a1790be49ca47a5d2cce40c3af29ffc6f5bea4922">
+                        http://old-gep.wustl.edu/~wilson/packages/ucsc_twobit/ucsc_twobit_340_linux_x86_64.tar.gz
+                    </action>
+                    <action type="move_directory_files">
+                        <source_directory>bin</source_directory>
+                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                    </action>
+                </actions>
+
+                <!-- Display error message for unsupported OS and CPU architecture -->
+                <actions>
+                    <action type="shell_command">
+                        echo "ERROR: This package only supports 64-bit systems running macOS or Linux"
+                    </action>
+                    <action type="shell_command">false</action>
+                </actions>
+
+                <!-- update $PATH environment variable -->
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions_group>
+        </install>
+        <readme>
+        <![CDATA[
+            This package contains the utilities for constructing and manipulating
+            `twoBit files <https://genome.ucsc.edu/goldenpath/help/twoBit.html>`_.
+            The utilities were created by the
+            `Genome Bioinformatics Group <https://genome.ucsc.edu/staff.html>`_
+            at the UCSC Genomics Institute.
+
+            The twoBit file format is designed to store genomic sequences and the
+            associated repeat masking information. The twoBit file is used by the
+            UCSC Genome Browser and by tools such as
+            `BLAT <https://genome.ucsc.edu/goldenpath/help/blatSpec.html>`_.
+        ]]>
+        </readme>
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/twobit_info.xml	Tue May 16 15:10:47 2017 -0400
@@ -0,0 +1,94 @@
+<?xml version="1.0"?>
+<tool id="twobit_info" name="twoBitInfo" version="1.0">
+    <description>Get sequence information from a twoBit file</description>
+
+    <macros>
+        <import>ucsc_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements_twobit" />
+
+    <command detect_errors="exit_code">
+<![CDATA[
+    #if str($output_type.output_type_selector) == "Sequence_Lengths":
+        twoBitInfo
+            ${output_type.no_Ns} "${twobit_input}" stdout |
+            sort -k 2,2nr > "${output}"
+    #else:
+        twoBitInfo -nBed "${twobit_input}" stdout |
+            sort -k 1,1 -k 2,2n |
+            awk 'BEGIN { OFS = "\t" } { print $1, $2, $3, sprintf("gap_%d", idx++ ) }'
+                > "${output}"
+    #end if
+]]>
+    </command>
+
+    <expand macro="environment_LC_COLLATE" />
+
+    <inputs>
+        <param name="twobit_input" type="data" format="twobit" label="twoBit input file" />
+
+        <conditional name="output_type">
+            <!--
+                Because the "when" element in the "data" output does not allow the "label"
+                attribute, the value from the output_type_selector is used as part of the
+                label to help users distinguish the different types of output produced by
+                this tool. The option values are in uppercase so that they conform to the
+                naming convention of the History items that are displayed to the user.
+            -->
+            <param name="output_type_selector" type="select"
+                    label="Type of output file">
+                <option value="Sequence_Lengths" selected="true">Sequence lengths</option>
+                <option value="Gaps">BED records of gaps in each sequence</option>
+            </param>
+
+            <when value="Sequence_Lengths">
+                <param name="no_Ns" type="boolean" checked="false"
+                        truevalue="-noNs" falsevalue=""
+                        label="Exclude gaps (N's) from sequence length"
+                        help="-noNs" />
+            </when>
+
+            <when value="Gaps"></when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="len"
+                label="${tool.name} on ${on_string}: ${output_type.output_type_selector}">
+            <change_format>
+                <when input="output_type.output_type_selector" value="Gaps" format="bed" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <!-- Test report sequence length -->
+            <param name="twobit_input" value="contigs.2bit" ftype="twobit" />
+            <param name="output_type_selector" value="Sequence_Lengths" />
+            <output name="output" file="contigs_chromInfo.tab" />
+        </test>
+        <test>
+            <!-- Test report sequence length excluding gaps -->
+            <param name="twobit_input" value="contigs.2bit" ftype="twobit" />
+            <param name="output_type_selector" value="Sequence_Lengths" />
+            <param name="no_Ns" value="-noNs" />
+            <output name="output" file="contigs_no_Ns_chromInfo.tab" />
+        </test>
+        <test>
+            <!-- Test report gap BED file -->
+            <param name="twobit_input" value="contigs.2bit" ftype="twobit" />
+            <param name="output_type_selector" value="Gaps" />
+            <output name="output" file="contigs_gaps.bed" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+twoBitInfo reports the length of each scaffold and the gap locations
+stored in a twoBit Sequence Archive.
+
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_macros.xml	Tue May 16 15:10:47 2017 -0400
@@ -0,0 +1,44 @@
+<macros>
+    <xml name="requirements_twobit">
+        <requirements>
+            <requirement type="package" version="340">ucsc_twobit</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <xml name="environment_LC_COLLATE">
+        <environment_variables>
+            <!-- Sort uppercase letters before lowercase (required by UCSC tools) -->
+            <environment_variable name="LC_COLLATE">C</environment_variable>
+            <yield />
+        </environment_variables>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+        @article{Kent01062002,
+author = {Kent, W. James and Sugnet, Charles W. and Furey, Terrence S. and Roskin, Krishna M. and Pringle, Tom H. and Zahler, Alan M. and Haussler,  and David},
+title = {The Human Genome Browser at UCSC},
+volume = {12},
+number = {6},
+pages = {996-1006},
+year = {2002},
+doi = {10.1101/gr.229102},
+URL = {http://genome.cshlp.org/content/12/6/996.abstract},
+eprint = {http://genome.cshlp.org/content/12/6/996.full.pdf+html},
+journal = {Genome Research}
+}
+            </citation>
+            <yield />
+        </citations>
+    </xml>
+
+    <token name="@OPTIONAL_PARAM_FUNC@">
+<![CDATA[
+        #def optional_param($_flag, $_flag_value, $_sep="=")
+            #if str($_flag_value) and str($_flag_value).strip():
+                ${_flag}${_sep}${_flag_value}
+            #end if
+        #end def
+]]>
+    </token>
+</macros>