Mercurial > repos > yating-l > hubarchivecreator

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BigBed.py	Wed May 17 13:16:57 2017 -0400
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+
+import os
+import shutil
+from subprocess import Popen, PIPE
+import re
+
+# Internal dependencies
+from Datatype import Datatype
+
+class BigBed(Datatype):
+    """ Configurations for creating the bigBed evidence track """
+
+    def __init__(self, input_bigbed_path, data_bigbed):
+        super(BigBed, self).__init__()
+
+        self.track = None
+
+        self.input_bigbed_path = input_bigbed_path
+        self.name_bigbed = data_bigbed["name"]
+        self.priority = data_bigbed["order_index"]
+        self.track_color = data_bigbed["track_color"]
+        self.group_name = data_bigbed["group_name"]
+
+        track_name = "".join((self.name_bigbed, ".bigbed"))
+        if data_bigbed["long_label"]:
+            self.long_label = data_bigbed["long_label"]
+        else:
+            self.long_label = self.name_bigbed
+
+        bigbed_file_path = os.path.join(self.myTrackFolderPath, track_name)
+
+        track_type = self.determine_track_type(input_bigbed_path)
+
+        shutil.copy(self.input_bigbed_path, bigbed_file_path)
+
+        # Create the Track Object
+        self.createTrack(file_path=track_name,
+                         track_name=track_name,
+                         long_label=self.long_label,
+                         track_type=track_type,
+                         visibility='hide',
+                         priority=self.priority,
+                         track_file=bigbed_file_path,
+                         track_color=self.track_color,
+                         group_name=self.group_name)
+
+        print "- BigBed %s created" % self.name_bigbed
+
+
+    def determine_track_type(self, bb_file):
+        """
+        Determine the number of standard and extra fields using bigBedSummary
+
+        Implementation of reading from stdout is based on a Stackoverflow post:
+        http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate
+
+        :param bb_file: path to a bigBed file
+
+        :returns: the bigBed track type
+        """
+
+        cmd_ph = Popen(["bigBedSummary", "-fields", bb_file, "stdout"],
+                       stdout=PIPE, bufsize=1)
+
+        pattern = r"(\d+) bed definition fields, (\d+) total fields"
+
+        with cmd_ph.stdout:
+            for line in iter(cmd_ph.stdout.readline, b''):
+                match = re.match(pattern, line)
+
+                if match:
+                    extra_mark = "." if match.group(1) == match.group(2) else "+"
+                    bed_type = "bigBed %s %s" % (match.group(1), extra_mark)
+                    break
+
+        cmd_ph.wait()
+
+        return bed_type
--- a/cytoBand.py	Tue May 16 18:09:00 2017 -0400
+++ b/cytoBand.py	Wed May 17 13:16:57 2017 -0400
@@ -48,6 +48,7 @@
                          track_name='cytoBandIdeo',
                          long_label=self.long_label,
                          track_type='bigBed',
+                         visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
--- a/hubArchiveCreator.py	Tue May 16 18:09:00 2017 -0400
+++ b/hubArchiveCreator.py	Wed May 17 13:16:57 2017 -0400
@@ -30,6 +30,7 @@
 from TrackHub import TrackHub
 from bigPsl import bigPsl
 from BedBlastAlignments import BedBlastAlignments
+from BigBed import BigBed

 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort

@@ -74,6 +75,9 @@
     # Psl Management
     parser.add_argument('--psl', action='append', help='Psl format')

+    # BigBed Management
+    parser.add_argument('--bigbed', action='append', help='BigBed format')
+
     # TODO: Check if the running directory can have issues if we run the tool outside
     parser.add_argument('-d', '--directory',
                         help='Running tool directory, where to find the templates. Default is running directory')
@@ -137,6 +141,7 @@
     array_inputs_psl = args.psl
     array_inputs_bigpsl = args.bigpsl
     array_inputs_bed_blast_alignments = args.bedBlastAlignments
+    array_inputs_bigbed = args.bigbed

     outputFile = args.output

@@ -165,7 +170,8 @@
                         (array_inputs_gtf, Gtf),
                         (array_inputs_psl, Psl),
                         (array_inputs_bigpsl, bigPsl),
-                        (array_inputs_bed_blast_alignments, BedBlastAlignments)]:
+                        (array_inputs_bed_blast_alignments, BedBlastAlignments),
+                        (array_inputs_bigbed, BigBed)]:
         if inputs:
             all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
--- a/hubArchiveCreator.xml	Tue May 16 18:09:00 2017 -0400
+++ b/hubArchiveCreator.xml	Wed May 17 13:16:57 2017 -0400
@@ -5,9 +5,10 @@
     </description>

     <requirements>
-        <requirement type="package" version="1.0">ucsc_tools_340</requirement>
+        <requirement type="package" version="340">ucsc_hac</requirement>
         <requirement type="package" version="1.2">samtools</requirement>
         <requirement type="package" version="340">ucsc_bigwig</requirement>
+        <requirement type="package" version="340">ucsc_bigbed</requirement>
     </requirements>

     <stdio>
@@ -127,6 +128,11 @@
                     #silent $prepare_json($f.formatChoice.BIGWIG, $index_track_final,
                                             extra_data_dict)
                 #end if
+                #if $f.formatChoice.format_select == "bigbed"
+                    --bigbed $f.formatChoice.BIGBED
+                    #silent $prepare_json($f.formatChoice.BIGBED, $index_track_final,
+                                            extra_data_dict)
+                #end if
                 #if $f.formatChoice.format_select == "gff3"
                     --gff3 $f.formatChoice.GFF3
                     #silent $prepare_json($f.formatChoice.GFF3, $index_track_final,
@@ -186,6 +192,7 @@
                         <option value="bed">BED</option>
                         <option value="psl">PSL</option>
                         <option value="bigwig">BIGWIG</option>
+                        <option value="bigbed">BIGBED</option>
                         <option value="gff3">GFF3</option>
                         <option value="gtf">GTF</option>
                     </param>
@@ -308,6 +315,22 @@
                             </sanitizer>
                         </param>
                     </when>
+                    <when value="bigbed">
+                        <param
+                                format="bigbed"
+                                name="BIGBED"
+                                type="data"
+                                label="BIGBED File"
+                        />
+                        <param name="longLabel" type="text" size="30" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
                     <when value="gff3">
                         <param
                                 format="gff3"
--- a/tool_dependencies.xml	Tue May 16 18:09:00 2017 -0400
+++ b/tool_dependencies.xml	Wed May 17 13:16:57 2017 -0400
@@ -1,6 +1,5 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <!-- UCSC Tools in  -->
     <!-- Useful for HAC are:
         - twoBitInfo
         - sort
@@ -16,75 +15,15 @@
     <package name="samtools" version="1.2">
         <repository changeset_revision="5b7172f9b230" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
-
-    <package name="ucsc_tools_340" version="1.0">
-        <install version="1.0">
-            <actions_group>
-                <actions architecture="x86_64" os="linux">
-                    <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action>
-                    <action type="move_directory_files">
-                        <source_directory>.</source_directory>
-                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                    </action>
-                </actions>
-                <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions_group>
-         </install>
-         <readme>The well known UCSC tools from Jim Kent.</readme>
+    <package name="ucsc_hac" version="340">
+        <repository changeset_revision="1368b25a0ae0" name="package_ucsc_hac_340" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="ucsc_bigbed" version="340">
+        <repository changeset_revision="dd9b230c079d" name="package_ucsc_bigbed_340" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>

-    <!-- package ucsc_bigwig is created by Wilson Leung -->
-    <package name="ucsc_bigwig" version="340">
-        <install version="1.0">
-            <actions_group>
-                <actions architecture="x86_64" os="darwin">
-                    <action sha256sum="a34c57a9fb3c36a984b8fa879a99697c994cd981a1277663d372638e4dec8bb2" type="download_by_url">
-                        http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_macOS_x86_64.tar.gz
-                    </action>
-                    <action type="move_directory_files">
-                        <source_directory>bin</source_directory>
-                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                    </action>
-                </actions>
-
-                <actions architecture="x86_64" os="linux">
-                    <action sha256sum="0d2bd886e312980e0ae58ae912315beeeac612fd1783c959b4eabd62cffd8512" type="download_by_url">
-                        http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_linux_x86_64.tar.gz
-                    </action>
-                    <action type="move_directory_files">
-                        <source_directory>bin</source_directory>
-                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                    </action>
-                </actions>
+    <package name="ucsc_bigwig" version="340">
+        <repository changeset_revision="06411298fa7d" name="package_ucsc_bigwig_340" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>

-                <!-- Display error message for unsupported OS and CPU architecture -->
-                <actions>
-                    <action type="shell_command">
-                        echo "ERROR: This package only supports 64-bit systems running macOS or Linux"
-                    </action>
-                    <action type="shell_command">false</action>
-                </actions>
-
-                <!-- update $PATH environment variable -->
-                <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions_group>
-        </install>
-        <readme>
-
-            This package contains the utilities for constructing
-            `bigWig files &lt;https://genome.ucsc.edu/goldenpath/help/bigWig.html&gt;`_.
-            The utilities were created by the
-            `Genome Bioinformatics Group &lt;https://genome.ucsc.edu/staff.html&gt;`_
-            at the UCSC Genomics Institute.
-
-            The bigWig file format is designed to store dense continuous datasets
-            and it is compatible with many genome browsers (e.g., UCSC Genome Browser,
-            JBrowse, IGV).
-
-        </readme>
-    </package>
 </tool_dependency>
--- a/util/subtools.py	Tue May 16 18:09:00 2017 -0400
+++ b/util/subtools.py	Wed May 17 13:16:57 2017 -0400
@@ -250,6 +250,8 @@
 # See the "track" Common settings at:
 #https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments
 def fixName(filename):
+    if filename == 'cytoBandIdeo':
+        return filename
     valid_chars = "_%s%s" % (string.ascii_letters, string.digits)
     sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename])
     sanitize_name = "gonramp_" + sanitize_name