Mercurial > repos > yating-l > hubarchivecreator
changeset 55:ba9997c847dc draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b2894a39921adaa6a93b12523820b61a78e11f20-dirty
author | yating-l |
---|---|
date | Wed, 17 May 2017 13:16:57 -0400 |
parents | 4a58094b051e |
children | 04cc7c2e7b47 |
files | BigBed.py cytoBand.py hubArchiveCreator.py hubArchiveCreator.xml tool_dependencies.xml util/subtools.py |
diffstat | 6 files changed, 121 insertions(+), 71 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BigBed.py Wed May 17 13:16:57 2017 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/python + +import os +import shutil +from subprocess import Popen, PIPE +import re + +# Internal dependencies +from Datatype import Datatype + +class BigBed(Datatype): + """ Configurations for creating the bigBed evidence track """ + + def __init__(self, input_bigbed_path, data_bigbed): + super(BigBed, self).__init__() + + self.track = None + + self.input_bigbed_path = input_bigbed_path + self.name_bigbed = data_bigbed["name"] + self.priority = data_bigbed["order_index"] + self.track_color = data_bigbed["track_color"] + self.group_name = data_bigbed["group_name"] + + track_name = "".join((self.name_bigbed, ".bigbed")) + if data_bigbed["long_label"]: + self.long_label = data_bigbed["long_label"] + else: + self.long_label = self.name_bigbed + + bigbed_file_path = os.path.join(self.myTrackFolderPath, track_name) + + track_type = self.determine_track_type(input_bigbed_path) + + shutil.copy(self.input_bigbed_path, bigbed_file_path) + + # Create the Track Object + self.createTrack(file_path=track_name, + track_name=track_name, + long_label=self.long_label, + track_type=track_type, + visibility='hide', + priority=self.priority, + track_file=bigbed_file_path, + track_color=self.track_color, + group_name=self.group_name) + + print "- BigBed %s created" % self.name_bigbed + + + def determine_track_type(self, bb_file): + """ + Determine the number of standard and extra fields using bigBedSummary + + Implementation of reading from stdout is based on a Stackoverflow post: + http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate + + :param bb_file: path to a bigBed file + + :returns: the bigBed track type + """ + + cmd_ph = Popen(["bigBedSummary", "-fields", bb_file, "stdout"], + stdout=PIPE, bufsize=1) + + pattern = r"(\d+) bed definition fields, (\d+) total fields" + + with cmd_ph.stdout: + for line in iter(cmd_ph.stdout.readline, b''): + match = re.match(pattern, line) + + if match: + extra_mark = "." if match.group(1) == match.group(2) else "+" + bed_type = "bigBed %s %s" % (match.group(1), extra_mark) + break + + cmd_ph.wait() + + return bed_type
--- a/cytoBand.py Tue May 16 18:09:00 2017 -0400 +++ b/cytoBand.py Wed May 17 13:16:57 2017 -0400 @@ -48,6 +48,7 @@ track_name='cytoBandIdeo', long_label=self.long_label, track_type='bigBed', + visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/hubArchiveCreator.py Tue May 16 18:09:00 2017 -0400 +++ b/hubArchiveCreator.py Wed May 17 13:16:57 2017 -0400 @@ -30,6 +30,7 @@ from TrackHub import TrackHub from bigPsl import bigPsl from BedBlastAlignments import BedBlastAlignments +from BigBed import BigBed # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -74,6 +75,9 @@ # Psl Management parser.add_argument('--psl', action='append', help='Psl format') + # BigBed Management + parser.add_argument('--bigbed', action='append', help='BigBed format') + # TODO: Check if the running directory can have issues if we run the tool outside parser.add_argument('-d', '--directory', help='Running tool directory, where to find the templates. Default is running directory') @@ -137,6 +141,7 @@ array_inputs_psl = args.psl array_inputs_bigpsl = args.bigpsl array_inputs_bed_blast_alignments = args.bedBlastAlignments + array_inputs_bigbed = args.bigbed outputFile = args.output @@ -165,7 +170,8 @@ (array_inputs_gtf, Gtf), (array_inputs_psl, Psl), (array_inputs_bigpsl, bigPsl), - (array_inputs_bed_blast_alignments, BedBlastAlignments)]: + (array_inputs_bed_blast_alignments, BedBlastAlignments), + (array_inputs_bigbed, BigBed)]: if inputs: all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
--- a/hubArchiveCreator.xml Tue May 16 18:09:00 2017 -0400 +++ b/hubArchiveCreator.xml Wed May 17 13:16:57 2017 -0400 @@ -5,9 +5,10 @@ </description> <requirements> - <requirement type="package" version="1.0">ucsc_tools_340</requirement> + <requirement type="package" version="340">ucsc_hac</requirement> <requirement type="package" version="1.2">samtools</requirement> <requirement type="package" version="340">ucsc_bigwig</requirement> + <requirement type="package" version="340">ucsc_bigbed</requirement> </requirements> <stdio> @@ -127,6 +128,11 @@ #silent $prepare_json($f.formatChoice.BIGWIG, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.format_select == "bigbed" + --bigbed $f.formatChoice.BIGBED + #silent $prepare_json($f.formatChoice.BIGBED, $index_track_final, + extra_data_dict) + #end if #if $f.formatChoice.format_select == "gff3" --gff3 $f.formatChoice.GFF3 #silent $prepare_json($f.formatChoice.GFF3, $index_track_final, @@ -186,6 +192,7 @@ <option value="bed">BED</option> <option value="psl">PSL</option> <option value="bigwig">BIGWIG</option> + <option value="bigbed">BIGBED</option> <option value="gff3">GFF3</option> <option value="gtf">GTF</option> </param> @@ -308,6 +315,22 @@ </sanitizer> </param> </when> + <when value="bigbed"> + <param + format="bigbed" + name="BIGBED" + type="data" + label="BIGBED File" + /> + <param name="longLabel" type="text" size="30" label="Track name" /> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> + </when> <when value="gff3"> <param format="gff3"
--- a/tool_dependencies.xml Tue May 16 18:09:00 2017 -0400 +++ b/tool_dependencies.xml Wed May 17 13:16:57 2017 -0400 @@ -1,6 +1,5 @@ <?xml version="1.0"?> <tool_dependency> - <!-- UCSC Tools in --> <!-- Useful for HAC are: - twoBitInfo - sort @@ -16,75 +15,15 @@ <package name="samtools" version="1.2"> <repository changeset_revision="5b7172f9b230" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - - <package name="ucsc_tools_340" version="1.0"> - <install version="1.0"> - <actions_group> - <actions architecture="x86_64" os="linux"> - <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR/bin</destination_directory> - </action> - </actions> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions_group> - </install> - <readme>The well known UCSC tools from Jim Kent.</readme> + <package name="ucsc_hac" version="340"> + <repository changeset_revision="1368b25a0ae0" name="package_ucsc_hac_340" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="ucsc_bigbed" version="340"> + <repository changeset_revision="dd9b230c079d" name="package_ucsc_bigbed_340" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - <!-- package ucsc_bigwig is created by Wilson Leung --> - <package name="ucsc_bigwig" version="340"> - <install version="1.0"> - <actions_group> - <actions architecture="x86_64" os="darwin"> - <action sha256sum="a34c57a9fb3c36a984b8fa879a99697c994cd981a1277663d372638e4dec8bb2" type="download_by_url"> - http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_macOS_x86_64.tar.gz - </action> - <action type="move_directory_files"> - <source_directory>bin</source_directory> - <destination_directory>$INSTALL_DIR/bin</destination_directory> - </action> - </actions> - - <actions architecture="x86_64" os="linux"> - <action sha256sum="0d2bd886e312980e0ae58ae912315beeeac612fd1783c959b4eabd62cffd8512" type="download_by_url"> - http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_linux_x86_64.tar.gz - </action> - <action type="move_directory_files"> - <source_directory>bin</source_directory> - <destination_directory>$INSTALL_DIR/bin</destination_directory> - </action> - </actions> + <package name="ucsc_bigwig" version="340"> + <repository changeset_revision="06411298fa7d" name="package_ucsc_bigwig_340" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> - <!-- Display error message for unsupported OS and CPU architecture --> - <actions> - <action type="shell_command"> - echo "ERROR: This package only supports 64-bit systems running macOS or Linux" - </action> - <action type="shell_command">false</action> - </actions> - - <!-- update $PATH environment variable --> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions_group> - </install> - <readme> - - This package contains the utilities for constructing - `bigWig files <https://genome.ucsc.edu/goldenpath/help/bigWig.html>`_. - The utilities were created by the - `Genome Bioinformatics Group <https://genome.ucsc.edu/staff.html>`_ - at the UCSC Genomics Institute. - - The bigWig file format is designed to store dense continuous datasets - and it is compatible with many genome browsers (e.g., UCSC Genome Browser, - JBrowse, IGV). - - </readme> - </package> </tool_dependency>
--- a/util/subtools.py Tue May 16 18:09:00 2017 -0400 +++ b/util/subtools.py Wed May 17 13:16:57 2017 -0400 @@ -250,6 +250,8 @@ # See the "track" Common settings at: #https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments def fixName(filename): + if filename == 'cytoBandIdeo': + return filename valid_chars = "_%s%s" % (string.ascii_letters, string.digits) sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename]) sanitize_name = "gonramp_" + sanitize_name