changeset 52:c66803bff0cc draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
author yating-l
date Thu, 11 May 2017 17:21:15 -0400
parents 364b8db8de17
children 44059c9e7c02
files Bam.py Bam.pyc Bed.py Bed.pyc BedBlastAlignments.py BedBlastAlignments.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BedSpliceJunctions.py BedSpliceJunctions.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc Psl.py Psl.pyc bigPsl.py bigPsl.pyc hubArchiveCreator.xml templates/trackDb/layout.txt tool_dependencies.xml
diffstat 25 files changed, 183 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/Bam.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/Bam.py	Thu May 11 17:21:15 2017 -0400
@@ -37,7 +37,10 @@
 
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = self.data_bam["group_name"]
-
+        if self.data_bam["long_label"]:
+            self.long_label = self.data_bam["long_label"]
+        else:
+            self.long_label = self.name_bam
         # First: Add the bam file
         # Second: Add the bam index file, in the same folder (https://genome.ucsc.edu/goldenpath/help/bam.html)
 
@@ -52,7 +55,7 @@
         # Create the Track Object
         self.createTrack(file_path=self.name_bam,
                          track_name=self.name_bam,
-                         long_label=self.name_bam, track_type='bam', visibility='pack', priority=self.priority,
+                         long_label=self.long_label, track_type='bam', visibility='pack', priority=self.priority,
                          track_file=bam_index_file_path,
                          track_color=self.track_color,
                          group_name=self.group_name
Binary file Bam.pyc has changed
--- a/Bed.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/Bed.py	Thu May 11 17:21:15 2017 -0400
@@ -26,7 +26,10 @@
         self.track_color = self.data_bed_generic["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = self.data_bed_generic["group_name"]
-
+        if self.data_bed_generic["long_label"]:
+            self.long_label = self.data_bed_generic["long_label"]
+        else:
+            self.long_label = self.name_bed_generic
 
         # Sort processing
         subtools.sort(self.inputBedGeneric, self.sortedBedFile.name)
@@ -44,7 +47,7 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_bed_generic, track_type='bigBed', visibility='dense',
+                         long_label=self.long_label, track_type='bigBed', visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
Binary file Bed.pyc has changed
--- a/BedBlastAlignments.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/BedBlastAlignments.py	Thu May 11 17:21:15 2017 -0400
@@ -21,7 +21,10 @@
         self.track_color = data_bed_blast_alignments["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_bed_blast_alignments["group_name"]
-
+        if data_bed_blast_alignments["long_label"]:
+            self.long_label = data_bed_blast_alignments["long_label"]
+        else:
+            self.long_label = self.name_bed_blast_alignments
         #sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
 
         # Sort processing
@@ -45,7 +48,7 @@
 
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_bed_blast_alignments, track_type='bigBed 12 +', visibility='dense',
+                         long_label=self.long_label, track_type='bigBed 12 +', visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
Binary file BedBlastAlignments.pyc has changed
--- a/BedSimpleRepeats.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/BedSimpleRepeats.py	Thu May 11 17:21:15 2017 -0400
@@ -20,6 +20,10 @@
         self.track_color = data_bed_simple_repeats["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_bed_simple_repeats["group_name"]
+        if data_bed_simple_repeats["long_label"]:
+            self.long_label = data_bed_simple_repeats["long_label"]
+        else:
+            self.long_label = self.name_bed_simple_repeats
 
         sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
 
@@ -43,7 +47,7 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_bed_simple_repeats, track_type='bigBed 4 +', visibility='dense',
+                         long_label=self.long_label, track_type='bigBed 4 +', visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
Binary file BedSimpleRepeats.pyc has changed
--- a/BedSpliceJunctions.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/BedSpliceJunctions.py	Thu May 11 17:21:15 2017 -0400
@@ -20,7 +20,10 @@
         self.track_color = data_bed_splice_junctions["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_bed_splice_junctions["group_name"]
-
+        if data_bed_splice_junctions["long_label"]:
+            self.long_label = data_bed_splice_junctions["long_label"]
+        else:
+            self.long_label = self.name_bed_splice_junctions
         sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
 
         # Sort processing
@@ -43,7 +46,7 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_bed_splice_junctions, track_type='bigBed 12 +', visibility='dense',
+                         long_label=self.long_label, track_type='bigBed 12 +', visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
Binary file BedSpliceJunctions.pyc has changed
--- a/BigWig.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/BigWig.py	Thu May 11 17:21:15 2017 -0400
@@ -2,6 +2,8 @@
 
 import os
 import shutil
+from subprocess import Popen, PIPE
+import re
 
 # Internal dependencies
 from Datatype import Datatype
@@ -21,7 +23,10 @@
         self.track_color = data_bigwig["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_bigwig["group_name"]
-
+        if data_bigwig["long_label"]:
+            self.long_label = data_bigwig["long_label"]
+        else:
+            self.long_label = self.name_bigwig
         #print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig )
 
         trackName = "".join( ( self.name_bigwig, ".bigwig" ) )
@@ -32,31 +37,38 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_bigwig,
-                         track_type='bigWig', visibility='full',
+                         long_label=self.long_label,
+                         track_type=self.determine_track_type(myBigWigFilePath),
+                         visibility='full',
                          priority=self.priority,
                          track_file=myBigWigFilePath,
                          track_color=self.track_color,
                          group_name=self.group_name)
 
-        # dataURL = "tracks/%s" % trackName
-        #
-        # # Return the BigBed track
-        #
-        # trackDb = TrackDb(
-        #     trackName=trackName,
-        #     longLabel=self.name_bigwig,
-        #     shortLabel=self.getShortName( self.name_bigwig ),
-        #     trackDataURL=dataURL,
-        #     trackType='bigWig',
-        #     visibility='full',
-        #     priority=self.priority,
-        # )
-        #
-        # self.track = Track(
-        #     trackFile=myBigWigFilePath,
-        #     trackDb=trackDb,
-        # )
-
         print("- BigWig %s created" % self.name_bigwig)
         #print("- %s created in %s" % (trackName, myBigWigFilePath))
+
+    def determine_track_type(self, bw_file):
+        """
+        bigWig tracks must declare the expected signal range for the data
+        (See https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html).
+        This method determines the range of values for a bigWig file using
+        the bigWigInfo program.
+
+        Implementation of reading from stdout is based on a Stackoverflow post:
+        http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate
+
+        :param bw_file: path to a bigWig file
+
+        :returns: the bigWig track type
+        """
+        cmd_ph = Popen(["bigWigInfo", "-minMax", bw_file],
+                       stdout=PIPE, bufsize=1)
+
+        with cmd_ph.stdout:
+            for line in iter(cmd_ph.stdout.readline, b''):
+                bw_type = "bigWig %s" % line.rstrip()
+
+        cmd_ph.wait()
+
+        return bw_type
Binary file BigWig.pyc has changed
--- a/Datatype.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/Datatype.py	Thu May 11 17:21:15 2017 -0400
@@ -66,7 +66,7 @@
     # TODO: Rename for PEP8
     def getShortName( self, name_to_shortify ):
         # Slice to get from Long label the short label
-        short_label_slice = slice(0, 15)
+        short_label_slice = slice(0, 17)
 
         return name_to_shortify[short_label_slice]
 
Binary file Datatype.pyc has changed
--- a/Gff3.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/Gff3.py	Thu May 11 17:21:15 2017 -0400
@@ -22,7 +22,10 @@
         self.track_color = data_gff3["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_gff3["group_name"]
-
+        if data_gff3["long_label"]:
+            self.long_label = data_gff3["long_label"]
+        else:
+            self.long_label = self.name_gff3
         # TODO: See if we need these temporary files as part of the generated files
         unsorted_genePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
         unsorted_bigGenePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsorted.bigGenePred")
@@ -57,7 +60,7 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_gff3,
+                         long_label=self.long_label,
                          track_type='bigGenePred', visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
Binary file Gff3.pyc has changed
--- a/Gtf.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/Gtf.py	Thu May 11 17:21:15 2017 -0400
@@ -28,7 +28,10 @@
         self.track_color = data_gtf["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_gtf["group_name"]
-
+        if data_gtf["long_label"]:
+            self.long_label = data_gtf["long_label"]
+        else:
+            self.long_label = self.name_gtf
         #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
 
         # TODO: See if we need these temporary files as part of the generated files
@@ -69,7 +72,7 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_gtf, track_type='bigGenePred',
+                         long_label=self.long_label, track_type='bigGenePred',
                          visibility='dense', priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
Binary file Gtf.pyc has changed
--- a/Psl.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/Psl.py	Thu May 11 17:21:15 2017 -0400
@@ -19,7 +19,10 @@
         self.track_color = data_psl["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_psl["group_name"]
-
+        if data_psl["long_label"]:
+            self.long_label = data_psl["long_label"]
+        else:
+            self.long_label = self.name_psl
         # Temporary files
         unsorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='.psl')
         sorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='psl')
@@ -50,7 +53,7 @@
         # Create the Track Object
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_psl,
+                         long_label=self.long_label,
                          track_type='bigPsl', visibility='dense',
                          priority=self.priority,
                          track_file=my_big_psl_file_path,
Binary file Psl.pyc has changed
--- a/bigPsl.py	Wed Apr 12 16:51:03 2017 -0400
+++ b/bigPsl.py	Thu May 11 17:21:15 2017 -0400
@@ -21,7 +21,10 @@
         self.track_color = data_bigpsl["track_color"]
         # TODO: Think about how to avoid repetition of the group_name everywhere
         self.group_name = data_bigpsl["group_name"]
-
+        if data_bigpsl["long_label"]:
+            self.long_label = data_bigpsl["long_label"]
+        else:
+            self.long_label = self.name_bigpsl
         #sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
 
         # Sort processing
@@ -45,7 +48,7 @@
 
         self.createTrack(file_path=trackName,
                          track_name=trackName,
-                         long_label=self.name_bigpsl, track_type='bigPsl', visibility='dense',
+                         long_label=self.long_label, track_type='bigPsl', visibility='dense',
                          priority=self.priority,
                          track_file=myBigBedFilePath,
                          track_color=self.track_color,
Binary file bigPsl.pyc has changed
--- a/hubArchiveCreator.xml	Wed Apr 12 16:51:03 2017 -0400
+++ b/hubArchiveCreator.xml	Thu May 11 17:21:15 2017 -0400
@@ -7,6 +7,7 @@
     <requirements>
         <requirement type="package" version="1.0">ucsc_tools_340</requirement>
         <requirement type="package" version="1.2">samtools</requirement>
+        <requirement type="package" version="340">ucsc_bigwig</requirement>
     </requirements>
 
     <stdio>
@@ -70,9 +71,10 @@
                 ## For each format, we have a few mandatory fields we store in a dict
                 #set track_color = str($f.formatChoice.track_color)
                 #set group_name = str($g.group_name)
-
+                #set longLabel = str($f.formatChoice.longLabel)
                 #set extra_data_dict = {"track_color": $track_color,
-                                        "group_name": $group_name}
+                                        "group_name": $group_name,
+                                        "long_label": $longLabel}
 
                 #if $f.formatChoice.format_select == "bam"
                     --bam $f.formatChoice.BAM
@@ -101,12 +103,12 @@
                     #end if
                     #if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option"
                         --bedBlastAlignments $f.formatChoice.bedChoice.BED_blast_alignment
-                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,                              
+                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,
                                              extra_data_dict)
                     #end if
                     #if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option"
                         --bigpsl $f.formatChoice.bedChoice.BED_blat_alignment
-                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,                              
+                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,
                                              extra_data_dict)
                     #end if
                 #end if
@@ -190,6 +192,7 @@
                                 type="data"
                                 label="BAM File"
                         />
+                        <param name="longLabel" type="text" size="80" label="Track label" />
                         <!-- TODO: Find a solution to avoid repetition and to generate a new color depending on the others -->
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
@@ -198,7 +201,7 @@
                                 </valid>
                             </sanitizer>
                         </param>
-                    </when>     
+                    </when>
                     <when value="bed">
                         <conditional name="bedChoice">
                             <param name="bed_select" type="select" label="Bed Choice">
@@ -250,6 +253,7 @@
                                 />
                             </when>
                         </conditional>
+                        <param name="longLabel" type="text" size="30" label="Track name" />
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
                                 <valid initial="string.letters,string.digits">
@@ -265,6 +269,7 @@
                                 type="data"
                                 label="PSL File"
                         />
+                        <param name="longLabel" type="text" size="30" label="Track name" />
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
                                 <valid initial="string.letters,string.digits">
@@ -280,6 +285,7 @@
                                 type="data"
                                 label="BIGWIG File"
                         />
+                        <param name="longLabel" type="text" size="30" label="Track name" />
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
                                 <valid initial="string.letters,string.digits">
@@ -295,6 +301,7 @@
                                 type="data"
                                 label="GFF3 File"
                         />
+                        <param name="longLabel" type="text" size="30" label="Track name" />
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
                                 <valid initial="string.letters,string.digits">
@@ -310,6 +317,7 @@
                                 type="data"
                                 label="GTF File"
                         />
+                        <param name="longLabel" type="text" size="30" label="Track name" />
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
                                 <valid initial="string.letters,string.digits">
@@ -467,7 +475,7 @@
                 />
             </output>
         </test>
-        
+
 
         <!-- Test with Psl -->
         <test>
@@ -1022,4 +1030,8 @@
         This Galaxy tool permits to prepare your files to be ready for
         Assembly Hub visualization.
     </help>
+
+    <citations>
+        <citation type="doi">10.7490/f1000research.1112719.1</citation>
+    </citations>
 </tool>
--- a/templates/trackDb/layout.txt	Wed Apr 12 16:51:03 2017 -0400
+++ b/templates/trackDb/layout.txt	Thu May 11 17:21:15 2017 -0400
@@ -1,14 +1,32 @@
 % for trackDb in trackDbs:
-    ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
-    track ${trackDb.trackName}
-    longLabel ${trackDb.longLabel}
-    shortLabel ${trackDb.shortLabel}
-    bigDataUrl ${trackDb.trackDataURL}
-    type ${trackDb.trackType}
-    visibility ${trackDb.visibility}
-    thickDrawItem ${trackDb.thickDrawItem}
-    priority ${trackDb.priority}
-    color ${trackDb.track_color}
-    group ${trackDb.group_name.lower().replace(' ', '_')}
+    % if "bigWig" in trackDb.trackType:
+
+track ${trackDb.trackName}
+longLabel ${trackDb.longLabel}
+shortLabel ${trackDb.shortLabel}
+bigDataUrl ${trackDb.trackDataURL}
+visibility ${trackDb.visibility}
+priority ${trackDb.priority}
+color ${trackDb.track_color}
+group ${trackDb.group_name.lower().replace(' ', '_')}
+type ${trackDb.trackType}
+autoScale on
+maxHeightPixels 100:32:8
+windowingFunction mean+whiskers
 
+    % else:
+
+## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
+track ${trackDb.trackName}
+longLabel ${trackDb.longLabel}
+shortLabel ${trackDb.shortLabel}
+bigDataUrl ${trackDb.trackDataURL}
+type ${trackDb.trackType}
+visibility ${trackDb.visibility}
+thickDrawItem ${trackDb.thickDrawItem}
+priority ${trackDb.priority}
+color ${trackDb.track_color}
+group ${trackDb.group_name.lower().replace(' ', '_')}
+
+    % endif
 % endfor
--- a/tool_dependencies.xml	Wed Apr 12 16:51:03 2017 -0400
+++ b/tool_dependencies.xml	Thu May 11 17:21:15 2017 -0400
@@ -16,16 +16,16 @@
     <package name="samtools" version="1.2">
         <repository changeset_revision="5b7172f9b230" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
-    
+
     <package name="ucsc_tools_340" version="1.0">
         <install version="1.0">
             <actions_group>
                 <actions architecture="x86_64" os="linux">
                     <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action>
                     <action type="move_directory_files">
-                        <source_directory>.</source_directory>                       
+                        <source_directory>.</source_directory>
                         <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                    </action>    
+                    </action>
                 </actions>
                 <action type="set_environment">
                     <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
@@ -34,5 +34,57 @@
          </install>
          <readme>The well known UCSC tools from Jim Kent.</readme>
     </package>
-    
+
+    <!-- package ucsc_bigwig is created by Wilson Leung -->
+    <package name="ucsc_bigwig" version="340">
+        <install version="1.0">
+            <actions_group>
+                <actions architecture="x86_64" os="darwin">
+                    <action sha256sum="a34c57a9fb3c36a984b8fa879a99697c994cd981a1277663d372638e4dec8bb2" type="download_by_url">
+                        http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_macOS_x86_64.tar.gz
+                    </action>
+                    <action type="move_directory_files">
+                        <source_directory>bin</source_directory>
+                        <destination_directory>$INSTALL_DIR</destination_directory>
+                    </action>
+                </actions>
+
+                <actions architecture="x86_64" os="linux">
+                    <action sha256sum="0d2bd886e312980e0ae58ae912315beeeac612fd1783c959b4eabd62cffd8512" type="download_by_url">
+                        http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_linux_x86_64.tar.gz
+                    </action>
+                    <action type="move_directory_files">
+                        <source_directory>bin</source_directory>
+                        <destination_directory>$INSTALL_DIR</destination_directory>
+                    </action>
+                </actions>
+
+                <!-- Display error message for unsupported OS and CPU architecture -->
+                <actions>
+                    <action type="shell_command">
+                        echo "ERROR: This package only supports 64-bit systems running macOS or Linux"
+                    </action>
+                    <action type="shell_command">false</action>
+                </actions>
+
+                <!-- update $PATH environment variable -->
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions_group>
+        </install>
+        <readme>
+        
+            This package contains the utilities for constructing
+            `bigWig files &lt;https://genome.ucsc.edu/goldenpath/help/bigWig.html&gt;`_.
+            The utilities were created by the
+            `Genome Bioinformatics Group &lt;https://genome.ucsc.edu/staff.html&gt;`_
+            at the UCSC Genomics Institute.
+
+            The bigWig file format is designed to store dense continuous datasets
+            and it is compatible with many genome browsers (e.g., UCSC Genome Browser,
+            JBrowse, IGV).
+        
+        </readme>
+    </package>
 </tool_dependency>