Mercurial > repos > yating-l > hubarchivecreator
changeset 52:c66803bff0cc draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
author | yating-l |
---|---|
date | Thu, 11 May 2017 17:21:15 -0400 |
parents | 364b8db8de17 |
children | 44059c9e7c02 |
files | Bam.py Bam.pyc Bed.py Bed.pyc BedBlastAlignments.py BedBlastAlignments.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BedSpliceJunctions.py BedSpliceJunctions.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc Psl.py Psl.pyc bigPsl.py bigPsl.pyc hubArchiveCreator.xml templates/trackDb/layout.txt tool_dependencies.xml |
diffstat | 25 files changed, 183 insertions(+), 61 deletions(-) [+] |
line wrap: on
line diff
--- a/Bam.py Wed Apr 12 16:51:03 2017 -0400 +++ b/Bam.py Thu May 11 17:21:15 2017 -0400 @@ -37,7 +37,10 @@ # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = self.data_bam["group_name"] - + if self.data_bam["long_label"]: + self.long_label = self.data_bam["long_label"] + else: + self.long_label = self.name_bam # First: Add the bam file # Second: Add the bam index file, in the same folder (https://genome.ucsc.edu/goldenpath/help/bam.html) @@ -52,7 +55,7 @@ # Create the Track Object self.createTrack(file_path=self.name_bam, track_name=self.name_bam, - long_label=self.name_bam, track_type='bam', visibility='pack', priority=self.priority, + long_label=self.long_label, track_type='bam', visibility='pack', priority=self.priority, track_file=bam_index_file_path, track_color=self.track_color, group_name=self.group_name
--- a/Bed.py Wed Apr 12 16:51:03 2017 -0400 +++ b/Bed.py Thu May 11 17:21:15 2017 -0400 @@ -26,7 +26,10 @@ self.track_color = self.data_bed_generic["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = self.data_bed_generic["group_name"] - + if self.data_bed_generic["long_label"]: + self.long_label = self.data_bed_generic["long_label"] + else: + self.long_label = self.name_bed_generic # Sort processing subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) @@ -44,7 +47,7 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_bed_generic, track_type='bigBed', visibility='dense', + long_label=self.long_label, track_type='bigBed', visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/BedBlastAlignments.py Wed Apr 12 16:51:03 2017 -0400 +++ b/BedBlastAlignments.py Thu May 11 17:21:15 2017 -0400 @@ -21,7 +21,10 @@ self.track_color = data_bed_blast_alignments["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_bed_blast_alignments["group_name"] - + if data_bed_blast_alignments["long_label"]: + self.long_label = data_bed_blast_alignments["long_label"] + else: + self.long_label = self.name_bed_blast_alignments #sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") # Sort processing @@ -45,7 +48,7 @@ self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_bed_blast_alignments, track_type='bigBed 12 +', visibility='dense', + long_label=self.long_label, track_type='bigBed 12 +', visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/BedSimpleRepeats.py Wed Apr 12 16:51:03 2017 -0400 +++ b/BedSimpleRepeats.py Thu May 11 17:21:15 2017 -0400 @@ -20,6 +20,10 @@ self.track_color = data_bed_simple_repeats["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_bed_simple_repeats["group_name"] + if data_bed_simple_repeats["long_label"]: + self.long_label = data_bed_simple_repeats["long_label"] + else: + self.long_label = self.name_bed_simple_repeats sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") @@ -43,7 +47,7 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_bed_simple_repeats, track_type='bigBed 4 +', visibility='dense', + long_label=self.long_label, track_type='bigBed 4 +', visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/BedSpliceJunctions.py Wed Apr 12 16:51:03 2017 -0400 +++ b/BedSpliceJunctions.py Thu May 11 17:21:15 2017 -0400 @@ -20,7 +20,10 @@ self.track_color = data_bed_splice_junctions["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_bed_splice_junctions["group_name"] - + if data_bed_splice_junctions["long_label"]: + self.long_label = data_bed_splice_junctions["long_label"] + else: + self.long_label = self.name_bed_splice_junctions sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") # Sort processing @@ -43,7 +46,7 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_bed_splice_junctions, track_type='bigBed 12 +', visibility='dense', + long_label=self.long_label, track_type='bigBed 12 +', visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/BigWig.py Wed Apr 12 16:51:03 2017 -0400 +++ b/BigWig.py Thu May 11 17:21:15 2017 -0400 @@ -2,6 +2,8 @@ import os import shutil +from subprocess import Popen, PIPE +import re # Internal dependencies from Datatype import Datatype @@ -21,7 +23,10 @@ self.track_color = data_bigwig["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_bigwig["group_name"] - + if data_bigwig["long_label"]: + self.long_label = data_bigwig["long_label"] + else: + self.long_label = self.name_bigwig #print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig ) trackName = "".join( ( self.name_bigwig, ".bigwig" ) ) @@ -32,31 +37,38 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_bigwig, - track_type='bigWig', visibility='full', + long_label=self.long_label, + track_type=self.determine_track_type(myBigWigFilePath), + visibility='full', priority=self.priority, track_file=myBigWigFilePath, track_color=self.track_color, group_name=self.group_name) - # dataURL = "tracks/%s" % trackName - # - # # Return the BigBed track - # - # trackDb = TrackDb( - # trackName=trackName, - # longLabel=self.name_bigwig, - # shortLabel=self.getShortName( self.name_bigwig ), - # trackDataURL=dataURL, - # trackType='bigWig', - # visibility='full', - # priority=self.priority, - # ) - # - # self.track = Track( - # trackFile=myBigWigFilePath, - # trackDb=trackDb, - # ) - print("- BigWig %s created" % self.name_bigwig) #print("- %s created in %s" % (trackName, myBigWigFilePath)) + + def determine_track_type(self, bw_file): + """ + bigWig tracks must declare the expected signal range for the data + (See https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html). + This method determines the range of values for a bigWig file using + the bigWigInfo program. + + Implementation of reading from stdout is based on a Stackoverflow post: + http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate + + :param bw_file: path to a bigWig file + + :returns: the bigWig track type + """ + cmd_ph = Popen(["bigWigInfo", "-minMax", bw_file], + stdout=PIPE, bufsize=1) + + with cmd_ph.stdout: + for line in iter(cmd_ph.stdout.readline, b''): + bw_type = "bigWig %s" % line.rstrip() + + cmd_ph.wait() + + return bw_type
--- a/Datatype.py Wed Apr 12 16:51:03 2017 -0400 +++ b/Datatype.py Thu May 11 17:21:15 2017 -0400 @@ -66,7 +66,7 @@ # TODO: Rename for PEP8 def getShortName( self, name_to_shortify ): # Slice to get from Long label the short label - short_label_slice = slice(0, 15) + short_label_slice = slice(0, 17) return name_to_shortify[short_label_slice]
--- a/Gff3.py Wed Apr 12 16:51:03 2017 -0400 +++ b/Gff3.py Thu May 11 17:21:15 2017 -0400 @@ -22,7 +22,10 @@ self.track_color = data_gff3["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_gff3["group_name"] - + if data_gff3["long_label"]: + self.long_label = data_gff3["long_label"] + else: + self.long_label = self.name_gff3 # TODO: See if we need these temporary files as part of the generated files unsorted_genePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") unsorted_bigGenePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsorted.bigGenePred") @@ -57,7 +60,7 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_gff3, + long_label=self.long_label, track_type='bigGenePred', visibility='dense', priority=self.priority, track_file=myBigBedFilePath,
--- a/Gtf.py Wed Apr 12 16:51:03 2017 -0400 +++ b/Gtf.py Thu May 11 17:21:15 2017 -0400 @@ -28,7 +28,10 @@ self.track_color = data_gtf["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_gtf["group_name"] - + if data_gtf["long_label"]: + self.long_label = data_gtf["long_label"] + else: + self.long_label = self.name_gtf #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) # TODO: See if we need these temporary files as part of the generated files @@ -69,7 +72,7 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_gtf, track_type='bigGenePred', + long_label=self.long_label, track_type='bigGenePred', visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/Psl.py Wed Apr 12 16:51:03 2017 -0400 +++ b/Psl.py Thu May 11 17:21:15 2017 -0400 @@ -19,7 +19,10 @@ self.track_color = data_psl["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_psl["group_name"] - + if data_psl["long_label"]: + self.long_label = data_psl["long_label"] + else: + self.long_label = self.name_psl # Temporary files unsorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='.psl') sorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='psl') @@ -50,7 +53,7 @@ # Create the Track Object self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_psl, + long_label=self.long_label, track_type='bigPsl', visibility='dense', priority=self.priority, track_file=my_big_psl_file_path,
--- a/bigPsl.py Wed Apr 12 16:51:03 2017 -0400 +++ b/bigPsl.py Thu May 11 17:21:15 2017 -0400 @@ -21,7 +21,10 @@ self.track_color = data_bigpsl["track_color"] # TODO: Think about how to avoid repetition of the group_name everywhere self.group_name = data_bigpsl["group_name"] - + if data_bigpsl["long_label"]: + self.long_label = data_bigpsl["long_label"] + else: + self.long_label = self.name_bigpsl #sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") # Sort processing @@ -45,7 +48,7 @@ self.createTrack(file_path=trackName, track_name=trackName, - long_label=self.name_bigpsl, track_type='bigPsl', visibility='dense', + long_label=self.long_label, track_type='bigPsl', visibility='dense', priority=self.priority, track_file=myBigBedFilePath, track_color=self.track_color,
--- a/hubArchiveCreator.xml Wed Apr 12 16:51:03 2017 -0400 +++ b/hubArchiveCreator.xml Thu May 11 17:21:15 2017 -0400 @@ -7,6 +7,7 @@ <requirements> <requirement type="package" version="1.0">ucsc_tools_340</requirement> <requirement type="package" version="1.2">samtools</requirement> + <requirement type="package" version="340">ucsc_bigwig</requirement> </requirements> <stdio> @@ -70,9 +71,10 @@ ## For each format, we have a few mandatory fields we store in a dict #set track_color = str($f.formatChoice.track_color) #set group_name = str($g.group_name) - + #set longLabel = str($f.formatChoice.longLabel) #set extra_data_dict = {"track_color": $track_color, - "group_name": $group_name} + "group_name": $group_name, + "long_label": $longLabel} #if $f.formatChoice.format_select == "bam" --bam $f.formatChoice.BAM @@ -101,12 +103,12 @@ #end if #if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option" --bedBlastAlignments $f.formatChoice.bedChoice.BED_blast_alignment - #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, + #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, extra_data_dict) #end if #if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option" --bigpsl $f.formatChoice.bedChoice.BED_blat_alignment - #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, + #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, extra_data_dict) #end if #end if @@ -190,6 +192,7 @@ type="data" label="BAM File" /> + <param name="longLabel" type="text" size="80" label="Track label" /> <!-- TODO: Find a solution to avoid repetition and to generate a new color depending on the others --> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> @@ -198,7 +201,7 @@ </valid> </sanitizer> </param> - </when> + </when> <when value="bed"> <conditional name="bedChoice"> <param name="bed_select" type="select" label="Bed Choice"> @@ -250,6 +253,7 @@ /> </when> </conditional> + <param name="longLabel" type="text" size="30" label="Track name" /> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> <valid initial="string.letters,string.digits"> @@ -265,6 +269,7 @@ type="data" label="PSL File" /> + <param name="longLabel" type="text" size="30" label="Track name" /> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> <valid initial="string.letters,string.digits"> @@ -280,6 +285,7 @@ type="data" label="BIGWIG File" /> + <param name="longLabel" type="text" size="30" label="Track name" /> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> <valid initial="string.letters,string.digits"> @@ -295,6 +301,7 @@ type="data" label="GFF3 File" /> + <param name="longLabel" type="text" size="30" label="Track name" /> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> <valid initial="string.letters,string.digits"> @@ -310,6 +317,7 @@ type="data" label="GTF File" /> + <param name="longLabel" type="text" size="30" label="Track name" /> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> <valid initial="string.letters,string.digits"> @@ -467,7 +475,7 @@ /> </output> </test> - + <!-- Test with Psl --> <test> @@ -1022,4 +1030,8 @@ This Galaxy tool permits to prepare your files to be ready for Assembly Hub visualization. </help> + + <citations> + <citation type="doi">10.7490/f1000research.1112719.1</citation> + </citations> </tool>
--- a/templates/trackDb/layout.txt Wed Apr 12 16:51:03 2017 -0400 +++ b/templates/trackDb/layout.txt Thu May 11 17:21:15 2017 -0400 @@ -1,14 +1,32 @@ % for trackDb in trackDbs: - ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html - track ${trackDb.trackName} - longLabel ${trackDb.longLabel} - shortLabel ${trackDb.shortLabel} - bigDataUrl ${trackDb.trackDataURL} - type ${trackDb.trackType} - visibility ${trackDb.visibility} - thickDrawItem ${trackDb.thickDrawItem} - priority ${trackDb.priority} - color ${trackDb.track_color} - group ${trackDb.group_name.lower().replace(' ', '_')} + % if "bigWig" in trackDb.trackType: + +track ${trackDb.trackName} +longLabel ${trackDb.longLabel} +shortLabel ${trackDb.shortLabel} +bigDataUrl ${trackDb.trackDataURL} +visibility ${trackDb.visibility} +priority ${trackDb.priority} +color ${trackDb.track_color} +group ${trackDb.group_name.lower().replace(' ', '_')} +type ${trackDb.trackType} +autoScale on +maxHeightPixels 100:32:8 +windowingFunction mean+whiskers + % else: + +## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html +track ${trackDb.trackName} +longLabel ${trackDb.longLabel} +shortLabel ${trackDb.shortLabel} +bigDataUrl ${trackDb.trackDataURL} +type ${trackDb.trackType} +visibility ${trackDb.visibility} +thickDrawItem ${trackDb.thickDrawItem} +priority ${trackDb.priority} +color ${trackDb.track_color} +group ${trackDb.group_name.lower().replace(' ', '_')} + + % endif % endfor
--- a/tool_dependencies.xml Wed Apr 12 16:51:03 2017 -0400 +++ b/tool_dependencies.xml Thu May 11 17:21:15 2017 -0400 @@ -16,16 +16,16 @@ <package name="samtools" version="1.2"> <repository changeset_revision="5b7172f9b230" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - + <package name="ucsc_tools_340" version="1.0"> <install version="1.0"> <actions_group> <actions architecture="x86_64" os="linux"> <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action> <action type="move_directory_files"> - <source_directory>.</source_directory> + <source_directory>.</source_directory> <destination_directory>$INSTALL_DIR/bin</destination_directory> - </action> + </action> </actions> <action type="set_environment"> <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> @@ -34,5 +34,57 @@ </install> <readme>The well known UCSC tools from Jim Kent.</readme> </package> - + + <!-- package ucsc_bigwig is created by Wilson Leung --> + <package name="ucsc_bigwig" version="340"> + <install version="1.0"> + <actions_group> + <actions architecture="x86_64" os="darwin"> + <action sha256sum="a34c57a9fb3c36a984b8fa879a99697c994cd981a1277663d372638e4dec8bb2" type="download_by_url"> + http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_macOS_x86_64.tar.gz + </action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + </actions> + + <actions architecture="x86_64" os="linux"> + <action sha256sum="0d2bd886e312980e0ae58ae912315beeeac612fd1783c959b4eabd62cffd8512" type="download_by_url"> + http://old-gep.wustl.edu/~wilson/packages/ucsc_bigwig/ucsc_bigwig_340_linux_x86_64.tar.gz + </action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + </actions> + + <!-- Display error message for unsupported OS and CPU architecture --> + <actions> + <action type="shell_command"> + echo "ERROR: This package only supports 64-bit systems running macOS or Linux" + </action> + <action type="shell_command">false</action> + </actions> + + <!-- update $PATH environment variable --> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + </action> + </actions_group> + </install> + <readme> + + This package contains the utilities for constructing + `bigWig files <https://genome.ucsc.edu/goldenpath/help/bigWig.html>`_. + The utilities were created by the + `Genome Bioinformatics Group <https://genome.ucsc.edu/staff.html>`_ + at the UCSC Genomics Institute. + + The bigWig file format is designed to store dense continuous datasets + and it is compatible with many genome browsers (e.g., UCSC Genome Browser, + JBrowse, IGV). + + </readme> + </package> </tool_dependency>