Mercurial > repos > yating-l > hubarchivecreatortest
changeset 3:fa990284327b draft default tip
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit ac73da32d59853ca563e7939d05016a6f3a6899e-dirty
| author | yating-l |
|---|---|
| date | Mon, 30 Oct 2017 11:58:31 -0400 |
| parents | 7486909dfc22 |
| children | |
| files | datatypes/Datatype.py datatypes/Datatype.pyc datatypes/binary/BigBed.py datatypes/binary/BigBed.pyc datatypes/interval/BigPsl.py datatypes/interval/BigPsl.pyc hubArchiveCreator.xml macros.xml util/index/ExternIndex.py util/index/ExternIndex.pyc util/index/TrixIndex.py util/index/TrixIndex.pyc |
| diffstat | 12 files changed, 40 insertions(+), 239 deletions(-) [+] |
line wrap: on
line diff
--- a/datatypes/Datatype.py Fri Sep 29 14:50:50 2017 -0400 +++ b/datatypes/Datatype.py Mon Oct 30 11:58:31 2017 -0400 @@ -96,110 +96,3 @@ def createTrackDb(self): self.track = TrackDb(self.trackName, self.longLabel, self.shortLabel, self.trackDataURL, self.trackType, self.extraSettings) - - - - - - - ''' - def __init__(self): - not_init_message = "The {0} is not initialized." \ - "Did you use pre_init static method first?" - if Datatype.input_fasta_file is None: - raise TypeError(not_init_message.format('reference genome')) - if Datatype.extra_files_path is None: - raise TypeError(not_init_message.format('track Hub path')) - if Datatype.tool_directory is None: - raise TypeError(not_init_message.format('tool directory')) - self.track = None - self.extra_settings = collections.OrderedDict() - - - @staticmethod - def pre_init(reference_genome, two_bit_path, chrom_sizes_file, - extra_files_path, tool_directory, specie_folder, tracks_folder): - Datatype.extra_files_path = extra_files_path - Datatype.tool_directory = tool_directory - - # TODO: All this should be in TrackHub and not in Datatype - Datatype.mySpecieFolderPath = specie_folder - Datatype.myTrackFolderPath = tracks_folder - - Datatype.input_fasta_file = reference_genome - - # 2bit file creation from input fasta - Datatype.twoBitFile = two_bit_path - Datatype.chromSizesFile = chrom_sizes_file - - @staticmethod - def get_largest_scaffold_name(self): - # We can get the biggest scaffold here, with chromSizesFile - with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes: - # TODO: Check if exists - return chrom_sizes.readline().split()[0] - - - def createTrack(self, trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings=None): - self.track = TrackDb(trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings) - - def initRequiredSettings(self, trackSettings, trackDataURL = None, trackType = None): - - #Initialize required fields: trackName, longLabel, shortLable - - self.trackSettings = trackSettings - self.trackName = self.trackSettings["name"] - #self.priority = self.trackSettings["order_index"] - #self.track_color = self.trackSettings["track_color"] - # TODO: Think about how to avoid repetition of the group_name everywhere - #self.group_name = self.trackSettings["group_name"] - #self.database = self.trackSettings["database"] - if self.trackSettings["long_label"]: - self.longLabel = self.trackSettings["long_label"] - else: - self.longLabel = self.trackName - if not "short_label" in self.trackSettings: - self.shortLabel = "" - else: - self.shortLabel = self.trackSettings["short_label"] - self.trackDataURL = trackDataURL - self.trackType = trackType - - def setExtLink(self, database, inputFile, seqType=None, useIframe=True, iframeHeight=None, iframeWidth=None): - if "NCBI" in database: - if not seqType: - self.seqType = int(self.getSeqType(inputFile)) - else: - self.seqType = seqType - if self.seqType < 0: - print self.seqType - raise Exception("Sequence Type is not set for bigPsl. Stopping the application") - if self.seqType == 2: - self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/protein/$$" - elif self.seqType == 1: - self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/$$" - else: - raise Exception("Sequence Type {0} is not valid for bigPsl. Stopping the application".format(self.seqType)) - elif "UniProt" in database: - self.extra_settings["url"] = "http://www.uniprot.org/uniprot/$$" - elif "FlyBase" in database: - self.extra_settings["url"] = "http://flybase.org/reports/$$" - else: - self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$" - self.extra_settings["urlLabel"] = database + " Details:" - if useIframe: - self.extra_settings["iframeUrl"] = self.extra_settings["url"] - if not iframeHeight: - iframeHeight = "600" - if not iframeWidth: - iframeWidth = "800" - self.extra_settings["iframeOptions"] = "height= %s width= %s" % (iframeHeight, iframeWidth) - - def getSeqType(self, inputFile): - with open(inputFile, "r") as bigpsl: - sampleSeq = bigpsl.readline().split() - if len(sampleSeq) == 25: - return sampleSeq[-1] - else: - return "-1" - ''' \ No newline at end of file
--- a/datatypes/binary/BigBed.py Fri Sep 29 14:50:50 2017 -0400 +++ b/datatypes/binary/BigBed.py Mon Oct 30 11:58:31 2017 -0400 @@ -36,13 +36,12 @@ self.database_settings = DatabaseIndex(database=self.trackSettings["database"], seqType=self.seqType).setExtLink() self.extraSettings.update(self.database_settings) if "indexIx" in self.trackSettings and "indexIxx" in self.trackSettings: - if not "trix_id" in self.trackSettings or not self.trackSettings["trix_id"]: - logging.error("Didn't specify the search index. To use TRIX index, you need to specify the identifiers") - exit(1) - trix_id = self.trackSettings["trix_id"] - self.trix_settings = TrixIndex(indexIx=self.trackSettings["indexIx"], indexIxx=self.trackSettings["indexIxx"], trackName=self.trackName, mySpecieFolderPath=self.mySpecieFolderPath, trixId=trix_id).setExtLink() + self.trix_id = self.extraSettings["trix_id"] + if not self.trix_id: + logging.info("Didn't specify the ID for Trix index for BigBed file: %s. \n Will use \"name\" as default", self.trackName) + self.trix_settings = TrixIndex(indexIx=self.trackSettings["indexIx"], indexIxx=self.trackSettings["indexIxx"], trackName=self.trackName, mySpecieFolderPath=self.mySpecieFolderPath, trixId=self.trix_id).setExtLink() self.extraSettings.update(self.trix_settings) - + def validateData(self): self.validator = DataValidation(self.inputFile, self.dataType, self.chromSizesFile.name) self.validator.validate()
--- a/datatypes/interval/BigPsl.py Fri Sep 29 14:50:50 2017 -0400 +++ b/datatypes/interval/BigPsl.py Mon Oct 30 11:58:31 2017 -0400 @@ -3,7 +3,6 @@ import os import tempfile import string -import logging from Interval import Interval from util.index.DatabaseIndex import DatabaseIndex @@ -35,18 +34,15 @@ self.extraSettings["group"] = self.trackSettings["group_name"] self.extraSettings["visibility"] = "dense" self.extraSettings["priority"] = self.trackSettings["order_index"] - #self.extraSettings["searchIndex"] = "name" + self.extraSettings["searchIndex"] = "name" if self.seqType is None: self.seqType = self._getSeqType() if "database" in self.trackSettings: self.database_settings = DatabaseIndex(database=self.trackSettings["database"], seqType=self.seqType).setExtLink() self.extraSettings.update(self.database_settings) if "indexIx" in self.trackSettings and "indexIxx" in self.trackSettings: - if not "trix_id" in self.trackSettings or not self.trackSettings["trix_id"]: - logging.error("Didn't specify the search index. To use TRIX index, you need to specify the identifiers") - exit(1) trix_id = self.trackSettings["trix_id"] - self.trix_settings = TrixIndex(indexIx=self.trackSettings["indexIx"], indexIxx=self.trackSettings["indexIxx"], trackName=self.trackName, mySpecieFolderPath=self.mySpecieFolderPath, trixId = trix_id, default_index = "name").setExtLink() + self.trix_settings = TrixIndex(indexIx=self.trackSettings["indexIx"], indexIxx=self.trackSettings["indexIxx"], trackName=self.trackName, mySpecieFolderPath=self.mySpecieFolderPath).setExtLink() self.extraSettings.update(self.trix_settings)
--- a/hubArchiveCreator.xml Fri Sep 29 14:50:50 2017 -0400 +++ b/hubArchiveCreator.xml Mon Oct 30 11:58:31 2017 -0400 @@ -123,15 +123,8 @@ #set database = str($f.formatChoice.bedChoice.database) #silent $extra_data_dict.update({"database": $database}) #if $f.formatChoice.bedChoice.add_trix_index.add_trix_index_selector == "yes" - #set trix_id = str($f.formatChoice.bedChoice.add_trix_index.trix_id) - #silent $extra_data_dict.update({"trix_id": $trix_id}) - #if $f.formatChoice.bedChoice.add_trix_index.index_files.index_files_type == "collection" - #set ix_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.trix_index.index_ix) - #set ixx_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.trix_index.index_ixx) - #else - #set ix_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.index_ix) - #set ixx_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.index_ixx) - #end if + #set ix_index = str($f.formatChoice.bedChoice.add_trix_index.add_trix_file.trix_index.index_ix) + #set ixx_index = str($f.formatChoice.bedChoice.add_trix_index.add_trix_file.trix_index.index_ixx) #silent $extra_data_dict.update({"indexIx": $ix_index, "indexIxx": $ixx_index}) #end if #silent $prepare_json("BedBlastAlignments", $f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, @@ -141,15 +134,8 @@ #set database = str($f.formatChoice.bedChoice.database) #silent $extra_data_dict.update({"database": $database}) #if $f.formatChoice.bedChoice.add_trix_index.add_trix_index_selector == "yes" - #set trix_id = str($f.formatChoice.bedChoice.add_trix_index.trix_id) - #silent $extra_data_dict.update({"trix_id": $trix_id}) - #if $f.formatChoice.bedChoice.add_trix_index.index_files.index_files_type == "collection" - #set ix_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.trix_index.index_ix) - #set ixx_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.trix_index.index_ixx) - #else - #set ix_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.index_ix) - #set ixx_index = str($f.formatChoice.bedChoice.add_trix_index.index_files.index_ixx) - #end if + #set ix_index = str($f.formatChoice.bedChoice.add_trix_index.add_trix_file.trix_index.index_ix) + #set ixx_index = str($f.formatChoice.bedChoice.add_trix_index.add_trix_file.trix_index.index_ixx) #silent $extra_data_dict.update({"indexIx": $ix_index, "indexIxx": $ixx_index}) #end if #silent $prepare_json("BedBlatAlignments", $f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, @@ -166,16 +152,10 @@ #end if #if $f.formatChoice.format_select == "bigbed" #if $f.formatChoice.add_trix_index.add_trix_index_selector == "yes" - #set trix_id = str($f.formatChoice.add_trix_index.trix_id) - #silent $extra_data_dict.update({"trix_id": $trix_id}) - #if $f.formatChoice.add_trix_index.index_files.index_files_type == "collection" - #set ix_index = str($f.formatChoice.add_trix_index.index_files.trix_index.index_ix) - #set ixx_index = str($f.formatChoice.add_trix_index.index_files.trix_index.index_ixx) - #else - #set ix_index = str($f.formatChoice.add_trix_index.index_files.index_ix) - #set ixx_index = str($f.formatChoice.add_trix_index.index_files.index_ixx) - #end if - #silent $extra_data_dict.update({"indexIx": $ix_index, "indexIxx": $ixx_index}) + #set ix_index = str($f.formatChoice.add_trix_index.add_trix_file.trix_index.index_ix) + #set ixx_index = str($f.formatChoice.add_trix_index.add_trix_file.trix_index.index_ixx) + #set trix_id = str($f.formatChoice.add_trix_index.add_trix_file.trix_id) + #silent $extra_data_dict.update({"indexIx": $ix_index, "indexIxx": $ixx_index, "trix_id": $trix_id}) #end if #silent $prepare_json("BigBed", $f.formatChoice.BIGBED, $index_track_final, $extra_data_dict) @@ -377,7 +357,13 @@ label="BIGBED File" /> <expand macro="add_trix_file"> - <expand macro="specify_search_index" /> + <param + name="trix_id" + value="name" + type="text" + size="30" + label="Specify Trix identifier" + /> </expand> <param name="longLabel" type="text" size="76" label="Track label" help="It is limited to 76 printable characters, the first 17 printable characters will be used as a short label of the track" /> <param name="track_color" type="color" label="Track color" value="#000000">
--- a/macros.xml Fri Sep 29 14:50:50 2017 -0400 +++ b/macros.xml Mon Oct 30 11:58:31 2017 -0400 @@ -52,50 +52,21 @@ <xml name="add_trix_file"> <conditional name="add_trix_index"> - <param name="add_trix_index_selector" type="select" label="Add TRIX files that maps free text to a set of indices" > + <param name="add_trix_index_selector" type="select" label="Add Trix index that allow for fast look-up of free text associated with a list of identifiers." help="More about Trix index: https://genome.ucsc.edu/goldenpath/help/trix.html"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="yes"> <param - name="trix_id" - value="name" - type="text" - size="30" - label="Specify Trix identifier" - help="More about Trix index: https://genome.ucsc.edu/goldenpath/help/trix.html" + name="trix_index" + format="txt" + type="data_collection" + collection_type="list" + label="Specify Trix index" + help="Dataset collection containing ix and ixx files generated by UCSC Trix Index Generator" /> - <conditional name="index_files"> - <param name="index_files_type" type="select" label="Single files or an index collection" help="Select between provide individual trix files (ix and ixx) or an trix index collection"> - <option value="collection">Index collection</option> - <option value="single">Single files</option> - </param> - <when value="collection"> - <param - name="trix_index" - format="txt" - type="data_collection" - collection_type="list" - label="Select dataset collection" - help="Specify dataset collection containing ix and ixx files" - /> - </when> - <when value="single"> - <param - format="txt" - name="index_ix" - type="data" - label="Trix index file (ix)" - /> - <param - format="txt" - name="index_ixx" - type="data" - label="Trix index file (ixx)" - /> - </when> - </conditional> + <yield /> </when> <when value="no"> </when>
--- a/util/index/ExternIndex.py Fri Sep 29 14:50:50 2017 -0400 +++ b/util/index/ExternIndex.py Mon Oct 30 11:58:31 2017 -0400 @@ -13,48 +13,4 @@ @abc.abstractmethod def setExtLink(self): """set external link""" - - - - - ''' - @staticmethod - def setExtLink(database, inputFile, extra_settings, seqType=None, useIframe=True, iframeHeight=None, iframeWidth=None): - if "NCBI" in database: - if not seqType: - seqType = int(ExternIndex.getSeqType(inputFile)) - else: - seqType = seqType - if seqType < 0: - print seqType - raise Exception("Sequence Type is not set for bigPsl. Stopping the application") - if seqType == 2: - extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/protein/$$" - elif seqType == 1: - extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/$$" - else: - raise Exception("Sequence Type {0} is not valid for bigPsl. Stopping the application".format(seqType)) - elif "UniProt" in database: - extra_settings["url"] = "http://www.uniprot.org/uniprot/$$" - elif "FlyBase" in database: - extra_settings["url"] = "http://flybase.org/reports/$$" - else: - extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$" - extra_settings["urlLabel"] = database + " Details:" - if useIframe: - extra_settings["iframeUrl"] = extra_settings["url"] - if not iframeHeight: - iframeHeight = "600" - if not iframeWidth: - iframeWidth = "800" - extra_settings["iframeOptions"] = "height= %s width= %s" % (iframeHeight, iframeWidth) - - @staticmethod - def getSeqType(inputFile): - with open(inputFile, "r") as bigpsl: - sampleSeq = bigpsl.readline().split() - if len(sampleSeq) == 25: - return sampleSeq[-1] - else: - return "-1" - ''' \ No newline at end of file + \ No newline at end of file
--- a/util/index/TrixIndex.py Fri Sep 29 14:50:50 2017 -0400 +++ b/util/index/TrixIndex.py Mon Oct 30 11:58:31 2017 -0400 @@ -7,20 +7,19 @@ from ExternIndex import ExternIndex class TrixIndex(ExternIndex): - def __init__(self, indexIx, indexIxx, trackName, mySpecieFolderPath, trixId, **args): + def __init__(self, indexIx, indexIxx, trackName, mySpecieFolderPath, **args): + self.logger = logging.getLogger(__name__) self.indexIx = indexIx self.indexIxx = indexIxx self.trackName = trackName self.mySpecieFolderPath = mySpecieFolderPath - self.trixId = trixId self.index_settings = collections.OrderedDict() - if "default_index" in args: - self.default_index = args["default_index"] + if "trixId" in args: + self.trixId = args["trixId"] else: - self.default_index = None + self.trixId = None def setExtLink(self): - self.setSearchIndex() self.moveIndexFile() self.index_settings["searchTrix"] = "trix/%s" % self.indexIxName return self.index_settings @@ -39,13 +38,14 @@ shutil.copyfile(self.indexIxx, self.indexIxxPath) def setSearchIndex(self): - if self.default_index: + if self.trixId: set_index = set() set_index.add(self.trixId) - set_index.add(self.default_index) + set_index.add("name") search_index = ",".join(set_index) else: - search_index = self.trixId + search_index = "name" logging.debug("trixId= %s, searchIndex= %s", self.trixId, search_index) self.index_settings["searchIndex"] = search_index +
