Mercurial > repos > yating-l > hubarchivecreatortest
diff datatypes/Datatype.py @ 1:85195e0d4b71 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b1ae7349f118a0fe7923d765020dfc684cf84116-dirty
| author | yating-l |
|---|---|
| date | Fri, 29 Sep 2017 13:32:23 -0400 |
| parents | |
| children | fa990284327b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/Datatype.py Fri Sep 29 13:32:23 2017 -0400 @@ -0,0 +1,205 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +Super Class of the managed datatype +""" + +import os +import tempfile +import collections +import util +import logging +import abc +from abc import ABCMeta +from TrackDb import TrackDb +from datatypes.validators.DataValidation import DataValidation + + +class Datatype(object): + __metaclass__ = ABCMeta + + twoBitFile = None + chromSizesFile = None + input_fasta_file = None + extra_files_path = None + tool_directory = None + + mySpecieFolderPath = None + myTrackFolderPath = None + + + def __init__(self): + not_init_message = "The {0} is not initialized." \ + "Did you use pre_init static method first?" + if Datatype.input_fasta_file is None: + raise TypeError(not_init_message.format('reference genome')) + if Datatype.extra_files_path is None: + raise TypeError(not_init_message.format('track Hub path')) + if Datatype.tool_directory is None: + raise TypeError(not_init_message.format('tool directory')) + self.inputFile = None + self.trackType = None + self.dataType = None + self.track = None + self.trackSettings = dict() + self.extraSettings = collections.OrderedDict() + + @staticmethod + def pre_init(reference_genome, two_bit_path, chrom_sizes_file, + extra_files_path, tool_directory, specie_folder, tracks_folder): + Datatype.extra_files_path = extra_files_path + Datatype.tool_directory = tool_directory + + # TODO: All this should be in TrackHub and not in Datatype + Datatype.mySpecieFolderPath = specie_folder + Datatype.myTrackFolderPath = tracks_folder + + Datatype.input_fasta_file = reference_genome + + # 2bit file creation from input fasta + Datatype.twoBitFile = two_bit_path + Datatype.chromSizesFile = chrom_sizes_file + + def generateCustomTrack(self): + self.validateData() + self.initSettings() + #Create the track file + self.createTrack() + # Create the TrackDb Object + self.createTrackDb() + logging.debug("- %s %s created", self.dataType, self.trackName) + + + @abc.abstractmethod + def validateData(self): + """validate the input data with DataValidation""" + + def initSettings(self): + #Initialize required fields: trackName, longLabel, shortLable + self.trackName = self.trackSettings["name"] + if self.trackSettings["long_label"]: + self.longLabel = self.trackSettings["long_label"] + else: + self.longLabel = self.trackName + if not "short_label" in self.trackSettings: + self.shortLabel = "" + else: + self.shortLabel = self.trackSettings["short_label"] + self.trackDataURL = os.path.join(self.myTrackFolderPath, self.trackName) + + + @abc.abstractmethod + def createTrack(self): + """Create the final track file""" + + def createTrackDb(self): + self.track = TrackDb(self.trackName, self.longLabel, self.shortLabel, self.trackDataURL, self.trackType, self.extraSettings) + + + + + + + + ''' + def __init__(self): + not_init_message = "The {0} is not initialized." \ + "Did you use pre_init static method first?" + if Datatype.input_fasta_file is None: + raise TypeError(not_init_message.format('reference genome')) + if Datatype.extra_files_path is None: + raise TypeError(not_init_message.format('track Hub path')) + if Datatype.tool_directory is None: + raise TypeError(not_init_message.format('tool directory')) + self.track = None + self.extra_settings = collections.OrderedDict() + + + @staticmethod + def pre_init(reference_genome, two_bit_path, chrom_sizes_file, + extra_files_path, tool_directory, specie_folder, tracks_folder): + Datatype.extra_files_path = extra_files_path + Datatype.tool_directory = tool_directory + + # TODO: All this should be in TrackHub and not in Datatype + Datatype.mySpecieFolderPath = specie_folder + Datatype.myTrackFolderPath = tracks_folder + + Datatype.input_fasta_file = reference_genome + + # 2bit file creation from input fasta + Datatype.twoBitFile = two_bit_path + Datatype.chromSizesFile = chrom_sizes_file + + @staticmethod + def get_largest_scaffold_name(self): + # We can get the biggest scaffold here, with chromSizesFile + with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes: + # TODO: Check if exists + return chrom_sizes.readline().split()[0] + + + def createTrack(self, trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings=None): + self.track = TrackDb(trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings) + + def initRequiredSettings(self, trackSettings, trackDataURL = None, trackType = None): + + #Initialize required fields: trackName, longLabel, shortLable + + self.trackSettings = trackSettings + self.trackName = self.trackSettings["name"] + #self.priority = self.trackSettings["order_index"] + #self.track_color = self.trackSettings["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + #self.group_name = self.trackSettings["group_name"] + #self.database = self.trackSettings["database"] + if self.trackSettings["long_label"]: + self.longLabel = self.trackSettings["long_label"] + else: + self.longLabel = self.trackName + if not "short_label" in self.trackSettings: + self.shortLabel = "" + else: + self.shortLabel = self.trackSettings["short_label"] + self.trackDataURL = trackDataURL + self.trackType = trackType + + def setExtLink(self, database, inputFile, seqType=None, useIframe=True, iframeHeight=None, iframeWidth=None): + if "NCBI" in database: + if not seqType: + self.seqType = int(self.getSeqType(inputFile)) + else: + self.seqType = seqType + if self.seqType < 0: + print self.seqType + raise Exception("Sequence Type is not set for bigPsl. Stopping the application") + if self.seqType == 2: + self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/protein/$$" + elif self.seqType == 1: + self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/$$" + else: + raise Exception("Sequence Type {0} is not valid for bigPsl. Stopping the application".format(self.seqType)) + elif "UniProt" in database: + self.extra_settings["url"] = "http://www.uniprot.org/uniprot/$$" + elif "FlyBase" in database: + self.extra_settings["url"] = "http://flybase.org/reports/$$" + else: + self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$" + self.extra_settings["urlLabel"] = database + " Details:" + if useIframe: + self.extra_settings["iframeUrl"] = self.extra_settings["url"] + if not iframeHeight: + iframeHeight = "600" + if not iframeWidth: + iframeWidth = "800" + self.extra_settings["iframeOptions"] = "height= %s width= %s" % (iframeHeight, iframeWidth) + + def getSeqType(self, inputFile): + with open(inputFile, "r") as bigpsl: + sampleSeq = bigpsl.readline().split() + if len(sampleSeq) == 25: + return sampleSeq[-1] + else: + return "-1" + ''' \ No newline at end of file
