Mercurial > repos > yating-l > hubarchivecreator
comparison datatypes/Datatype.py @ 66:4ca7cbf2d9b8 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 159730602ff500b59aefc7080fb49b726c88d655-dirty
author | yating-l |
---|---|
date | Tue, 26 Sep 2017 16:44:58 -0400 |
parents | |
children | 2440ce10848b |
comparison
equal
deleted
inserted
replaced
65:5a4206366b41 | 66:4ca7cbf2d9b8 |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf8 -*- | |
3 | |
4 """ | |
5 Super Class of the managed datatype | |
6 """ | |
7 | |
8 import os | |
9 import tempfile | |
10 import collections | |
11 import util | |
12 import logging | |
13 import abc | |
14 from abc import ABCMeta | |
15 from TrackDb import TrackDb | |
16 from datatypes.validators.DataValidation import DataValidation | |
17 | |
18 | |
19 class Datatype(object): | |
20 __metaclass__ = ABCMeta | |
21 | |
22 twoBitFile = None | |
23 chromSizesFile = None | |
24 input_fasta_file = None | |
25 extra_files_path = None | |
26 tool_directory = None | |
27 | |
28 mySpecieFolderPath = None | |
29 myTrackFolderPath = None | |
30 | |
31 | |
32 def __init__(self): | |
33 not_init_message = "The {0} is not initialized." \ | |
34 "Did you use pre_init static method first?" | |
35 if Datatype.input_fasta_file is None: | |
36 raise TypeError(not_init_message.format('reference genome')) | |
37 if Datatype.extra_files_path is None: | |
38 raise TypeError(not_init_message.format('track Hub path')) | |
39 if Datatype.tool_directory is None: | |
40 raise TypeError(not_init_message.format('tool directory')) | |
41 self.inputFile = None | |
42 self.trackType = None | |
43 self.dataType = None | |
44 self.track = None | |
45 self.trackSettings = dict() | |
46 self.extraSettings = collections.OrderedDict() | |
47 | |
48 @staticmethod | |
49 def pre_init(reference_genome, two_bit_path, chrom_sizes_file, | |
50 extra_files_path, tool_directory, specie_folder, tracks_folder): | |
51 Datatype.extra_files_path = extra_files_path | |
52 Datatype.tool_directory = tool_directory | |
53 | |
54 # TODO: All this should be in TrackHub and not in Datatype | |
55 Datatype.mySpecieFolderPath = specie_folder | |
56 Datatype.myTrackFolderPath = tracks_folder | |
57 | |
58 Datatype.input_fasta_file = reference_genome | |
59 | |
60 # 2bit file creation from input fasta | |
61 Datatype.twoBitFile = two_bit_path | |
62 Datatype.chromSizesFile = chrom_sizes_file | |
63 | |
64 def generateCustomTrack(self): | |
65 self.validateData() | |
66 self.initSettings() | |
67 #Create the track file | |
68 self.createTrack() | |
69 # Create the TrackDb Object | |
70 self.createTrackDb() | |
71 logging.debug("- %s %s created", self.dataType, self.trackName) | |
72 | |
73 | |
74 @abc.abstractmethod | |
75 def validateData(self): | |
76 """validate the input data with DataValidation""" | |
77 | |
78 def initSettings(self, trackType = None): | |
79 #Initialize required fields: trackName, longLabel, shortLable | |
80 self.trackName = self.trackSettings["name"] | |
81 if self.trackSettings["long_label"]: | |
82 self.longLabel = self.trackSettings["long_label"] | |
83 else: | |
84 self.longLabel = self.trackName | |
85 if not "short_label" in self.trackSettings: | |
86 self.shortLabel = "" | |
87 else: | |
88 self.shortLabel = self.trackSettings["short_label"] | |
89 self.trackDataURL = os.path.join(self.myTrackFolderPath, self.trackName) | |
90 if trackType: | |
91 self.trackType = trackType | |
92 | |
93 @abc.abstractmethod | |
94 def createTrack(self): | |
95 """Create the final track file""" | |
96 | |
97 def createTrackDb(self): | |
98 self.track = TrackDb(self.trackName, self.longLabel, self.shortLabel, self.trackDataURL, self.trackType, self.extraSettings) | |
99 | |
100 | |
101 | |
102 | |
103 | |
104 | |
105 | |
106 ''' | |
107 def __init__(self): | |
108 not_init_message = "The {0} is not initialized." \ | |
109 "Did you use pre_init static method first?" | |
110 if Datatype.input_fasta_file is None: | |
111 raise TypeError(not_init_message.format('reference genome')) | |
112 if Datatype.extra_files_path is None: | |
113 raise TypeError(not_init_message.format('track Hub path')) | |
114 if Datatype.tool_directory is None: | |
115 raise TypeError(not_init_message.format('tool directory')) | |
116 self.track = None | |
117 self.extra_settings = collections.OrderedDict() | |
118 | |
119 | |
120 @staticmethod | |
121 def pre_init(reference_genome, two_bit_path, chrom_sizes_file, | |
122 extra_files_path, tool_directory, specie_folder, tracks_folder): | |
123 Datatype.extra_files_path = extra_files_path | |
124 Datatype.tool_directory = tool_directory | |
125 | |
126 # TODO: All this should be in TrackHub and not in Datatype | |
127 Datatype.mySpecieFolderPath = specie_folder | |
128 Datatype.myTrackFolderPath = tracks_folder | |
129 | |
130 Datatype.input_fasta_file = reference_genome | |
131 | |
132 # 2bit file creation from input fasta | |
133 Datatype.twoBitFile = two_bit_path | |
134 Datatype.chromSizesFile = chrom_sizes_file | |
135 | |
136 @staticmethod | |
137 def get_largest_scaffold_name(self): | |
138 # We can get the biggest scaffold here, with chromSizesFile | |
139 with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes: | |
140 # TODO: Check if exists | |
141 return chrom_sizes.readline().split()[0] | |
142 | |
143 | |
144 def createTrack(self, trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings=None): | |
145 self.track = TrackDb(trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings) | |
146 | |
147 def initRequiredSettings(self, trackSettings, trackDataURL = None, trackType = None): | |
148 | |
149 #Initialize required fields: trackName, longLabel, shortLable | |
150 | |
151 self.trackSettings = trackSettings | |
152 self.trackName = self.trackSettings["name"] | |
153 #self.priority = self.trackSettings["order_index"] | |
154 #self.track_color = self.trackSettings["track_color"] | |
155 # TODO: Think about how to avoid repetition of the group_name everywhere | |
156 #self.group_name = self.trackSettings["group_name"] | |
157 #self.database = self.trackSettings["database"] | |
158 if self.trackSettings["long_label"]: | |
159 self.longLabel = self.trackSettings["long_label"] | |
160 else: | |
161 self.longLabel = self.trackName | |
162 if not "short_label" in self.trackSettings: | |
163 self.shortLabel = "" | |
164 else: | |
165 self.shortLabel = self.trackSettings["short_label"] | |
166 self.trackDataURL = trackDataURL | |
167 self.trackType = trackType | |
168 | |
169 def setExtLink(self, database, inputFile, seqType=None, useIframe=True, iframeHeight=None, iframeWidth=None): | |
170 if "NCBI" in database: | |
171 if not seqType: | |
172 self.seqType = int(self.getSeqType(inputFile)) | |
173 else: | |
174 self.seqType = seqType | |
175 if self.seqType < 0: | |
176 print self.seqType | |
177 raise Exception("Sequence Type is not set for bigPsl. Stopping the application") | |
178 if self.seqType == 2: | |
179 self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/protein/$$" | |
180 elif self.seqType == 1: | |
181 self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/$$" | |
182 else: | |
183 raise Exception("Sequence Type {0} is not valid for bigPsl. Stopping the application".format(self.seqType)) | |
184 elif "UniProt" in database: | |
185 self.extra_settings["url"] = "http://www.uniprot.org/uniprot/$$" | |
186 elif "FlyBase" in database: | |
187 self.extra_settings["url"] = "http://flybase.org/reports/$$" | |
188 else: | |
189 self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$" | |
190 self.extra_settings["urlLabel"] = database + " Details:" | |
191 if useIframe: | |
192 self.extra_settings["iframeUrl"] = self.extra_settings["url"] | |
193 if not iframeHeight: | |
194 iframeHeight = "600" | |
195 if not iframeWidth: | |
196 iframeWidth = "800" | |
197 self.extra_settings["iframeOptions"] = "height= %s width= %s" % (iframeHeight, iframeWidth) | |
198 | |
199 def getSeqType(self, inputFile): | |
200 with open(inputFile, "r") as bigpsl: | |
201 sampleSeq = bigpsl.readline().split() | |
202 if len(sampleSeq) == 25: | |
203 return sampleSeq[-1] | |
204 else: | |
205 return "-1" | |
206 ''' |