comparison datatypes/Datatype.py @ 1:85195e0d4b71 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b1ae7349f118a0fe7923d765020dfc684cf84116-dirty
author yating-l
date Fri, 29 Sep 2017 13:32:23 -0400
parents
children fa990284327b
comparison
equal deleted inserted replaced
0:f9ccc5ad1713 1:85195e0d4b71
1 #!/usr/bin/python
2 # -*- coding: utf8 -*-
3
4 """
5 Super Class of the managed datatype
6 """
7
8 import os
9 import tempfile
10 import collections
11 import util
12 import logging
13 import abc
14 from abc import ABCMeta
15 from TrackDb import TrackDb
16 from datatypes.validators.DataValidation import DataValidation
17
18
19 class Datatype(object):
20 __metaclass__ = ABCMeta
21
22 twoBitFile = None
23 chromSizesFile = None
24 input_fasta_file = None
25 extra_files_path = None
26 tool_directory = None
27
28 mySpecieFolderPath = None
29 myTrackFolderPath = None
30
31
32 def __init__(self):
33 not_init_message = "The {0} is not initialized." \
34 "Did you use pre_init static method first?"
35 if Datatype.input_fasta_file is None:
36 raise TypeError(not_init_message.format('reference genome'))
37 if Datatype.extra_files_path is None:
38 raise TypeError(not_init_message.format('track Hub path'))
39 if Datatype.tool_directory is None:
40 raise TypeError(not_init_message.format('tool directory'))
41 self.inputFile = None
42 self.trackType = None
43 self.dataType = None
44 self.track = None
45 self.trackSettings = dict()
46 self.extraSettings = collections.OrderedDict()
47
48 @staticmethod
49 def pre_init(reference_genome, two_bit_path, chrom_sizes_file,
50 extra_files_path, tool_directory, specie_folder, tracks_folder):
51 Datatype.extra_files_path = extra_files_path
52 Datatype.tool_directory = tool_directory
53
54 # TODO: All this should be in TrackHub and not in Datatype
55 Datatype.mySpecieFolderPath = specie_folder
56 Datatype.myTrackFolderPath = tracks_folder
57
58 Datatype.input_fasta_file = reference_genome
59
60 # 2bit file creation from input fasta
61 Datatype.twoBitFile = two_bit_path
62 Datatype.chromSizesFile = chrom_sizes_file
63
64 def generateCustomTrack(self):
65 self.validateData()
66 self.initSettings()
67 #Create the track file
68 self.createTrack()
69 # Create the TrackDb Object
70 self.createTrackDb()
71 logging.debug("- %s %s created", self.dataType, self.trackName)
72
73
74 @abc.abstractmethod
75 def validateData(self):
76 """validate the input data with DataValidation"""
77
78 def initSettings(self):
79 #Initialize required fields: trackName, longLabel, shortLable
80 self.trackName = self.trackSettings["name"]
81 if self.trackSettings["long_label"]:
82 self.longLabel = self.trackSettings["long_label"]
83 else:
84 self.longLabel = self.trackName
85 if not "short_label" in self.trackSettings:
86 self.shortLabel = ""
87 else:
88 self.shortLabel = self.trackSettings["short_label"]
89 self.trackDataURL = os.path.join(self.myTrackFolderPath, self.trackName)
90
91
92 @abc.abstractmethod
93 def createTrack(self):
94 """Create the final track file"""
95
96 def createTrackDb(self):
97 self.track = TrackDb(self.trackName, self.longLabel, self.shortLabel, self.trackDataURL, self.trackType, self.extraSettings)
98
99
100
101
102
103
104
105 '''
106 def __init__(self):
107 not_init_message = "The {0} is not initialized." \
108 "Did you use pre_init static method first?"
109 if Datatype.input_fasta_file is None:
110 raise TypeError(not_init_message.format('reference genome'))
111 if Datatype.extra_files_path is None:
112 raise TypeError(not_init_message.format('track Hub path'))
113 if Datatype.tool_directory is None:
114 raise TypeError(not_init_message.format('tool directory'))
115 self.track = None
116 self.extra_settings = collections.OrderedDict()
117
118
119 @staticmethod
120 def pre_init(reference_genome, two_bit_path, chrom_sizes_file,
121 extra_files_path, tool_directory, specie_folder, tracks_folder):
122 Datatype.extra_files_path = extra_files_path
123 Datatype.tool_directory = tool_directory
124
125 # TODO: All this should be in TrackHub and not in Datatype
126 Datatype.mySpecieFolderPath = specie_folder
127 Datatype.myTrackFolderPath = tracks_folder
128
129 Datatype.input_fasta_file = reference_genome
130
131 # 2bit file creation from input fasta
132 Datatype.twoBitFile = two_bit_path
133 Datatype.chromSizesFile = chrom_sizes_file
134
135 @staticmethod
136 def get_largest_scaffold_name(self):
137 # We can get the biggest scaffold here, with chromSizesFile
138 with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes:
139 # TODO: Check if exists
140 return chrom_sizes.readline().split()[0]
141
142
143 def createTrack(self, trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings=None):
144 self.track = TrackDb(trackName, longLabel, shortLabel, trackDataURL, trackType, extra_settings)
145
146 def initRequiredSettings(self, trackSettings, trackDataURL = None, trackType = None):
147
148 #Initialize required fields: trackName, longLabel, shortLable
149
150 self.trackSettings = trackSettings
151 self.trackName = self.trackSettings["name"]
152 #self.priority = self.trackSettings["order_index"]
153 #self.track_color = self.trackSettings["track_color"]
154 # TODO: Think about how to avoid repetition of the group_name everywhere
155 #self.group_name = self.trackSettings["group_name"]
156 #self.database = self.trackSettings["database"]
157 if self.trackSettings["long_label"]:
158 self.longLabel = self.trackSettings["long_label"]
159 else:
160 self.longLabel = self.trackName
161 if not "short_label" in self.trackSettings:
162 self.shortLabel = ""
163 else:
164 self.shortLabel = self.trackSettings["short_label"]
165 self.trackDataURL = trackDataURL
166 self.trackType = trackType
167
168 def setExtLink(self, database, inputFile, seqType=None, useIframe=True, iframeHeight=None, iframeWidth=None):
169 if "NCBI" in database:
170 if not seqType:
171 self.seqType = int(self.getSeqType(inputFile))
172 else:
173 self.seqType = seqType
174 if self.seqType < 0:
175 print self.seqType
176 raise Exception("Sequence Type is not set for bigPsl. Stopping the application")
177 if self.seqType == 2:
178 self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/protein/$$"
179 elif self.seqType == 1:
180 self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/$$"
181 else:
182 raise Exception("Sequence Type {0} is not valid for bigPsl. Stopping the application".format(self.seqType))
183 elif "UniProt" in database:
184 self.extra_settings["url"] = "http://www.uniprot.org/uniprot/$$"
185 elif "FlyBase" in database:
186 self.extra_settings["url"] = "http://flybase.org/reports/$$"
187 else:
188 self.extra_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$"
189 self.extra_settings["urlLabel"] = database + " Details:"
190 if useIframe:
191 self.extra_settings["iframeUrl"] = self.extra_settings["url"]
192 if not iframeHeight:
193 iframeHeight = "600"
194 if not iframeWidth:
195 iframeWidth = "800"
196 self.extra_settings["iframeOptions"] = "height= %s width= %s" % (iframeHeight, iframeWidth)
197
198 def getSeqType(self, inputFile):
199 with open(inputFile, "r") as bigpsl:
200 sampleSeq = bigpsl.readline().split()
201 if len(sampleSeq) == 25:
202 return sampleSeq[-1]
203 else:
204 return "-1"
205 '''