comparison TrackHub.py @ 1:85195e0d4b71 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b1ae7349f118a0fe7923d765020dfc684cf84116-dirty
author yating-l
date Fri, 29 Sep 2017 13:32:23 -0400
parents
children
comparison
equal deleted inserted replaced
0:f9ccc5ad1713 1:85195e0d4b71
1 #!/usr/bin/python
2 # -*- coding: utf8 -*-
3
4 import logging
5 import os
6 import tempfile
7 import shutil
8 import zipfile
9 from mako.lookup import TemplateLookup
10
11 # Internal dependencies
12 from datatypes.Datatype import Datatype
13 from util import subtools
14 from util import santitizer
15
16
17
18
19 class TrackHub(object):
20 """docstring for TrackHub"""
21
22 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
23 super(TrackHub, self).__init__()
24 self.rootAssemblyHub = None
25
26 self.mySpecieFolderPath = None
27 self.myTracksFolderPath = None
28 self.tool_directory = tool_directory
29
30 self.reference_genome = inputFastaFile
31 # TODO: Add the specie name
32 self.genome_name = inputFastaFile.assembly_id
33 self.specie_html = self.genome_name + '.html'
34 self.default_pos = None
35 self.user_email = user_email
36
37 # Set containing the groups already added. Updated by addGroup()
38 self.groups = set()
39
40 # TODO: Modify according to the files passed in parameter
41 # ---- Templates ----
42 # Template trackDb
43 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
44 output_encoding='utf-8', encoding_errors='replace')
45 self.trackDbTemplate = mylookup.get_template("layout.txt")
46
47 # Template groups
48 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
49 output_encoding='utf-8', encoding_errors='replace')
50 self.groupsTemplate = mylookup.get_template("layout.txt")
51
52 # ---- End Templates ----
53
54 self.extra_files_path = extra_files_path
55 self.outputFile = outputFile
56
57 # Create the structure of the Assembly Hub
58 # TODO: Merge the following processing into a function as it is also used in twoBitCreator
59 self.twoBitName = None
60 self.two_bit_final_path = None
61 self.chromSizesFile = None
62
63 self.default_pos = None
64
65 # Set all the missing variables of this class, and create physically the folders/files
66 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
67
68 # Init the Datatype
69 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
70 self.extra_files_path, self.tool_directory,
71 self.mySpecieFolderPath, self.myTracksFolderPath)
72 '''
73 def createZip(self):
74 for root, dirs, files in os.walk(self.rootAssemblyHub):
75 # Get all files and construct the dir at the same time
76 for file in files:
77 self.outputZip.write(os.path.join(root, file))
78
79 self.outputZip.close()
80 '''
81 def addTrack(self, trackDbObject=None):
82 # Create the trackDb.txt file in the specie folder, if not exists
83 # Else append the new track
84 # TODO: Get this out of the function
85 trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, 'trackDb.txt')
86 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object
87 with open(trackDbTxtFilePath, 'a+') as trackDbFile:
88 group_name = trackDbObject["group"]
89 trackDbObject["group"] = santitizer.sanitize_group_name(trackDbObject["group"])
90 trackDbs = [trackDbObject]
91 # TODO: The addGroup does not belong here. Move it when the group becomes more than just a label
92 # Add the group as well, if exists in trackDbObject
93 self.addGroup(group_name)
94
95 htmlMakoRendered = self.trackDbTemplate.render(
96 trackDbs=trackDbs
97 )
98 trackDbFile.write(htmlMakoRendered)
99
100 #logging.debug("We just added track {0} (in group {1})".format(trackDbObject.trackName,
101 # trackDbObject.group_name.lower().replace(' ', '_')))
102 logging.debug("We just added track {0} (in group {1})".format(trackDbObject.get("track"),
103 trackDbObject.get("group").lower().replace(' ', '_')))
104 def addGroup(self, group_name="Default"):
105 # If not already present in self.groups, add to groups.txt
106 # Create the trackDb.txt file in the specie folder, if not exists
107 # Else append the new track
108 # TODO: Get this out of the function
109 groupsTxtFilePath = os.path.join(self.mySpecieFolderPath, 'groups.txt')
110
111 # If the group is already present, we don't need to add it
112 if group_name in self.groups:
113 logging.debug("We DON'T add in {0} the group {1}".format(groupsTxtFilePath,
114 group_name))
115 return
116
117 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object
118 with open(groupsTxtFilePath, 'a+') as groupFile:
119 # Add the group as well, if exists in trackDbObject
120
121 htmlMakoRendered = self.groupsTemplate.render(
122 label=group_name
123 )
124 groupFile.write(htmlMakoRendered)
125 logging.debug("We just added in {0} the group {1}".format(groupsTxtFilePath,
126 group_name))
127 self.groups.add(group_name)
128
129 def terminate(self):
130 # Just a test to output a simple HTML
131 # TODO: Create a class to handle the file object
132 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
133 output_encoding='utf-8', encoding_errors='replace')
134
135 mytemplate = mylookup.get_template('display.txt')
136 with open(self.outputFile, 'w') as htmlOutput:
137 # TODO: We are basically looping two times: One time with os.walk, Second time
138 # with the template. We could improve that if the number of files begins to be really important
139 list_relative_file_path = [ ]
140
141 # TODO: Create classes Tree to manage this => Better readibility and maintenability
142 def create_tree(array_path, tree, relative_array_file_path, level=0):
143 cur_relative_file_path = '/'.join(relative_array_file_path[:level+1])
144 if array_path[0] in tree.keys():
145 create_tree(array_path[1:], tree[array_path[0]][0],
146 relative_array_file_path, level+1)
147 else:
148 tree[array_path[0]] = ({}, cur_relative_file_path)
149 # TODO: Manage also the links of the directories => No link?
150 # => Managed in display.txt, but could also be managed there
151 # If we are don't have any sub-vertices
152 if len(array_path) == 1:
153 # We create the path to it
154 return
155 else:
156 create_tree(array_path[1:], tree[array_path[0]][0],
157 relative_array_file_path, level + 1)
158
159 walkable_tree = {}
160 for root, dirs, files in os.walk(self.extra_files_path):
161 # Prepare the tree from to perform a Depth First Search
162 for file in files:
163 relative_directory = os.path.relpath(root, self.extra_files_path)
164 relative_file_path = os.path.join(relative_directory, file)
165 array_path = relative_file_path.split('/')
166 create_tree(array_path, walkable_tree, array_path, 0)
167
168 htmlMakoRendered = mytemplate.render(
169 walkable_tree=walkable_tree
170 )
171 htmlOutput.write(htmlMakoRendered)
172
173
174 def __createAssemblyHub__(self, extra_files_path):
175 # Get all necessaries infos first
176 # 2bit file creation from input fasta
177
178 # baseNameFasta = os.path.basename(fasta_file_name)
179 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
180 # nameTwoBit = suffixTwoBit + '.2bit'
181 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
182 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
183
184 # Generate the twoBitInfo
185 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
186 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
187
188 # Then we get the output to generate the chromSizes
189 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
190 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
191
192 # We can get the biggest scaffold here, with chromSizesFile
193 with open(self.chromSizesFile.name, 'r') as chrom_sizes:
194 # TODO: Check if exists
195 self.default_pos = chrom_sizes.readline().split()[0]
196
197 # TODO: Manage to put every fill Function in a file dedicated for reading reasons
198 # Create the root directory
199 myHubPath = os.path.join(extra_files_path, "myHub")
200 if not os.path.exists(myHubPath):
201 os.makedirs(myHubPath)
202
203 # Create the specie folder
204 # TODO: Generate the name depending on the specie
205 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
206 if not os.path.exists(mySpecieFolderPath):
207 os.makedirs(mySpecieFolderPath)
208 self.mySpecieFolderPath = mySpecieFolderPath
209
210 # We create the 2bit file while we just created the specie folder
211 self.twoBitName = self.genome_name + ".2bit"
212 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
213 shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
214
215 # Add the genomes.txt file
216 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
217 self.__fillGenomesTxt__(genomesTxtFilePath)
218
219 # Add the hub.txt file
220 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
221 self.__fillHubTxt__(hubTxtFilePath)
222
223 # Add the hub.html file
224 hubHtmlFilePath = os.path.join(myHubPath, self.specie_html)
225 self.__fillHubHtmlFile__(hubHtmlFilePath)
226
227
228 # Create the description html file in the specie folder
229 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
230 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)
231
232 # Create the file groups.txt
233 # TODO: If not inputs for this, do no create the file
234 # groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
235 # self.__fillGroupsTxtFile__(groupsTxtFilePath)
236
237 # Create the folder tracks into the specie folder
238 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
239 if not os.path.exists(tracksFolderPath):
240 os.makedirs(tracksFolderPath)
241 self.myTracksFolderPath = tracksFolderPath
242
243 return myHubPath
244
245 def __fillGenomesTxt__(self, genomesTxtFilePath):
246 # TODO: Think about the inputs and outputs
247 # TODO: Manage the template of this file
248 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
249 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
250 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
251 mytemplate = mylookup.get_template("layout.txt")
252 with open(genomesTxtFilePath, 'w') as genomesTxtFile:
253 # Write the content of the file genomes.txt
254 twoBitPath = os.path.join(self.genome_name, self.twoBitName)
255 htmlMakoRendered = mytemplate.render(
256 genomeName=self.genome_name,
257 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
258 groupsPath=os.path.join(self.genome_name, "groups.txt"),
259 genomeDescription=self.genome_name,
260 twoBitPath=twoBitPath,
261 organismName=self.genome_name,
262 defaultPosition=self.default_pos,
263 orderKey="4500",
264 scientificName=self.genome_name,
265 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
266 )
267 genomesTxtFile.write(htmlMakoRendered)
268
269 def __fillHubTxt__(self, hubTxtFilePath):
270 # TODO: Think about the inputs and outputs
271 # TODO: Manage the template of this file
272 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
273 output_encoding='utf-8', encoding_errors='replace')
274 mytemplate = mylookup.get_template('layout.txt')
275 with open(hubTxtFilePath, 'w') as genomesTxtFile:
276 # Write the content of the file genomes.txt
277 htmlMakoRendered = mytemplate.render(
278 hubName=(''.join(['gonramp', self.genome_name.title()])),
279 shortLabel=self.genome_name,
280 longLabel=self.genome_name,
281 genomesFile='genomes.txt',
282 email=self.user_email,
283 descriptionUrl=self.specie_html
284 )
285 genomesTxtFile.write(htmlMakoRendered)
286
287 def __fillHubHtmlFile__(self, hubHtmlFilePath):
288 # TODO: Think about the inputs and outputs
289 # TODO: Manage the template of this file
290 # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
291 # t = Template(templates.hubDescription.layout.html)
292 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
293 output_encoding='utf-8', encoding_errors='replace')
294 mytemplate = mylookup.get_template("layout.txt")
295 with open(hubHtmlFilePath, 'w') as hubHtmlFile:
296 htmlMakoRendered = mytemplate.render(
297 specie='Dbia',
298 toolUsed='Augustus',
299 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
300 genomeID='3499',
301 specieFullName='Drosophila biarmipes'
302 )
303 #hubHtmlFile.write(htmlMakoRendered)
304
305 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
306 # TODO: Think about the inputs and outputs
307 # TODO: Manage the template of this file
308 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
309 output_encoding='utf-8', encoding_errors='replace')
310 mytemplate = mylookup.get_template("layout.txt")
311 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
312 # Write the content of the file genomes.txt
313 htmlMakoRendered = mytemplate.render(
314 specieDescription='This is the description of the dbia',
315 )
316 #descriptionHtmlFile.write(htmlMakoRendered)
317
318 def __fillGroupsTxtFile__(self, groupsTxtFilePath):
319 # TODO: Reenable this function at some point
320 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
321 output_encoding='utf-8', encoding_errors='replace')
322 mytemplate = mylookup.get_template("layout.txt")
323 with open(groupsTxtFilePath, 'w') as groupsTxtFile:
324 # Write the content of groups.txt
325 # groupsTxtFile.write('name map')
326 htmlMakoRendered = mytemplate.render(
327 mapName='map',
328 labelMapping='Mapping',
329 prioriy='2',
330 isClosed='0'
331 )
332 # groupsTxtFile.write(htmlMakoRendered)