Mercurial > repos > yating-l > hubarchivecreator_test
comparison hub-archive-creator/TrackHub.py @ 6:9193fe3ee73f draft default tip
Uploaded
| author | yating-l |
|---|---|
| date | Thu, 22 Dec 2016 15:59:24 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:48d7b2dddae6 | 6:9193fe3ee73f |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -*- coding: utf8 -*- | |
| 3 | |
| 4 import logging | |
| 5 import os | |
| 6 import tempfile | |
| 7 import shutil | |
| 8 import zipfile | |
| 9 | |
| 10 # Internal dependencies | |
| 11 from Datatype import Datatype | |
| 12 from util import subtools | |
| 13 | |
| 14 from mako.lookup import TemplateLookup | |
| 15 | |
| 16 | |
| 17 class TrackHub(object): | |
| 18 """docstring for TrackHub""" | |
| 19 | |
| 20 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): | |
| 21 super(TrackHub, self).__init__() | |
| 22 | |
| 23 self.rootAssemblyHub = None | |
| 24 | |
| 25 self.mySpecieFolderPath = None | |
| 26 self.myTracksFolderPath = None | |
| 27 self.tool_directory = tool_directory | |
| 28 | |
| 29 self.reference_genome = inputFastaFile | |
| 30 # TODO: Add the specie name | |
| 31 self.genome_name = inputFastaFile.assembly_id | |
| 32 self.specie_html = self.genome_name + '.html' | |
| 33 self.default_pos = None | |
| 34 self.user_email = user_email | |
| 35 | |
| 36 # Set containing the groups already added. Updated by addGroup() | |
| 37 self.groups = set() | |
| 38 | |
| 39 # TODO: Modify according to the files passed in parameter | |
| 40 # ---- Templates ---- | |
| 41 # Template trackDb | |
| 42 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], | |
| 43 output_encoding='utf-8', encoding_errors='replace') | |
| 44 self.trackDbTemplate = mylookup.get_template("layout.txt") | |
| 45 | |
| 46 # Template groups | |
| 47 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], | |
| 48 output_encoding='utf-8', encoding_errors='replace') | |
| 49 self.groupsTemplate = mylookup.get_template("layout.txt") | |
| 50 | |
| 51 # ---- End Templates ---- | |
| 52 | |
| 53 self.extra_files_path = extra_files_path | |
| 54 self.outputFile = outputFile | |
| 55 | |
| 56 # Create the structure of the Assembly Hub | |
| 57 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | |
| 58 self.twoBitName = None | |
| 59 self.two_bit_final_path = None | |
| 60 self.chromSizesFile = None | |
| 61 | |
| 62 self.default_pos = None | |
| 63 | |
| 64 # Set all the missing variables of this class, and create physically the folders/files | |
| 65 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) | |
| 66 | |
| 67 # Init the Datatype | |
| 68 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, | |
| 69 self.extra_files_path, self.tool_directory, | |
| 70 self.mySpecieFolderPath, self.myTracksFolderPath) | |
| 71 | |
| 72 def createZip(self): | |
| 73 for root, dirs, files in os.walk(self.rootAssemblyHub): | |
| 74 # Get all files and construct the dir at the same time | |
| 75 for file in files: | |
| 76 self.outputZip.write(os.path.join(root, file)) | |
| 77 | |
| 78 self.outputZip.close() | |
| 79 | |
| 80 def addTrack(self, trackDbObject=None): | |
| 81 # Create the trackDb.txt file in the specie folder, if not exists | |
| 82 # Else append the new track | |
| 83 # TODO: Get this out of the function | |
| 84 trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, 'trackDb.txt') | |
| 85 | |
| 86 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object | |
| 87 with open(trackDbTxtFilePath, 'a+') as trackDbFile: | |
| 88 trackDbs = [trackDbObject] | |
| 89 | |
| 90 # TODO: The addGroup does not belong here. Move it when the group becomes more than just a label | |
| 91 # Add the group as well, if exists in trackDbObject | |
| 92 self.addGroup(trackDbObject.group_name) | |
| 93 | |
| 94 htmlMakoRendered = self.trackDbTemplate.render( | |
| 95 trackDbs=trackDbs | |
| 96 ) | |
| 97 trackDbFile.write(htmlMakoRendered) | |
| 98 | |
| 99 logging.debug("We just added track {0} (in group {1})".format(trackDbObject.trackName, | |
| 100 trackDbObject.group_name.lower().replace(' ', '_'))) | |
| 101 | |
| 102 def addGroup(self, group_name="Default"): | |
| 103 # If not already present in self.groups, add to groups.txt | |
| 104 # Create the trackDb.txt file in the specie folder, if not exists | |
| 105 # Else append the new track | |
| 106 # TODO: Get this out of the function | |
| 107 groupsTxtFilePath = os.path.join(self.mySpecieFolderPath, 'groups.txt') | |
| 108 | |
| 109 # If the group is already present, we don't need to add it | |
| 110 if group_name in self.groups: | |
| 111 logging.debug("We DON'T add in {0} the group {1}".format(groupsTxtFilePath, | |
| 112 group_name)) | |
| 113 return | |
| 114 | |
| 115 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object | |
| 116 with open(groupsTxtFilePath, 'a+') as groupFile: | |
| 117 # Add the group as well, if exists in trackDbObject | |
| 118 | |
| 119 htmlMakoRendered = self.groupsTemplate.render( | |
| 120 label=group_name | |
| 121 ) | |
| 122 groupFile.write(htmlMakoRendered) | |
| 123 logging.debug("We just added in {0} the group {1}".format(groupsTxtFilePath, | |
| 124 group_name)) | |
| 125 self.groups.add(group_name) | |
| 126 | |
| 127 def terminate(self): | |
| 128 # Just a test to output a simple HTML | |
| 129 # TODO: Create a class to handle the file object | |
| 130 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], | |
| 131 output_encoding='utf-8', encoding_errors='replace') | |
| 132 | |
| 133 mytemplate = mylookup.get_template('display.txt') | |
| 134 with open(self.outputFile, 'w') as htmlOutput: | |
| 135 # TODO: We are basically looping two times: One time with os.walk, Second time | |
| 136 # with the template. We could improve that if the number of files begins to be really important | |
| 137 list_relative_file_path = [ ] | |
| 138 | |
| 139 # TODO: Create classes Tree to manage this => Better readibility and maintenability | |
| 140 def create_tree(array_path, tree, relative_array_file_path, level=0): | |
| 141 cur_relative_file_path = '/'.join(relative_array_file_path[:level+1]) | |
| 142 if array_path[0] in tree.keys(): | |
| 143 create_tree(array_path[1:], tree[array_path[0]][0], | |
| 144 relative_array_file_path, level+1) | |
| 145 else: | |
| 146 tree[array_path[0]] = ({}, cur_relative_file_path) | |
| 147 # TODO: Manage also the links of the directories => No link? | |
| 148 # => Managed in display.txt, but could also be managed there | |
| 149 # If we are don't have any sub-vertices | |
| 150 if len(array_path) == 1: | |
| 151 # We create the path to it | |
| 152 return | |
| 153 else: | |
| 154 create_tree(array_path[1:], tree[array_path[0]][0], | |
| 155 relative_array_file_path, level + 1) | |
| 156 | |
| 157 walkable_tree = {} | |
| 158 for root, dirs, files in os.walk(self.extra_files_path): | |
| 159 # Prepare the tree from to perform a Depth First Search | |
| 160 for file in files: | |
| 161 relative_directory = os.path.relpath(root, self.extra_files_path) | |
| 162 relative_file_path = os.path.join(relative_directory, file) | |
| 163 array_path = relative_file_path.split('/') | |
| 164 create_tree(array_path, walkable_tree, array_path, 0) | |
| 165 | |
| 166 htmlMakoRendered = mytemplate.render( | |
| 167 walkable_tree=walkable_tree | |
| 168 ) | |
| 169 htmlOutput.write(htmlMakoRendered) | |
| 170 | |
| 171 def __createAssemblyHub__(self, extra_files_path): | |
| 172 # Get all necessaries infos first | |
| 173 # 2bit file creation from input fasta | |
| 174 | |
| 175 # baseNameFasta = os.path.basename(fasta_file_name) | |
| 176 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | |
| 177 # nameTwoBit = suffixTwoBit + '.2bit' | |
| 178 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 179 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) | |
| 180 | |
| 181 # Generate the twoBitInfo | |
| 182 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 183 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name) | |
| 184 | |
| 185 # Then we get the output to generate the chromSizes | |
| 186 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") | |
| 187 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) | |
| 188 | |
| 189 # We can get the biggest scaffold here, with chromSizesFile | |
| 190 with open(self.chromSizesFile.name, 'r') as chrom_sizes: | |
| 191 # TODO: Check if exists | |
| 192 self.default_pos = chrom_sizes.readline().split()[0] | |
| 193 | |
| 194 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | |
| 195 # Create the root directory | |
| 196 myHubPath = os.path.join(extra_files_path, "myHub") | |
| 197 if not os.path.exists(myHubPath): | |
| 198 os.makedirs(myHubPath) | |
| 199 | |
| 200 # Create the specie folder | |
| 201 # TODO: Generate the name depending on the specie | |
| 202 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) | |
| 203 if not os.path.exists(mySpecieFolderPath): | |
| 204 os.makedirs(mySpecieFolderPath) | |
| 205 self.mySpecieFolderPath = mySpecieFolderPath | |
| 206 | |
| 207 # We create the 2bit file while we just created the specie folder | |
| 208 self.twoBitName = self.genome_name + ".2bit" | |
| 209 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName) | |
| 210 shutil.copyfile(twoBitFile.name, self.two_bit_final_path) | |
| 211 | |
| 212 # Add the genomes.txt file | |
| 213 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | |
| 214 self.__fillGenomesTxt__(genomesTxtFilePath) | |
| 215 | |
| 216 # Add the hub.txt file | |
| 217 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | |
| 218 self.__fillHubTxt__(hubTxtFilePath) | |
| 219 | |
| 220 # Add the hub.html file | |
| 221 hubHtmlFilePath = os.path.join(myHubPath, self.specie_html) | |
| 222 self.__fillHubHtmlFile__(hubHtmlFilePath) | |
| 223 | |
| 224 | |
| 225 # Create the description html file in the specie folder | |
| 226 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | |
| 227 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath) | |
| 228 | |
| 229 # Create the file groups.txt | |
| 230 # TODO: If not inputs for this, do no create the file | |
| 231 # groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | |
| 232 # self.__fillGroupsTxtFile__(groupsTxtFilePath) | |
| 233 | |
| 234 # Create the folder tracks into the specie folder | |
| 235 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | |
| 236 if not os.path.exists(tracksFolderPath): | |
| 237 os.makedirs(tracksFolderPath) | |
| 238 self.myTracksFolderPath = tracksFolderPath | |
| 239 | |
| 240 return myHubPath | |
| 241 | |
| 242 def __fillGenomesTxt__(self, genomesTxtFilePath): | |
| 243 # TODO: Think about the inputs and outputs | |
| 244 # TODO: Manage the template of this file | |
| 245 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | |
| 246 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly') | |
| 247 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | |
| 248 mytemplate = mylookup.get_template("layout.txt") | |
| 249 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | |
| 250 # Write the content of the file genomes.txt | |
| 251 twoBitPath = os.path.join(self.genome_name, self.twoBitName) | |
| 252 htmlMakoRendered = mytemplate.render( | |
| 253 genomeName=self.genome_name, | |
| 254 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"), | |
| 255 groupsPath=os.path.join(self.genome_name, "groups.txt"), | |
| 256 genomeDescription=self.genome_name, | |
| 257 twoBitPath=twoBitPath, | |
| 258 organismName=self.genome_name, | |
| 259 defaultPosition=self.default_pos, | |
| 260 orderKey="4500", | |
| 261 scientificName=self.genome_name, | |
| 262 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html") | |
| 263 ) | |
| 264 genomesTxtFile.write(htmlMakoRendered) | |
| 265 | |
| 266 def __fillHubTxt__(self, hubTxtFilePath): | |
| 267 # TODO: Think about the inputs and outputs | |
| 268 # TODO: Manage the template of this file | |
| 269 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')], | |
| 270 output_encoding='utf-8', encoding_errors='replace') | |
| 271 mytemplate = mylookup.get_template('layout.txt') | |
| 272 with open(hubTxtFilePath, 'w') as genomesTxtFile: | |
| 273 # Write the content of the file genomes.txt | |
| 274 htmlMakoRendered = mytemplate.render( | |
| 275 hubName=(''.join(['gonramp', self.genome_name.title()])), | |
| 276 shortLabel=self.genome_name, | |
| 277 longLabel=self.genome_name, | |
| 278 genomesFile='genomes.txt', | |
| 279 email=self.user_email, | |
| 280 descriptionUrl=self.specie_html | |
| 281 ) | |
| 282 genomesTxtFile.write(htmlMakoRendered) | |
| 283 | |
| 284 def __fillHubHtmlFile__(self, hubHtmlFilePath): | |
| 285 # TODO: Think about the inputs and outputs | |
| 286 # TODO: Manage the template of this file | |
| 287 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | |
| 288 # t = Template(templates.hubDescription.layout.html) | |
| 289 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')], | |
| 290 output_encoding='utf-8', encoding_errors='replace') | |
| 291 mytemplate = mylookup.get_template("layout.txt") | |
| 292 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | |
| 293 htmlMakoRendered = mytemplate.render( | |
| 294 specie='Dbia', | |
| 295 toolUsed='Augustus', | |
| 296 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | |
| 297 genomeID='3499', | |
| 298 specieFullName='Drosophila biarmipes' | |
| 299 ) | |
| 300 #hubHtmlFile.write(htmlMakoRendered) | |
| 301 | |
| 302 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath): | |
| 303 # TODO: Think about the inputs and outputs | |
| 304 # TODO: Manage the template of this file | |
| 305 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')], | |
| 306 output_encoding='utf-8', encoding_errors='replace') | |
| 307 mytemplate = mylookup.get_template("layout.txt") | |
| 308 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | |
| 309 # Write the content of the file genomes.txt | |
| 310 htmlMakoRendered = mytemplate.render( | |
| 311 specieDescription='This is the description of the dbia', | |
| 312 ) | |
| 313 #descriptionHtmlFile.write(htmlMakoRendered) | |
| 314 | |
| 315 def __fillGroupsTxtFile__(self, groupsTxtFilePath): | |
| 316 # TODO: Reenable this function at some point | |
| 317 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], | |
| 318 output_encoding='utf-8', encoding_errors='replace') | |
| 319 mytemplate = mylookup.get_template("layout.txt") | |
| 320 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | |
| 321 # Write the content of groups.txt | |
| 322 # groupsTxtFile.write('name map') | |
| 323 htmlMakoRendered = mytemplate.render( | |
| 324 mapName='map', | |
| 325 labelMapping='Mapping', | |
| 326 prioriy='2', | |
| 327 isClosed='0' | |
| 328 ) | |
| 329 # groupsTxtFile.write(htmlMakoRendered) |
