Mercurial > repos > yating-l > hubarchivecreatortest
comparison TrackHub.py @ 1:85195e0d4b71 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b1ae7349f118a0fe7923d765020dfc684cf84116-dirty
| author | yating-l |
|---|---|
| date | Fri, 29 Sep 2017 13:32:23 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:f9ccc5ad1713 | 1:85195e0d4b71 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -*- coding: utf8 -*- | |
| 3 | |
| 4 import logging | |
| 5 import os | |
| 6 import tempfile | |
| 7 import shutil | |
| 8 import zipfile | |
| 9 from mako.lookup import TemplateLookup | |
| 10 | |
| 11 # Internal dependencies | |
| 12 from datatypes.Datatype import Datatype | |
| 13 from util import subtools | |
| 14 from util import santitizer | |
| 15 | |
| 16 | |
| 17 | |
| 18 | |
| 19 class TrackHub(object): | |
| 20 """docstring for TrackHub""" | |
| 21 | |
| 22 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): | |
| 23 super(TrackHub, self).__init__() | |
| 24 self.rootAssemblyHub = None | |
| 25 | |
| 26 self.mySpecieFolderPath = None | |
| 27 self.myTracksFolderPath = None | |
| 28 self.tool_directory = tool_directory | |
| 29 | |
| 30 self.reference_genome = inputFastaFile | |
| 31 # TODO: Add the specie name | |
| 32 self.genome_name = inputFastaFile.assembly_id | |
| 33 self.specie_html = self.genome_name + '.html' | |
| 34 self.default_pos = None | |
| 35 self.user_email = user_email | |
| 36 | |
| 37 # Set containing the groups already added. Updated by addGroup() | |
| 38 self.groups = set() | |
| 39 | |
| 40 # TODO: Modify according to the files passed in parameter | |
| 41 # ---- Templates ---- | |
| 42 # Template trackDb | |
| 43 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], | |
| 44 output_encoding='utf-8', encoding_errors='replace') | |
| 45 self.trackDbTemplate = mylookup.get_template("layout.txt") | |
| 46 | |
| 47 # Template groups | |
| 48 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], | |
| 49 output_encoding='utf-8', encoding_errors='replace') | |
| 50 self.groupsTemplate = mylookup.get_template("layout.txt") | |
| 51 | |
| 52 # ---- End Templates ---- | |
| 53 | |
| 54 self.extra_files_path = extra_files_path | |
| 55 self.outputFile = outputFile | |
| 56 | |
| 57 # Create the structure of the Assembly Hub | |
| 58 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | |
| 59 self.twoBitName = None | |
| 60 self.two_bit_final_path = None | |
| 61 self.chromSizesFile = None | |
| 62 | |
| 63 self.default_pos = None | |
| 64 | |
| 65 # Set all the missing variables of this class, and create physically the folders/files | |
| 66 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) | |
| 67 | |
| 68 # Init the Datatype | |
| 69 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, | |
| 70 self.extra_files_path, self.tool_directory, | |
| 71 self.mySpecieFolderPath, self.myTracksFolderPath) | |
| 72 ''' | |
| 73 def createZip(self): | |
| 74 for root, dirs, files in os.walk(self.rootAssemblyHub): | |
| 75 # Get all files and construct the dir at the same time | |
| 76 for file in files: | |
| 77 self.outputZip.write(os.path.join(root, file)) | |
| 78 | |
| 79 self.outputZip.close() | |
| 80 ''' | |
| 81 def addTrack(self, trackDbObject=None): | |
| 82 # Create the trackDb.txt file in the specie folder, if not exists | |
| 83 # Else append the new track | |
| 84 # TODO: Get this out of the function | |
| 85 trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, 'trackDb.txt') | |
| 86 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object | |
| 87 with open(trackDbTxtFilePath, 'a+') as trackDbFile: | |
| 88 group_name = trackDbObject["group"] | |
| 89 trackDbObject["group"] = santitizer.sanitize_group_name(trackDbObject["group"]) | |
| 90 trackDbs = [trackDbObject] | |
| 91 # TODO: The addGroup does not belong here. Move it when the group becomes more than just a label | |
| 92 # Add the group as well, if exists in trackDbObject | |
| 93 self.addGroup(group_name) | |
| 94 | |
| 95 htmlMakoRendered = self.trackDbTemplate.render( | |
| 96 trackDbs=trackDbs | |
| 97 ) | |
| 98 trackDbFile.write(htmlMakoRendered) | |
| 99 | |
| 100 #logging.debug("We just added track {0} (in group {1})".format(trackDbObject.trackName, | |
| 101 # trackDbObject.group_name.lower().replace(' ', '_'))) | |
| 102 logging.debug("We just added track {0} (in group {1})".format(trackDbObject.get("track"), | |
| 103 trackDbObject.get("group").lower().replace(' ', '_'))) | |
| 104 def addGroup(self, group_name="Default"): | |
| 105 # If not already present in self.groups, add to groups.txt | |
| 106 # Create the trackDb.txt file in the specie folder, if not exists | |
| 107 # Else append the new track | |
| 108 # TODO: Get this out of the function | |
| 109 groupsTxtFilePath = os.path.join(self.mySpecieFolderPath, 'groups.txt') | |
| 110 | |
| 111 # If the group is already present, we don't need to add it | |
| 112 if group_name in self.groups: | |
| 113 logging.debug("We DON'T add in {0} the group {1}".format(groupsTxtFilePath, | |
| 114 group_name)) | |
| 115 return | |
| 116 | |
| 117 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object | |
| 118 with open(groupsTxtFilePath, 'a+') as groupFile: | |
| 119 # Add the group as well, if exists in trackDbObject | |
| 120 | |
| 121 htmlMakoRendered = self.groupsTemplate.render( | |
| 122 label=group_name | |
| 123 ) | |
| 124 groupFile.write(htmlMakoRendered) | |
| 125 logging.debug("We just added in {0} the group {1}".format(groupsTxtFilePath, | |
| 126 group_name)) | |
| 127 self.groups.add(group_name) | |
| 128 | |
| 129 def terminate(self): | |
| 130 # Just a test to output a simple HTML | |
| 131 # TODO: Create a class to handle the file object | |
| 132 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], | |
| 133 output_encoding='utf-8', encoding_errors='replace') | |
| 134 | |
| 135 mytemplate = mylookup.get_template('display.txt') | |
| 136 with open(self.outputFile, 'w') as htmlOutput: | |
| 137 # TODO: We are basically looping two times: One time with os.walk, Second time | |
| 138 # with the template. We could improve that if the number of files begins to be really important | |
| 139 list_relative_file_path = [ ] | |
| 140 | |
| 141 # TODO: Create classes Tree to manage this => Better readibility and maintenability | |
| 142 def create_tree(array_path, tree, relative_array_file_path, level=0): | |
| 143 cur_relative_file_path = '/'.join(relative_array_file_path[:level+1]) | |
| 144 if array_path[0] in tree.keys(): | |
| 145 create_tree(array_path[1:], tree[array_path[0]][0], | |
| 146 relative_array_file_path, level+1) | |
| 147 else: | |
| 148 tree[array_path[0]] = ({}, cur_relative_file_path) | |
| 149 # TODO: Manage also the links of the directories => No link? | |
| 150 # => Managed in display.txt, but could also be managed there | |
| 151 # If we are don't have any sub-vertices | |
| 152 if len(array_path) == 1: | |
| 153 # We create the path to it | |
| 154 return | |
| 155 else: | |
| 156 create_tree(array_path[1:], tree[array_path[0]][0], | |
| 157 relative_array_file_path, level + 1) | |
| 158 | |
| 159 walkable_tree = {} | |
| 160 for root, dirs, files in os.walk(self.extra_files_path): | |
| 161 # Prepare the tree from to perform a Depth First Search | |
| 162 for file in files: | |
| 163 relative_directory = os.path.relpath(root, self.extra_files_path) | |
| 164 relative_file_path = os.path.join(relative_directory, file) | |
| 165 array_path = relative_file_path.split('/') | |
| 166 create_tree(array_path, walkable_tree, array_path, 0) | |
| 167 | |
| 168 htmlMakoRendered = mytemplate.render( | |
| 169 walkable_tree=walkable_tree | |
| 170 ) | |
| 171 htmlOutput.write(htmlMakoRendered) | |
| 172 | |
| 173 | |
| 174 def __createAssemblyHub__(self, extra_files_path): | |
| 175 # Get all necessaries infos first | |
| 176 # 2bit file creation from input fasta | |
| 177 | |
| 178 # baseNameFasta = os.path.basename(fasta_file_name) | |
| 179 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | |
| 180 # nameTwoBit = suffixTwoBit + '.2bit' | |
| 181 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 182 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) | |
| 183 | |
| 184 # Generate the twoBitInfo | |
| 185 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 186 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name) | |
| 187 | |
| 188 # Then we get the output to generate the chromSizes | |
| 189 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") | |
| 190 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) | |
| 191 | |
| 192 # We can get the biggest scaffold here, with chromSizesFile | |
| 193 with open(self.chromSizesFile.name, 'r') as chrom_sizes: | |
| 194 # TODO: Check if exists | |
| 195 self.default_pos = chrom_sizes.readline().split()[0] | |
| 196 | |
| 197 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | |
| 198 # Create the root directory | |
| 199 myHubPath = os.path.join(extra_files_path, "myHub") | |
| 200 if not os.path.exists(myHubPath): | |
| 201 os.makedirs(myHubPath) | |
| 202 | |
| 203 # Create the specie folder | |
| 204 # TODO: Generate the name depending on the specie | |
| 205 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) | |
| 206 if not os.path.exists(mySpecieFolderPath): | |
| 207 os.makedirs(mySpecieFolderPath) | |
| 208 self.mySpecieFolderPath = mySpecieFolderPath | |
| 209 | |
| 210 # We create the 2bit file while we just created the specie folder | |
| 211 self.twoBitName = self.genome_name + ".2bit" | |
| 212 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName) | |
| 213 shutil.copyfile(twoBitFile.name, self.two_bit_final_path) | |
| 214 | |
| 215 # Add the genomes.txt file | |
| 216 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | |
| 217 self.__fillGenomesTxt__(genomesTxtFilePath) | |
| 218 | |
| 219 # Add the hub.txt file | |
| 220 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | |
| 221 self.__fillHubTxt__(hubTxtFilePath) | |
| 222 | |
| 223 # Add the hub.html file | |
| 224 hubHtmlFilePath = os.path.join(myHubPath, self.specie_html) | |
| 225 self.__fillHubHtmlFile__(hubHtmlFilePath) | |
| 226 | |
| 227 | |
| 228 # Create the description html file in the specie folder | |
| 229 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | |
| 230 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath) | |
| 231 | |
| 232 # Create the file groups.txt | |
| 233 # TODO: If not inputs for this, do no create the file | |
| 234 # groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | |
| 235 # self.__fillGroupsTxtFile__(groupsTxtFilePath) | |
| 236 | |
| 237 # Create the folder tracks into the specie folder | |
| 238 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | |
| 239 if not os.path.exists(tracksFolderPath): | |
| 240 os.makedirs(tracksFolderPath) | |
| 241 self.myTracksFolderPath = tracksFolderPath | |
| 242 | |
| 243 return myHubPath | |
| 244 | |
| 245 def __fillGenomesTxt__(self, genomesTxtFilePath): | |
| 246 # TODO: Think about the inputs and outputs | |
| 247 # TODO: Manage the template of this file | |
| 248 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | |
| 249 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly') | |
| 250 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | |
| 251 mytemplate = mylookup.get_template("layout.txt") | |
| 252 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | |
| 253 # Write the content of the file genomes.txt | |
| 254 twoBitPath = os.path.join(self.genome_name, self.twoBitName) | |
| 255 htmlMakoRendered = mytemplate.render( | |
| 256 genomeName=self.genome_name, | |
| 257 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"), | |
| 258 groupsPath=os.path.join(self.genome_name, "groups.txt"), | |
| 259 genomeDescription=self.genome_name, | |
| 260 twoBitPath=twoBitPath, | |
| 261 organismName=self.genome_name, | |
| 262 defaultPosition=self.default_pos, | |
| 263 orderKey="4500", | |
| 264 scientificName=self.genome_name, | |
| 265 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html") | |
| 266 ) | |
| 267 genomesTxtFile.write(htmlMakoRendered) | |
| 268 | |
| 269 def __fillHubTxt__(self, hubTxtFilePath): | |
| 270 # TODO: Think about the inputs and outputs | |
| 271 # TODO: Manage the template of this file | |
| 272 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')], | |
| 273 output_encoding='utf-8', encoding_errors='replace') | |
| 274 mytemplate = mylookup.get_template('layout.txt') | |
| 275 with open(hubTxtFilePath, 'w') as genomesTxtFile: | |
| 276 # Write the content of the file genomes.txt | |
| 277 htmlMakoRendered = mytemplate.render( | |
| 278 hubName=(''.join(['gonramp', self.genome_name.title()])), | |
| 279 shortLabel=self.genome_name, | |
| 280 longLabel=self.genome_name, | |
| 281 genomesFile='genomes.txt', | |
| 282 email=self.user_email, | |
| 283 descriptionUrl=self.specie_html | |
| 284 ) | |
| 285 genomesTxtFile.write(htmlMakoRendered) | |
| 286 | |
| 287 def __fillHubHtmlFile__(self, hubHtmlFilePath): | |
| 288 # TODO: Think about the inputs and outputs | |
| 289 # TODO: Manage the template of this file | |
| 290 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | |
| 291 # t = Template(templates.hubDescription.layout.html) | |
| 292 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')], | |
| 293 output_encoding='utf-8', encoding_errors='replace') | |
| 294 mytemplate = mylookup.get_template("layout.txt") | |
| 295 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | |
| 296 htmlMakoRendered = mytemplate.render( | |
| 297 specie='Dbia', | |
| 298 toolUsed='Augustus', | |
| 299 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | |
| 300 genomeID='3499', | |
| 301 specieFullName='Drosophila biarmipes' | |
| 302 ) | |
| 303 #hubHtmlFile.write(htmlMakoRendered) | |
| 304 | |
| 305 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath): | |
| 306 # TODO: Think about the inputs and outputs | |
| 307 # TODO: Manage the template of this file | |
| 308 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')], | |
| 309 output_encoding='utf-8', encoding_errors='replace') | |
| 310 mytemplate = mylookup.get_template("layout.txt") | |
| 311 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | |
| 312 # Write the content of the file genomes.txt | |
| 313 htmlMakoRendered = mytemplate.render( | |
| 314 specieDescription='This is the description of the dbia', | |
| 315 ) | |
| 316 #descriptionHtmlFile.write(htmlMakoRendered) | |
| 317 | |
| 318 def __fillGroupsTxtFile__(self, groupsTxtFilePath): | |
| 319 # TODO: Reenable this function at some point | |
| 320 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], | |
| 321 output_encoding='utf-8', encoding_errors='replace') | |
| 322 mytemplate = mylookup.get_template("layout.txt") | |
| 323 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | |
| 324 # Write the content of groups.txt | |
| 325 # groupsTxtFile.write('name map') | |
| 326 htmlMakoRendered = mytemplate.render( | |
| 327 mapName='map', | |
| 328 labelMapping='Mapping', | |
| 329 prioriy='2', | |
| 330 isClosed='0' | |
| 331 ) | |
| 332 # groupsTxtFile.write(htmlMakoRendered) |
