Mercurial > repos > yating-l > jbrowsearchivecreator
changeset 39:4a69515eed63 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 91271a6c0d39c923f0d460b2979247baa297286b-dirty
author | yating-l |
---|---|
date | Fri, 13 Apr 2018 18:21:35 -0400 |
parents | d17f629f5486 |
children | b1553f2a4942 |
files | TrackHub.py datatypes/Datatype.py datatypes/interval/Interval.py jbrowseArchiveCreator.xml templates/display.txt tool_dependencies.xml tracks/CanvasFeatures.py tracks/IntervalFeatures.py util/Reader.py util/subtools.py |
diffstat | 10 files changed, 94 insertions(+), 178 deletions(-) [+] |
line wrap: on
line diff
--- a/TrackHub.py Fri Apr 06 13:44:56 2018 -0400 +++ b/TrackHub.py Fri Apr 13 18:21:35 2018 -0400 @@ -17,14 +17,15 @@ class TrackHub: def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory, trackType): - - self.rootAssemblyHub = None self.mySpecieFolderPath = None # Store intermediate files, will be removed if not in debug mode self.myTracksFolderPath = None + # Store interval files and their tabix index + self.myFinalTracksFolderPath = None + # Store binary files: Bam, BigWig self.myBinaryFolderPath = None @@ -38,29 +39,24 @@ # Set all the missing variables of this class, and create physically the folders/files - self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) + self.__createAssemblyHub__(extra_files_path=extra_files_path) # Init the Datatype Datatype.pre_init(self.reference_genome, self.chromSizesFile, self.extra_files_path, self.tool_directory, - self.mySpecieFolderPath, self.myTracksFolderPath, self.myFinalTracksFolderPath, self.myBinaryFolderPath, self.trackType) + self.mySpecieFolderPath, self.myTracksFolderPath, self.myBinaryFolderPath, self.trackType) self._prepareRefseq() self.trackList = os.path.join(self.mySpecieFolderPath, "trackList.json") self._createTrackList() - - self.myTrackStyle = TrackStyles(self.tool_directory, self.mySpecieFolderPath, self.trackList) - #self.cssFolderPath = os.path.join(self.mySpecieFolderPath, 'css') - #self.cssFilePath = os.path.join(self.cssFolderPath, 'custom_track_styles.css') + if Datatype.trackType == 'HTMLFeatures': + self.myTrackStyle = TrackStyles(self.tool_directory, self.mySpecieFolderPath, self.trackList) self.logger = logging.getLogger(__name__) def addTrack(self, trackDbObject): if trackDbObject['dataType'].lower() == 'bam': - #new_track = subprocess.Popen(['echo', trackDbObject['options']], stdout=subprocess.PIPE) - #subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout) subtools.add_track_json(self.trackList, trackDbObject['options']) - #subtools.add_track_json(self.trackList, trackDbObject['track_json']) elif trackDbObject['dataType'].lower() == 'bigwig': subtools.add_track_json(self.trackList, trackDbObject['options']) else: @@ -70,7 +66,7 @@ # Use Tabix index tracks by default for CanvasFeatures # TODO: add support for HTMLFeatures else: - subtools.generate_tabix_indexed_track(trackDbObject['trackDataURL'], trackDbObject['dataType'], self.myFinalTracksFolderPath) + subtools.generate_tabix_indexed_track(trackDbObject['trackDataURL'], trackDbObject['dataType'], trackDbObject['track'], self.myFinalTracksFolderPath) subtools.add_track_json(self.trackList, trackDbObject['options']) def terminate(self, debug=False): @@ -78,7 +74,6 @@ self._indexName() if not debug: self._removeRaw() - #self._makeArchive() self._outHtml() print "Success!\n" @@ -107,14 +102,8 @@ def _prepareRefseq(self): subtools.prepare_refseqs(self.reference_genome.false_path, self.mySpecieFolderPath) - #try: - #print os.path.join(self.tool_dir, 'prepare-refseqs.pl') + ", '--fasta', " + self.reference +", '--out', self.json])" - #subprocess.call(['prepare-refseqs.pl', '--fasta', self.reference_genome.false_path, '--out', self.mySpecieFolderPath]) - #except OSError as e: - #print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror) def _indexName(self): - #subprocess.call(['generate-names.pl', '-v', '--out', self.mySpecieFolderPath]) subtools.generate_names(self.mySpecieFolderPath) print "finished name index \n" @@ -125,25 +114,14 @@ with open(self.outputFile, 'w') as htmlfile: htmlMakoRendered = htmlTemplate.render( - species_folder = os.path.relpath(self.mySpecieFolderPath, self.extra_files_path), + jbrowse_hub_name = self.reference_genome.assembly_id, trackList = os.path.relpath(self.trackList, self.extra_files_path) ) htmlfile.write(htmlMakoRendered) - #with open(self.outputFile, 'w') as htmlfile: - # htmlstr = 'The new Organism "%s" is created on Apollo: <br>' % self.genome_name - # jbrowse_hub = '<li><a href = "%s" target="_blank">View JBrowse Hub on Apollo</a></li>' % host_name - # htmlstr += jbrowse_hub - # htmlfile.write(htmlstr) - - def __createAssemblyHub__(self, extra_files_path): # Get all necessaries infos first # 2bit file creation from input fasta - - # baseNameFasta = os.path.basename(fasta_file_name) - # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) - # nameTwoBit = suffixTwoBit + '.2bit' twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) @@ -153,21 +131,15 @@ # Then we get the output to generate the chromSizes self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) + subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) # We can get the biggest scaffold here, with chromSizesFile with open(self.chromSizesFile.name, 'r') as chrom_sizes: # TODO: Check if exists self.default_pos = chrom_sizes.readline().split()[0] - # TODO: Manage to put every fill Function in a file dedicated for reading reasons - # Create the root directory - myHubPath = os.path.join(extra_files_path, "myHub") - if not os.path.exists(myHubPath): - os.makedirs(myHubPath) - - # Create the specie folder - mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) + # Create the specie folder, use generic name "myHub", to support display JBrowse with Galaxy display application + mySpecieFolderPath = os.path.join(extra_files_path, "myHub") if not os.path.exists(mySpecieFolderPath): os.makedirs(mySpecieFolderPath) self.mySpecieFolderPath = mySpecieFolderPath @@ -188,5 +160,3 @@ if not os.path.exists(myBinaryFolderPath): os.makedirs(myBinaryFolderPath) self.myBinaryFolderPath = myBinaryFolderPath - - return myHubPath
--- a/datatypes/Datatype.py Fri Apr 06 13:44:56 2018 -0400 +++ b/datatypes/Datatype.py Fri Apr 13 18:21:35 2018 -0400 @@ -53,14 +53,13 @@ @staticmethod def pre_init(reference_genome, chrom_sizes_file, - extra_files_path, tool_directory, specie_folder, tracks_folder, final_tracks_folder, binary_folder, track_type): + extra_files_path, tool_directory, specie_folder, tracks_folder, binary_folder, track_type): Datatype.extra_files_path = extra_files_path Datatype.tool_directory = tool_directory # TODO: All this should be in TrackHub and not in Datatype Datatype.mySpecieFolderPath = specie_folder Datatype.myTrackFolderPath = tracks_folder # temporary raw data files - Datatype.myFinalTrackFolderPath = final_tracks_folder # final tracks folder Datatype.myBinaryFolderPath = binary_folder Datatype.input_fasta_file = reference_genome
--- a/datatypes/interval/Interval.py Fri Apr 06 13:44:56 2018 -0400 +++ b/datatypes/interval/Interval.py Fri Apr 13 18:21:35 2018 -0400 @@ -13,10 +13,7 @@ def __init__(self): super(Interval, self).__init__() - if not Datatype.trackType: - self.trackType = "CanvasFeatures" - else: - self.trackType = Datatype.trackType + self.trackType = Datatype.trackType logging.debug("Set default trackType = %s for feature tracks", self.trackType) self.trackFileType = "gff"
--- a/jbrowseArchiveCreator.xml Fri Apr 06 13:44:56 2018 -0400 +++ b/jbrowseArchiveCreator.xml Fri Apr 13 18:21:35 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="2.0.1"> +<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="2.1"> <description> This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse with Apollo plugin </description> @@ -10,7 +10,7 @@ <requirement type="package" version="1.9">numpy</requirement> <requirement type="package" version="1.68">biopython</requirement> <requirement type="package" version="340">ucsc_hac</requirement> - <requirement type="package" version="1.12.4">jbrowse_tools</requirement> + <requirement type="package" version="1.13.1">jbrowse_tools</requirement> <requirement type="package" version="1.0">gff3sort</requirement> </requirements> @@ -150,6 +150,8 @@ #silent $data_parameter_dict.update({"debug_mode": str($advanced_options.debug_mode)}) +#silent $data_parameter_dict.update({"feature_tracks_type": str($advanced_options.feature_tracks_type)}) + with open(file_path, 'w') as f: json.dump($data_parameter_dict, f) </configfile> @@ -331,7 +333,16 @@ </param> <!-- TODO: Avoid redundancy here --> <when value="on"> - <param name="debug_mode" type="select" label="Activate debug mode"> + <param name="feature_tracks_type" type="select" label="Choose JBrowse feature tracks type"> + <option value="CanvasFeatures" selected="true">CanvasFeatures</option> + <option value="HTMLFeatures">HTMLFeatures</option> + <help> + Use CanvasFeatures by default. + CanvasFeatures are newer than HTMLFeatures. CanvasFeatures tracks are indexed Tabix GFF3 or BED files. + HTMLFeatures allow some additional features of Apollo: dragging the features and highlighting edges. HTMLFeatures tracks are generated by flat-file-to-json.pl script to convert range-based annotation data (genes, transcripts, etc) to range-indexed sets of static JSON files. + </help> + </param> + <param name="debug_mode" type="select" label="Activate debug mode"> <option value="false" selected="true">No</option> <option value="true">Yes</option> <help> @@ -340,9 +351,8 @@ </param> </when> <when value="off"> - <param name="debug_mode" type="hidden" - value="false"> - </param> + <param name="debug_mode" type="hidden" value="false" /> + <param name="feature_tracks_type" type="hidden" value="CanvasFeatures" /> </when> </conditional> </inputs>
--- a/templates/display.txt Fri Apr 06 13:44:56 2018 -0400 +++ b/templates/display.txt Fri Apr 13 18:21:35 2018 -0400 @@ -4,7 +4,11 @@ The following JBrowse hub has been generated by JBrowse Archive Creator: </p> <ul> - <li> ${species_folder} </li> + <li> JBrowse Hub Name </li> + <ul> + <li> ${jbrowse_hub_name} </li> + </ul> + <li> Track configuration </li> <ul> <li> <a href="${trackList}">Track settings</a> </li> </ul>
--- a/tool_dependencies.xml Fri Apr 06 13:44:56 2018 -0400 +++ b/tool_dependencies.xml Fri Apr 13 18:21:35 2018 -0400 @@ -52,11 +52,11 @@ </install> </package> - <package name="jbrowse_tools" version="1.12.4"> + <package name="jbrowse_tools" version="1.13.1"> <install version="1.0"> <actions_group> <actions architecture="x86_64" os="linux"> - <action type="download_by_url">https://github.com/GMOD/jbrowse/releases/download/1.12.4-release/JBrowse-1.12.4.zip</action> + <action type="download_by_url">https://github.com/GMOD/jbrowse/releases/download/1.13.1-release/JBrowse-1.13.1.zip</action> <action type="make_directory">$INSTALL_DIR/jbrowse</action> <action type="shell_command"> export HOME=$INSTALL_DIR/jbrowse && ./setup.sh
--- a/tracks/CanvasFeatures.py Fri Apr 06 13:44:56 2018 -0400 +++ b/tracks/CanvasFeatures.py Fri Apr 13 18:21:35 2018 -0400 @@ -23,30 +23,13 @@ track = dict() track['type'] = 'JBrowse/View/Track/' + self.trackType track['storeClass'] = 'JBrowse/Store/SeqFeature/GFF3Tabix' - track['urlTemplate'] = os.path.join('tracks', self.trackName) + if self.dataType == 'gff': + track['urlTemplate'] = os.path.join('tracks', self.trackName + '.gff3.gz') + else: + track['urlTemplate'] = os.path.join('tracks', self.trackName) track['label'] = self.trackLabel track['category'] = self.extraSettings['category'] track['style'] = self.extraSettings['style'] extraConfigs = track return extraConfigs - # def prepareExtraSetting(self): - # """ set CanvasFeatures configuration options """ - # extraConfigs = dict() - # self.extraSettings["clientConfig"] = dict() - # self.extraSettings["config"] = dict() - # if 'color' not in self.extraSettings or not self.extraSettings['color']: - # self.extraSettings["clientConfig"]['color'] = "#daa520" - # else: - # self.extraSettings["clientConfig"]['color'] = self.extraSettings['color'] - # if 'category' not in self.extraSettings or not self.extraSettings['category']: - # self.extraSettings["config"]['category'] = "Default group" - # else: - # self.extraSettings["config"]['category'] = self.extraSettings['category'] - # if 'glyph' in self.extraSettings: - # self.extraSettings["config"]['glyph'] = self.extraSettings['glyph'] - # if 'transcriptType' in self.extraSettings: - # self.extraSettings['config']['transcriptType'] = self.extraSettings['transcriptType'] - # extraConfigs["config"] = json.dumps(self.extraSettings["config"]) - # extraConfigs["clientConfig"] = json.dumps(self.extraSettings["clientConfig"]) - # return extraConfigs \ No newline at end of file
--- a/tracks/IntervalFeatures.py Fri Apr 06 13:44:56 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ -#!/usr/bin/env python -import json -import logging -import collections - -from TrackDb import TrackDb -from util import subtools -from util import santitizer - -class Features(TrackDb): - def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None): - super(Features, self).__init__(trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings) - - def prepareExtraSetting(self): - if self.trackType == 'HTMLFeatures': - self.prepareHTMLExtraSetting() - else: - self.prepareCanvasExtraSetting() - - def prepareHTMLExtraSetting(self): - """ set HTMLFeatures configuration options """ - extraConfigs = dict() - self.extraSettings["clientConfig"] = dict() - self.extraSettings["config"] = dict() - if 'type' in self.extraSettings: - extraConfigs["type"] = self.extraSettings['type'] - if 'color' in self.extraSettings and self.extraSettings['color']: - extraConfigs['feature_color'] = self.extraSettings['color'] - else: - extraConfigs['feature_color'] = "#000000" - #self.extraSettings['clientConfig']['color'] = self.extraSettings['color'] - if 'subfeatureClasses' in self.extraSettings: - subfeature_css_class = santitizer.sanitize_name(self.trackLabel + "_" + self.extraSettings['subfeatureClasses']) - extraConfigs['subfeatureClasses'] = {self.extraSettings['subfeatureClasses']: subfeature_css_class} - - if 'category' not in self.extraSettings or not self.extraSettings['category']: - self.extraSettings['config']['category'] = "Default group" - else: - self.extraSettings['config']['category'] = self.extraSettings['category'] - - extraConfigs['config'] = json.dumps(self.extraSettings["config"]) - extraConfigs['clientConfig'] = json.dumps(self.extraSettings["clientConfig"]) - return extraConfigs - - def prepareCanvasExtraSetting(self): - """ set CanvasFeatures configuration options """ - extraConfigs = dict() - self.extraSettings["clientConfig"] = dict() - self.extraSettings["config"] = dict() - if 'color' not in self.extraSettings or not self.extraSettings['color']: - self.extraSettings["clientConfig"]['color'] = "#daa520" - else: - self.extraSettings["clientConfig"]['color'] = self.extraSettings['color'] - if 'category' not in self.extraSettings or not self.extraSettings['category']: - self.extraSettings["config"]['category'] = "Default group" - else: - self.extraSettings["config"]['category'] = self.extraSettings['category'] - if 'glyph' in self.extraSettings: - self.extraSettings["config"]['glyph'] = self.extraSettings['glyph'] - if 'transcriptType' in self.extraSettings: - self.extraSettings['config']['transcriptType'] = self.extraSettings['transcriptType'] - extraConfigs["config"] = json.dumps(self.extraSettings["config"]) - extraConfigs["clientConfig"] = json.dumps(self.extraSettings["clientConfig"]) - return extraConfigs - - def createTrackDb(self): - self.track_db = collections.OrderedDict([("track",self.trackName), - ("trackLabel",self.trackLabel), - ("trackDataURL",self.trackDataURL), - ("dataType", self.dataType), - ("trackType", self.trackType)] - ) - - - htmlExtraConfigs = self.prepareHTMLExtraSetting() - self.logger.debug("Generate extraConfigs for htmlFeatures = %s", json.dumps(htmlExtraConfigs)) - self.track_db["html"]["options"] = htmlExtraConfigs - canvasExtraConfigs = self.prepareCanvasExtraSetting() - self.logger.debug("Generate extraConfigs for canvasFeatures = %s", json.dumps(canvasExtraConfigs)) - self.track_db["canvas"]["options"] = canvasExtraConfigs - self.logger.debug("TrackDb object is created track_db = %s ", json.dumps(self.track_db)) \ No newline at end of file
--- a/util/Reader.py Fri Apr 06 13:44:56 2018 -0400 +++ b/util/Reader.py Fri Apr 13 18:21:35 2018 -0400 @@ -1,4 +1,6 @@ +import os import json +import shutil import logging import codecs @@ -67,8 +69,11 @@ exit(1) def getTrackType(self): - track_type = self.args.get("track_type") - return track_type + try: + return self.args.get("feature_tracks_type") + except KeyError: + print ("feature tracks type is not defined in the input file!") + exit(1) def getGenomeName(self): genome_name = santitizer.sanitize_name_input(self.args["genome_name"]) @@ -76,12 +81,13 @@ def getRefGenome(self): array_inputs_reference_genome = self.args["fasta"] - # TODO: Replace these with the object Fasta input_fasta_file = array_inputs_reference_genome["false_path"] input_fasta_file_name = santitizer.sanitize_name_input(array_inputs_reference_genome["name"]) - #genome_name = santitizer.sanitize_name_input(self.args["genome_name"]) + # Add "fasta" extension because Apollo needs it to create annotation + refseq_file = os.path.join(os.path.dirname(input_fasta_file), input_fasta_file_name + ".fasta") + shutil.copyfile(input_fasta_file, refseq_file) genome_name = self.getGenomeName() - reference_genome = Fasta(input_fasta_file, + reference_genome = Fasta(refseq_file, input_fasta_file_name, genome_name) return reference_genome
--- a/util/subtools.py Fri Apr 06 13:44:56 2018 -0400 +++ b/util/subtools.py Fri Apr 13 18:21:35 2018 -0400 @@ -237,16 +237,38 @@ else: raise ValueError('Did not find fai file') +def generate_indexed_refseq_track(fastaFile, referenceName, outputFolder): + faiFile = createFastaIndex(fastaFile) + refSeqFile = os.path.join(outputFolder, referenceName) + refSeqIndexFile = os.path.join(outputFolder, referenceName+'.fai') + shutil.copy(fastaFile, refSeqFile) + shutil.copy(faiFile, refSeqIndexFile) + +def remove_gene_lines(gff3_file, gff3_filtered): + with open(gff3_file, 'r') as f: + with open(gff3_filtered, 'w') as out: + for line in f: + if not line.startswith('#'): + feature_type = line.split('\t')[2].rstrip() + if feature_type == 'transcript' or feature_type == 'mRNA': + arr = line.split('\t') + # as we remove the gene features, we should also remove the Parent attribute (gene id) from the transcript + arr[8] = ';'.join([item for item in arr[8].split(';') if 'Parent=' not in item]).rstrip() + line = '\t'.join(arr) + '\n' + if feature_type == 'gene': + continue + out.write(line) + def gff3sort(inputFile, outputFile, precise=False): - array_call = ['gff3sort.pl', inputFile, '>', outputFile] + array_call = ['gff3sort.pl', inputFile] if precise: array_call.append('--precise') - p = _handleExceptionAndCheckCall(array_call) + p = _handleExceptionAndCheckCall(array_call, stdout=outputFile) return p def bedSort(inputFile, outputFile): - array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile, '>', outputFile] - p = _handleExceptionAndCheckCall(array_call) + array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile] + p = _handleExceptionAndCheckCall(array_call, stdout=outputFile) return p def bgzip(inputFile): @@ -265,19 +287,23 @@ else: raise ValueError('Did not find tbi file') -def generate_tabix_indexed_track(inputFile, dataType, outputFolder): +def generate_tabix_indexed_track(inputFile, dataType, trackName, outputFolder): if "bed" in dataType: fileType = 'bed' sortedFile = tempfile.NamedTemporaryFile(bufsize=0) - bedSort(inputFile, sortedFile.name) + bedSort(inputFile, sortedFile) elif "gff" in dataType: fileType = 'gff' + filteredFile = tempfile.NamedTemporaryFile(bufsize=0) + remove_gene_lines(inputFile, filteredFile.name) sortedFile = tempfile.NamedTemporaryFile(bufsize=0) - gff3sort(inputFile, sortedFile.name) - compressedFile = bgzip(sortedFile) + gff3sort(filteredFile.name, sortedFile) + # add .gff3.gz extension to Tabix GFF3 files, in order to enable creating name index with generate-names.pl + trackName = trackName + '.gff3.gz' + compressedFile = bgzip(sortedFile.name) tabixFile = createTabix(compressedFile, fileType) - trackPath = os.path.join(outputFolder, inputFile) - trackIndexPath = os.path.join(outputFolder, inputFile+'.tbi') + trackPath = os.path.join(outputFolder, trackName) + trackIndexPath = os.path.join(outputFolder, trackName+'.tbi') shutil.copy(compressedFile, trackPath) shutil.copy(tabixFile, trackIndexPath) @@ -349,13 +375,15 @@ p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout) return p -def prepare_refseqs(fasta_file_name, outputFolder): - array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder] +def prepare_refseqs(fastaFile, outputFolder): + #array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder] + createFastaIndex(fastaFile) + array_call = ['prepare-refseqs.pl', '--indexed_fasta', fastaFile, '--out', outputFolder] p = _handleExceptionAndCheckCall(array_call) return p -def generate_names(outputFolder): - array_call = ['generate-names.pl', '-v', '--out', outputFolder] +def generate_names(outputFolder, hashBits=4): + array_call = ['generate-names.pl', '--hashBits', '4', '-v', '--out', outputFolder] p = _handleExceptionAndCheckCall(array_call) return p