# HG changeset patch
# User yating-l
# Date 1523658095 14400
# Node ID 4a69515eed632e9425955e6225d1371a7a3b1fd3
# Parent d17f629f5486099979dec572d78034f33f58cfb2
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 91271a6c0d39c923f0d460b2979247baa297286b-dirty
diff -r d17f629f5486 -r 4a69515eed63 TrackHub.py
--- a/TrackHub.py Fri Apr 06 13:44:56 2018 -0400
+++ b/TrackHub.py Fri Apr 13 18:21:35 2018 -0400
@@ -17,14 +17,15 @@
class TrackHub:
def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory, trackType):
-
- self.rootAssemblyHub = None
self.mySpecieFolderPath = None
# Store intermediate files, will be removed if not in debug mode
self.myTracksFolderPath = None
+ # Store interval files and their tabix index
+ self.myFinalTracksFolderPath = None
+
# Store binary files: Bam, BigWig
self.myBinaryFolderPath = None
@@ -38,29 +39,24 @@
# Set all the missing variables of this class, and create physically the folders/files
- self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
+ self.__createAssemblyHub__(extra_files_path=extra_files_path)
# Init the Datatype
Datatype.pre_init(self.reference_genome, self.chromSizesFile,
self.extra_files_path, self.tool_directory,
- self.mySpecieFolderPath, self.myTracksFolderPath, self.myFinalTracksFolderPath, self.myBinaryFolderPath, self.trackType)
+ self.mySpecieFolderPath, self.myTracksFolderPath, self.myBinaryFolderPath, self.trackType)
self._prepareRefseq()
self.trackList = os.path.join(self.mySpecieFolderPath, "trackList.json")
self._createTrackList()
-
- self.myTrackStyle = TrackStyles(self.tool_directory, self.mySpecieFolderPath, self.trackList)
- #self.cssFolderPath = os.path.join(self.mySpecieFolderPath, 'css')
- #self.cssFilePath = os.path.join(self.cssFolderPath, 'custom_track_styles.css')
+ if Datatype.trackType == 'HTMLFeatures':
+ self.myTrackStyle = TrackStyles(self.tool_directory, self.mySpecieFolderPath, self.trackList)
self.logger = logging.getLogger(__name__)
def addTrack(self, trackDbObject):
if trackDbObject['dataType'].lower() == 'bam':
- #new_track = subprocess.Popen(['echo', trackDbObject['options']], stdout=subprocess.PIPE)
- #subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
subtools.add_track_json(self.trackList, trackDbObject['options'])
- #subtools.add_track_json(self.trackList, trackDbObject['track_json'])
elif trackDbObject['dataType'].lower() == 'bigwig':
subtools.add_track_json(self.trackList, trackDbObject['options'])
else:
@@ -70,7 +66,7 @@
# Use Tabix index tracks by default for CanvasFeatures
# TODO: add support for HTMLFeatures
else:
- subtools.generate_tabix_indexed_track(trackDbObject['trackDataURL'], trackDbObject['dataType'], self.myFinalTracksFolderPath)
+ subtools.generate_tabix_indexed_track(trackDbObject['trackDataURL'], trackDbObject['dataType'], trackDbObject['track'], self.myFinalTracksFolderPath)
subtools.add_track_json(self.trackList, trackDbObject['options'])
def terminate(self, debug=False):
@@ -78,7 +74,6 @@
self._indexName()
if not debug:
self._removeRaw()
- #self._makeArchive()
self._outHtml()
print "Success!\n"
@@ -107,14 +102,8 @@
def _prepareRefseq(self):
subtools.prepare_refseqs(self.reference_genome.false_path, self.mySpecieFolderPath)
- #try:
- #print os.path.join(self.tool_dir, 'prepare-refseqs.pl') + ", '--fasta', " + self.reference +", '--out', self.json])"
- #subprocess.call(['prepare-refseqs.pl', '--fasta', self.reference_genome.false_path, '--out', self.mySpecieFolderPath])
- #except OSError as e:
- #print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror)
def _indexName(self):
- #subprocess.call(['generate-names.pl', '-v', '--out', self.mySpecieFolderPath])
subtools.generate_names(self.mySpecieFolderPath)
print "finished name index \n"
@@ -125,25 +114,14 @@
with open(self.outputFile, 'w') as htmlfile:
htmlMakoRendered = htmlTemplate.render(
- species_folder = os.path.relpath(self.mySpecieFolderPath, self.extra_files_path),
+ jbrowse_hub_name = self.reference_genome.assembly_id,
trackList = os.path.relpath(self.trackList, self.extra_files_path)
)
htmlfile.write(htmlMakoRendered)
- #with open(self.outputFile, 'w') as htmlfile:
- # htmlstr = 'The new Organism "%s" is created on Apollo:
' % self.genome_name
- # jbrowse_hub = '
View JBrowse Hub on Apollo' % host_name
- # htmlstr += jbrowse_hub
- # htmlfile.write(htmlstr)
-
-
def __createAssemblyHub__(self, extra_files_path):
# Get all necessaries infos first
# 2bit file creation from input fasta
-
- # baseNameFasta = os.path.basename(fasta_file_name)
- # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
- # nameTwoBit = suffixTwoBit + '.2bit'
twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
@@ -153,21 +131,15 @@
# Then we get the output to generate the chromSizes
self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
- subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
+ subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
# We can get the biggest scaffold here, with chromSizesFile
with open(self.chromSizesFile.name, 'r') as chrom_sizes:
# TODO: Check if exists
self.default_pos = chrom_sizes.readline().split()[0]
- # TODO: Manage to put every fill Function in a file dedicated for reading reasons
- # Create the root directory
- myHubPath = os.path.join(extra_files_path, "myHub")
- if not os.path.exists(myHubPath):
- os.makedirs(myHubPath)
-
- # Create the specie folder
- mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
+ # Create the specie folder, use generic name "myHub", to support display JBrowse with Galaxy display application
+ mySpecieFolderPath = os.path.join(extra_files_path, "myHub")
if not os.path.exists(mySpecieFolderPath):
os.makedirs(mySpecieFolderPath)
self.mySpecieFolderPath = mySpecieFolderPath
@@ -188,5 +160,3 @@
if not os.path.exists(myBinaryFolderPath):
os.makedirs(myBinaryFolderPath)
self.myBinaryFolderPath = myBinaryFolderPath
-
- return myHubPath
diff -r d17f629f5486 -r 4a69515eed63 datatypes/Datatype.py
--- a/datatypes/Datatype.py Fri Apr 06 13:44:56 2018 -0400
+++ b/datatypes/Datatype.py Fri Apr 13 18:21:35 2018 -0400
@@ -53,14 +53,13 @@
@staticmethod
def pre_init(reference_genome, chrom_sizes_file,
- extra_files_path, tool_directory, specie_folder, tracks_folder, final_tracks_folder, binary_folder, track_type):
+ extra_files_path, tool_directory, specie_folder, tracks_folder, binary_folder, track_type):
Datatype.extra_files_path = extra_files_path
Datatype.tool_directory = tool_directory
# TODO: All this should be in TrackHub and not in Datatype
Datatype.mySpecieFolderPath = specie_folder
Datatype.myTrackFolderPath = tracks_folder # temporary raw data files
- Datatype.myFinalTrackFolderPath = final_tracks_folder # final tracks folder
Datatype.myBinaryFolderPath = binary_folder
Datatype.input_fasta_file = reference_genome
diff -r d17f629f5486 -r 4a69515eed63 datatypes/interval/Interval.py
--- a/datatypes/interval/Interval.py Fri Apr 06 13:44:56 2018 -0400
+++ b/datatypes/interval/Interval.py Fri Apr 13 18:21:35 2018 -0400
@@ -13,10 +13,7 @@
def __init__(self):
super(Interval, self).__init__()
- if not Datatype.trackType:
- self.trackType = "CanvasFeatures"
- else:
- self.trackType = Datatype.trackType
+ self.trackType = Datatype.trackType
logging.debug("Set default trackType = %s for feature tracks", self.trackType)
self.trackFileType = "gff"
diff -r d17f629f5486 -r 4a69515eed63 jbrowseArchiveCreator.xml
--- a/jbrowseArchiveCreator.xml Fri Apr 06 13:44:56 2018 -0400
+++ b/jbrowseArchiveCreator.xml Fri Apr 13 18:21:35 2018 -0400
@@ -1,4 +1,4 @@
-
+
This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse with Apollo plugin
@@ -10,7 +10,7 @@
numpy
biopython
ucsc_hac
- jbrowse_tools
+ jbrowse_tools
gff3sort
@@ -150,6 +150,8 @@
#silent $data_parameter_dict.update({"debug_mode": str($advanced_options.debug_mode)})
+#silent $data_parameter_dict.update({"feature_tracks_type": str($advanced_options.feature_tracks_type)})
+
with open(file_path, 'w') as f:
json.dump($data_parameter_dict, f)
@@ -331,7 +333,16 @@
-
+
+
+
+
+ Use CanvasFeatures by default.
+ CanvasFeatures are newer than HTMLFeatures. CanvasFeatures tracks are indexed Tabix GFF3 or BED files.
+ HTMLFeatures allow some additional features of Apollo: dragging the features and highlighting edges. HTMLFeatures tracks are generated by flat-file-to-json.pl script to convert range-based annotation data (genes, transcripts, etc) to range-indexed sets of static JSON files.
+
+
+
@@ -340,9 +351,8 @@
-
-
+
+
diff -r d17f629f5486 -r 4a69515eed63 templates/display.txt
--- a/templates/display.txt Fri Apr 06 13:44:56 2018 -0400
+++ b/templates/display.txt Fri Apr 13 18:21:35 2018 -0400
@@ -4,7 +4,11 @@
The following JBrowse hub has been generated by JBrowse Archive Creator:
- - ${species_folder}
+ - JBrowse Hub Name
+
+ - Track configuration
diff -r d17f629f5486 -r 4a69515eed63 tool_dependencies.xml
--- a/tool_dependencies.xml Fri Apr 06 13:44:56 2018 -0400
+++ b/tool_dependencies.xml Fri Apr 13 18:21:35 2018 -0400
@@ -52,11 +52,11 @@
-
+
- https://github.com/GMOD/jbrowse/releases/download/1.12.4-release/JBrowse-1.12.4.zip
+ https://github.com/GMOD/jbrowse/releases/download/1.13.1-release/JBrowse-1.13.1.zip
$INSTALL_DIR/jbrowse
export HOME=$INSTALL_DIR/jbrowse && ./setup.sh
diff -r d17f629f5486 -r 4a69515eed63 tracks/CanvasFeatures.py
--- a/tracks/CanvasFeatures.py Fri Apr 06 13:44:56 2018 -0400
+++ b/tracks/CanvasFeatures.py Fri Apr 13 18:21:35 2018 -0400
@@ -23,30 +23,13 @@
track = dict()
track['type'] = 'JBrowse/View/Track/' + self.trackType
track['storeClass'] = 'JBrowse/Store/SeqFeature/GFF3Tabix'
- track['urlTemplate'] = os.path.join('tracks', self.trackName)
+ if self.dataType == 'gff':
+ track['urlTemplate'] = os.path.join('tracks', self.trackName + '.gff3.gz')
+ else:
+ track['urlTemplate'] = os.path.join('tracks', self.trackName)
track['label'] = self.trackLabel
track['category'] = self.extraSettings['category']
track['style'] = self.extraSettings['style']
extraConfigs = track
return extraConfigs
- # def prepareExtraSetting(self):
- # """ set CanvasFeatures configuration options """
- # extraConfigs = dict()
- # self.extraSettings["clientConfig"] = dict()
- # self.extraSettings["config"] = dict()
- # if 'color' not in self.extraSettings or not self.extraSettings['color']:
- # self.extraSettings["clientConfig"]['color'] = "#daa520"
- # else:
- # self.extraSettings["clientConfig"]['color'] = self.extraSettings['color']
- # if 'category' not in self.extraSettings or not self.extraSettings['category']:
- # self.extraSettings["config"]['category'] = "Default group"
- # else:
- # self.extraSettings["config"]['category'] = self.extraSettings['category']
- # if 'glyph' in self.extraSettings:
- # self.extraSettings["config"]['glyph'] = self.extraSettings['glyph']
- # if 'transcriptType' in self.extraSettings:
- # self.extraSettings['config']['transcriptType'] = self.extraSettings['transcriptType']
- # extraConfigs["config"] = json.dumps(self.extraSettings["config"])
- # extraConfigs["clientConfig"] = json.dumps(self.extraSettings["clientConfig"])
- # return extraConfigs
\ No newline at end of file
diff -r d17f629f5486 -r 4a69515eed63 tracks/IntervalFeatures.py
--- a/tracks/IntervalFeatures.py Fri Apr 06 13:44:56 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-import json
-import logging
-import collections
-
-from TrackDb import TrackDb
-from util import subtools
-from util import santitizer
-
-class Features(TrackDb):
- def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None):
- super(Features, self).__init__(trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings)
-
- def prepareExtraSetting(self):
- if self.trackType == 'HTMLFeatures':
- self.prepareHTMLExtraSetting()
- else:
- self.prepareCanvasExtraSetting()
-
- def prepareHTMLExtraSetting(self):
- """ set HTMLFeatures configuration options """
- extraConfigs = dict()
- self.extraSettings["clientConfig"] = dict()
- self.extraSettings["config"] = dict()
- if 'type' in self.extraSettings:
- extraConfigs["type"] = self.extraSettings['type']
- if 'color' in self.extraSettings and self.extraSettings['color']:
- extraConfigs['feature_color'] = self.extraSettings['color']
- else:
- extraConfigs['feature_color'] = "#000000"
- #self.extraSettings['clientConfig']['color'] = self.extraSettings['color']
- if 'subfeatureClasses' in self.extraSettings:
- subfeature_css_class = santitizer.sanitize_name(self.trackLabel + "_" + self.extraSettings['subfeatureClasses'])
- extraConfigs['subfeatureClasses'] = {self.extraSettings['subfeatureClasses']: subfeature_css_class}
-
- if 'category' not in self.extraSettings or not self.extraSettings['category']:
- self.extraSettings['config']['category'] = "Default group"
- else:
- self.extraSettings['config']['category'] = self.extraSettings['category']
-
- extraConfigs['config'] = json.dumps(self.extraSettings["config"])
- extraConfigs['clientConfig'] = json.dumps(self.extraSettings["clientConfig"])
- return extraConfigs
-
- def prepareCanvasExtraSetting(self):
- """ set CanvasFeatures configuration options """
- extraConfigs = dict()
- self.extraSettings["clientConfig"] = dict()
- self.extraSettings["config"] = dict()
- if 'color' not in self.extraSettings or not self.extraSettings['color']:
- self.extraSettings["clientConfig"]['color'] = "#daa520"
- else:
- self.extraSettings["clientConfig"]['color'] = self.extraSettings['color']
- if 'category' not in self.extraSettings or not self.extraSettings['category']:
- self.extraSettings["config"]['category'] = "Default group"
- else:
- self.extraSettings["config"]['category'] = self.extraSettings['category']
- if 'glyph' in self.extraSettings:
- self.extraSettings["config"]['glyph'] = self.extraSettings['glyph']
- if 'transcriptType' in self.extraSettings:
- self.extraSettings['config']['transcriptType'] = self.extraSettings['transcriptType']
- extraConfigs["config"] = json.dumps(self.extraSettings["config"])
- extraConfigs["clientConfig"] = json.dumps(self.extraSettings["clientConfig"])
- return extraConfigs
-
- def createTrackDb(self):
- self.track_db = collections.OrderedDict([("track",self.trackName),
- ("trackLabel",self.trackLabel),
- ("trackDataURL",self.trackDataURL),
- ("dataType", self.dataType),
- ("trackType", self.trackType)]
- )
-
-
- htmlExtraConfigs = self.prepareHTMLExtraSetting()
- self.logger.debug("Generate extraConfigs for htmlFeatures = %s", json.dumps(htmlExtraConfigs))
- self.track_db["html"]["options"] = htmlExtraConfigs
- canvasExtraConfigs = self.prepareCanvasExtraSetting()
- self.logger.debug("Generate extraConfigs for canvasFeatures = %s", json.dumps(canvasExtraConfigs))
- self.track_db["canvas"]["options"] = canvasExtraConfigs
- self.logger.debug("TrackDb object is created track_db = %s ", json.dumps(self.track_db))
\ No newline at end of file
diff -r d17f629f5486 -r 4a69515eed63 util/Reader.py
--- a/util/Reader.py Fri Apr 06 13:44:56 2018 -0400
+++ b/util/Reader.py Fri Apr 13 18:21:35 2018 -0400
@@ -1,4 +1,6 @@
+import os
import json
+import shutil
import logging
import codecs
@@ -67,8 +69,11 @@
exit(1)
def getTrackType(self):
- track_type = self.args.get("track_type")
- return track_type
+ try:
+ return self.args.get("feature_tracks_type")
+ except KeyError:
+ print ("feature tracks type is not defined in the input file!")
+ exit(1)
def getGenomeName(self):
genome_name = santitizer.sanitize_name_input(self.args["genome_name"])
@@ -76,12 +81,13 @@
def getRefGenome(self):
array_inputs_reference_genome = self.args["fasta"]
- # TODO: Replace these with the object Fasta
input_fasta_file = array_inputs_reference_genome["false_path"]
input_fasta_file_name = santitizer.sanitize_name_input(array_inputs_reference_genome["name"])
- #genome_name = santitizer.sanitize_name_input(self.args["genome_name"])
+ # Add "fasta" extension because Apollo needs it to create annotation
+ refseq_file = os.path.join(os.path.dirname(input_fasta_file), input_fasta_file_name + ".fasta")
+ shutil.copyfile(input_fasta_file, refseq_file)
genome_name = self.getGenomeName()
- reference_genome = Fasta(input_fasta_file,
+ reference_genome = Fasta(refseq_file,
input_fasta_file_name, genome_name)
return reference_genome
diff -r d17f629f5486 -r 4a69515eed63 util/subtools.py
--- a/util/subtools.py Fri Apr 06 13:44:56 2018 -0400
+++ b/util/subtools.py Fri Apr 13 18:21:35 2018 -0400
@@ -237,16 +237,38 @@
else:
raise ValueError('Did not find fai file')
+def generate_indexed_refseq_track(fastaFile, referenceName, outputFolder):
+ faiFile = createFastaIndex(fastaFile)
+ refSeqFile = os.path.join(outputFolder, referenceName)
+ refSeqIndexFile = os.path.join(outputFolder, referenceName+'.fai')
+ shutil.copy(fastaFile, refSeqFile)
+ shutil.copy(faiFile, refSeqIndexFile)
+
+def remove_gene_lines(gff3_file, gff3_filtered):
+ with open(gff3_file, 'r') as f:
+ with open(gff3_filtered, 'w') as out:
+ for line in f:
+ if not line.startswith('#'):
+ feature_type = line.split('\t')[2].rstrip()
+ if feature_type == 'transcript' or feature_type == 'mRNA':
+ arr = line.split('\t')
+ # as we remove the gene features, we should also remove the Parent attribute (gene id) from the transcript
+ arr[8] = ';'.join([item for item in arr[8].split(';') if 'Parent=' not in item]).rstrip()
+ line = '\t'.join(arr) + '\n'
+ if feature_type == 'gene':
+ continue
+ out.write(line)
+
def gff3sort(inputFile, outputFile, precise=False):
- array_call = ['gff3sort.pl', inputFile, '>', outputFile]
+ array_call = ['gff3sort.pl', inputFile]
if precise:
array_call.append('--precise')
- p = _handleExceptionAndCheckCall(array_call)
+ p = _handleExceptionAndCheckCall(array_call, stdout=outputFile)
return p
def bedSort(inputFile, outputFile):
- array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile, '>', outputFile]
- p = _handleExceptionAndCheckCall(array_call)
+ array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile]
+ p = _handleExceptionAndCheckCall(array_call, stdout=outputFile)
return p
def bgzip(inputFile):
@@ -265,19 +287,23 @@
else:
raise ValueError('Did not find tbi file')
-def generate_tabix_indexed_track(inputFile, dataType, outputFolder):
+def generate_tabix_indexed_track(inputFile, dataType, trackName, outputFolder):
if "bed" in dataType:
fileType = 'bed'
sortedFile = tempfile.NamedTemporaryFile(bufsize=0)
- bedSort(inputFile, sortedFile.name)
+ bedSort(inputFile, sortedFile)
elif "gff" in dataType:
fileType = 'gff'
+ filteredFile = tempfile.NamedTemporaryFile(bufsize=0)
+ remove_gene_lines(inputFile, filteredFile.name)
sortedFile = tempfile.NamedTemporaryFile(bufsize=0)
- gff3sort(inputFile, sortedFile.name)
- compressedFile = bgzip(sortedFile)
+ gff3sort(filteredFile.name, sortedFile)
+ # add .gff3.gz extension to Tabix GFF3 files, in order to enable creating name index with generate-names.pl
+ trackName = trackName + '.gff3.gz'
+ compressedFile = bgzip(sortedFile.name)
tabixFile = createTabix(compressedFile, fileType)
- trackPath = os.path.join(outputFolder, inputFile)
- trackIndexPath = os.path.join(outputFolder, inputFile+'.tbi')
+ trackPath = os.path.join(outputFolder, trackName)
+ trackIndexPath = os.path.join(outputFolder, trackName+'.tbi')
shutil.copy(compressedFile, trackPath)
shutil.copy(tabixFile, trackIndexPath)
@@ -349,13 +375,15 @@
p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout)
return p
-def prepare_refseqs(fasta_file_name, outputFolder):
- array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder]
+def prepare_refseqs(fastaFile, outputFolder):
+ #array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder]
+ createFastaIndex(fastaFile)
+ array_call = ['prepare-refseqs.pl', '--indexed_fasta', fastaFile, '--out', outputFolder]
p = _handleExceptionAndCheckCall(array_call)
return p
-def generate_names(outputFolder):
- array_call = ['generate-names.pl', '-v', '--out', outputFolder]
+def generate_names(outputFolder, hashBits=4):
+ array_call = ['generate-names.pl', '--hashBits', '4', '-v', '--out', outputFolder]
p = _handleExceptionAndCheckCall(array_call)
return p