Mercurial > repos > yating-l > jbrowsearchivecreator

Binary file ApolloUser.pyc has changed
--- a/README.md	Wed Jul 12 12:55:27 2017 -0400
+++ b/README.md	Fri Oct 13 12:44:31 2017 -0400
@@ -4,11 +4,10 @@
 ## Features
 1. Similar interface to Hub Archive Creator.
 2. Convert tracks to GFF3 datatypes (e.g Blastxml => GFF3) in order to import feature data from the flat files
-3. Generate a zip file including all the tracks and configuration for JBrowse visualization
-4. Group the tracks
-5. Set the color for each track
-6. Set the label for each track
-7. Create workflows within Galaxy to automatize pipeline analysis and get them ready to visualization inside JBrowse...in a few clicks!
+3. Group the tracks
+4. Set the color for each track
+5. Set the label for each track
+6. Create workflows within Galaxy to automatize pipeline analysis and get them ready to visualization inside JBrowse...in a few clicks!

 At the moment, Supported datatypes are:
 - Bam
@@ -19,6 +18,7 @@
 - Gff3
 - Gtf
 - Blastxml
+- BigPsl

 ## Installation:
 1. You would need to add this tool into your Galaxy.
Binary file TrackDb.pyc has changed
--- a/TrackHub.py	Wed Jul 12 12:55:27 2017 -0400
+++ b/TrackHub.py	Fri Oct 13 12:44:31 2017 -0400
@@ -5,169 +5,180 @@
 import shutil
 import zipfile
 import json
-import utils
+import tempfile
+import logging
+
+from datatypes.Datatype import Datatype
+from apollo.ApolloInstance import ApolloInstance
+from tracks.TrackStyles import TrackStyles
+from util import subtools
+from util import santitizer


 class TrackHub:
-    def __init__(self, inputFiles, reference, outputDirect, tool_dir, genome, extra_files_path, metaData, jbrowse_host):
-        self.input_files = inputFiles.tracks
-        self.outfile = outputDirect
-        self.outfolder = extra_files_path
-        self.out_path = os.path.join(extra_files_path, 'myHub')
-        self.reference = reference
-        self.tool_dir = tool_dir
-        self.metaData = metaData
-        self.raw = os.path.join(self.out_path, 'raw')
-        self.json = os.path.join(self.out_path, 'json')
-        self.jbrowse_host = jbrowse_host
-        try:
-            if os.path.exists(self.json):
-                shutil.rmtree(self.json)
-            os.makedirs(self.json)
-        except OSError as e:
-            print "Cannot create json folder error({0}): {1}".format(e.errno, e.strerror)
-        else:
-            print "Create jbrowse folder {}".format(self.out_path)
+    def __init__(self, inputFastaFile, apollo_user, outputFile, extra_files_path, tool_directory, trackType, apollo_host):
+
+        self.rootAssemblyHub = None
+
+        self.mySpecieFolderPath = None
+
+        # Store intermediate files, will be removed if not in debug mode
+        self.myTracksFolderPath = None
+
+        # Store binary files: Bam, BigWig
+        self.myBinaryFolderPath = None
+
+        self.tool_directory = tool_directory
+        self.trackType = trackType
+        self.reference_genome = inputFastaFile
+        self.genome_name = inputFastaFile.assembly_id
+        self.extra_files_path = extra_files_path
+        self.outputFile = outputFile
+        self.chromSizesFile = None
+
+        # Set up apollo
+        self.apollo = ApolloInstance(apollo_host)
+        self.apollo_user = apollo_user
+
+        # Set all the missing variables of this class, and create physically the folders/files
+        self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
+        # Init the Datatype
+        Datatype.pre_init(self.reference_genome, self.chromSizesFile,
+                          self.extra_files_path, self.tool_directory,
+                          self.mySpecieFolderPath, self.myTracksFolderPath, self.myBinaryFolderPath, self.trackType)
+
+        self._prepareRefseq()
+        self.trackList = os.path.join(self.mySpecieFolderPath, "trackList.json")
+        self._createTrackList()
+
+        self.myTrackStyle = TrackStyles(self.tool_directory, self.mySpecieFolderPath, self.trackList)
+        #self.cssFolderPath = os.path.join(self.mySpecieFolderPath, 'css')
+        #self.cssFilePath = os.path.join(self.cssFolderPath, 'custom_track_styles.css')
+        self.logger = logging.getLogger(__name__)
+

-    def createHub(self):
-        self.prepareRefseq()
-        for input_file in self.input_files:
-            self.addTrack(input_file)
-        self.indexName()
-        slink = self.makeArchive()
-        self.outHtml(slink)
+
+    def addTrack(self, trackDbObject):
+        if trackDbObject['dataType'].lower() == 'bam':
+            #new_track = subprocess.Popen(['echo', trackDbObject['options']], stdout=subprocess.PIPE)
+            #subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
+            subtools.add_track_json(self.trackList, trackDbObject['options'])
+            #subtools.add_track_json(self.trackList, trackDbObject['track_json'])
+        elif trackDbObject['dataType'].lower() == 'bigwig':
+            subtools.add_track_json(self.trackList, trackDbObject['options'])
+        else:
+            if trackDbObject['trackType'] == 'HTMLFeatures':
+                self._customizeHTMLFeature(trackDbObject)
+            subtools.flatfile_to_json(trackDbObject['trackDataURL'], trackDbObject['dataType'], trackDbObject['trackType'], trackDbObject['trackLabel'], self.mySpecieFolderPath, trackDbObject['options'])
+
+
+    def terminate(self, debug=False):
+        """ Write html file """
+        self._indexName()
+        if not debug:
+            self._removeRaw()
+        self._makeArchive()
         print "Success!\n"
-
-    def prepareRefseq(self):
-        try:
+
+
+    def _customizeHTMLFeature(self, trackDbObject):
+        if trackDbObject['options']:
+            subfeatures = trackDbObject['options'].get('subfeatureClasses')
+            feature_color = trackDbObject['options']['feature_color']
+            if subfeatures:
+                for key, value in subfeatures.items():
+                    self.myTrackStyle.addCustomColor(value, feature_color)
+            else:
+                customizedFeature = santitizer.sanitize_name(trackDbObject['trackLabel'])
+                clientConfig = json.loads(trackDbObject['options']['clientConfig'])
+                clientConfig['renderClassName'] = customizedFeature
+                trackDbObject['options']['clientConfig'] = json.dumps(clientConfig)
+                self.myTrackStyle.addCustomColor(customizedFeature, feature_color)
+
+    def _removeRaw(self):
+        if os.path.exists(self.myTracksFolderPath):
+            shutil.rmtree(self.myTracksFolderPath)
+
+    def _createTrackList(self):
+        if not os.path.exists(self.trackList):
+            os.mknod(self.trackList)
+
+    def _prepareRefseq(self):
+        subtools.prepare_refseqs(self.reference_genome.false_path, self.mySpecieFolderPath)
+        #try:
             #print os.path.join(self.tool_dir, 'prepare-refseqs.pl') + ", '--fasta', " + self.reference +", '--out', self.json])"
-            subprocess.call(['prepare-refseqs.pl', '--fasta', self.reference, '--out', self.json])
-        except OSError as e:
-            print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror)
-    #TODO: hard coded the bam and bigwig tracks. Need to allow users to customize the settings
-    def addTrack(self, track):
-        #print "false_path" , track['false_path']
-        if track['false_path'] in self.metaData.keys():
-            metadata = self.metaData[track['false_path']]
-        else:
-            metadata = {}
-        self.SetMetadata(track, metadata)
-        if track['dataType'] == 'bam':
-            self.Bam(track, metadata)
-           # print "add bam track\n"
-        elif track['dataType'] == 'bigwig':
-            #create trackList.json if not exist
-            self.createTrackList()
-            json_file = os.path.join(self.json, "trackList.json")
-            bigwig_file = os.path.join(self.raw, track['fileName'])
-            subprocess.call(['add-bw-track.pl', '--label', metadata['label'], '--bw_url', bigwig_file, '--pos_color', metadata['style']['pos_color'], '--neg_color', metadata['style']['neg_color'], '--plot', 'JBrowse/View/Track/Wiggle/XYPlot', '--out', json_file, '--in', json_file])
-        else:
-            flat_file = os.path.join(self.raw, track['fileName'])
-            if track['dataType'] == 'bed':
-                subprocess.call(['flatfile-to-json.pl', '--bed', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
-            elif track['dataType'] == 'bedSpliceJunctions' or track['dataType'] == 'gtf' or track['dataType'] == 'blastxml':
-                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"glyph": "JBrowse/View/FeatureGlyph/Segments", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
-            elif track['dataType'] == 'gff3_transcript':
-                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"transcriptType": "transcript", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
-            else:
-                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
-
-    def indexName(self):
-        subprocess.call(['generate-names.pl', '-v', '--out', self.json])
+            #subprocess.call(['prepare-refseqs.pl', '--fasta', self.reference_genome.false_path, '--out', self.mySpecieFolderPath])
+        #except OSError as e:
+            #print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror)
+
+    def _indexName(self):
+        #subprocess.call(['generate-names.pl', '-v', '--out', self.mySpecieFolderPath])
+        subtools.generate_names(self.mySpecieFolderPath)
         print "finished name index \n"

-    def makeArchive(self):
-        file_dir = os.path.abspath(self.outfile)
-        source_dir = os.path.dirname(file_dir)
-        folder_name = os.path.basename(self.outfolder)
-        source_name = os.path.basename(self.out_path)
-        source = os.path.join(source_dir, folder_name, source_name)
-        slink = source.replace('/', '_')
-        slink = os.path.join('/var/www/html/JBrowse-1.12.1/data', slink)
-        try:
-            if os.path.islink(slink):
-                os.unlink(slink)
-        except OSError as oserror:
-            print "Cannot create symlink to the data({0}): {1}".format(oserror.errno, oserror.strerror)
-        os.symlink(source, slink)
-        return slink
-
-    def outHtml(self, slink):
-        with open(self.outfile, 'w') as htmlfile:
-            htmlstr = 'The JBrowse Hub is created: <br>'
-            url = self.jbrowse_host + "/JBrowse-1.12.1/index.html?data=%s"
-            jbrowse_hub = '<li><a href = "%s" target="_blank">View JBrowse Hub</a></li>' % url
-            link_name = os.path.basename(slink)
-            relative_path = os.path.join('data', link_name + '/json')
-            htmlstr += jbrowse_hub % relative_path
-            htmlfile.write(htmlstr)
+    def _outHtml(self, host_name):
+        with open(self.outputFile, 'w') as htmlfile:
+            htmlstr = 'The new Organism "%s" is created on Apollo: <br>' % self.genome_name
+            jbrowse_hub = '<li><a href = "%s" target="_blank">View JBrowse Hub on Apollo</a></li>' % host_name
+            htmlstr += jbrowse_hub
+            htmlfile.write(htmlstr)
+
+    def _makeArchive(self):
+        self.apollo.loadHubToApollo(self.apollo_user, self.genome_name, self.mySpecieFolderPath, admin=True)
+        apollo_host = self.apollo.getHost()
+        self._outHtml(apollo_host)
+
+
+    def __createAssemblyHub__(self, extra_files_path):
+        # Get all necessaries infos first
+        # 2bit file creation from input fasta
+
+        # baseNameFasta = os.path.basename(fasta_file_name)
+        # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+        # nameTwoBit = suffixTwoBit + '.2bit'
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
+
+        # Generate the twoBitInfo
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+        subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)

-    def createTrackList(self):
-        trackList = os.path.join(self.json, "trackList.json")
-        if not os.path.exists(trackList):
-            os.mknod(trackList)
-
-    def Bam(self, track, metadata):
-        #create trackList.json if not exist
-        self.createTrackList()
-        json_file = os.path.join(self.json, "trackList.json")
-        bam_track = dict()
-        bam_track['type'] = 'JBrowse/View/Track/Alignments2'
-        bam_track['storeClass'] = 'JBrowse/Store/SeqFeature/BAM'
-        bam_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
-        bam_track['baiUrlTemplate'] = os.path.join('../raw', track['index'])
-        bam_track['label'] = metadata['label']
-        bam_track['category'] = metadata['category']
-        bam_track = json.dumps(bam_track)
-        #Use add-track-json.pl to add bam track to json file
-        new_track = subprocess.Popen(['echo', bam_track], stdout=subprocess.PIPE)
-        subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
-    '''
-    def BigWig(self, track, metadata):
-        #create trackList.json if not exist
-        self.createTrackList()
-        json_file = os.path.join(self.json, "trackList.json")
-        bigwig_track = dict()
-        bigwig_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
-        bigwig_track['type'] = 'JBrowse/View/Track/Wiggle/XYPlot'
-        bigwig_track['storeClass'] = 'JBrowse/Store/SeqFeature/BigWig'
-        bigwig_track['label'] = metadata['label']
-        bigwig_track['style'] = metadata['style']
-        bigwig_track['category'] = metadata['category']
-        bigwig_track = json.dumps(bigwig_track)
-        #Use add-track-json.pl to add bigwig track to json file
-        new_track = subprocess.Popen(['echo', bigwig_track], stdout=subprocess.PIPE)
-        #output = new_track.communicate()[0]
-        subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
-    '''
-    def BigWig
-    #If the metadata is not set, use the default value
-    def SetMetadata(self, track, metadata):
-        if 'label' not in metadata.keys() or metadata['label'] == '':
-            metadata['label'] = track['fileName']
-        if 'color' not in metadata.keys() or metadata['color'] == '':
-            metadata['color'] = "#daa520"
-        if track['dataType'] == 'bigwig':
-            if 'style' not in metadata.keys():
-                metadata['style'] = {}
-            if 'pos_color' not in metadata['style'] or metadata['style']['pos_color'] == '':
-                metadata['style']['pos_color'] = "#FFA600"
-            if 'neg_color' not in metadata['style'] or metadata['style']['neg_color'] == '':
-                metadata['style']['neg_color'] = "#005EFF"
-        if 'category' not in metadata.keys() or metadata['category'] == '':
-            metadata['category'] = "Default group"
-        if track['dataType'] == 'blastxml':
-            metadata['type'] = "G-OnRamp_plugin/BlastAlignment"
-        elif track['dataType'] == 'bigpsl':
-            metadata['type'] = "G-OnRamp_plugin/BlatAlignment"
-        elif track['dataType'] == 'gff3_transcript' or track['dataType'] == 'gff3_mrna':
-            metadata['type'] = "G-OnRamp_plugin/GenePred"
-        else:
-            metadata['type'] = "CanvasFeatures"
+        # We can get the biggest scaffold here, with chromSizesFile
+        with open(self.chromSizesFile.name, 'r') as chrom_sizes:
+            # TODO: Check if exists
+            self.default_pos = chrom_sizes.readline().split()[0]
+
+        # TODO: Manage to put every fill Function in a file dedicated for reading reasons
+        # Create the root directory
+        myHubPath = os.path.join(extra_files_path, "myHub")
+        if not os.path.exists(myHubPath):
+            os.makedirs(myHubPath)
+
+        # Create the specie folder
+        # TODO: Generate the name depending on the specie
+        mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
+        if not os.path.exists(mySpecieFolderPath):
+            os.makedirs(mySpecieFolderPath)
+        self.mySpecieFolderPath = mySpecieFolderPath

-
+        # We create the 2bit file while we just created the specie folder
+        #self.twoBitName = self.genome_name + ".2bit"
+        #self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
+        #shutil.copyfile(twoBitFile.name, self.two_bit_final_path)

-
+        # Create the folder tracks into the specie folder
+        tracksFolderPath = os.path.join(mySpecieFolderPath, "raw")
+        if not os.path.exists(tracksFolderPath):
+            os.makedirs(tracksFolderPath)
+        self.myTracksFolderPath = tracksFolderPath

+        myBinaryFolderPath = os.path.join(mySpecieFolderPath, 'bbi')
+        if not os.path.exists(myBinaryFolderPath):
+            os.makedirs(myBinaryFolderPath)
+        self.myBinaryFolderPath = myBinaryFolderPath

-
+        return myHubPath
Binary file TrackHub.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/apollo/ApolloInstance.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+import json
+import logging
+from util import subtools
+
+class ApolloInstance(object):
+    def __init__(self, apollo_host):
+        self.apollo_host = apollo_host
+        self.logger = logging.getLogger(__name__)
+
+    def getHost(self):
+        return self.apollo_host
+
+    def createApolloUser(self, apollo_user, admin=None):
+        p = subtools.arrow_create_user(apollo_user.user_email, apollo_user.firstname, apollo_user.lastname, apollo_user.password, admin)
+        user_info = json.loads(p)
+        user_id = user_info.get('userId')
+        if not user_id:
+            self.logger.debug("Cannot create new user: %s; The user may already exist", apollo_user.user_email)
+            user_id = subtools.arrow_get_users(apollo_user.user_email)
+        self.logger.debug("Got user_id for new or existing user: user_id = %s", str(user_id))
+        return user_id
+
+    def grantPermission(self, user_id, organism_id, **user_permissions):
+        subtools.arrow_update_organism_permissions(user_id, organism_id, **user_permissions)
+        self.logger.debug("Grant user %s permissions to organism %s, permissions = %s", str(user_id), str(organism_id), ','.join(user_permissions))
+
+    def addOrganism(self, organism_name, organism_dir):
+        p = subtools.arrow_add_organism(organism_name, organism_dir)
+        organism = json.loads(p)
+        organism_id = organism['id']
+        self.logger.debug("Added new organism to Apollo instance, %s", p)
+        return organism_id
+
+    def loadHubToApollo(self, apollo_user, organism_name, organism_dir, admin_user=False, **user_permissions):
+        user_id = self.createApolloUser(apollo_user, admin_user)
+        organism_id = self.addOrganism(organism_name, organism_dir)
+        self.grantPermission(user_id, organism_id, **user_permissions)
+        self.logger.debug("Successfully load the hub to Apollo")
\ No newline at end of file
Binary file apollo/ApolloInstance.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/apollo/ApolloUser.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import os
+
+class ApolloUser(object):
+    def __init__(self, user_email, firstname, lastname, password):
+        self.user_email = user_email
+        self.firstname = firstname
+        self.lastname = lastname
+        self.password = password
Binary file apollo/ApolloUser.pyc has changed
Binary file apollo/__init__.pyc has changed
--- a/bedToGff3.py	Wed Jul 12 12:55:27 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-#!/usr/bin/env python
-
-'''
-Convert BED format to gff3
-reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
-'''
-import os
-from collections import OrderedDict
-import utils
-
-class bedToGff3():
-    def __init__(self, inputBedFile, chrom_sizes, bed_type, output):
-        self.input = inputBedFile
-        #file_dir = os.path.basename(inputBedFile)
-        #print file_dir + "\n\n"
-        self.output = output
-        self.chrom_sizes = chrom_sizes
-        self.type = bed_type
-        if self.type == "trfbig":
-            self.trfbig_to_gff3()
-        if self.type == "regtools":
-            self.splicejunctions_to_gff3()
-        if self.type == "blat":
-            self.bigpsl_to_gff3()
-
-    def trfbig_to_gff3(self):
-        gff3 = open(self.output, 'w')
-        gff3.write("##gff-version 3\n")
-        sizes_dict = utils.sequence_region(self.chrom_sizes)
-        seq_regions = dict()
-        with open(self.input, 'r') as bed:
-            for line in bed:
-                field = OrderedDict()
-                attribute = OrderedDict()
-                li = line.rstrip().split("\t")
-                field['seqid'] = li[0]
-                if field['seqid'] not in seq_regions:
-                    end_region = sizes_dict[field['seqid']]
-                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
-                    seq_regions[field['seqid']] = end_region
-                field['source'] = li[3]
-                field['type'] = 'tandem_repeat'
-                # The first base in a chromosome is numbered 0 in BED format
-                field['start'] = str(int(li[1]) + 1)
-                field['end'] = li[2]
-                field['score'] = li[9]
-                field['strand'] = '+'
-                field['phase'] = '.'
-                attribute['length of repeat unit'] = li[4]
-                attribute['mean number of copies of repeat'] = li[5]
-                attribute['length of consensus sequence'] = li[6]
-                attribute['percentage match'] = li[7]
-                attribute['percentage indel'] = li[8]
-                attribute['percent of a\'s in repeat unit'] = li[10]
-                attribute['percent of c\'s in repeat unit'] = li[11]
-                attribute['percent of g\'s in repeat unit'] = li[12]
-                attribute['percent of t\'s in repeat unit'] = li[13]
-                attribute['entropy'] = li[14]
-                attribute['sequence of repeat unit element'] = li[15]
-                utils.write_features(field, attribute, gff3)
-        gff3.close()
-
-
-    def splicejunctions_to_gff3(self):
-        gff3 = open(self.output, 'w')
-        gff3.write("##gff-version 3\n")
-        sizes_dict = utils.sequence_region(self.chrom_sizes)
-        seq_regions = dict()
-        with open(self.input, 'r') as bed:
-            for line in bed:
-                field = OrderedDict()
-                attribute = OrderedDict()
-                li = line.rstrip().split("\t")
-                field['seqid'] = li[0]
-                if field['seqid'] not in seq_regions:
-                    end_region = sizes_dict[field['seqid']]
-                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
-                    seq_regions[field['seqid']] = end_region
-                field['source'] = li[3]
-                field['type'] = 'junction'
-                # The first base in a chromosome is numbered 0 in BED format
-                field['start'] = int(li[1]) + 1
-                field['end'] = li[2]
-                field['score'] = li[12]
-                field['strand'] = li[5]
-                field['phase'] = '.'
-                attribute['ID'] = li[0] + '_' + li[3]
-                attribute['Name'] = li[3]
-                attribute['blockcount'] = li[9]
-                attribute['blocksizes'] = li[10]
-                attribute['chromstarts'] = li[11]
-                utils.write_features(field, attribute, gff3)
-                utils.child_blocks(field, attribute, gff3, 'exon_junction')
-        gff3.close()
-
-    def bigpsl_to_gff3(self):
-        gff3 = open(self.output, 'w')
-        gff3.write("##gff-version 3\n")
-        sizes_dict = utils.sequence_region(self.chrom_sizes)
-        seq_regions = dict()
-        with open(self.input, 'r') as bed:
-            for line in bed:
-                field = OrderedDict()
-                attribute = OrderedDict()
-                li = line.rstrip().split("\t")
-                field['seqid'] = li[0]
-                if field['seqid'] not in seq_regions:
-                    end_region = sizes_dict[field['seqid']]
-                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
-                    seq_regions[field['seqid']] = end_region
-                field['source'] = 'UCSC BLAT alignment tool'
-                field['type'] = 'match'
-                # The first base in a chromosome is numbered 0 in BED format
-                field['start'] = str(int(li[1]) + 1)
-                field['end'] = li[2]
-                field['score'] = li[4]
-                field['strand'] = li[5]
-                field['phase'] = '.'
-                attribute['ID'] = li[0] + '_' + li[3]
-                attribute['Name'] = li[3]
-                attribute['blockcount'] = li[9]
-                attribute['blocksizes'] = li[10]
-                attribute['chromstarts'] = li[11]
-                attribute['ochrom_start'] = li[12]
-                attribute['ochrom_end'] = li[13]
-                attribute['ochrom_strand'] = li[14]
-                attribute['ochrom_size'] = li[15]
-                attribute['ochrom_starts'] = li[16]
-                attribute['sequence on other chromosome'] = li[17]
-                attribute['cds in ncbi format'] = li[18]
-                attribute['size of target chromosome'] = li[19]
-                attribute['number of bases matched'] = li[20]
-                attribute['number of bases that don\'t match'] = li[21]
-                attribute['number of bases that match but are part of repeats'] = li[22]
-                attribute['number of \'N\' bases'] = li[23]
-                utils.write_features(field, attribute, gff3)
-                utils.child_blocks(field, attribute, gff3, 'match_part')
-        gff3.close()
-
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigGenePred.as	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,25 @@
+table bigGenePred
+"bigGenePred gene models"
+   (
+   string chrom;       "Reference sequence chromosome or scaffold"
+   uint   chromStart;  "Start position in chromosome"
+   uint   chromEnd;    "End position in chromosome"
+   string name;        "Name or ID of item, ideally both human readable and unique"
+   uint score;         "Score (0-1000)"
+   char[1] strand;     "+ or - for strand"
+   uint thickStart;    "Start of where display should be thick (start codon)"
+   uint thickEnd;      "End of where display should be thick (stop codon)"
+   uint reserved;       "RGB value (use R,G,B string in input file)"
+   int blockCount;     "Number of blocks"
+   int[blockCount] blockSizes; "Comma separated list of block sizes"
+   int[blockCount] chromStarts; "Start positions relative to chromStart"
+   string name2;       "Alternative/human readable name"
+   string cdsStartStat; "Status of CDS start annotation (none, unknown, incomplete, or complete)"
+   string cdsEndStat;   "Status of CDS end annotation (none, unknown, incomplete, or complete)"
+   int[blockCount] exonFrames; "Exon frame {0,1,2}, or -1 if no frame for exon"
+   string type;        "Transcript type"
+   string geneName;    "Primary identifier for gene"
+   string geneName2;   "Alternative/human readable gene name"
+   string geneType;    "Gene type"
+   )
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigPsl.as	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,33 @@
+table bigPsl
+"bigPsl pairwise alignment"
+    (
+    string chrom;       "Reference sequence chromosome or scaffold"
+    uint   chromStart;  "Start position in chromosome"
+    uint   chromEnd;    "End position in chromosome"
+    string name;        "Name or ID of item, ideally both human readable and unique"
+    uint score;         "Score (0-1000)"
+    char[1] strand;     "+ or - for strand"
+    uint thickStart;    "Start of where display should be thick (start codon)"
+    uint thickEnd;      "End of where display should be thick (stop codon)"
+    uint reserved;       "RGB value (use R,G,B string in input file)"
+    int blockCount;     "Number of blocks"
+    int[blockCount] blockSizes; "Comma separated list of block sizes"
+    int[blockCount] chromStarts; "Start positions relative to chromStart"
+
+    uint    oChromStart;"Start position in other chromosome"
+    uint    oChromEnd;  "End position in other chromosome"
+    char[1] oStrand;    "+ or - for other strand"
+    uint    oChromSize; "Size of other chromosome."
+    int[blockCount] oChromStarts; "Start positions relative to oChromStart"
+
+    lstring  oSequence;  "Sequence on other chrom (or edit list, or empty)"
+    string   oCDS;       "CDS in NCBI format"
+
+    uint    chromSize;"Size of target chromosome"
+
+    uint match;        "Number of bases matched."
+    uint misMatch; " Number of bases that don't match "
+    uint repMatch; " Number of bases that match but are part of repeats "
+    uint nCount;   " Number of 'N' bases "
+    )
+
--- a/blastxmlToGff3.py	Wed Jul 12 12:55:27 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,159 +0,0 @@
-#!/usr/bin/env python
-
-
-from Bio.Blast import NCBIXML
-from collections import OrderedDict
-import utils
-
-
-def align2cigar(hsp_query, hsp_reference):
-    """
-        Build CIGAR representation from an hsp_query
-        input:
-            hsp_query
-            hsp_sbjct
-        output:
-            CIGAR string
-    """
-    query = hsp_query
-    ref = hsp_reference
-    # preType, curType:
-    # 'M' represents match,
-    # 'I' represents insert a gap into the reference sequence,
-    # 'D' represents insert a gap into the target (delete from reference)
-    # some ideas of this algin2cigar function are coming from
-    # https://gist.github.com/ozagordi/099bdb796507da8d9426
-    prevType = 'M'
-    curType = 'M'
-    count = 0
-    cigar = []
-    num = len(query)
-    for i in range(num):
-        if query[i] == '-':
-            curType = 'D'
-        elif ref[i] == '-':
-            curType = 'I'
-        else:
-            curType = 'M'
-        if curType == prevType:
-            count += 1
-        else:
-            cigar.append('%s%d' % (prevType, count))
-            prevType = curType
-            count = 1
-    cigar.append('%s%d' % (curType, count))
-    return ' '.join(cigar)
-
-def gff3_writer(blast_records, gff3_file):
-    gff3 = open(gff3_file, 'a')
-    gff3.write("##gff-version 3\n")
-    seq_regions = dict()
-    for blast_record in blast_records:
-        query_name = blast_record.query.split(" ")[0]
-        source = blast_record.application
-        method = blast_record.matrix
-        for alignment in blast_record.alignments:
-            group = {
-            "parent_field" : OrderedDict(),
-            "parent_attribute" : OrderedDict(),
-            "alignments" : []
-            }
-            title = alignment.title.split(" ")
-            contig_name = title[len(title) - 1]
-            length = alignment.length
-            group['parent_field']['seqid'] = contig_name
-            group['parent_field']['source'] = source
-            group['parent_field']['type'] = 'match'
-            group['parent_attribute']['ID'] = contig_name + '_' + query_name
-            group['parent_attribute']['method'] = method
-            group['parent_attribute']['length'] = length
-            if contig_name not in seq_regions:
-                gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n')
-                seq_regions[contig_name] = length
-            match_num = 0
-            coords = [length, 0]
-            for hsp in alignment.hsps:
-                hsp_align = {}
-                field = OrderedDict()
-                attribute = OrderedDict()
-                ref = hsp.sbjct
-                query = hsp.query
-                field['seqid'] = contig_name
-                field['source'] = source
-                field['type'] = 'match_part'
-
-                field['start'] = hsp.sbjct_start
-                if field['start'] < coords[0]:
-                    coords[0] = field['start']
-                ref_length = len(ref.replace('-', ''))
-                # if run tblastn, the actual length of reference should be multiplied by 3
-                if source.lower() == "tblastn":
-                    ref_length *= 3
-                field['end'] = field['start'] + ref_length - 1
-                if field['end'] > coords[1]:
-                    coords[1] = field['end']
-                field['score'] = hsp.score
-                #decide if the alignment in the same strand or reverse strand
-                #reading frame
-                # (+, +), (0, 0), (-, -) => +
-                # (+, -), (-, +) => -
-                if hsp.frame[1] * hsp.frame[0] > 0:
-                    field['strand'] = '+'
-                elif hsp.frame[1] * hsp.frame[0] < 0:
-                    field['strand'] = '-'
-                else:
-                    if hsp.frame[0] + hsp.frame[1] >= 0:
-                        field['strand'] = '+'
-                    else:
-                        field['strand'] = '-'
-                field['phase'] = '.'
-
-                target_start = hsp.query_start
-                target_len = len(query.replace('-', ''))
-                # if run blastx, the actual length of query should be multiplied by 3
-                if source.lower() == "blastx":
-                    target_len *= 3
-                target_end = target_start + target_len -1
-                attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num)
-                attribute['Parent'] = group['parent_attribute']['ID']
-                attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end)
-                attribute['Gap'] = align2cigar(query, ref)
-                #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin
-                attribute['subject'] = hsp.sbjct
-                attribute['query'] = hsp.query
-                attribute['match'] = hsp.match
-                attribute['gaps'] = attribute['match'].count(' ')
-                similar = attribute['match'].count('+')
-                attribute['identities'] = len(attribute['match']) - similar - attribute['gaps']
-                attribute['positives'] = attribute['identities'] + similar
-                attribute['expect'] = hsp.expect
-                # show reading frame attribute only if the frame is not (0, 0)
-                attribute['frame'] = hsp.frame[1]
-                match_num += 1
-                hsp_align['field'] = field
-                hsp_align['attribute'] = attribute
-                group['alignments'].append(hsp_align)
-            group['parent_field']['start'] = coords[0]
-            group['parent_field']['end'] = coords[1]
-            group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.'
-            group['parent_attribute']['match_num'] = match_num
-            group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end']))
-            utils.write_features(group['parent_field'], group['parent_attribute'], gff3)
-            prev_end = -1
-            for align in group['alignments']:
-                overlap = ''
-                if align['field']['start'] <= prev_end:
-                    overlap += str(align['field']['start']) + ',' + str(prev_end)
-                prev_end = align['field']['end']
-                align['attribute']['overlap'] = overlap
-                utils.write_features(align['field'], align['attribute'], gff3)
-    gff3.close()
-
-def blastxml2gff3(xml_file, gff3_file):
-    result_handle = open(xml_file)
-    blast_records = NCBIXML.parse(result_handle)
-    gff3_writer(blast_records, gff3_file)
-
-if __name__ == "__main__":
-    blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt")
-
Binary file blastxmlToGff3.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/Datatype.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,122 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Super Class of the managed datatype
+"""
+
+import os
+import tempfile
+import collections
+from util import subtools
+import logging
+import abc
+from abc import ABCMeta
+from tracks.HTMLFeatures import HTMLFeatures
+from tracks.CanvasFeatures import CanvasFeatures
+from tracks.BamFeatures import BamFeatures
+from tracks.BigwigFeatures import BigwigFeatures
+from datatypes.validators.DataValidation import DataValidation
+
+
+class Datatype(object):
+    __metaclass__ = ABCMeta
+
+    chromSizesFile = None
+    input_fasta_file = None
+    extra_files_path = None
+    tool_directory = None
+
+    mySpecieFolderPath = None
+    myTrackFolderPath = None
+    myBinaryFolderPath = None
+
+    trackType = None
+
+    def __init__(self):
+        not_init_message = "The {0} is not initialized." \
+                           "Did you use pre_init static method first?"
+        if Datatype.input_fasta_file is None:
+            raise TypeError(not_init_message.format('reference genome'))
+        if Datatype.extra_files_path is None:
+            raise TypeError(not_init_message.format('track Hub path'))
+        if Datatype.tool_directory is None:
+            raise TypeError(not_init_message.format('tool directory'))
+        self.inputFile = None
+        self.trackType = None
+        self.dataType = None
+        self.trackFileType = None
+        self.track = None
+        self.trackSettings = dict()
+        self.extraSettings = collections.OrderedDict()
+
+
+    @staticmethod
+    def pre_init(reference_genome, chrom_sizes_file,
+                 extra_files_path, tool_directory, specie_folder, tracks_folder, binary_folder, track_type):
+        Datatype.extra_files_path = extra_files_path
+        Datatype.tool_directory = tool_directory
+
+        # TODO: All this should be in TrackHub and not in Datatype
+        Datatype.mySpecieFolderPath = specie_folder
+        Datatype.myTrackFolderPath = tracks_folder
+        Datatype.myBinaryFolderPath = binary_folder
+
+        Datatype.input_fasta_file = reference_genome
+
+        # 2bit file creation from input fasta
+        #Datatype.twoBitFile = two_bit_path
+        Datatype.chromSizesFile = chrom_sizes_file
+        Datatype.trackType = track_type
+
+
+    def generateCustomTrack(self):
+        self.validateData()
+        self.initSettings()
+        #Create the track file
+        self.createTrack()
+        # Create the TrackDb Object
+        self.createTrackDb()
+        logging.debug("- %s %s created", self.dataType, self.trackName)
+
+
+    @abc.abstractmethod
+    def validateData(self):
+        """validate the input data with DataValidation"""
+
+    def initSettings(self):
+        #Initialize required fields: trackName, longLabel, shortLable
+        self.trackName = self.trackSettings["name"]
+        self.trackDataURL = os.path.join(self.myTrackFolderPath, self.trackName)
+        if self.trackSettings["long_label"]:
+            self.trackLabel = self.trackSettings["long_label"]
+        else:
+            self.trackLabel = self.trackName
+        if "trackType" in self.trackSettings and self.trackSettings["trackType"]:
+            self.trackType = self.trackSettings["trackType"]
+        if self.trackSettings["group_name"]:
+            self.extraSettings["category"] = self.trackSettings["group_name"]
+        if "track_color" in self.trackSettings and self.trackSettings["track_color"]:
+            self.extraSettings["color"] = self.trackSettings["track_color"]
+
+
+    @abc.abstractmethod
+    def createTrack(self):
+        """Create the final track file"""
+
+    def createTrackDb(self):
+        if self.trackType == 'HTMLFeatures':
+            self.track = HTMLFeatures(self.trackName, self.trackLabel, self.trackDataURL, self.trackType, self.dataType, self.extraSettings)
+        elif self.trackType == "CanvasFeatures":
+            self.track = CanvasFeatures(self.trackName, self.trackLabel, self.trackDataURL, self.trackType, self.dataType, self.extraSettings)
+        elif self.trackType == "bam":
+            self.track = BamFeatures(self.trackName, self.trackLabel, self.trackDataURL, self.trackType, self.dataType, self.extraSettings)
+        elif self.trackType == "bigwig":
+            self.track = BigwigFeatures(self.trackName, self.trackLabel, self.trackDataURL, self.trackType, self.dataType, self.extraSettings)
+        else:
+            logging.error("Cannot createTrackDb, because trackType is not defined or invalid! trackType = %s", self.trackType)
+        self.track.createTrackDb()
+
+        #self.track = TrackDb(self.trackName, self.trackLabel, self.trackDataURL, self.trackType, self.dataType, self.extraSettings)
+
+
\ No newline at end of file
Binary file datatypes/Datatype.pyc has changed
Binary file datatypes/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/binary/Bam.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Class to handle Bam files to UCSC TrackHub
+"""
+
+import logging
+import os
+import shutil
+
+from Binary import Binary
+from datatypes.validators.DataValidation import DataValidation
+from util import subtools
+
+
+
+
+class Bam(Binary):
+    def __init__(self, input_bam_false_path, data_bam):
+        super(Bam, self).__init__()
+        self.inputFile = input_bam_false_path
+        self.trackSettings = data_bam
+        self.dataType = "bam"
+        self.trackType = "bam"
+
+
+    def validateData(self):
+        self.validator = DataValidation(self.inputFile, self.dataType, self.chromSizesFile.name)
+        self.validator.validate()
+
+    def createTrack(self):
+        #shutil.copy(self.inputFile, self.trackDataURL)
+        extension = os.path.splitext(self.trackName)[1]
+        if extension != '.bam':
+            self.trackName = self.trackName + '.bam'
+            self.trackDataURL = os.path.join(self.myBinaryFolderPath, self.trackName)
+            #self.trackDataURL = os.path.join(self.myTrackFolderPath, self.trackName)
+        shutil.copyfile(self.inputFile, self.trackDataURL)
+        bam_index = subtools.createBamIndex(self.inputFile)
+        indexName = os.path.basename(bam_index)
+        trackIndexURL = os.path.join(self.myBinaryFolderPath, indexName)
+        #trackIndexURL = os.path.join(self.myTrackFolderPath, indexName)
+        shutil.copyfile(bam_index, trackIndexURL)
+        self.extraSettings['index'] = indexName
+
+
+
+
+
+
Binary file datatypes/binary/Bam.pyc has changed
Binary file datatypes/binary/BigBed.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/binary/BigWig.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+
+import os
+import shutil
+from subprocess import Popen, PIPE
+import re
+
+# Internal dependencies
+from Binary import Binary
+from datatypes.validators.DataValidation import DataValidation
+
+
+
+class BigWig(Binary):
+    def __init__(self, input_bigwig_path, data_bigwig):
+        super(BigWig, self).__init__()
+        self.inputFile = input_bigwig_path
+        self.trackSettings = data_bigwig
+        self.dataType = "bigWig"
+        self.trackType= "bigwig"
+
+    def initSettings(self):
+        super(BigWig, self).initSettings()
+        if 'style' in self.trackSettings:
+            self.extraSettings['style'] = self.trackSettings['style']
+
+    def validateData(self):
+        self.validator = DataValidation(self.inputFile, self.dataType, self.chromSizesFile.name)
+        self.validator.validate()
+
+
Binary file datatypes/binary/BigWig.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/binary/Binary.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Super Class of the managed datatype
+"""
+
+import os
+import tempfile
+import collections
+import shutil
+import util
+from TrackDb import TrackDb
+from datatypes.Datatype import Datatype
+
+
+class Binary(Datatype):
+
+    def __init__(self):
+        super(Binary, self).__init__()
+
+
+    def initSettings(self):
+        super(Binary, self).initSettings()
+        self.trackDataURL = os.path.join(self.myBinaryFolderPath, self.trackName)
+
+
+    def createTrack(self):
+        shutil.copy(self.inputFile, self.trackDataURL)
+
+
+
+
+
+
+
+
+
\ No newline at end of file
Binary file datatypes/binary/Binary.pyc has changed
Binary file datatypes/binary/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/converters/BedConversion.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+'''
+Convert BED format to gff3
+reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
+'''
+import os
+import tempfile
+from collections import OrderedDict
+
+from util import subtools
+from DataConversion import DataConversion
+
+class BedConversion(DataConversion):
+    def __init__(self, inputBedFile, outputFile, chromSizesFile, bedType, trackType, options=None):
+       super(BedConversion, self).__init__(inputBedFile, outputFile, chromSizesFile, bedType, options)
+
+
+    def convertFormats(self):
+        self.dataToJson()
+
+
+    def dataToJson(self):
+        if self.dataType != 'bed':
+            self.convertToGff3()
+            self.inputFile = self.gff3_file
+            self.dataType == 'gff'
+        subtools.flatfile_to_json(self.inputFile, self.dataType, self.trackType, self.trackLabel, self.outputFile, self.options)
+
+    def convertToGff3(self):
+        self.gff3_file = tempfile.NamedTemporaryFile(suffix=".gff3")
+        if self.dataType == "trfbig":
+            self.trfbig_to_gff3()
+        elif self.dataType == "regtools":
+            self.splicejunctions_to_gff3()
+        elif self.dataType == "blat":
+            self.bigpsl_to_gff3()
+        else:
+            raise ValueError("dataType %s is not support for converting to GFF3", self.dataType)
+
+    def trfbig_to_gff3(self):
+        gff3 = open(self.gff3_file.name, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = subtools.sequence_region(self.chromSizesFile)
+        seq_regions = dict()
+        with open(self.inputFile, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = li[3]
+                field['type'] = 'tandem_repeat'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = str(int(li[1]) + 1)
+                field['end'] = li[2]
+                field['score'] = li[9]
+                field['strand'] = '+'
+                field['phase'] = '.'
+                attribute['length of repeat unit'] = li[4]
+                attribute['mean number of copies of repeat'] = li[5]
+                attribute['length of consensus sequence'] = li[6]
+                attribute['percentage match'] = li[7]
+                attribute['percentage indel'] = li[8]
+                attribute['percent of a\'s in repeat unit'] = li[10]
+                attribute['percent of c\'s in repeat unit'] = li[11]
+                attribute['percent of g\'s in repeat unit'] = li[12]
+                attribute['percent of t\'s in repeat unit'] = li[13]
+                attribute['entropy'] = li[14]
+                attribute['sequence of repeat unit element'] = li[15]
+                subtools.write_features(field, attribute, gff3)
+        gff3.close()
+
+
+    def splicejunctions_to_gff3(self):
+        gff3 = open(self.gff3_file.name, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = subtools.sequence_region(self.chromSizesFile)
+        seq_regions = dict()
+        with open(self.inputFile, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = li[3]
+                field['type'] = 'junction'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = int(li[1]) + 1
+                field['end'] = li[2]
+                field['score'] = li[12]
+                field['strand'] = li[5]
+                field['phase'] = '.'
+                attribute['ID'] = li[0] + '_' + li[3]
+                attribute['Name'] = li[3]
+                attribute['blockcount'] = li[9]
+                attribute['blocksizes'] = li[10]
+                attribute['chromstarts'] = li[11]
+                subtools.write_features(field, attribute, gff3)
+                subtools.child_blocks(field, attribute, gff3, 'exon_junction')
+        gff3.close()
+
+    def bigpsl_to_gff3(self):
+        gff3 = open(self.gff3_file.name, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = subtools.sequence_region(self.chromSizesFile)
+        seq_regions = dict()
+        with open(self.inputFile, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = 'UCSC BLAT alignment tool'
+                field['type'] = 'match'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = str(int(li[1]) + 1)
+                field['end'] = li[2]
+                field['score'] = li[4]
+                field['strand'] = li[5]
+                field['phase'] = '.'
+                attribute['ID'] = li[0] + '_' + li[3]
+                attribute['Name'] = li[3]
+                attribute['blockcount'] = li[9]
+                attribute['blocksizes'] = li[10]
+                attribute['chromstarts'] = li[11]
+                attribute['ochrom_start'] = li[12]
+                attribute['ochrom_end'] = li[13]
+                attribute['ochrom_strand'] = li[14]
+                attribute['ochrom_size'] = li[15]
+                attribute['ochrom_starts'] = li[16]
+                attribute['sequence on other chromosome'] = li[17]
+                attribute['cds in ncbi format'] = li[18]
+                attribute['size of target chromosome'] = li[19]
+                attribute['number of bases matched'] = li[20]
+                attribute['number of bases that don\'t match'] = li[21]
+                attribute['number of bases that match but are part of repeats'] = li[22]
+                attribute['number of \'N\' bases'] = li[23]
+                subtools.write_features(field, attribute, gff3)
+                subtools.child_blocks(field, attribute, gff3, 'match_part')
+        gff3.close()
+
Binary file datatypes/converters/BedConversion.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/converters/DataConversion.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import logging
+import os
+import subprocess
+import sys
+import string
+import tempfile
+
+from bedToGff3 import bedToGff3
+from blastxmlToGff3 import blastxmlToGff3
+from gtfToGff3 import gtfToGff3
+
+
+
+
+class DataConversion(object):
+    CONVERT_OPERATIONS = {("bed", "gff"): "bedtogff3",
+                          ("blastxml", "gff"): "blastxmltogff3",
+                          ("gtf", "gff"): "gtftogff3"}
+
+    def __init__(self, inputFile, outputFile, chromSizesFile, operateType, options=None):
+        if not operateType:
+            return
+        if not inputFile:
+            raise TypeError("the input file is not specified!\n")
+        self.inputFile = inputFile
+        self.chromSizesFile = chromSizesFile
+        self.outputFile = outputFile
+        self.operateType = operateType
+        self.options = options
+
+
+
+    def convertFormats(self):
+        """ Convert data into JBrowse track """
+        convertMethod = self.CONVERT_OPERATIONS[self.operateType]
+        if convertMethod == "bedtogff3":
+            bedToGff3(self.inputFile, self.chromSizesFile, self.outputFile, self.options)
+        elif convertMethod == "blastxmltogff3":
+            blastxmlToGff3(self.inputFile, self.outputFile)
+        elif convertMethod == "gtftogff3":
+            gtfToGff3(self.inputFile, self.outputFile, self.chromSizesFile)
+        else:
+            raise ValueError("the operation %s is not defined!\n", self.operateType)
Binary file datatypes/converters/DataConversion.pyc has changed
Binary file datatypes/converters/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/converters/bedToGff3.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+'''
+Convert BED format to gff3
+reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
+'''
+import os
+from collections import OrderedDict
+from util import subtools
+
+def bedToGff3(inputBedFile, chrom_sizes, output, bed_type):
+    if bed_type == "trfbig":
+        trfbig_to_gff3(inputBedFile, chrom_sizes, output)
+    if bed_type == "regtools":
+        splicejunctions_to_gff3(inputBedFile, chrom_sizes, output)
+    if bed_type == "blat":
+        bigpsl_to_gff3(inputBedFile, chrom_sizes, output)
+
+def trfbig_to_gff3(inputBedFile, chrom_sizes, output):
+    gff3 = open(output, 'w')
+    gff3.write("##gff-version 3\n")
+    sizes_dict = subtools.sequence_region(chrom_sizes)
+    seq_regions = dict()
+    with open(inputBedFile, 'r') as bed:
+        for line in bed:
+            field = OrderedDict()
+            attribute = OrderedDict()
+            li = line.rstrip().split("\t")
+            field['seqid'] = li[0]
+            if field['seqid'] not in seq_regions:
+                end_region = sizes_dict[field['seqid']]
+                gff3.write("##sequence-region " +
+                        field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                seq_regions[field['seqid']] = end_region
+            field['source'] = li[3]
+            field['type'] = 'tandem_repeat'
+            # The first base in a chromosome is numbered 0 in BED format
+            field['start'] = str(int(li[1]) + 1)
+            field['end'] = li[2]
+            field['score'] = li[9]
+            field['strand'] = '+'
+            field['phase'] = '.'
+            attribute['length of repeat unit'] = li[4]
+            attribute['mean number of copies of repeat'] = li[5]
+            attribute['length of consensus sequence'] = li[6]
+            attribute['percentage match'] = li[7]
+            attribute['percentage indel'] = li[8]
+            attribute['percent of a\'s in repeat unit'] = li[10]
+            attribute['percent of c\'s in repeat unit'] = li[11]
+            attribute['percent of g\'s in repeat unit'] = li[12]
+            attribute['percent of t\'s in repeat unit'] = li[13]
+            attribute['entropy'] = li[14]
+            attribute['sequence of repeat unit element'] = li[15]
+            subtools.write_features(field, attribute, gff3)
+    gff3.close()
+
+def splicejunctions_to_gff3(inputBedFile, chrom_sizes, output):
+    gff3 = open(output, 'w')
+    gff3.write("##gff-version 3\n")
+    sizes_dict = subtools.sequence_region(chrom_sizes)
+    seq_regions = dict()
+    with open(inputBedFile, 'r') as bed:
+        for line in bed:
+            field = OrderedDict()
+            attribute = OrderedDict()
+            li = line.rstrip().split("\t")
+            field['seqid'] = li[0]
+            if field['seqid'] not in seq_regions:
+                end_region = sizes_dict[field['seqid']]
+                gff3.write("##sequence-region " +
+                        field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                seq_regions[field['seqid']] = end_region
+            field['source'] = li[3]
+            field['type'] = 'junction'
+            # The first base in a chromosome is numbered 0 in BED format
+            field['start'] = int(li[1]) + 1
+            field['end'] = li[2]
+            field['score'] = li[12]
+            field['strand'] = li[5]
+            field['phase'] = '.'
+            attribute['ID'] = li[0] + '_' + li[3]
+            attribute['Name'] = li[3]
+            attribute['blockcount'] = li[9]
+            attribute['blocksizes'] = li[10]
+            attribute['chromstarts'] = li[11]
+            subtools.write_features(field, attribute, gff3)
+            subtools.child_blocks(field, attribute, gff3, 'exon_junction')
+    gff3.close()
+
+def bigpsl_to_gff3(inputBedFile, chrom_sizes, output):
+    gff3 = open(output, 'w')
+    gff3.write("##gff-version 3\n")
+    sizes_dict = subtools.sequence_region(chrom_sizes)
+    seq_regions = dict()
+    with open(inputBedFile, 'r') as bed:
+        for line in bed:
+            field = OrderedDict()
+            attribute = OrderedDict()
+            li = line.rstrip().split("\t")
+            field['seqid'] = li[0]
+            if field['seqid'] not in seq_regions:
+                end_region = sizes_dict[field['seqid']]
+                gff3.write("##sequence-region " +
+                        field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                seq_regions[field['seqid']] = end_region
+            field['source'] = 'UCSC BLAT alignment tool'
+            field['type'] = 'match'
+            # The first base in a chromosome is numbered 0 in BED format
+            field['start'] = str(int(li[1]) + 1)
+            field['end'] = li[2]
+            field['score'] = li[4]
+            field['strand'] = li[5]
+            field['phase'] = '.'
+            attribute['ID'] = li[0] + '_' + li[3]
+            attribute['Name'] = li[3]
+            attribute['blockcount'] = li[9]
+            attribute['blocksizes'] = li[10]
+            attribute['chromstarts'] = li[11]
+            attribute['ochrom_start'] = li[12]
+            attribute['ochrom_end'] = li[13]
+            attribute['ochrom_strand'] = li[14]
+            attribute['ochrom_size'] = li[15]
+            attribute['ochrom_starts'] = li[16]
+            attribute['sequence on other chromosome'] = li[17]
+            attribute['cds in ncbi format'] = li[18]
+            attribute['size of target chromosome'] = li[19]
+            attribute['number of bases matched'] = li[20]
+            attribute['number of bases that don\'t match'] = li[21]
+            attribute['number of bases that match but are part of repeats'] = li[22]
+            attribute['number of \'N\' bases'] = li[23]
+            subtools.write_features(field, attribute, gff3)
+            subtools.child_blocks(field, attribute, gff3, 'match_part')
+    gff3.close()
Binary file datatypes/converters/bedToGff3.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/converters/blastxmlToGff3.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+
+
+from Bio.Blast import NCBIXML
+from collections import OrderedDict
+import utils
+
+
+def align2cigar(hsp_query, hsp_reference):
+    """
+        Build CIGAR representation from an hsp_query
+        input:
+            hsp_query
+            hsp_sbjct
+        output:
+            CIGAR string
+    """
+    query = hsp_query
+    ref = hsp_reference
+    # preType, curType:
+    # 'M' represents match,
+    # 'I' represents insert a gap into the reference sequence,
+    # 'D' represents insert a gap into the target (delete from reference)
+    # some ideas of this algin2cigar function are coming from
+    # https://gist.github.com/ozagordi/099bdb796507da8d9426
+    prevType = 'M'
+    curType = 'M'
+    count = 0
+    cigar = []
+    num = len(query)
+    for i in range(num):
+        if query[i] == '-':
+            curType = 'D'
+        elif ref[i] == '-':
+            curType = 'I'
+        else:
+            curType = 'M'
+        if curType == prevType:
+            count += 1
+        else:
+            cigar.append('%s%d' % (prevType, count))
+            prevType = curType
+            count = 1
+    cigar.append('%s%d' % (curType, count))
+    return ' '.join(cigar)
+
+def gff3_writer(blast_records, gff3_file):
+    gff3 = open(gff3_file, 'a')
+    gff3.write("##gff-version 3\n")
+    seq_regions = dict()
+    for blast_record in blast_records:
+        query_name = blast_record.query.split(" ")[0]
+        source = blast_record.application
+        method = blast_record.matrix
+        for alignment in blast_record.alignments:
+            group = {
+            "parent_field" : OrderedDict(),
+            "parent_attribute" : OrderedDict(),
+            "alignments" : []
+            }
+            title = alignment.title.split(" ")
+            contig_name = title[len(title) - 1]
+            length = alignment.length
+            group['parent_field']['seqid'] = contig_name
+            group['parent_field']['source'] = source
+            group['parent_field']['type'] = 'match'
+            group['parent_attribute']['ID'] = contig_name + '_' + query_name
+            group['parent_attribute']['method'] = method
+            group['parent_attribute']['length'] = length
+            if contig_name not in seq_regions:
+                gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n')
+                seq_regions[contig_name] = length
+            match_num = 0
+            coords = [length, 0]
+            for hsp in alignment.hsps:
+                hsp_align = {}
+                field = OrderedDict()
+                attribute = OrderedDict()
+                ref = hsp.sbjct
+                query = hsp.query
+                field['seqid'] = contig_name
+                field['source'] = source
+                field['type'] = 'match_part'
+
+                field['start'] = hsp.sbjct_start
+                if field['start'] < coords[0]:
+                    coords[0] = field['start']
+                ref_length = len(ref.replace('-', ''))
+                # if run tblastn, the actual length of reference should be multiplied by 3
+                if source.lower() == "tblastn":
+                    ref_length *= 3
+                field['end'] = field['start'] + ref_length - 1
+                if field['end'] > coords[1]:
+                    coords[1] = field['end']
+                field['score'] = hsp.score
+                #decide if the alignment in the same strand or reverse strand
+                #reading frame
+                # (+, +), (0, 0), (-, -) => +
+                # (+, -), (-, +) => -
+                if hsp.frame[1] * hsp.frame[0] > 0:
+                    field['strand'] = '+'
+                elif hsp.frame[1] * hsp.frame[0] < 0:
+                    field['strand'] = '-'
+                else:
+                    if hsp.frame[0] + hsp.frame[1] >= 0:
+                        field['strand'] = '+'
+                    else:
+                        field['strand'] = '-'
+                field['phase'] = '.'
+
+                target_start = hsp.query_start
+                target_len = len(query.replace('-', ''))
+                # if run blastx, the actual length of query should be multiplied by 3
+                if source.lower() == "blastx":
+                    target_len *= 3
+                target_end = target_start + target_len -1
+                attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num)
+                attribute['Parent'] = group['parent_attribute']['ID']
+                attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end)
+                attribute['Gap'] = align2cigar(query, ref)
+                #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin
+                attribute['subject'] = hsp.sbjct
+                attribute['query'] = hsp.query
+                attribute['match'] = hsp.match
+                attribute['gaps'] = attribute['match'].count(' ')
+                similar = attribute['match'].count('+')
+                attribute['identities'] = len(attribute['match']) - similar - attribute['gaps']
+                attribute['positives'] = attribute['identities'] + similar
+                attribute['expect'] = hsp.expect
+                # show reading frame attribute only if the frame is not (0, 0)
+                attribute['frame'] = hsp.frame[1]
+                match_num += 1
+                hsp_align['field'] = field
+                hsp_align['attribute'] = attribute
+                group['alignments'].append(hsp_align)
+            group['parent_field']['start'] = coords[0]
+            group['parent_field']['end'] = coords[1]
+            group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.'
+            group['parent_attribute']['match_num'] = match_num
+            group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end']))
+            utils.write_features(group['parent_field'], group['parent_attribute'], gff3)
+            prev_end = -1
+            for align in group['alignments']:
+                overlap = ''
+                if align['field']['start'] <= prev_end:
+                    overlap += str(align['field']['start']) + ',' + str(prev_end)
+                prev_end = align['field']['end']
+                align['attribute']['overlap'] = overlap
+                utils.write_features(align['field'], align['attribute'], gff3)
+    gff3.close()
+
+def blastxmlToGff3(xml_file, gff3_file):
+    result_handle = open(xml_file)
+    blast_records = NCBIXML.parse(result_handle)
+    gff3_writer(blast_records, gff3_file)
+
+if __name__ == "__main__":
+    blastxmlToGff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt")
Binary file datatypes/converters/blastxmlToGff3.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/converters/gtfToGff3.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+'''
+Convert GTF format to GFF3
+reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
+'''
+import os
+from collections import OrderedDict
+from util import subtools
+
+
+
+
+def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
+    """
+    Covert gtf file output from StringTie to gff3 format
+    """
+    gff3 = open(gff3_file, 'w')
+    gff3.write("##gff-version 3\n")
+    sizes_dict = subtools.sequence_region(chrom_sizes)
+    seq_regions = dict()
+    parents = dict()
+    with open(gtf_file, 'r') as gtf:
+        for line in gtf:
+            if line.startswith('#') or not line.strip():
+                continue
+            field = OrderedDict()
+            attribute = OrderedDict()
+            li = line.rstrip().split("\t")
+            #print li
+            field['seqid'] = li[0]
+            #print field['seqid']
+            if field['seqid'] not in seq_regions:
+                end_region = sizes_dict[field['seqid']]
+                gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                seq_regions[field['seqid']] = end_region
+            field['source'] = li[1]
+            field['type'] = li[2]
+                # The first base in a chromosome is numbered 0 in BED format
+            field['start'] = li[3]
+            field['end'] = li[4]
+            field['score'] = li[5]
+            field['strand'] = li[6]
+            field['phase'] = li[7]
+            attr_li = li[8].split(';')
+            gene_id = attr_li[0].split()[1].strip('"')
+            attribute['ID'] = gene_id + '_' + field['type'] + '_' + str(field['start']) + '_' + str(field['end'])
+            if field['type'] == 'transcript':
+                parents[gene_id] = attribute['ID']
+                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
+                attribute['coverage'] = attr_li[2].split()[1].strip('"')
+                attribute['fpkm'] = attr_li[3].split()[1].strip('"')
+                attribute['tpm'] = attr_li[4].split()[1].strip('"')
+            elif field['type'] == 'exon':
+                attribute['Parent'] = parents[gene_id]
+                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
+                attribute['coverage'] = attr_li[3].split()[1].strip('"')
+            subtools.write_features(field, attribute, gff3)
+    gff3.close()
+
Binary file datatypes/converters/gtfToGff3.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Bed.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+import logging
+import shutil
+
+# Internal dependencies
+from Interval import Interval
+from datatypes.validators.DataValidation import DataValidation
+from datatypes.converters.DataConversion import DataConversion
+
+class Bed(Interval):
+    def __init__(self, inputBedGeneric, data_bed_generic):
+        super(Bed, self).__init__()
+        self.inputFile = inputBedGeneric
+        self.trackSettings = data_bed_generic
+        self.bedFields = None
+        self.extFields = None
+        self.dataType = "bed"
+
+    def createTrack(self):
+        shutil.copyfile(self.inputFile, self.trackDataURL)
+
+    def validateData(self):
+        self.validator = DataValidation(self.inputFile, self.getValidateType(), self.chromSizesFile.name)
+        self.validator.validate()
+
+    def _getBedFields(self):
+        """count number of bed fields for generic bed format"""
+        with open(self.inputFile, 'r') as bed:
+            l = bed.readline().split()
+            return len(l)
+
+    def getValidateType(self):
+        if not self.bedFields:
+            logging.debug("bedFields is not defined, consider the file as Bed generic format, datatype = bed%s", str(self.bedFields))
+            self.bedFields = self._getBedFields()
+            return self.dataType + str(self.bedFields)
+        elif not self.extFields:
+            return self.dataType + str(self.bedFields)
+        else:
+            return self.dataType + str(self.bedFields) + "+" + str(self.extFields)
+
+
+
+
+
Binary file datatypes/interval/Bed.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/BedBlastAlignments.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+import string
+
+from BigPsl import BigPsl
+from datatypes.converters.DataConversion import DataConversion
+from util import subtools
+
+
+class BedBlastAlignments( BigPsl ):
+    def __init__(self, input_bed_blast_alignments_false_path, data_bed_blast_alignments):
+
+        super(BedBlastAlignments, self).__init__(input_bed_blast_alignments_false_path, data_bed_blast_alignments)
+        #self.seqType = 1
+        self.trackType = "G-OnRamp_plugin/BlastAlignment"
+
+    def initSettings(self):
+        super(BedBlastAlignments, self).initSettings()
+        self.extraSettings["subfeatureClasses"] = "match_part"
+
+
+
+
\ No newline at end of file
Binary file datatypes/interval/BedBlastAlignments.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/BedBlatAlignments.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,23 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+import string
+
+from BigPsl import BigPsl
+from datatypes.converters.DataConversion import DataConversion
+from util import subtools
+
+
+class BedBlatAlignments( BigPsl ):
+    def __init__(self, input_bed_blast_alignments_false_path, data_bed_blast_alignments):
+
+        super(BedBlatAlignments, self).__init__(input_bed_blast_alignments_false_path, data_bed_blast_alignments)
+        #self.seqType = 1
+        #self.trackType = "G-OnRamp_plugin/BlatAlignment"
+
+    def initSettings(self):
+        super(BedBlatAlignments, self).initSettings()
+        self.extraSettings["subfeatureClasses"] = "match_part"
+
+
\ No newline at end of file
Binary file datatypes/interval/BedBlatAlignments.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/BedSimpleRepeats.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Bed import Bed
+from datatypes.validators.DataValidation import DataValidation
+from datatypes.converters.DataConversion import DataConversion
+
+
+class BedSimpleRepeats( Bed ):
+    def __init__(self, input_bed_simple_repeats_false_path, data_bed_simple_repeats):
+
+        super(BedSimpleRepeats, self).__init__(input_bed_simple_repeats_false_path, data_bed_simple_repeats)
+        self.bedFields = 4
+        self.extFields = 12
+        self.autoSql = os.path.join(self.tool_directory, 'trf_simpleRepeat.as')
+        self.trackFileType = "gff"
+
+
+
+    def validateData(self):
+        self.validateOptions = self.getValidateOptions(tab="True", autoSql=self.autoSql)
+        self.validator = DataValidation(self.inputFile, self.getValidateType(), self.chromSizesFile.name, self.validateOptions)
+        self.validator.validate()
+
+
+    def createTrack(self):
+        self.convertType = self.getConvertType()
+        self.converter = DataConversion(self.inputFile, self.trackDataURL, self.chromSizesFile.name, self.convertType, 'trfbig')
+        self.converter.convertFormats()
+        self.dataType = self.trackFileType
+
Binary file datatypes/interval/BedSimpleRepeats.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/BedSpliceJunctions.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Bed import Bed
+from datatypes.validators.DataValidation import DataValidation
+from datatypes.converters.DataConversion import DataConversion
+
+
+
+class BedSpliceJunctions( Bed ):
+    def __init__(self, input_bed_splice_junctions_false_path, data_bed_splice_junctions):
+
+        super(BedSpliceJunctions, self).__init__(input_bed_splice_junctions_false_path, data_bed_splice_junctions)
+        self.bedFields = 12
+        self.extFields = 1
+        self.autoSql = os.path.join(self.tool_directory, 'spliceJunctions.as')
+        self.trackFileType = "gff"
+
+    def initSettings(self):
+        super(BedSpliceJunctions, self).initSettings()
+        self.extraSettings["glyph"] = "JBrowse/View/FeatureGlyph/Segments"
+        self.extraSettings["subfeatureClasses"] = "exon_junction"
+
+    def validateData(self):
+        self.validateOptions = self.getValidateOptions(tab="True", autoSql=self.autoSql)
+        self.validator = DataValidation(self.inputFile, self.getValidateType(), self.chromSizesFile.name, self.validateOptions)
+        self.validator.validate()
+
+    def createTrack(self):
+        self.convertType = self.getConvertType()
+        self.converter = DataConversion(self.inputFile, self.trackDataURL, self.chromSizesFile.name, self.convertType, 'regtools')
+        self.converter.convertFormats()
+        self.dataType = self.trackFileType
+
Binary file datatypes/interval/BedSpliceJunctions.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/BigPsl.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,53 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+import string
+
+from Interval import Interval
+from util.index.DatabaseIndex import DatabaseIndex
+from util.index.TrixIndex import TrixIndex
+from datatypes.validators.DataValidation import DataValidation
+from datatypes.converters.DataConversion import DataConversion
+
+
+class BigPsl(Interval):
+    def __init__(self, input_bigpsl_false_path, data_bigpsl):
+
+        super(BigPsl, self).__init__()
+        self.inputFile = input_bigpsl_false_path
+        self.trackSettings = data_bigpsl
+        self.dataType = "bed"
+        self.bedFields = 12
+        self.extFields = 12
+        #self.seqType = None
+        self.autoSql = os.path.join(self.tool_directory, 'bigPsl.as')
+
+    def initSettings(self):
+        super(BigPsl, self).initSettings()
+        self.extraSettings["glyph"] = "JBrowse/View/FeatureGlyph/Segments"
+        #self.extraSettings["subfeatureClasses"] = "match_part"
+
+    def validateData(self):
+        self.validateOptions = self.getValidateOptions(tab="True", autoSql=self.autoSql)
+        self.validator = DataValidation(self.inputFile, self.getValidateType(), self.chromSizesFile.name, self.validateOptions)
+        self.validator.validate()
+
+    def createTrack(self):
+        self.convertType = self.getConvertType()
+        self.converter = DataConversion(self.inputFile, self.trackDataURL, self.chromSizesFile.name, self.convertType, 'blat')
+        self.converter.convertFormats()
+        self.dataType = self.trackFileType
+
+    def getValidateType(self):
+        if not self.bedFields or not self.extFields:
+            raise Exception("Invalid bigPsl format, no {0} or {1}".format("bedFields", "extFields"))
+        return self.dataType + str(self.bedFields) + "+" + str(self.extFields)
+
+    def _getSeqType(self):
+        with open(self.inputFile, "r") as bigpsl:
+            sampleSeq = bigpsl.readline().split()
+        if len(sampleSeq) == 25:
+            return sampleSeq[-1]
+        else:
+            return None
Binary file datatypes/interval/BigPsl.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/BlastXml.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,34 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+import string
+
+from Interval import Interval
+from datatypes.converters.DataConversion import DataConversion
+from util import subtools
+
+
+class BlastXml( Interval ):
+    def __init__(self, input_blast_alignments_false_path, data_blast_alignments):
+
+        super(BlastXml, self).__init__()
+        self.inputFile = input_blast_alignments_false_path
+        self.trackSettings = data_blast_alignments
+        self.dataType = "blastxml"
+        #self.trackType = "G-OnRamp_plugin/BlatAlignment"
+
+    def initSettings(self):
+        super(BlastXml, self).initSettings()
+        self.extraSettings["glyph"] = "JBrowse/View/FeatureGlyph/Segments"
+        self.extraSettings["subfeatureClasses"] = "match_part"
+
+    def validateData(self):
+        return
+
+    def createTrack(self):
+        self.convertType = self.getConvertType()
+        self.converter = DataConversion(self.inputFile, self.trackDataURL, self.chromSizesFile.name, self.convertType)
+        self.converter.convertFormats()
+        self.dataType = self.trackFileType
+
Binary file datatypes/interval/BlastXml.pyc has changed
Binary file datatypes/interval/CytoBand.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Gff.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+import abc
+import shutil
+
+# Internal dependencies
+from Interval import Interval
+from datatypes.validators.DataValidation import DataValidation
+from datatypes.converters.DataConversion import DataConversion
+
+class Gff(Interval):
+    def __init__(self):
+        super(Gff, self).__init__()
+        self.autoSql = os.path.join(self.tool_directory, 'bigGenePred.as')
+
+
+    def createTrack(self):
+        shutil.copyfile(self.inputFile, self.trackDataURL)
+
\ No newline at end of file
Binary file datatypes/interval/Gff.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Gff3.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,22 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Gff import Gff
+from datatypes.validators.Gff3Validation import Gff3Validation
+
+
+class Gff3( Gff ):
+    def __init__(self, input_Gff3_false_path, data_gff3):
+        super( Gff3, self ).__init__()
+        self.inputFile = input_Gff3_false_path
+        self.trackSettings = data_gff3
+        self.dataType = "gff"
+
+
+    def validateData(self):
+        self.validator = Gff3Validation(self.inputFile, self.dataType, self.chromSizesFile.name)
+        self.inputFile = self.validator.validate()
+
Binary file datatypes/interval/Gff3.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Gff3_mrna.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Gff import Gff
+from datatypes.validators.Gff3Validation import Gff3Validation
+
+
+class Gff3_mrna( Gff ):
+    def __init__(self, input_Gff3_false_path, data_gff3):
+        super( Gff3_mrna, self ).__init__()
+        self.inputFile = input_Gff3_false_path
+        self.trackSettings = data_gff3
+        self.dataType = "gff"
+        #self.trackType = "G-OnRamp_plugin/GenePred"
+
+    def initSettings(self):
+        super(Gff3_mrna, self).initSettings()
+        self.extraSettings["type"] = "mRNA"
+        self.extraSettings["subfeatureClasses"] = "CDS"
+
+    def validateData(self):
+        self.validator = Gff3Validation(self.inputFile, self.dataType, self.chromSizesFile.name)
+        self.inputFile = self.validator.validate()
+
Binary file datatypes/interval/Gff3_mrna.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Gff3_transcript.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,28 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Gff import Gff
+from datatypes.validators.Gff3Validation import Gff3Validation
+
+
+class Gff3_transcript( Gff ):
+    def __init__(self, input_Gff3_false_path, data_gff3):
+        super( Gff3_transcript, self ).__init__()
+        self.inputFile = input_Gff3_false_path
+        self.trackSettings = data_gff3
+        self.dataType = "gff"
+        #self.trackType = "G-OnRamp_plugin/GenePred"
+
+    def initSettings(self):
+        super(Gff3_transcript, self).initSettings()
+        self.extraSettings["transcriptType"] = "transcript"
+        self.extraSettings["type"] = "transcript"
+        self.extraSettings["subfeatureClasses"] = "CDS"
+
+    def validateData(self):
+        self.validator = Gff3Validation(self.inputFile, self.dataType, self.chromSizesFile.name)
+        self.inputFile = self.validator.validate()
+
Binary file datatypes/interval/Gff3_transcript.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Gtf.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Gff import Gff
+from datatypes.validators.GtfValidation import GtfValidation
+from datatypes.converters.DataConversion import DataConversion
+
+
+class Gtf(Gff):
+    def __init__( self, input_gtf_false_path, data_gtf):
+
+        super(Gtf, self).__init__()
+        self.inputFile = input_gtf_false_path
+        self.trackSettings = data_gtf
+        self.dataType = "gtf"
+
+    def initSettings(self):
+        super(Gtf, self).initSettings()
+        self.extraSettings["glyph"] = "JBrowse/View/FeatureGlyph/Segments"
+
+    def createTrack(self):
+        self.convertType = self.getConvertType()
+        self.converter = DataConversion(self.inputFile, self.trackDataURL, self.chromSizesFile.name, self.convertType)
+        self.converter.convertFormats()
+        self.dataType = self.trackFileType
+
+    def validateData(self):
+        self.validator = GtfValidation(self.inputFile, self.dataType, self.chromSizesFile.name)
+        self.inputFile = self.validator.validate()
+
Binary file datatypes/interval/Gtf.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/GtfStringTie.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,23 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Gtf import Gtf
+from datatypes.validators.GtfValidation import GtfValidation
+from datatypes.converters.DataConversion import DataConversion
+
+
+class GtfStringTie(Gtf):
+    def __init__( self, input_gtf_false_path, data_gtf):
+
+        super(GtfStringTie, self).__init__(input_gtf_false_path, data_gtf)
+
+
+    def initSettings(self):
+        super(GtfStringTie, self).initSettings()
+        self.extraSettings["glyph"] = "JBrowse/View/FeatureGlyph/Segments"
+        self.extraSettings["subfeatureClasses"] = "UTR"
+
+
Binary file datatypes/interval/GtfStringTie.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Interval.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Super Class of the managed datatype
+"""
+
+import  logging
+from datatypes.Datatype import Datatype
+
+
+class Interval(Datatype):
+
+    def __init__(self):
+        super(Interval, self).__init__()
+        if not Datatype.trackType:
+            self.trackType = "HTMLFeatures"
+        else:
+            self.trackType = Datatype.trackType
+        logging.debug("Set default trackType = %s for feature tracks", self.trackType)
+        self.trackFileType = "gff"
+
+
+    def getValidateOptions(self, tab=None, autoSql=None):
+        options = dict()
+        if tab:
+            options["tab"] = tab
+        if autoSql:
+            options["autoSql"] = autoSql
+        return options
+
+    def getConvertType(self):
+        if not self.trackFileType or not self.dataType:
+            raise ValueError("dataType or trackFileType has not been set!")
+        return (self.dataType.lower(), self.trackFileType.lower())
+
+
+
+
+
+
+
\ No newline at end of file
Binary file datatypes/interval/Interval.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/interval/Psl.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,42 @@
+import logging
+import os
+import tempfile
+
+# Internal dependencies
+from Interval import Interval
+from datatypes.validators.PslValidation import PslValidation
+from datatypes.converters.DataConversion import DataConversion
+
+
+class Psl(Interval):
+    def __init__(self, input_psl_path, data_psl):
+        super(Psl, self).__init__()
+        self.inputFile = input_psl_path
+        self.trackSettings = data_psl
+        self.dataType = "psl"
+        self.trackType = "bigPsl"
+        self.autoSql = os.path.join(self.tool_directory, 'bigPsl.as')
+
+    def initSettings(self):
+        super(Psl, self).initSettings()
+        self.trackName = "".join( ( self.trackName, ".bb") )
+        self.trackDataURL = os.path.join(self.myTrackFolderPath, self.trackName)
+        if "track_color" in self.trackSettings:
+            self.extraSettings["color"] = self.trackSettings["track_color"]
+        if "group_name" in self.trackSettings:
+            self.extraSettings["group"] = self.trackSettings["group_name"]
+        self.extraSettings["visibility"] = "dense"
+        self.extraSettings["priority"] = self.trackSettings["order_index"]
+
+    def validateData(self):
+        self.validator = PslValidation(self.inputFile, self.dataType, self.chromSizesFile)
+        self.validator.validate()
+
+    def createTrack(self):
+        self.convertType = self.getConvertType()
+        self.options = self.getConvertOptions("bed12+12", tab="True", autoSql=self.autoSql, extraIndex="name")
+        self.converter = DataConversion(self.inputFile, self.trackDataURL, self.chromSizesFile.name, self.convertType, self.options)
+        self.converter.convertFormats()
+
+    def getConvertType(self):
+        return (self.dataType.lower(), self.trackType.lower())
Binary file datatypes/interval/Psl.pyc has changed
Binary file datatypes/interval/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/sequence/Fasta.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Class describing the Fasta format
+(As of the 07/20/2016, only used with the reference genome)
+"""
+
+class Fasta(object):
+    def __init__(self, false_path, name, assembly_id):
+        self.false_path = false_path
+        self.name = name
+
+        if not assembly_id:
+            assembly_id = "unknown"
+        self.assembly_id = assembly_id
\ No newline at end of file
Binary file datatypes/sequence/Fasta.pyc has changed
Binary file datatypes/sequence/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/validators/DataValidation.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import logging
+import os
+import subprocess
+import sys
+import string
+import tempfile
+import re
+
+from util import subtools
+
+
+class DataValidation(object):
+    BED_TYPE = re.compile(r'bed([1-9][0-9]?)\+?([1-9][0-9]?)?$')
+    BIGBED_TYPE = re.compile(r'bigBed([1-9][0-9]?)\+?([1-9][0-9]?)?$')
+    FILE_TYPE = ["fasta", "fastq", "bam", "bigwig", "bed", "bigbed", "bedgraph"]
+
+    def __init__(self, inputFile, fileType, chromSizesFile, options=None):
+        self.inputFile = inputFile
+        self.fileType = fileType
+        self.chromSizesFile = chromSizesFile
+        self.options = options
+
+    def validate(self):
+        """validate input file format"""
+        if self._checkDatatype():
+            subtools.validateFiles(self.inputFile, self.chromSizesFile, self.fileType, self.options)
+        else:
+            raise TypeError("validateFiles cannot validate format {0}. Only the following formats can be validated by this tool: \n{1}\n".format(self.fileType, self.FILE_TYPE))
+
+    def _checkDatatype(self):
+        if re.match(self.BED_TYPE, self.fileType) or re.match(self.BIGBED_TYPE, self.fileType):
+            return True
+        elif self.fileType.lower() in self.FILE_TYPE:
+            return True
+        return False
Binary file datatypes/validators/DataValidation.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/validators/Gff3Validation.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import logging
+import os
+import subprocess
+import sys
+import string
+import tempfile
+import re
+
+from DataValidation import DataValidation
+
+
+
+class Gff3Validation(DataValidation):
+
+    def __init__(self, inputFile, fileType, chromSizesFile, options=None):
+        super(Gff3Validation, self).__init__(inputFile, fileType, chromSizesFile, options)
+
+    def validate(self):
+        """validate input file format"""
+        if self._removeExtraHeader() > 1:
+            print("- Warning: Gff3 created with a modified version of your Gff3 by removing extra headers '##gff-version 3'.")
+        return self.inputFile
+
+    def _removeExtraHeader(self):
+        """
+        Remove extra meta line: ##gff-version 3
+        """
+        valid_gff3_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gff3", delete=False)
+        valid = open(valid_gff3_file.name, 'w')
+        num = 0
+        with open(self.inputFile, 'r') as f:
+            for line in f:
+                if '##gff-version 3' in line:
+                    if num == 0:
+                        num += 1
+                    else:
+                        continue
+                valid.write(line)
+        self.inputFile = valid_gff3_file.name
+        return num
Binary file datatypes/validators/Gff3Validation.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/validators/GtfValidation.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import logging
+import os
+import subprocess
+import sys
+import string
+import tempfile
+import re
+
+from DataValidation import DataValidation
+
+
+class GtfValidation(DataValidation):
+
+    def __init__(self, inputFile, fileType, chromSizesFile, options=None):
+        super(GtfValidation, self).__init__(inputFile, fileType, chromSizesFile, options)
+
+    def validate(self):
+        """validate input file format"""
+        self._checkAndFixGtf()
+        if self.is_modified:
+            print("- Warning: Gtf created with a modified version of your Gtf because of start/end coordinates issues.")
+            print("Here are the lines removed: " + self._get_str_modified_lines())
+        return self.inputFile
+
+
+
+    def _checkAndFixGtf(self):
+        """
+        Call _checkAndFixGtf, check the integrity of gtf file,
+        if coordinates exceed chromosome size, either removed the whole line(s) or truncated to the end of the scaffold
+        depending on the user choice
+        default: remove the whole line(s)
+        """
+        # Set the boolean telling if we had to modify the file
+        self.is_modified = False
+        self.array_modified_lines = []
+        # Create a temp gtf just in case we have issues
+        temp_gtf = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gtf", delete=False)
+
+        # TODO: Get the user choice and use it
+        # TODO: Check if the start > 0 and the end <= chromosome size
+        # Get the chrom.sizes into a dictionary to have a faster access
+        # TODO: Think about doing this in Datatype.py, so everywhere we have access to this read-only dictionary
+        dict_chrom_sizes = {}
+        with open(self.chromSizesFile, 'r') as chromSizes:
+            lines = chromSizes.readlines()
+            for line in lines:
+                fields = line.split()
+                # fields[1] should be the name of the scaffold
+                # fields[2] should be the size of the scaffold
+                # TODO: Ensure this is true for all lines
+                dict_chrom_sizes[fields[0]] = fields[1]
+
+        # Parse the GTF and check each line using the chrom sizes dictionary
+        with open(temp_gtf.name, 'a+') as tmp:
+            with open(self.inputFile, 'r') as gtf:
+                lines = gtf.readlines()
+                for index, line in enumerate(lines):
+                    # If this is not a comment, we check the fields
+                    if not line.startswith('#'):
+                        fields = line.split()
+                        # We are interested in fields[0] => Seqname (scaffold)
+                        # We are interested in fields[3] => Start of the scaffold
+                        # We are interested in fields[4] => End of the scaffold
+                        scaffold_size = dict_chrom_sizes[fields[0]]
+                        start_position = fields[3]
+                        end_position = fields[4]
+
+                        if start_position > 0 and end_position <= scaffold_size:
+                            # We are good, so we copy this line
+                            tmp.write(line)
+                            tmp.write(os.linesep)
+
+
+                        # The sequence is not good, we are going to process it regarding the user choice
+                        # TODO: Process the user choice
+                        # By default, we are assuming the user choice is to remove the lines: We don't copy it
+
+                        # If we are here, it means the gtf has been modified
+                        else:
+                            # We save the line for the feedback to the user
+                            self.array_modified_lines.append(index + 1)
+
+                            if self.is_modified is False:
+                                self.is_modified = True
+                            else:
+                                pass
+                    else:
+                        tmp.write(line)
+                        tmp.write(os.linesep)
+
+        # Once the process it completed, we just replace the path of the gtf
+        self.inputFile = temp_gtf.name
+
+        # TODO: Manage the issue with the fact the dataset is going to still exist on the disk because of delete=False
+        #return modified_gtf
+
+    def _get_str_modified_lines(self):
+        return ','.join(map(str, self.array_modified_lines))
+
\ No newline at end of file
Binary file datatypes/validators/GtfValidation.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/validators/PslValidation.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import logging
+import os
+import subprocess
+import sys
+import string
+import tempfile
+import re
+
+from util import subtools
+from datatypes.validators.DataValidation import DataValidation
+
+
+class PslValidation(DataValidation):
+
+    def __init__(self, inputFile, fileType, chromSizesFile, options=None):
+        super(PslValidation, self).__init__(inputFile, fileType, chromSizesFile, options)
+
+    def validate(self):
+        """validate input file format"""
+        self.pslCheck()
+
+    def pslCheck(self):
+        subtools.pslCheck(self.inputFile)
Binary file datatypes/validators/PslValidation.pyc has changed
Binary file datatypes/validators/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowseArchiveCreator.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -*- coding: utf8 -*-
+
+"""
+This Galaxy tool permits to prepare your files to be ready for JBrowse visualization.
+"""
+
+import sys
+import argparse
+import json
+import logging
+import collections
+
+
+# Internal dependencies
+from util.Reader import Reader
+from util.Logger import Logger
+from TrackHub import TrackHub
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
+    parser.add_argument('-j', '--data_json', help='JSON file containing the metadata of the inputs')
+    parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the JBrowse Hub Archive')
+
+    # Get the args passed in parameter
+    args = parser.parse_args()
+    json_inputs_data = args.data_json
+    outputFile = args.output
+
+    ##Parse JSON file with Reader
+    reader = Reader(json_inputs_data)
+
+    # Begin init variables
+    extra_files_path = reader.getExtFilesPath()
+    toolDirectory = reader.getToolDir()
+    #outputFile = reader.getOutputDir()
+    user_email = reader.getUserEmail()
+    reference_genome = reader.getRefGenome()
+    debug_mode = reader.getDebugMode()
+    track_type = reader.getTrackType()
+    #jbrowse_path = reader.getJBrowsePath()
+    apollo_host = reader.getApolloHost()
+    apollo_user = reader.getApolloUser()
+
+    #### Logging management ####
+    # If we are in Debug mode, also print in stdout the debug dump
+    log = Logger(tool_directory=toolDirectory, debug=debug_mode, extra_files_path=extra_files_path)
+    log.setup_logging()
+    logging.info('#### JBrowseArchiveCreator: Start ####\n')
+    logging.debug('---- Welcome in JBrowseArchiveCreator Debug Mode ----\n')
+    logging.debug('JSON parameters: %s\n\n', json.dumps(reader.args))
+    #### END Logging management ####
+
+    # Create the Track Hub folder
+    logging.info('#### JBrowseArchiveCreator: Creating the Track Hub folder ####\n')
+    trackHub = TrackHub(reference_genome, apollo_user, outputFile, extra_files_path, toolDirectory, track_type, apollo_host)
+
+    # Create Ordered Dictionary to add the tracks in the tool form order
+    logging.info('#### JBrowseArchiveCreator: Preparing track data ####\n')
+    all_datatype_dictionary = reader.getTracksData()
+    all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
+
+    logging.debug("----- End of all_datatype_dictionary processing -----")
+    #logging.debug("all_datatype_ordered_dictionary are: %s", json.dumps(all_datatype_ordered_dictionary))
+
+    logging.info('#### JBrowseArchiveCreator: Adding tracks to Track Hub ####\n')
+    logging.debug("----- Beginning of Track adding processing -----")
+
+    for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
+        trackHub.addTrack(datatypeObject.track.track_db)
+
+    logging.debug("----- End of Track adding processing -----")
+
+    # We terminate the process and so create a HTML file summarizing all the files
+    logging.info('#### JBrowseArchiveCreator: Creating the HTML file ####\n')
+    trackHub.terminate(debug_mode)
+
+    logging.debug('---- End of JBrowseArchiveCreator Debug Mode: Bye! ----\n')
+    logging.info('#### JBrowseArchiveCreator: Congratulation! Assembly Hub is created! ####\n')
+
+    sys.exit(0)
+
+if __name__ == "__main__":
+    main(sys.argv)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowseArchiveCreator.xml	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,446 @@
+<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="1.0.1">
+    <description>
+        This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse with Apollo plugin
+    </description>
+
+    <requirements>
+        <requirement type="package" version="1.2">samtools</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="1.68">biopython</requirement>
+        <requirement type="package" version="1.0">ucsc_tools_340</requirement>
+        <requirement type="package" version="1.12.1">jbrowse_tools</requirement>
+    </requirements>
+
+    <stdio>
+    </stdio>
+
+    <command detect_errors="exit_code"><![CDATA[
+        mkdir -p $output.extra_files_path;
+
+        ## Dump the tool parameters into a JSON file
+        python $json_file parameters.json;
+
+        python $__tool_directory__/jbrowseArchiveCreator.py --data_json parameters.json -o $output
+    ]]></command>
+    <configfiles>
+        <configfile name="json_file">
+import json
+import sys
+
+file_path = sys.argv[1]
+#set global data_parameter_dict = {}
+
+## Ask the user to enter the genome name
+#silent $data_parameter_dict.update({"genome_name": str($genome_name)})
+#silent $data_parameter_dict.update({"apollo_host": str($apollo_host)})
+if $apollo_users_settings.apollo_users_selector == "yes":
+    #set apollo_user = {"firstname": str($apollo_users_settings.firstname), "lastname": str($apollo_users_settings.lastname), "password": str($apollo_users_settings.password), "user_email": str($apollo_users_settings.user_email)}
+    $data_parameter_dict.update({"apollo_user": $apollo_user})
+
+## Function to retrieve the data of the inputs
+#def prepare_json($datatype, $input_to_prepare, $order_index, $extra_data_dict={})
+    #set false_path = str($input_to_prepare)
+    #set $data_dict = {"false_path": $false_path}
+
+    #set name = str($input_to_prepare.name)
+    #silent $data_dict.update({"name": $name})
+    #silent $data_dict.update($extra_data_dict)
+    ## Add the ordering by taking the tool form indexes
+    #silent $data_dict.update({"order_index": $order_index})
+
+    #if $datatype in $data_parameter_dict
+        #silent $data_parameter_dict[$datatype].append($data_dict)
+    #else
+        #set array_inputs = []
+        #silent $array_inputs.append($data_dict)
+        #silent $data_parameter_dict.update({$datatype: $array_inputs})
+    #end if
+#end def
+
+## Get the number of digits from tracks, to have a unique integer from group index and track index
+
+#set temp_max_digit = 0
+
+#for $g in $group
+    #if len($g.format) > $temp_max_digit
+        #silent temp_max_digit = len($g.format)
+    #end if
+#end for
+
+#set nb_digits_max_track = len(str($temp_max_digit))
+
+## END Get the number of digits
+
+#for $i_g, $g in enumerate( $group )
+    #for $i, $f in enumerate( $g.format )
+        ## Create the order index using index_group+1 concatenated with index_track
+        #set index_group_final = str($i_g + 1)
+        #set index_track_final = str($index_group_final) + str($i).zfill($nb_digits_max_track)
+
+        ## For each format, we have a few mandatory fields we store in a dict
+        #set track_color = str($f.formatChoice.track_color)
+        #set group_name = str($g.group_name)
+        #set longLabel = str($f.formatChoice.longLabel)
+        #set extra_data_dict = {"track_color": $track_color,
+                                "group_name": $group_name,
+                                "long_label": $longLabel}
+        #if $f.formatChoice.format_select == "bam"
+            #set bam_index = $f.formatChoice.BAM.metadata.bam_index
+
+            ## Add Bam format specific fields
+            #silent $extra_data_dict.update({"index": $bam_index})
+
+            #silent $prepare_json("Bam", $f.formatChoice.BAM, $index_track_final, $extra_data_dict)
+        #end if
+        #if $f.formatChoice.format_select == "bed"
+            #if $f.formatChoice.bedChoice.bed_select == "bed_generic"
+                #silent $prepare_json("Bed", $f.formatChoice.bedChoice.BED, $index_track_final,
+                                                $extra_data_dict)
+            #end if
+            #if $f.formatChoice.bedChoice.bed_select == "bed_simple_repeats_option"
+                #silent $prepare_json("BedSimpleRepeats", $f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final,
+                                                $extra_data_dict)
+            #end if
+            #if $f.formatChoice.bedChoice.bed_select == "bed_splice_junctions_option"
+                #silent $prepare_json("BedSpliceJunctions", $f.formatChoice.bedChoice.BED_splice_junctions, $index_track_final,
+                                                $extra_data_dict)
+            #end if
+            #if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option"
+                ##set database = str($f.formatChoice.bedChoice.database)
+                ##silent $extra_data_dict.update({"database": $database})
+                #silent $prepare_json("BedBlastAlignments", $f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,
+                                             $extra_data_dict)
+            #end if
+            #if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option"
+                ##set database = str($f.formatChoice.bedChoice.database)
+                ##silent $extra_data_dict.update({"database": $database})
+                #silent $prepare_json("BedBlatAlignments", $f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,
+                                             $extra_data_dict)
+            #end if
+        #end if
+        #if $f.formatChoice.format_select == "blastxml"
+                #silent $prepare_json($f.formatChoice.BlastXML, extra_data_dict)
+        #end if
+        #if $f.formatChoice.format_select == "bigwig"
+            #set pos_color = str($f.formatChoice.pos_color)
+            #set neg_color = str($f.formatChoice.neg_color)
+            #silent $extra_data_dict.update({"style" : {"pos_color" : $pos_color, "neg_color" : $neg_color}})
+            #silent $prepare_json("BigWig", $f.formatChoice.BIGWIG, $index_track_final,
+                                            $extra_data_dict)
+        #end if
+        #if $f.formatChoice.format_select == 'gff3'
+            #if $f.formatChoice.gff3Choice.gff3_select == 'gff3_generic'
+                #silent $prepare_json("Gff3", $f.formatChoice.GFF3, $index_track_final,
+                                            $extra_data_dict)
+            #elif $f.formatChoice.gff3Choice.gff3_select == 'gff3_transcript'
+                #silent $prepare_json("Gff3_transcript", $f.formatChoice.GFF3, $index_track_final,
+                                            $extra_data_dict)
+            #elif $f.formatChoice.gff3Choice.gff3_select == 'gff3_mrna'
+                #silent $prepare_json("Gff3_mrna", $f.formatChoice.GFF3, $index_track_final,
+                                            $extra_data_dict)
+            #end if
+        #end if
+        #if $f.formatChoice.format_select == "gtf"
+            ## Add also GTF from Agustus? See https://github.com/ENCODE-DCC/kentUtils/issues/8
+            #silent $prepare_json("Gtf", $f.formatChoice.GTF, $index_track_final,
+                                             $extra_data_dict)
+        #end if
+    #end for
+#end for
+
+## We combine the fasta file dataset name with his false path in a JSON object
+#set fasta_json = {"false_path": str($fasta_file), "name": str($fasta_file.name)}
+$data_parameter_dict.update({"fasta": $fasta_json})
+
+## Retrieve the user email
+#silent $data_parameter_dict.update({"user_email": str($__user_email__)})
+
+#silent $data_parameter_dict.update({"tool_directory": str($__tool_directory__)})
+
+#silent $data_parameter_dict.update({"extra_files_path": str($output.extra_files_path)})
+
+#silent $data_parameter_dict.update({"debug_mode": str($advanced_options.debug_mode)})
+
+with open(file_path, 'w') as f:
+    json.dump($data_parameter_dict, f)
+        </configfile>
+    </configfiles>
+
+    <inputs>
+        <param
+                name="genome_name"
+                type="text"
+                size="30"
+                value="unknown"
+                label="JBrowse Hub Name"
+        />
+        <param
+                format="fasta"
+                name="fasta_file"
+                type="data"
+                label="Reference genome"
+        />
+        <param
+                name="apollo_host"
+                type="text"
+                label="Apollo host"
+        />
+        <conditional name="apollo_users_settings">
+            <param name="apollo_users_selector" type="select" label="Create or specify your Apollo account">
+                <option value="no" selected="true">Use exist demo user account (will use your galaxy email address for apollo, password: gonramp) </option>
+                <option value="yes">Create or use your own Apollo account</option>
+            </param>
+            <!-- TODO: Avoid redundancy here -->
+            <when value="yes">
+                <param
+                    name="firstname"
+                    type="text"
+                    label="First Name"
+                />
+                <param
+                    name="lastname"
+                    type="text"
+                    label="Last Name"
+                />
+                <param
+                    name="user_email"
+                    type="text"
+                    label="Email Address"
+                />
+                <param
+                    name="password"
+                    type="text"
+                    label="Password"
+                />
+            </when>
+            <when value="no">
+                <param name="default_user" type="hidden"
+                       value="false">
+                </param>
+            </when>
+        </conditional>
+
+        <repeat name="group" title="New group">
+            <param type="text" name="group_name" label="Group name" value="Default group"/>
+            <repeat name="format" title="New track">
+                <conditional name="formatChoice">
+                    <param name="format_select" type="select" label="Format">
+                        <option value="bam" selected="true">BAM</option>
+                        <option value="bed">BED</option>
+                        <option value="blastxml">BlastXML</option>
+                        <option value="bigwig">BigWig</option>
+                        <option value="gff3">GFF3</option>
+                        <option value="gtf">GTF</option>
+                    </param>
+
+                    <when value="bam">
+                        <param
+                                format="bam"
+                                name="BAM"
+                                type="data"
+                                label="BAM File"
+                        />
+                        <param name="longLabel" type="text" size="30" value = "Sequence Alignment" label="Track label" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="bed">
+                        <conditional name="bedChoice">
+                            <param name="bed_select" type="select" label="Bed Choice">
+                                <option value="bed_generic">BED format</option>
+                                <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option>
+                                <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option>
+                                <option value="bed_blast_alignment_option">Blast alignments (bed12+12 / bigPsl.as)</option>
+                                <option value="bed_blat_alignment_option">BLAT alignments (bigPsl / bigPsl.as)</option>
+                            </param>
+                            <when value="bed_generic">
+                                <param
+                                        format="bed"
+                                        name="BED_generic"
+                                        type="data"
+                                        label="Bed File"
+                                />
+                            </when>
+                            <when value="bed_simple_repeats_option">
+                                <param
+                                        format="bed"
+                                        name="BED_simple_repeats"
+                                        type="data"
+                                        label="Bed Simple Repeats (Bed4+12) File"
+                                />
+                            </when>
+                            <when value="bed_splice_junctions_option">
+                                <param
+                                        format="bed"
+                                        name="BED_splice_junctions"
+                                        type="data"
+                                        label="Bed Splice Junctions (Bed12+1) File"
+                                />
+                            </when>
+                            <when value="bed_blast_alignment_option">
+                                <param
+                                        format="bed"
+                                        name="BED_blast_alignment"
+                                        type="data"
+                                        label="Bed Blast Alignments (Bed12+12) File"
+                                />
+                            </when>
+                            <when value="bed_blat_alignment_option">
+                                <param
+                                        format="bed"
+                                        name="BED_blat_alignment"
+                                        type="data"
+                                        label="Bed BLAT Alignments (bigPsl) File"
+                                />
+                            </when>
+                        </conditional>
+                        <param name="longLabel" type="text" size="30" label="Track label" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="blastxml">
+                        <param
+                                format="blastxml"
+                                name="BlastXML"
+                                type="data"
+                                label="Blast Alignments File"
+                        />
+                        <param name="longLabel" type="text" size="30" value="Blast Alignment" label="Track label" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="bigwig">
+                        <param
+                                format="bigwig"
+                                name="BIGWIG"
+                                type="data"
+                                label="BIGWIG File"
+                        />
+                        <param name="longLabel" type="text" size="30" value="Sequence Coverage" label="Track label" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="gff3">
+                        <conditional name="gff3Choice">
+                            <param name="gff3_select" type="select" label="gff3 type">
+                                <option value="gff3_generic">GFF3 format</option>
+                                <option value="gff3_transcript">GFF3 format output from gene prediction tools (e.g. Augustus), structure: gene->transcription->CDS</option>
+                                <option value="gff3_mrna">GFF3 format output from gene prediction tools (e.g. SNAP), structure: gene->mRNA->CDS</option>
+                            </param>
+                            <when value="gff3_generic">
+                                <param
+                                        format="gff3"
+                                        name="GFF3_generic"
+                                        type="data"
+                                        label="GFF3 File"
+                                />
+                            </when>
+                            <when value="gff3_transcript">
+                                <param
+                                        format="gff3"
+                                        name="GFF3_transcript"
+                                        type="data"
+                                        label="GFF3 File from gene prediction"
+                                />
+                            </when>
+                            <when value="gff3_mrna">
+                                <param
+                                        format="gff3"
+                                        name="GFF3_mrna"
+                                        type="data"
+                                        label="GFF3 File from gene prediction"
+                                />
+                            </when>
+                        </conditional>
+                        <param name="longLabel" type="text" size="30" value="Gene Prediction" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#daa520">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="gtf">
+                        <param
+                                format="gtf"
+                                name="GTF"
+                                type="data"
+                                label="GTF File"
+                        />
+                        <param name="longLabel" type="text" size="30" value="Assembled Transcripts" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                </conditional>
+            </repeat>
+        </repeat>
+        <conditional name="advanced_options">
+            <param name="advanced_options_selector" type="select" label="Advanced options">
+                <option value="off" selected="true">Hide advanced options</option>
+                <option value="on">Display advanced options</option>
+            </param>
+            <!-- TODO: Avoid redundancy here -->
+            <when value="on">
+                <param name="debug_mode" type="select" label="Activate debug mode">
+                    <option value="false" selected="true">No</option>
+                    <option value="true">Yes</option>
+                    <help>
+                        Use this option if you are a G-OnRamp developer
+                    </help>
+                </param>
+            </when>
+            <when value="off">
+                <param name="debug_mode" type="hidden"
+                       value="false">
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="jbrowsehub" name="output" label="${tool.name}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference" value="dbia3/raw/dbia3.fa" />
+            <param name="genome_name" value="unknown" />
+            <param name="group_name" value="Default group"/>
+            <param name="format_select" value="bam" />
+            <param name="BAM" value="dbia3/raw/HISAT.bam" />
+            <param name="label" value="" />
+            <output name="output" file="JBrowse_Archive_Creator_html.html" />
+        </test>
+    </tests>
+    <help>
+        This Galaxy tool will create a jbrowse hub which including binary datasets and json datasets that can be used for
+        JBrowse visualization.
+    </help>
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
--- a/jbrowse_hub.py	Wed Jul 12 12:55:27 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,176 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import argparse
-import json
-import utils
-import trackObject
-import TrackHub
-
-
-
-def main(argv):
-    parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
-
-    # Reference genome mandatory
-    parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
-
-    # Genome name
-    parser.add_argument('-g', '--genome_name', help='Name of reference genome')
-
-    # Output folder
-    parser.add_argument('-o', '--out', help='output html')
-
-    # Output folder
-    parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
-
-    #Tool Directory
-    parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
-
-    #GFF3
-    parser.add_argument('--gff3', action='append', help='GFF3 format')
-
-    # GFF3 structure: gene->transcription->CDS
-    parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS')
-
-    # GFF3 structure: gene->mRNA->CDS
-    parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS')
-
-    # generic BED
-    parser.add_argument('--bed', action='append', help='BED format')
-
-    # trfBig simple repeats (BED 4+12)
-    parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as')
-
-    # regtools (BED 12+1)
-    parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as')
-
-    # tblastn alignment (blastxml)
-    parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn')
-
-    # blat alignment (bigpsl 12+12)
-    parser.add_argument('--bigpsl', action='append', help='bigpsl format from blat alignment')
-
-    # BAM format
-    parser.add_argument('--bam', action='append', help='BAM format from HISAT')
-
-    # BIGWIG format
-    parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage')
-
-    # GTF format
-    parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
-
-    # Metadata json format
-    parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
-
-    #JBrowse host
-    parser.add_argument('--jbrowse_host', help="JBrowse Host")
-
-    args = parser.parse_args()
-    all_datatype_dictionary = dict()
-
-
-    if not args.fasta:
-        parser.print_help()
-        raise RuntimeError("No reference genome\n")
-    reference = args.fasta
-    genome = 'unknown'
-    out_path = 'unknown.html'
-    extra_files_path = '.'
-    tool_directory = '.'
-    jbrowse_host = ''
-    if args.jbrowse_host:
-        jbrowse_host = args.jbrowse_host
-    if args.genome_name:
-        genome = args.genome_name
-    if args.out:
-        out_path = args.out
-    if args.extra_files_path:
-        extra_files_path = args.extra_files_path
-
-    #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
-    if args.tool_directory:
-        tool_directory = args.tool_directory
-
-    #Calculate chromsome sizes using genome reference and uscs tools
-    chrom_size = utils.getChromSizes(reference, tool_directory)
-
-    #get metadata from json file
-    json_inputs_data = args.data_json
-    if json_inputs_data:
-        inputs_data = json.loads(json_inputs_data)
-    else:
-        inputs_data = {}
-
-    #print inputs_data
-
-    #Initate trackObject
-    all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path)
-
-    array_inputs_bam = args.bam
-    array_inputs_bed = args.bed
-    array_inputs_bed_simple_repeats = args.bedSimpleRepeats
-    array_inputs_bed_splice_junctions = args.bedSpliceJunctions
-    array_inputs_bigwig = args.bigwig
-    array_inputs_gff3 = args.gff3
-    array_inputs_gff3_transcript = args.gff3_transcript
-    array_inputs_gff3_mrna = args.gff3_mrna
-    array_inputs_gtf = args.gtf
-    array_inputs_blastxml = args.blastxml
-    array_inputs_bigpsl = args.bigpsl
-
-    if array_inputs_bam:
-        all_datatype_dictionary['bam'] = array_inputs_bam
-    if array_inputs_bed:
-        all_datatype_dictionary['bed'] = array_inputs_bed
-    if array_inputs_bed_simple_repeats:
-        all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats
-    if array_inputs_bed_splice_junctions:
-        all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions
-    if array_inputs_bigwig:
-        all_datatype_dictionary['bigwig'] = array_inputs_bigwig
-    if array_inputs_gff3:
-        all_datatype_dictionary['gff3'] = array_inputs_gff3
-    if array_inputs_gff3_transcript:
-        all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript
-    if array_inputs_gff3_mrna:
-        all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna
-    if array_inputs_gtf:
-        all_datatype_dictionary['gtf'] = array_inputs_gtf
-    if array_inputs_blastxml:
-        all_datatype_dictionary['blastxml'] = array_inputs_blastxml
-    if array_inputs_bigpsl:
-        all_datatype_dictionary['bigpsl'] =  array_inputs_bigpsl
-    print "input tracks: \n", all_datatype_dictionary
-
-    for datatype, inputfiles in all_datatype_dictionary.items():
-        try:
-            if not inputfiles:
-                raise ValueError('empty input, must provide track files!\n')
-        except IOError:
-            print 'Cannot open', datatype
-        else:
-            for f in inputfiles:
-                #metadata = {}
-                #print f
-                #if f in inputs_data.keys():
-                   # metadata = inputs_data[f]
-                    #print metadata
-                #Convert tracks into gff3 format
-                all_tracks.addToRaw(f, datatype)
-
-    jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host)
-    jbrowseHub.createHub()
-
-"""
-def extractMetadata(array_inputs, inputs_data):
-    metadata_dict = {}
-    for input_false_path in array_inputs:
-        for key, data_value in inputs_data.items():
-            if key == input_false_path:
-                metadata_dict[input_false_path]
-"""
-
-if __name__ == "__main__":
-    main(sys.argv)
-
--- a/jbrowse_hub.xml	Wed Jul 12 12:55:27 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,306 +0,0 @@
-<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="1.0.0">
-    <description>
-        This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse
-    </description>
-
-    <requirements>
-        <requirement type="package" version="1.2">samtools</requirement>
-        <requirement type="package" version="1.9">numpy</requirement>
-        <requirement type="package" version="1.68">biopython</requirement>
-        <requirement type="package" version="1.0">ucsc_tools_340</requirement>
-        <requirement type="package" version="1.12.1">jbrowse_tools</requirement>
-    </requirements>
-
-    <stdio>
-    </stdio>
-
-    <command detect_errors="exit_code"><![CDATA[
-        python $__tool_directory__/jbrowse_hub.py
-        --fasta '$reference'
-        --genome_name '$genome_name'
-
-        #set galaxy_url = str($GALAXY_URL)
-        #set $jbrowse_url = galaxy_url.replace("8080", "80")
-        --jbrowse_host '$jbrowse_url'
-
-        ## json metadata recording from Remi's hub-archive-creator.xml
-        #import json
-        #set global data_parameter_dict = {}
-
-        ## Function to retrieve the data of the inputs
-        #def prepare_json($input_to_prepare, $extra_data_dict={})
-            #set false_path = str($input_to_prepare)
-            #set name = $input_to_prepare.name
-
-            #set data_dict = {"name": $name}
-            #silent data_dict.update($extra_data_dict)
-
-            #silent $data_parameter_dict.update({$false_path: $data_dict})
-
-        #end def
-
-        #for $g in $group
-        #for $f in $g.format
-            #set track_label =  str($f.formatChoice.label)
-            #set group_name = str($g.group_name)
-            #set extra_data_dict = {"label" : $track_label, "category" : $group_name}
-            #if $f.formatChoice.format_select == 'bed'
-                #set track_color = str($f.formatChoice.track_color)
-                #silent extra_data_dict.update({"color" : $track_color})
-                #if $f.formatChoice.bedChoice.bed_select == 'bed_generic_option'
-                    --bed $f.formatChoice.bedChoice.BED_generic
-                    #silent $prepare_json($f.formatChoice.bedChoice.BED_generic, extra_data_dict)
-                #elif $f.formatChoice.bedChoice.bed_select == 'bed_simple_repeats_option'
-                    --bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats
-                    #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, extra_data_dict)
-                #elif $f.formatChoice.bedChoice.bed_select == 'bed_splice_junctions_option'
-                    --bedSpliceJunctions $f.formatChoice.bedChoice.BED_splice_junctions
-                    #silent $prepare_json($f.formatChoice.bedChoice.BED_splice_junctions, extra_data_dict)
-                #elif $f.formatChoice.bedChoice.bed_select == 'bigpsl'
-                    --bigpsl $f.formatChoice.bedChoice.BigPsl
-                    #silent $prepare_json($f.formatChoice.bedChoice.BigPsl, extra_data_dict)
-                #end if
-            #end if
-            #if $f.formatChoice.format_select == 'bam'
-                --bam $f.formatChoice.BAM
-                #silent $prepare_json($f.formatChoice.BAM, extra_data_dict)
-            #end if
-            #if $f.formatChoice.format_select == 'gff3'
-                #set track_color = str($f.formatChoice.track_color)
-                #silent extra_data_dict.update({"color" : $track_color})
-                #if $f.formatChoice.gff3Choice.gff3_select == 'gff3_generic'
-                    --gff3 $f.formatChoice.gff3Choice.GFF3_generic
-                    #silent $prepare_json($f.formatChoice.gff3Choice.GFF3_generic, extra_data_dict)
-                #elif $f.formatChoice.gff3Choice.gff3_select == 'gff3_transcript'
-                    --gff3_transcript $f.formatChoice.gff3Choice.GFF3_transcript
-                    #silent $prepare_json($f.formatChoice.gff3Choice.GFF3_transcript, extra_data_dict)
-                #elif $f.formatChoice.gff3Choice.gff3_select == 'gff3_mrna'
-                    --gff3_mrna $f.formatChoice.gff3Choice.GFF3_mrna
-                    #silent $prepare_json($f.formatChoice.gff3Choice.GFF3_mrna, extra_data_dict)
-                #end if
-            #end if
-            #if $f.formatChoice.format_select == 'blastxml'
-                --blastxml $f.formatChoice.BlastXML
-                #silent $prepare_json($f.formatChoice.BlastXML, extra_data_dict)
-            #end if
-            #if $f.formatChoice.format_select == 'gtf'
-                --gtf $f.formatChoice.GTF
-                #set track_color = str($f.formatChoice.track_color)
-                #silent extra_data_dict.update({"color" : $track_color})
-                #silent $prepare_json($f.formatChoice.GTF, extra_data_dict)
-            #end if
-            #if $f.formatChoice.format_select == 'bigwig'
-                --bigwig $f.formatChoice.BIGWIG
-                #set pos_color = str($f.formatChoice.pos_color)
-                #set neg_color = str($f.formatChoice.neg_color)
-                #silent $extra_data_dict.update({"style" : {"pos_color" : $pos_color, "neg_color" : $neg_color}})
-                #silent $prepare_json($f.formatChoice.BIGWIG, extra_data_dict)
-            #end if
-       #end for
-       #end for
-
-       #set all_data_json = json.dumps($data_parameter_dict)
-       -j '$all_data_json'
-       -e '$output.extra_files_path'
-       -o '$output'
-
-    ]]></command>
-
-    <inputs>
-        <param name="GALAXY_URL" type="baseurl" value="" />
-        <param name="reference" type="data" format="fasta" label="Reference Genome" />
-        <param name="genome_name" type="text" size="30" value="unknown" label="Genome name" />
-        <repeat name="group" title="New group">
-            <param type="text" name="group_name" label="Group name" value="Default group"/>
-            <repeat name="format" title="New track">
-                <conditional name="formatChoice">
-                    <param name="format_select" type="select" label="Format">
-                        <option value="bam" selected="true">BAM</option>
-                        <option value="bed">BED</option>
-                        <option value="blastxml">BlastXML</option>
-                        <option value="bigwig">BigWig</option>
-                        <option value="gff3">GFF3</option>
-                        <option value="gtf">GTF</option>
-                    </param>
-
-                    <when value="bam">
-                        <param
-                                format="bam"
-                                name="BAM"
-                                type="data"
-                                label="BAM File"
-                        />
-                        <param name="label" type="text" size="30" value = "Sequence Alignment" label="Track name" />
-                    </when>
-                    <when value="bed">
-                        <conditional name="bedChoice">
-                            <param name="bed_select" type="select" label="Bed Choice">
-                                <option value="bed_generic_option">BED format</option>
-                                <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option>
-                                <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option>
-                                <option value="bigpsl">Blat Alignment (bed12+12 / bigPsl.as)</option>
-                            </param>
-                            <when value="bed_generic_option">
-                                <param
-                                        format="bed"
-                                        name="BED_generic"
-                                        type="data"
-                                        label="Bed File"
-                                />
-                            </when>
-                            <when value="bed_simple_repeats_option">
-                                <param
-                                        format="bed"
-                                        name="BED_simple_repeats"
-                                        type="data"
-                                        label="Bed Simple Repeats (Bed4+12) File"
-                                />
-                            </when>
-                            <when value="bed_splice_junctions_option">
-                                <param
-                                        format="bed"
-                                        name="BED_splice_junctions"
-                                        type="data"
-                                        label="Bed Splice Junctions (Bed12+1) File"
-                                />
-                            </when>
-                            <when value="bigpsl">
-                                <param
-                                    format="bed"
-                                    name="BigPsl"
-                                    type="data"
-                                    label="Blat Alignments File"
-                                />
-                            </when>
-                        </conditional>
-                        <param name="label" type="text" size="30" value="BED file" label="Track name" />
-                        <param name="track_color" type="color" label="Track color" value="#daa520">
-                            <sanitizer>
-                                <valid initial="string.letters,string.digits">
-                                    <add value="#"/>
-                                </valid>
-                            </sanitizer>
-                        </param>
-                    </when>
-                    <when value="blastxml">
-                        <param
-                                format="blastxml"
-                                name="BlastXML"
-                                type="data"
-                                label="Blast Alignments File"
-                        />
-                        <param name="label" type="text" size="30" value="Blast Alignment" label="Track name" />
-                        <param name="track_color" type="color" label="Track color" value="#daa520">
-                            <sanitizer>
-                                <valid initial="string.letters,string.digits">
-                                    <add value="#"/>
-                                </valid>
-                            </sanitizer>
-                        </param>
-                    </when>
-                    <when value="bigwig">
-                        <param
-                                format="bigwig"
-                                name="BIGWIG"
-                                type="data"
-                                label="BIGWIG File"
-                        />
-                        <param name="label" type="text" size="30" value="Sequence Coverage" label="Track name" />
-                        <param name="pos_color" type="color" label="Positive Coverage Color" value="#FFA600">
-                            <sanitizer>
-                                <valid initial="string.letters,string.digits">
-                                    <add value="#"/>
-                                </valid>
-                            </sanitizer>
-                        </param>
-                        <param name="neg_color" type="color" label="Negative Coverage Color" value="#005EFF">
-                            <sanitizer>
-                                <valid initial="string.letters,string.digits">
-                                    <add value="#"/>
-                                </valid>
-                            </sanitizer>
-                        </param>
-                    </when>
-                    <when value="gff3">
-                        <conditional name="gff3Choice">
-                            <param name="gff3_select" type="select" label="gff3 type">
-                                <option value="gff3_generic">GFF3 format</option>
-                                <option value="gff3_transcript">GFF3 format output from gene prediction tools (e.g. Augustus), structure: gene->transcription->CDS</option>
-                                <option value="gff3_mrna">GFF3 format output from gene prediction tools (e.g. SNAP), structure: gene->mRNA->CDS</option>
-                            </param>
-                            <when value="gff3_generic">
-                                <param
-                                        format="gff3"
-                                        name="GFF3_generic"
-                                        type="data"
-                                        label="GFF3 File"
-                                />
-                            </when>
-                            <when value="gff3_transcript">
-                                <param
-                                        format="gff3"
-                                        name="GFF3_transcript"
-                                        type="data"
-                                        label="GFF3 File from gene prediction"
-                                />
-                            </when>
-                            <when value="gff3_mrna">
-                                <param
-                                        format="gff3"
-                                        name="GFF3_mrna"
-                                        type="data"
-                                        label="GFF3 File from gene prediction"
-                                />
-                            </when>
-                        </conditional>
-                        <param name="label" type="text" size="30" value="Gene Prediction" label="Track name" />
-                        <param name="track_color" type="color" label="Track color" value="#daa520">
-                            <sanitizer>
-                                <valid initial="string.letters,string.digits">
-                                    <add value="#"/>
-                                </valid>
-                            </sanitizer>
-                        </param>
-                    </when>
-                    <when value="gtf">
-                        <param
-                                format="gtf"
-                                name="GTF"
-                                type="data"
-                                label="GTF File"
-                        />
-                        <param name="label" type="text" size="30" value="Assembled Transcripts" label="Track name" />
-                        <param name="track_color" type="color" label="Track color" value="#daa520">
-                            <sanitizer>
-                                <valid initial="string.letters,string.digits">
-                                    <add value="#"/>
-                                </valid>
-                            </sanitizer>
-                        </param>
-                    </when>
-                </conditional>
-            </repeat>
-        </repeat>
-    </inputs>
-
-    <outputs>
-        <data format="jbrowsehub" name="output" label="${tool.name}" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="reference" value="dbia3/raw/dbia3.fa" />
-            <param name="genome_name" value="unknown" />
-            <param name="group_name" value="Default group"/>
-            <param name="format_select" value="bam" />
-            <param name="BAM" value="dbia3/raw/HISAT.bam" />
-            <param name="label" value="" />
-            <output name="output" file="JBrowse_Archive_Creator_html.html" />
-        </test>
-    </tests>
-    <help>
-        This Galaxy tool will create a tar file which including raw datasets and json datasets that can be used for
-        JBrowse visualization.
-    </help>
-    <citations>
-    </citations>
-</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/logging.json	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,48 @@
+{
+    "version": 1,
+    "disable_existing_loggers": false,
+    "formatters": {
+        "simple": {
+            "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        }
+    },
+
+    "handlers": {
+        "console": {
+            "class": "logging.StreamHandler",
+            "level": "INFO",
+            "formatter": "simple",
+            "stream": "ext://sys.stdout"
+        },
+
+        "console_stderr": {
+            "class": "logging.StreamHandler",
+            "level": "ERROR",
+            "formatter": "simple",
+            "stream": "ext://sys.stderr"
+        },
+
+        "debug_file_handler": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "level": "DEBUG",
+            "formatter": "simple",
+            "filename": "__main__.log",
+            "maxBytes": 10485760,
+            "backupCount": 20,
+            "encoding": "utf8"
+        }
+    },
+
+    "loggers": {
+        "Reader": {
+            "level": "INFO",
+            "handlers": ["console"],
+            "propagate": "yes"
+        }
+    },
+
+    "root": {
+        "level": "DEBUG",
+        "handlers": ["console", "console_stderr", "debug_file_handler"]
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spliceJunctions.as	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,17 @@
+table spliceJunctions
+"Predicted splice junctions"
+    (
+    string chrom;      "Reference sequence chromosome or scaffold"
+    uint   chromStart; "Start position in chromosome"
+    uint   chromEnd;   "End position in chromosome"
+    string name;       "Name of item"
+    uint   score;      "Score from 0-1000"
+    char[1] strand;    "+ or -"
+    uint thickStart;   "Start of where display should be thick (start codon)"
+    uint thickEnd;     "End of where display should be thick (stop codon)"
+    uint reserved;     "Used as itemRgb as of 2004-11-22"
+    int blockCount;    "Number of blocks"
+    int[blockCount] blockSizes; "Comma separated list of block sizes"
+    int[blockCount] chromStarts; "Start positions relative to chromStart"
+    uint junctionScore;   "Number of reads supporting the splice junction"
+    )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/custom_track_styles.css	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,9 @@
+.${label},
+.plus-${label},
+.minus-${label}
+{
+    background-color: ${color};
+    height: 90%;
+    top: 5%;
+}
+
--- a/trackObject.py	Wed Jul 12 12:55:27 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import shutil
-import utils
-import bedToGff3
-import blastxmlToGff3
-
-
-class trackObject:
-    def __init__(self, chrom_size, genome, extra_files_path):
-        self.chrom_size = chrom_size
-        outputDirect = os.path.join(extra_files_path, 'myHub')
-        self.raw_folder = os.path.join(outputDirect, 'raw')
-        #Store metadata of the tracks
-        self.tracks = []
-        try:
-            if os.path.exists(self.raw_folder):
-                if os.path.isdir(self.raw_folder):
-                    shutil.rmtree(self.raw_folder)
-                else:
-                    os.remove(self.raw_folder)
-            os.makedirs(self.raw_folder)
-        except OSError as oserror:
-            print "Cannot create raw folder error({0}): {1}".format(oserror.errno, oserror.strerror)
-
-    def addToRaw(self, dataFile, dataType):
-        """
-        Convert gff3, BED, blastxml and gtf files into gff3 files
-        and store converted files in folder 'raw'
-        """
-        false_path = os.path.abspath(dataFile)
-        fileName = os.path.basename(dataFile)
-        des_path = os.path.join(self.raw_folder, fileName)
-        track = {}
-        if dataType == 'bed' or dataType == 'gff3' or dataType == 'gff3_mrna' or dataType == 'gff3_transcript' or dataType == 'fasta' or dataType == 'bam' or dataType == 'bigwig':
-            if dataType == 'bam':
-                # JBrowse will raise error: not a BAM file if the filename hasn't .bam extension
-                extension = os.path.splitext(fileName)[1]
-                if extension != '.bam':
-                    fileName = fileName + '.bam'
-                des_path = os.path.join(self.raw_folder, fileName)
-                bam_index = utils.createBamIndex(dataFile)
-                indexname = os.path.basename(bam_index)
-                des_path_for_index = os.path.join(self.raw_folder, indexname)
-                shutil.copyfile(bam_index, des_path_for_index)
-                track['index'] = indexname
-
-            try:
-                shutil.copyfile(dataFile, des_path)
-            except shutil.Error as err1:
-                print "Cannot move file, error({0}: {1})".format(err1.errno, err1.strerror)
-            except IOError as err2:
-                print "Cannot move file, error({0}: {1})".format(err2.errno, err2.strerror)
-        elif dataType == 'bedSimpleRepeats':
-            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'trfbig', des_path)
-        elif dataType == 'bedSpliceJunctions':
-            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'regtools', des_path)
-        elif dataType == 'bigpsl':
-            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'blat', des_path)
-        elif dataType == 'blastxml':
-            blastxmlToGff3.blastxml2gff3(dataFile, des_path)
-        elif dataType == 'gtf':
-            utils.gtfToGff3(dataFile, des_path, self.chrom_size)
-        track['fileName'] = fileName
-        track['dataType'] = dataType
-        track['false_path'] = false_path
-        #self.SetMetadata(track, metaData)
-        self.tracks.append(track)
-
-
\ No newline at end of file
Binary file trackObject.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tracks/BamFeatures.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+import os
+import json
+import logging
+
+from TrackDb import TrackDb
+from util import subtools
+from util import santitizer
+
+
+class BamFeatures(TrackDb):
+    def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None):
+        super(BamFeatures, self).__init__(trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings)
+
+    def prepareExtraSetting(self):
+        if 'category' not in self.extraSettings or not self.extraSettings['category']:
+            self.extraSettings['category'] = "Default group"
+        bam_track = dict()
+        bam_track['type'] = 'JBrowse/View/Track/Alignments2'
+        bam_track['storeClass'] = 'JBrowse/Store/SeqFeature/BAM'
+        bam_track['urlTemplate'] = os.path.join('bbi', self.trackName)
+        bam_track['baiUrlTemplate'] = os.path.join('bbi', self.extraSettings['index'])
+        bam_track['label'] = self.trackLabel
+        bam_track['category'] = self.extraSettings['category']
+        #extraConfigs = json.dumps(bam_track)
+        extraConfigs = bam_track
+        return extraConfigs
+
+
\ No newline at end of file
Binary file tracks/BamFeatures.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tracks/BigwigFeatures.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+import os
+import json
+import logging
+
+from TrackDb import TrackDb
+from util import subtools
+from util import santitizer
+
+
+class BigwigFeatures(TrackDb):
+    def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None):
+        super(BigwigFeatures, self).__init__(trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings)
+
+    def prepareExtraSetting(self):
+        if 'category' not in self.extraSettings or not self.extraSettings['category']:
+            self.extraSettings['category'] = "Default group"
+        if 'color' not in self.extraSettings or not self.extraSettings['color']:
+            self.extraSettings['style'] = {}
+            self.extraSettings['style']['pos_color'] = "#FFA600"
+        else:
+            self.extraSettings['style'] = {}
+            self.extraSettings['style']['pos_color'] = self.extraSettings['color']
+
+
+        '''
+        if 'style' not in self.extraSettings:
+            self.extraSettings['style'] = {}
+            if 'pos_color' not in self.extraSettings['style'] or self.extraSettings['style']['pos_color'] == '':
+                self.extraSettings['style']['pos_color'] = "#FFA600"
+            if 'neg_color' not in self.extraSettings['style'] or self.extraSettings['style']['neg_color'] == '':
+                self.extraSettings['style']['neg_color'] = "#005EFF"
+        '''
+        bigwig_track = dict()
+        bigwig_track['urlTemplate'] = os.path.join('bbi', self.trackName)
+        bigwig_track['type'] = 'JBrowse/View/Track/Wiggle/XYPlot'
+        bigwig_track['storeClass'] = 'JBrowse/Store/SeqFeature/BigWig'
+        bigwig_track['label'] = self.trackLabel
+        bigwig_track['style'] = self.extraSettings['style']
+        bigwig_track['category'] = self.extraSettings['category']
+        #extraConfigs = json.dumps(bigwig_track)
+        extraConfigs = bigwig_track
+        return extraConfigs
+
+
\ No newline at end of file
Binary file tracks/BigwigFeatures.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tracks/CanvasFeatures.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+import json
+import logging
+
+from TrackDb import TrackDb
+from util import subtools
+
+
+class CanvasFeatures(TrackDb):
+    def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None):
+        super(CanvasFeatures, self).__init__(trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings)
+
+    def prepareExtraSetting(self):
+        """ set CanvasFeatures configuration options """
+        extraConfigs = dict()
+        self.extraSettings["clientConfig"] = dict()
+        self.extraSettings["config"] = dict()
+        if 'color' not in self.extraSettings or not self.extraSettings['color']:
+            self.extraSettings["clientConfig"]['color'] = "#daa520"
+        else:
+            self.extraSettings["clientConfig"]['color'] = self.extraSettings['color']
+        if 'category' not in self.extraSettings or not self.extraSettings['category']:
+            self.extraSettings["config"]['category'] = "Default group"
+        else:
+            self.extraSettings["config"]['category'] = self.extraSettings['category']
+        if 'glyph' in self.extraSettings:
+            self.extraSettings["config"]['glyph'] = self.extraSettings['glyph']
+        if 'transcriptType' in self.extraSettings:
+            self.extraSettings['config']['transcriptType'] = self.extraSettings['transcriptType']
+        extraConfigs["config"] = json.dumps(self.extraSettings["config"])
+        extraConfigs["clientConfig"] = json.dumps(self.extraSettings["clientConfig"])
+        return extraConfigs
\ No newline at end of file
Binary file tracks/CanvasFeatures.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tracks/HTMLFeatures.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+import json
+import logging
+
+from TrackDb import TrackDb
+from util import subtools
+from util import santitizer
+
+
+class HTMLFeatures(TrackDb):
+    def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None):
+        super(HTMLFeatures, self).__init__(trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings)
+
+    def prepareExtraSetting(self):
+        """ set HTMLFeatures configuration options """
+        extraConfigs = dict()
+        self.extraSettings["clientConfig"] = dict()
+        self.extraSettings["config"] = dict()
+        if 'type' in self.extraSettings:
+            extraConfigs["type"] = self.extraSettings['type']
+        if 'color' in self.extraSettings and self.extraSettings['color']:
+            extraConfigs['feature_color'] = self.extraSettings['color']
+        else:
+            extraConfigs['feature_color'] = "#000000"
+        #self.extraSettings['clientConfig']['color'] = self.extraSettings['color']
+        if 'subfeatureClasses' in self.extraSettings:
+            subfeature_css_class = santitizer.sanitize_name(self.trackLabel + "_" + self.extraSettings['subfeatureClasses'])
+            extraConfigs['subfeatureClasses'] = {self.extraSettings['subfeatureClasses']: subfeature_css_class}
+
+        if 'category' not in self.extraSettings or not self.extraSettings['category']:
+            self.extraSettings['config']['category'] = "Default group"
+        else:
+            self.extraSettings['config']['category'] = self.extraSettings['category']
+
+        extraConfigs['config'] = json.dumps(self.extraSettings["config"])
+        extraConfigs['clientConfig'] = json.dumps(self.extraSettings["clientConfig"])
+        return extraConfigs
+
+
\ No newline at end of file
Binary file tracks/HTMLFeatures.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tracks/TrackDb.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,53 @@
+#!/usr/bin/python
+"""
+Super Class of the tracks
+"""
+import os
+import abc
+from abc import ABCMeta
+import collections
+import json
+import logging
+from util import santitizer
+
+class TrackDb(object):
+    """docstring for TrackDb"""
+    __metaclass__ = ABCMeta
+
+    def __init__(self, trackName, trackLabel, trackDataURL, trackType, dataType, extraSettings=None):
+        #super(TrackDb, self).__init__()
+
+        not_init_message = "The {0} is not initialized."
+        if trackName is None:
+            raise TypeError(not_init_message.format('trackName'))
+        if trackLabel is None:
+            raise TypeError(not_init_message.format('trackLabel'))
+        if trackType is None:
+            raise TypeError(not_init_message.format('trackType'))
+        self.trackName = trackName
+        self.trackLabel = trackLabel
+        self.trackDataURL = trackDataURL
+        self.trackType = trackType
+        self.dataType = dataType
+        self.extraSettings = extraSettings
+        self.logger = logging.getLogger(__name__)
+        #self.createTrackDb()
+
+    def createTrackDb(self):
+        self.track_db = collections.OrderedDict([("track",self.trackName),
+                ("trackLabel",self.trackLabel),
+                ("trackDataURL",self.trackDataURL),
+                ("dataType", self.dataType),
+                ("trackType", self.trackType)]
+                )
+
+
+        extraConfigs = self.prepareExtraSetting()
+        self.logger.debug("Generate extraConfigs = %s", json.dumps(extraConfigs))
+        self.track_db["options"] = extraConfigs
+        #print self.track_db
+        self.logger.debug("TrackDb object is created track_db = %s ", json.dumps(self.track_db))
+
+    @abc.abstractmethod
+    def prepareExtraSetting(self):
+        """ set optional configurations for the track """
Binary file tracks/TrackDb.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tracks/TrackStyles.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+import os
+import json
+import logging
+from mako.lookup import TemplateLookup
+
+class TrackStyles(object):
+    def __init__(self, tool_directory, species_folder, trackListFile, cssFolderName="css", cssFileName="custom_track_styles.css"):
+        self.logger = logging.getLogger(__name__)
+        self.tool_directory = tool_directory
+        self.species_folder = species_folder
+        self.trackList = trackListFile
+        self.cssFolderName = cssFolderName
+        self.cssFileName = cssFileName
+        self.cssFilePath = self._createCssFile()
+        self.cssTemplate = self._getCssTemplate()
+        self._addCssToTrackList()
+
+
+    def addCustomColor(self, feature_class_name, feature_color):
+        with open(self.cssFilePath, 'a+') as css:
+            htmlMakoRendered = self.cssTemplate.render(
+            label = feature_class_name,
+            color = feature_color
+        )
+            css.write(htmlMakoRendered)
+        self.logger.debug("create customized track css class: cssFilePath= %s", self.cssFilePath)
+
+
+    def _createCssFile(self):
+        cssFolderPath = os.path.join(self.species_folder, self.cssFolderName)
+        cssFilePath = os.path.join(cssFolderPath, self.cssFileName)
+        if not os.path.exists(cssFilePath):
+            if not os.path.exists(cssFolderPath):
+                os.mkdir(cssFolderPath)
+            os.mknod(cssFilePath)
+        return cssFilePath
+
+    def _getCssTemplate(self):
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        cssTemplate = mylookup.get_template("custom_track_styles.css")
+        return cssTemplate
+
+
+    def _addCssToTrackList(self):
+        with open(self.trackList, 'r+') as track:
+            data = json.load(track)
+            css_path = os.path.join('data', self.cssFolderName, self.cssFileName)
+            data['css'] = {'url': css_path}
+            json_string = json.dumps(data, indent=4, separators=(',', ': '))
+            track.seek(0)
+            track.write(json_string)
+            track.truncate()
+        self.logger.debug("added customized css url to trackList.json")
+
+
+
\ No newline at end of file
Binary file tracks/TrackStyles.pyc has changed
Binary file tracks/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trf_simpleRepeat.as	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,20 @@
+table simpleRepeat
+"Describes the Simple Tandem Repeats"
+   (
+   string chrom;      	"Reference sequence chromosome or scaffold"
+   uint   chromStart; 	"Start position in chromosome"
+   uint   chromEnd;   	"End position in chromosome"
+   string name;       	"Simple Repeats tag name"
+   uint   period;     	"Length of repeat unit"
+   float  copyNum;    	"Mean number of copies of repeat"
+   uint   consensusSize;	"Length of consensus sequence"
+   uint   perMatch;  	"Percentage Match"
+   uint   perIndel;  	"Percentage Indel"
+   uint   score; 	"Alignment Score = 2*match-7*mismatch-7*indel; minscore=50"
+   uint   A;  	"Percent of A's in repeat unit"
+   uint   C;  	"Percent of C's in repeat unit"
+   uint   G;  	"Percent of G's in repeat unit"
+   uint   T;  	"Percent of T's in repeat unit"
+   float   entropy;  	"Entropy"
+   lstring sequence;    	"Sequence of repeat unit element"
+   )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/Logger.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,38 @@
+import os
+import sys
+import json
+import logging
+import logging.config
+
+#from util.Filters import TraceBackFormatter
+
+class Logger(object):
+    def __init__(self, tool_directory, debug="False", extra_files_path=None):
+        self.tool_directory = tool_directory
+        self.default_level = logging.INFO
+        self.debug = debug
+        self.extra_files_path = extra_files_path
+
+    def setup_logging(self):
+        """Setup logging configuration
+        reference: https://fangpenlin.com/posts/2012/08/26/good-logging-practice-in-python/
+        """
+        config_path = os.path.join(self.tool_directory, 'logging.json')
+        default_level=logging.INFO
+        if self.debug.lower() == "true":
+            default_level=logging.DEBUG
+        if os.path.exists(config_path):
+            with open(config_path, 'rt') as f:
+                config = json.load(f)
+            config["handlers"]["console"]["level"] = default_level
+            if self.extra_files_path:
+                for i in config["handlers"]:
+                    if "filename" in config["handlers"][i]:
+                        config["handlers"][i]["filename"] = os.path.join(self.extra_files_path, config["handlers"][i]["filename"])
+                logging.config.dictConfig(config)
+            else:
+                logging.warn("Extra files path is not set. The log files will exist at current working directory instead of final output folder")
+        else:
+            logging.basicConfig(level=default_level)
+            logging.warn("Cannot find logging configuration file!\n")
+
Binary file util/Logger.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/Reader.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,146 @@
+import json
+import logging
+import codecs
+
+
+# Internal dependencies
+from datatypes.binary.Bam import Bam
+from datatypes.binary.BigWig import BigWig
+from datatypes.interval.Bed import Bed
+from datatypes.interval.BedSimpleRepeats import BedSimpleRepeats
+from datatypes.interval.BedSpliceJunctions import BedSpliceJunctions
+from datatypes.interval.BlastXml import BlastXml
+from datatypes.interval.Gff3 import Gff3
+from datatypes.interval.Gff3_mrna import Gff3_mrna
+from datatypes.interval.Gff3_transcript import Gff3_transcript
+from datatypes.interval.Gtf import Gtf
+from datatypes.interval.GtfStringTie import GtfStringTie
+from datatypes.interval.BigPsl import BigPsl
+from datatypes.interval.BedBlatAlignments import BedBlatAlignments
+from datatypes.interval.BedBlastAlignments import BedBlastAlignments
+from datatypes.interval.Psl import Psl
+from datatypes.sequence.Fasta import Fasta
+from apollo.ApolloUser import ApolloUser
+from util import santitizer
+
+class Reader(object):
+
+    DATATYPE_CLASS = [Bam, BigWig, Bed, BedSimpleRepeats,
+        BedSpliceJunctions, BigPsl, BedBlatAlignments, BedBlastAlignments,
+        BlastXml, Gff3, Gff3_mrna, Gff3_transcript, Gff3_mrna, Gtf, GtfStringTie, Psl, Fasta]
+
+    def __init__(self, input_json_file):
+        self.inputFile = input_json_file
+        self.args = self.loadJson()
+
+
+    def loadJson(self):
+        try:
+            data_file = codecs.open(self.inputFile, 'r', 'utf-8')
+            return json.load(data_file)
+        except IOError:
+            print "Cannot find JSON file\n"
+            exit(1)
+
+    def getToolDir(self):
+        try:
+            return self.args["tool_directory"]
+        except KeyError:
+            print ("tool_directory is not defined in the input file!")
+            exit(1)
+
+    def getExtFilesPath(self):
+        try:
+            return self.args["extra_files_path"]
+        except KeyError:
+            print ("extra_files_path is not defined in the input file!")
+            exit(1)
+
+    def getUserEmail(self):
+        try:
+            return self.args["user_email"]
+        except KeyError:
+            print ("user_email is not defined in the input file!")
+            exit(1)
+
+    def getDebugMode(self):
+        try:
+            return self.args["debug_mode"]
+        except KeyError:
+            print ("debug_mode is not defined in the input file!")
+            exit(1)
+
+    def getTrackType(self):
+        track_type = self.args.get("track_type")
+        return track_type
+
+    def getApolloHost(self):
+        apollo_host = self.args.get("apollo_host")
+        return apollo_host
+
+
+    def getRefGenome(self):
+        array_inputs_reference_genome = self.args["fasta"]
+        # TODO: Replace these with the object Fasta
+        input_fasta_file = array_inputs_reference_genome["false_path"]
+        input_fasta_file_name = santitizer.sanitize_name_input(array_inputs_reference_genome["name"])
+        genome_name = santitizer.sanitize_name_input(self.args["genome_name"])
+        reference_genome = Fasta(input_fasta_file,
+                             input_fasta_file_name, genome_name)
+        return reference_genome
+
+    def getApolloUser(self):
+        user_info = self.args.get("apollo_user")
+        if not user_info:
+            firstname = "demo"
+            lastname = "user"
+            password = "gonramp"
+            user_email = self.getUserEmail()
+        else:
+            firstname = user_info['firstname']
+            lastname = user_info['lastname']
+            user_email = user_info['user_email']
+            password = user_info['password']
+        apollo_user = ApolloUser(user_email, firstname, lastname, password)
+        return apollo_user
+
+    def getTracksData(self):
+        self.logger = logging.getLogger(__name__)
+        all_datatype_dictionary = dict()
+        for datatype in self.DATATYPE_CLASS:
+            class_name = datatype.__name__
+            array_inputs = self.args.get(str(class_name))
+            if array_inputs:
+                self.logger.debug("Creating %s objects\n", class_name)
+                self.logger.debug("array_inputs: %s", array_inputs)
+                all_datatype_dictionary.update(self.create_ordered_datatype_objects(datatype, array_inputs))
+
+        return all_datatype_dictionary
+
+    def create_ordered_datatype_objects(self, ExtensionClass, array_inputs):
+        """
+        Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
+        and update the dictionary of datatype
+
+        :param ExtensionClass:
+        :param array_inputs:
+        :type ExtensionClass: Datatype
+        :type array_inputs: list[string]
+        """
+
+        datatype_dictionary = {}
+
+        # TODO: Optimize this double loop
+        for input_data in array_inputs:
+            input_false_path = input_data["false_path"]
+            input_data["name"] = santitizer.sanitize_name_input(input_data["name"])
+            extensionObject = ExtensionClass(input_false_path, input_data)
+            extensionObject.generateCustomTrack()
+            datatype_dictionary.update({input_data["order_index"]: extensionObject})
+            self.logger.debug("%s object: %s has been created", ExtensionClass, input_data["name"])
+        return datatype_dictionary
+
+
+
+
+
Binary file util/Reader.pyc has changed
Binary file util/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/index/DatabaseIndex.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+
+import collections
+from ExternIndex import ExternIndex
+
+class DatabaseIndex(ExternIndex):
+    def __init__(self, database, **args):
+        self.database = database
+        self.seqType=args.get("seqType")
+        self.useIframe=args.get("useIframe")
+        self.iframeHeight=args.get("iframeHeight")
+        self.iframeWidth=args.get("iframeWidth")
+
+    def setExtLink(self):
+        return self.setDatabaseLink(self.database, self.seqType, self.useIframe, self.iframeHeight, self.iframeWidth)
+
+
+    def setDatabaseLink(self, database, seqType=None, useIframe=None, iframeHeight=None, iframeWidth=None):
+        database_settings = collections.OrderedDict()
+        if "NCBI" in database:
+            if not seqType:
+                database_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$"
+            elif seqType == 2:
+                database_settings["url"] = "https://www.ncbi.nlm.nih.gov/protein/$$"
+            elif seqType == 1:
+                database_settings["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/$$"
+            else:
+                raise Exception("Sequence Type {0} is not valid, should be either protein (seqType==2) or nucleotide (seqType==1). Stopping the application".format(seqType))
+        elif "UniProt" in database:
+            database_settings["url"] = "http://www.uniprot.org/uniprot/$$"
+        elif "FlyBase" in database:
+            database_settings["url"] = "http://flybase.org/reports/$$"
+        else:
+            database_settings["url"] = "https://www.ncbi.nlm.nih.gov/gquery/?term=$$"
+        database_settings["urlLabel"] = database + " Details:"
+        if useIframe or useIframe is None:
+            database_settings["iframeUrl"] = database_settings["url"]
+            if not iframeHeight:
+                iframeHeight = "600"
+            if not iframeWidth:
+                iframeWidth = "800"
+            database_settings["iframeOptions"] = "height= %s width= %s" % (iframeHeight, iframeWidth)
+        return database_settings
+
Binary file util/index/DatabaseIndex.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/index/ExternIndex.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+import collections
+import abc
+from abc import ABCMeta
+
+class ExternIndex(object):
+    __metaclass__ = ABCMeta
+
+    @abc.abstractmethod
+    def __init__(self):
+        """init"""
+
+    @abc.abstractmethod
+    def setExtLink(self):
+        """set external link"""
+
\ No newline at end of file
Binary file util/index/ExternIndex.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/index/TrixIndex.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+
+import os
+import collections
+import shutil
+import logging
+from ExternIndex import ExternIndex
+
+class TrixIndex(ExternIndex):
+    def __init__(self, indexIx, indexIxx, trackName, mySpecieFolderPath, trixId, **args):
+        self.logger = logging.getLogger(__name__)
+        self.indexIx = indexIx
+        self.indexIxx = indexIxx
+        self.trackName = trackName
+        self.mySpecieFolderPath = mySpecieFolderPath
+        self.trixId = trixId.strip()
+        if not self.trixId:
+            self.logger.error("Didn't specify the Trix identifier. To use TRIX index, you need to specify the identifier")
+            exit(1)
+        if "default_index" in args:
+            self.default_index = args["default_index"]
+        else:
+            self.default_index = None
+        self.index_settings = collections.OrderedDict()
+
+    def setExtLink(self):
+        self.setSearchIndex()
+        self.moveIndexFile()
+        self.index_settings["searchTrix"] = "trix/%s" % self.indexIxName
+        return self.index_settings
+
+    def moveIndexFile(self):
+        indexFolder = os.path.join(self.mySpecieFolderPath, 'trix')
+        self.indexIxName = "".join( ( self.trackName, ".ix") )
+        self.indexIxxName = "".join( ( self.trackName, ".ixx") )
+        if not os.path.exists(indexFolder):
+            os.makedirs(indexFolder)
+
+        # Move index files to the index folder
+        self.indexIxPath = os.path.join(indexFolder, self.indexIxName)
+        shutil.copyfile(self.indexIx, self.indexIxPath)
+        self.indexIxxPath = os.path.join(indexFolder, self.indexIxxName)
+        shutil.copyfile(self.indexIxx, self.indexIxxPath)
+
+    def setSearchIndex(self):
+        if self.default_index:
+            set_index = set()
+            set_index.add(self.trixId)
+            set_index.add(self.default_index)
+            search_index = ",".join(set_index)
+        else:
+            search_index = self.trixId
+        logging.debug("trixId= %s, searchIndex= %s", self.trixId, search_index)
+        self.index_settings["searchIndex"] = search_index
+
Binary file util/index/TrixIndex.pyc has changed
Binary file util/index/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/santitizer.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import logging
+import os
+import subprocess
+import sys
+import string
+import tempfile
+
+
+def prefixTrackName(filename):
+    """
+    santitize trackName. Because track name must begin with a letter and
+    contain only the following chars: [a-zA-Z0-9_].
+    See the "track" Common settings at:
+    https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments
+    skip the santitization for cytoBandIdeo track
+    """
+    if filename == 'cytoBandIdeo':
+        return filename
+    valid_chars = "_%s%s" % (string.ascii_letters, string.digits)
+    sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename])
+    sanitize_name = "gonramp_" + sanitize_name
+    return sanitize_name
+
+def sanitize_name_input(string_to_sanitize):
+    """
+    Sanitize the string passed in parameter by replacing '/' and ' ' by '_'
+
+    :param string_to_sanitize:
+    :return :
+
+    :Example:
+
+    >>> sanitize_name_input('this/is an//example')
+    this_is_an__example
+    """
+    return string_to_sanitize \
+            .replace("/", "_") \
+            .replace(" ", "_")
+
+def sanitize_name_inputs(inputs_data):
+    """
+    Sanitize value of the keys "name" of the dictionary passed in parameter.
+
+    Because sometimes output from Galaxy, or even just file name, from user inputs, have spaces.
+    Also, it can contain '/' character and could break the use of os.path function.
+
+    :param inputs_data: dict[string, dict[string, string]]
+    """
+    for key in inputs_data:
+        inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
+
+def sanitize_group_name(group_name):
+    return group_name.lower().replace(' ', '_')
+
+def sanitize_name(input_name):
+    """
+    Galaxy will name all the files and dirs as *.dat,
+    the function can replace '.' to '_' for the dirs
+    """
+    validChars = "_-%s%s" % (string.ascii_letters, string.digits)
+    sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
+    return "gonramp_" + sanitized_name
Binary file util/santitizer.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/subtools.py	Fri Oct 13 12:44:31 2017 -0400
@@ -0,0 +1,372 @@
+#!/usr/bin/env python
+
+"""
+This file include common used functions for converting file format to gff3
+"""
+from collections import OrderedDict
+import json
+import subprocess
+import os
+import sys
+import tempfile
+import string
+import logging
+
+class PopenError(Exception):
+    def __init__(self, cmd, error, return_code):
+        self.cmd = cmd
+        self.error = error
+        self.return_code = return_code
+
+    def __str__(self):
+        message = "The subprocess {0} has returned the error: {1}.".format(
+            self.cmd, self.return_code)
+        message = ','.join(
+            (message, "Its error message is: {0}".format(self.error)))
+        return repr(message)
+
+
+def _handleExceptionAndCheckCall(array_call, **kwargs):
+    """
+    This class handle exceptions and call the tool.
+    It maps the signature of subprocess.check_call:
+    See https://docs.python.org/2/library/subprocess.html#subprocess.check_call
+    """
+    stdout = kwargs.get('stdout', subprocess.PIPE)
+    stderr = kwargs.get('stderr', subprocess.PIPE)
+    shell = kwargs.get('shell', False)
+    stdin = kwargs.get('stdin', None)
+
+    cmd = array_call[0]
+
+    output = None
+    error = None
+
+    # TODO: Check the value of array_call and <=[0]
+    logging.debug("Calling {0}:".format(cmd))
+    logging.debug("%s", array_call)
+    logging.debug("---------")
+
+    # TODO: Use universal_newlines option from Popen?
+    try:
+        p = subprocess.Popen(array_call, stdout=stdout,
+                             stderr=stderr, shell=shell, stdin=stdin)
+
+        # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate
+
+        output, error = p.communicate()
+
+        if stdout == subprocess.PIPE:
+            logging.debug("\t{0}".format(output))
+        else:
+            logging.debug("\tOutput in file {0}".format(stdout.name))
+        # If we detect an error from the subprocess, then we raise an exception
+        # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process
+        # TODO: The responsability of returning a sys.exit() should not be there, but up in the app.
+        if p.returncode:
+            if stderr == subprocess.PIPE:
+                raise PopenError(cmd, error, p.returncode)
+            else:
+                # TODO: To Handle properly with a design behind, if we received a option as a file for the error
+                raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"
+                                .format(cmd, stderr.name, p.returncode))
+
+    except OSError as e:
+        message = "The subprocess {0} has encountered an OSError: {1}".format(
+            cmd, e.strerror)
+        if e.filename:
+            message = '\n'.join(
+                (message, ", against this file: {0}".format(e.filename)))
+        logging.error(message)
+        sys.exit(-1)
+    except PopenError as p:
+        message = "The subprocess {0} has returned the error: {1}.".format(
+            p.cmd, p.return_code)
+        message = '\n'.join(
+            (message, "Its error message is: {0}".format(p.error)))
+
+        logging.exception(message)
+
+        sys.exit(p.return_code)
+    except Exception as e:
+        message = "The subprocess {0} has encountered an unknown error: {1}".format(
+            cmd, e)
+        logging.exception(message)
+
+        sys.exit(-1)
+    return p
+
+
+def write_features(field, attribute, gff3):
+    """
+    The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
+    field, attribute are ordered dictionary
+    gff3 is the file handler
+    """
+    attr = []
+    for v in field.values():
+        gff3.write(str(v) + '\t')
+    for k, v in attribute.items():
+        s = str(k) + '=' + str(v)
+        attr.append(s)
+    gff3.write(';'.join(attr))
+    gff3.write('\n')
+
+def twoBitInfo(two_bit_file_name, two_bit_info_file):
+    """
+    Call twoBitInfo and write the result into twoBit_info_file
+    :param two_bit_file_name:
+    :param two_bit_info_file:
+    :return the subprocess.check_call return object:
+    """
+    array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def faToTwoBit(fasta_file_name, twoBitFile):
+    """
+    This function call faToTwoBit UCSC tool, and return the twoBitFile
+    :param fasta_file_name:
+    :param mySpecieFolder:
+    :return:
+    """
+
+    array_call = ['faToTwoBit', fasta_file_name, twoBitFile]
+    _handleExceptionAndCheckCall(array_call)
+
+    return twoBitFile
+
+def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name):
+    """
+    Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name
+    :param two_bit_info_file_name:
+    :param chrom_sizes_file_name:
+    :return:
+    """
+    array_call = ['sort', '-k2rn', two_bit_info_file_name,
+                  '-o', chrom_sizes_file_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def getChromSizes(reference, tool_dir):
+    #TODO: find a better way instead of shipping the two exec files with the tool
+    faToTwoBit = os.path.join(tool_dir, 'faToTwoBit')
+    twoBitInfo = os.path.join(tool_dir, 'twoBitInfo')
+    try:
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False)
+    except IOError as err:
+        print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror)
+    try:
+        subprocess.call(['faToTwoBit', reference, twoBitFile.name])
+    except OSError as err:
+        print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror)
+    try:
+        subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name])
+    except OSError as err:
+        print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror)
+    return chrom_sizes
+
+def sequence_region(chrom_sizes):
+    """
+    This function read from a chromatin size file generated by twoBitInfo and write the information to dict
+    return a dict
+    """
+    f = open(chrom_sizes, 'r')
+    sizes = f.readlines()
+    sizes_dict = {}
+    for line in sizes:
+        chrom_info = line.rstrip().split('\t')
+        sizes_dict[chrom_info[0]] = chrom_info[1]
+    return sizes_dict
+
+def child_blocks(parent_field, parent_attr, gff3, child_type):
+    num = 0
+    blockcount = int(parent_attr['blockcount'])
+    chromstart = parent_attr['chromstarts'].split(',')
+    blocksize = parent_attr['blocksizes'].split(',')
+    parent_start = parent_field['start']
+    while num < blockcount:
+        child_attr = OrderedDict()
+        child_field = parent_field
+        child_field['type'] = child_type
+        child_field['start'] = int(chromstart[num]) + int(parent_start)
+        child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
+        child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1)
+        child_attr['Parent'] = parent_attr['ID']
+        write_features(child_field, child_attr, gff3)
+        num = num + 1
+
+def add_tracks_to_json(trackList_json, new_tracks, modify_type):
+    """
+    Add to track configuration (trackList.json)
+    # modify_type =  'add_tracks': add a new track like bam or bigwig, new_track = dict()
+    # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
+    """
+    with open(trackList_json, 'r+') as f:
+        data = json.load(f)
+        if modify_type == 'add_tracks':
+            data['tracks'].append(new_tracks)
+        elif modify_type == 'add_attr':
+            for k in new_tracks:
+                for track in data['tracks']:
+                    if k.lower() in track['urlTemplate'].lower():
+                        attr = new_tracks[k]
+                        for k, v in attr.items():
+                            track[k] = v
+        f.seek(0, 0)
+        f.write(json.dumps(data, separators=(',' , ':'), indent=4))
+        f.truncate()
+        f.close()
+
+
+def createBamIndex(bamfile):
+    subprocess.call(['samtools', 'index', bamfile])
+    filename = bamfile + '.bai'
+    if os.path.exists(filename):
+        return filename
+    else:
+        raise ValueError('Did not find bai file')
+
+def flatfile_to_json(inputFile, dataType, trackType, trackLabel, outputFolder, options=None, compress=False):
+    if "bed" in dataType:
+        fileType = "--bed"
+    elif "gff" in dataType:
+        fileType = "--gff"
+    else:
+        raise ValueError("%s is not a valid filetype for flatfile_to_json" % dataType)
+
+
+    array_call = ['flatfile-to-json.pl',
+                   fileType, inputFile,
+                   '--trackType', trackType,
+                   '--trackLabel', trackLabel,
+                   '--out', outputFolder]
+    if compress:
+        array_call.append('--compress')
+    if options:
+        config = options.get("config")
+        clientConfig = options.get("clientConfig")
+        renderClassName = options.get('renderClassName')
+        subfeatureClasses = options.get('subfeatureClasses')
+        load_type = options.get("type")
+        if clientConfig:
+            array_call.append('--clientConfig')
+            array_call.append(clientConfig)
+        if config:
+            array_call.append('--config')
+            array_call.append(config)
+        if load_type:
+            array_call.append('--type')
+            array_call.append(load_type)
+        if renderClassName:
+            array_call.append('--renderClassName')
+            array_call.append(renderClassName)
+        if subfeatureClasses:
+            array_call.append('--subfeatureClasses')
+            array_call.append(json.dumps(subfeatureClasses))
+
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def bam_to_json(inputFile, trackLabel, outputFolder, options=None, compress=False):
+
+    array_call = ['bam-to-json.pl',
+                   '--bam', inputFile,
+                   '--trackLabel', trackLabel,
+                   '--out', outputFolder]
+    if compress:
+        array_call.append('--compress')
+    if options:
+        config = options.get('config')
+        clientConfig = options.get('clientConfig')
+        if clientConfig:
+            array_call.append('--clientConfig')
+            array_call.append(clientConfig)
+        if config:
+            array_call.append('--config')
+            array_call.append(config)
+
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def add_track_json(trackList, track_json):
+    track_json = json.dumps(track_json)
+    new_track = subprocess.Popen(['echo', track_json], stdout=subprocess.PIPE)
+    p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout)
+    return p
+
+def prepare_refseqs(fasta_file_name, outputFolder):
+    array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def generate_names(outputFolder):
+    array_call = ['generate-names.pl', '-v', '--out', outputFolder]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None):
+    """
+    Call validateFiles on input_file, using chrom_sizes_file_name and file_type
+    :param input_file:
+    :param chrom_sizes_file_name:
+    :param file_type:
+    :return:
+    """
+
+    array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file]
+    if options:
+        tab = options.get("tab")
+        autoSql = options.get("autoSql")
+        logging.debug("tab: {0}".format(tab))
+        logging.debug("autoSql: {0}".format(autoSql))
+        if autoSql:
+            autoSql = ''.join(['-as=', autoSql])
+            array_call.append(autoSql)
+        if tab:
+            array_call.append('-tab')
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def arrow_add_organism(organism_name, organism_dir, public=False):
+    array_call = ['arrow', 'organisms', 'add_organism', organism_name, organism_dir]
+    if public:
+        array_call.append('--public')
+    p = subprocess.check_output(array_call)
+    return p
+
+def arrow_create_user(user_email, firstname, lastname, password, admin=False):
+    """ Create a new user of Apollo, the default user_role is "user" """
+    array_call = ['arrow', 'users', 'create_user', user_email, firstname, lastname, password]
+    if admin:
+        array_call += ['--role', 'admin']
+    p = subprocess.check_output(array_call)
+    return p
+
+def arrow_update_organism_permissions(user_id, organism, **user_permissions):
+    array_call = ['arrow', 'users', 'update_organism_permissions', str(user_id), str(organism)]
+    admin = user_permissions.get("admin", False)
+    write = user_permissions.get("write", False)
+    read = user_permissions.get("read", False)
+    export = user_permissions.get("export", False)
+    if admin:
+        array_call.append('--administrate')
+    if write:
+        array_call.append('--write')
+    if read:
+        array_call.append('--read')
+    if export:
+        array_call.append('--export')
+    p = subprocess.check_output(array_call)
+    return p
+
+def arrow_get_users(user_email):
+    array_call = ['arrow', 'users', 'get_users']
+    p = subprocess.check_output(array_call)
+    all_users = json.loads(p)
+    for d  in all_users:
+        if d['username'] == user_email:
+            return d['userId']
+    logging.error("Cannot find user %s", user_email)
Binary file util/subtools.pyc has changed
--- a/utils.py	Wed Jul 12 12:55:27 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This file include common used functions for converting file format to gff3
-"""
-from collections import OrderedDict
-import json
-import subprocess
-import os
-import tempfile
-import string
-
-def write_features(field, attribute, gff3):
-    """
-    The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
-    field, attribute are ordered dictionary
-    gff3 is the file handler
-    """
-    attr = []
-    for v in field.values():
-        gff3.write(str(v) + '\t')
-    for k, v in attribute.items():
-        s = str(k) + '=' + str(v)
-        attr.append(s)
-    gff3.write(';'.join(attr))
-    gff3.write('\n')
-
-def getChromSizes(reference, tool_dir):
-    #TODO: find a better way instead of shipping the two exec files with the tool
-    faToTwoBit = os.path.join(tool_dir, 'faToTwoBit')
-    twoBitInfo = os.path.join(tool_dir, 'twoBitInfo')
-    try:
-        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
-        chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False)
-    except IOError as err:
-        print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror)
-    try:
-        subprocess.call(['faToTwoBit', reference, twoBitFile.name])
-    except OSError as err:
-        print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror)
-    try:
-        subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name])
-    except OSError as err:
-        print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror)
-    return chrom_sizes
-
-def sequence_region(chrom_sizes):
-    """
-    This function read from a chromatin size file generated by twoBitInfo and write the information to dict
-    return a dict
-    """
-    f = open(chrom_sizes, 'r')
-    sizes = f.readlines()
-    sizes_dict = {}
-    for line in sizes:
-        chrom_info = line.rstrip().split('\t')
-        sizes_dict[chrom_info[0]] = chrom_info[1]
-    return sizes_dict
-
-def child_blocks(parent_field, parent_attr, gff3, child_type):
-    num = 0
-    blockcount = int(parent_attr['blockcount'])
-    chromstart = parent_attr['chromstarts'].split(',')
-    blocksize = parent_attr['blocksizes'].split(',')
-    parent_start = parent_field['start']
-    while num < blockcount:
-        child_attr = OrderedDict()
-        child_field = parent_field
-        child_field['type'] = child_type
-        child_field['start'] = int(chromstart[num]) + int(parent_start)
-        child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
-        child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1)
-        child_attr['Parent'] = parent_attr['ID']
-        write_features(child_field, child_attr, gff3)
-        num = num + 1
-
-def add_tracks_to_json(trackList_json, new_tracks, modify_type):
-    """
-    Add to track configuration (trackList.json)
-    # modify_type =  'add_tracks': add a new track like bam or bigwig, new_track = dict()
-    # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
-    """
-    with open(trackList_json, 'r+') as f:
-        data = json.load(f)
-        if modify_type == 'add_tracks':
-            data['tracks'].append(new_tracks)
-        elif modify_type == 'add_attr':
-            for k in new_tracks:
-                for track in data['tracks']:
-                    if k.lower() in track['urlTemplate'].lower():
-                        attr = new_tracks[k]
-                        for k, v in attr.items():
-                            track[k] = v
-        f.seek(0, 0)
-        f.write(json.dumps(data, separators=(',' , ':'), indent=4))
-        f.truncate()
-        f.close()
-
-def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
-    """
-    Covert gtf file output from StringTie to gff3 format
-    """
-    gff3 = open(gff3_file, 'w')
-    gff3.write("##gff-version 3\n")
-    sizes_dict = sequence_region(chrom_sizes)
-    seq_regions = dict()
-    parents = dict()
-    with open(gtf_file, 'r') as gtf:
-        for line in gtf:
-            if line.startswith('#'):
-                continue
-            field = OrderedDict()
-            attribute = OrderedDict()
-            li = line.rstrip().split("\t")
-            #print li
-            field['seqid'] = li[0]
-            #print field['seqid']
-            if field['seqid'] not in seq_regions:
-                end_region = sizes_dict[field['seqid']]
-                gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
-                seq_regions[field['seqid']] = end_region
-            field['source'] = li[1]
-            field['type'] = li[2]
-                # The first base in a chromosome is numbered 0 in BED format
-            field['start'] = li[3]
-            field['end'] = li[4]
-            field['score'] = li[5]
-            field['strand'] = li[6]
-            field['phase'] = li[7]
-            attr_li = li[8].split(';')
-            gene_id = attr_li[0].split()[1].strip('"')
-            attribute['ID'] = gene_id + '_' + field['type'] + '_' + str(field['start']) + '_' + str(field['end'])
-            if field['type'] == 'transcript':
-                parents[gene_id] = attribute['ID']
-                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
-                attribute['coverage'] = attr_li[2].split()[1].strip('"')
-                attribute['fpkm'] = attr_li[3].split()[1].strip('"')
-                attribute['tpm'] = attr_li[4].split()[1].strip('"')
-            elif field['type'] == 'exon':
-                attribute['Parent'] = parents[gene_id]
-                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
-                attribute['coverage'] = attr_li[3].split()[1].strip('"')
-            write_features(field, attribute, gff3)
-    gff3.close()
-
-
-def sanitize_name(input_name):
-    """
-    Galaxy will name all the files and dirs as *.dat,
-    the function can replace '.' to '_' for the dirs
-    """
-    validChars = "_-%s%s" % (string.ascii_letters, string.digits)
-    sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
-    return "gonramp_" + sanitized_name
-
-def createBamIndex(bamfile):
-    subprocess.call(['samtools', 'index', bamfile])
-    filename = bamfile + '.bai'
-    if os.path.exists(filename):
-        return filename
-    else:
-        raise ValueError('Did not find bai file')
Binary file utils.pyc has changed