changeset 1:646b05282962

Uploaded
author jorrit
date Tue, 12 Feb 2013 13:46:10 -0500
parents 4f8ea94020b1
children 474ea11c8581
files annotation.py datatypes_conf.xml ontology.py registry.py termenrichment.py
diffstat 5 files changed, 864 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotation.py	Tue Feb 12 13:46:10 2013 -0500
@@ -0,0 +1,42 @@
+"""
+Annotation datatypes
+
+"""
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
+import logging, os, sys, time, tempfile, shutil
+import data
+from galaxy import util
+from galaxy.datatypes.sniff import *
+from galaxy.web import url_for
+from cgi import escape
+import urllib
+from bx.intervals.io import *
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.tabular import Tabular
+import math
+
+log = logging.getLogger(__name__)
+
+class Gaf( Tabular ):
+    """Tab delimited data in Gene Ontology Association File (GAF) format"""
+    file_ext = "gaf"
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in GAF format
+        """
+        headers = get_headers( filename, '\t' )
+        try:
+            for hdr in headers:
+                if hdr and hdr[0].startswith( '!gaf-version:' ) :
+                    return True
+            return False
+        except:
+            return False
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Tue Feb 12 13:46:10 2013 -0500
@@ -0,0 +1,17 @@
+<!-- see http://wiki.galaxyproject.org/ToolShedDatatypesFeatures for more info -->
+<?xml version="1.0"?>
+<datatypes>
+    <datatypes_files>
+	<datatype_file name="annotation.py"/>
+	<datatype_file name="ontology.py"/>
+	<datatype_file name="registry.py"/>
+	<datatype_file name="termenrichment.py"/>
+    </datatype_files>
+    <registration> 
+        <datatype extension="ontology" type="galaxy.datatypes.ontology:Ontology" display_in_upload="true"/>
+        <datatype extension="obo" type="galaxy.datatypes.ontology:Obo" display_in_upload="true"/>
+        <datatype extension="owl" type="galaxy.datatypes.ontology:Owl" display_in_upload="true"/>
+        <datatype extension="gaf" type="galaxy.datatypes.annotation:Gaf" display_in_upload="true"/>
+        <datatype extension="terf" type="galaxy.datatypes.termenrichment:TerfTab" display_in_upload="true"/>
+    </registration>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontology.py	Tue Feb 12 13:46:10 2013 -0500
@@ -0,0 +1,90 @@
+"""
+Ontology datatypes
+
+"""
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
+import logging, os, sys, time, tempfile, shutil
+import data
+from galaxy import util
+from galaxy.datatypes.sniff import *
+from galaxy.web import url_for
+from cgi import escape
+import urllib
+from bx.intervals.io import *
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.tabular import Tabular
+import math
+
+log = logging.getLogger(__name__)
+
+class Ontology( data.Text ):
+    """Any ontology Format"""
+    file_ext = "ontology"
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+
+
+class Obo( Ontology ):
+    """OBO Format"""
+    file_ext = "obo"
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in OBO format
+        """
+        headers = get_headers( filename, '\n' )
+        try:
+            for hdr in headers:
+                if hdr and hdr[0].startswith( 'format-version:' ) :
+                    return True
+            return False
+        except:
+            return False
+
+class Owl( Ontology ):
+    """OWL"""
+    file_ext = "owl"
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in OWL RDF-XML format
+        """
+        headers = get_headers( filename, '\n' )
+        try:
+            for hdr in headers:
+                if hdr and hdr[0].find( '<owl' ) > -1 :
+                    return True
+                if hdr and hdr[0].find( 'http://www.w3.org/2002/07/owl' ) > -1 :
+                    return True
+            return False
+        except:
+            return False
+
+
+class OwlRdfXML( Owl ):
+    """OWL RDF/XML"""
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in OWL RDF-XML format
+        """
+        headers = get_headers( filename, '\n' )
+        try:
+            for hdr in headers:
+                if hdr and hdr[0].find( '<owl:Ontology' ) > -1 :
+                    return True
+            return False
+        except:
+            return False
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/registry.py	Tue Feb 12 13:46:10 2013 -0500
@@ -0,0 +1,664 @@
+"""
+Provides mapping between extensions and datatypes, mime-types, etc.
+"""
+import os, sys, tempfile, threading, logging
+import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex, wsf, annotation, ontology, termenrichment
+import galaxy.util
+from galaxy.util.odict import odict
+from display_applications.application import DisplayApplication
+
+class ConfigurationError( Exception ):
+    pass
+
+class Registry( object ):
+    def __init__( self ):
+        self.log = logging.getLogger(__name__)
+        self.log.addHandler( logging.NullHandler() )
+        self.datatypes_by_extension = {}
+        self.mimetypes_by_extension = {}
+        self.datatype_converters = odict()
+        # Converters defined in local datatypes_conf.xml
+        self.converters = []
+        # Converters defined in datatypes_conf.xml included in installed tool shed repositories.
+        self.proprietary_converters = []
+        self.converter_deps = {}
+        self.available_tracks = []
+        self.set_external_metadata_tool = None
+        self.sniff_order = []
+        self.upload_file_formats = []
+        # Datatype elements defined in local datatypes_conf.xml that contain display applications.
+        self.display_app_containers = []
+        # Datatype elements in datatypes_conf.xml included in installed
+        # tool shed repositories that contain display applications.
+        self.proprietary_display_app_containers = []
+        # Map a display application id to a display application
+        self.display_applications = odict()
+        # The following 2 attributes are used in the to_xml_file()
+        # method to persist the current state into an xml file.
+        self.display_path_attr = None
+        self.converters_path_attr = None
+        # The 'default' converters_path defined in local datatypes_conf.xml
+        self.converters_path = None
+        # The 'default' display_path defined in local datatypes_conf.xml
+        self.display_applications_path = None
+        self.inherit_display_application_by_class = []
+        # Keep a list of imported proprietary datatype class modules.
+        self.imported_modules = []
+        self.datatype_elems = []
+        self.sniffer_elems = []
+        self.xml_filename = None
+    def load_datatypes( self, root_dir=None, config=None, deactivate=False, override=True ):
+        """
+        Parse a datatypes XML file located at root_dir/config.  If deactivate is True, an installed tool shed
+        repository that includes proprietary datatypes is being deactivated, so appropriate loaded datatypes
+        will be removed from the registry.  The value of override will be False when a tool shed repository is
+        being installed.  Since installation is occurring after the datatypes registry has been initialized, its
+        contents cannot be overridden by new introduced conflicting data types.
+        """
+        def __import_module( full_path, datatype_module ):
+            sys.path.insert( 0, full_path )
+            imported_module = __import__( datatype_module )
+            sys.path.pop( 0 )
+            return imported_module
+        if root_dir and config:
+            handling_proprietary_datatypes = False
+            # Parse datatypes_conf.xml
+            tree = galaxy.util.parse_xml( config )
+            root = tree.getroot()
+            # Load datatypes and converters from config
+            if deactivate:
+                self.log.debug( 'Deactivating datatypes from %s' % config )
+            else:
+                self.log.debug( 'Loading datatypes from %s' % config )
+            registration = root.find( 'registration' )
+            # Set default paths defined in local datatypes_conf.xml.
+            if not self.converters_path:
+                self.converters_path_attr = registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' )
+                self.converters_path = os.path.join( root_dir, self.converters_path_attr )
+                if not os.path.isdir( self.converters_path ):
+                    raise ConfigurationError( "Directory does not exist: %s" % self.converters_path )
+            if not self.display_applications_path:
+                self.display_path_attr = registration.get( 'display_path', 'display_applications' )
+                self.display_applications_path = os.path.join( root_dir, self.display_path_attr )
+            # Proprietary datatype's <registration> tag may have special attributes, proprietary_converter_path and proprietary_display_path.
+            proprietary_converter_path = registration.get( 'proprietary_converter_path', None )
+            proprietary_display_path = registration.get( 'proprietary_display_path', None )
+            if proprietary_converter_path or proprietary_display_path and not handling_proprietary_datatypes:
+                handling_proprietary_datatypes = True
+            for elem in registration.findall( 'datatype' ):
+                try:
+                    extension = elem.get( 'extension', None )
+                    dtype = elem.get( 'type', None )
+                    type_extension = elem.get( 'type_extension', None )
+                    mimetype = elem.get( 'mimetype', None )
+                    display_in_upload = elem.get( 'display_in_upload', False )
+                    make_subclass = galaxy.util.string_as_bool( elem.get( 'subclass', False ) )
+                    # Proprietary datatypes included in installed tool shed repositories will include two special attributes
+                    # (proprietary_path and proprietary_datatype_module) if they depend on proprietary datatypes classes.
+                    proprietary_path = elem.get( 'proprietary_path', None )
+                    proprietary_datatype_module = elem.get( 'proprietary_datatype_module', None )
+                    if proprietary_path or proprietary_datatype_module and not handling_proprietary_datatypes:
+                        handling_proprietary_datatypes = True
+                    if deactivate:
+                        # We are deactivating an installed tool shed repository, so eliminate the
+                        # datatype elem from the in-memory list of datatype elems.
+                        for in_memory_elem in self.datatype_elems:
+                            in_memory_extension = in_memory_elem.get( 'extension', None )
+                            if in_memory_extension == extension:
+                                in_memory_dtype = elem.get( 'type', None )
+                                in_memory_type_extension = elem.get( 'type_extension', None )
+                                in_memory_mimetype = elem.get( 'mimetype', None )
+                                in_memory_display_in_upload = elem.get( 'display_in_upload', False )
+                                in_memory_make_subclass = galaxy.util.string_as_bool( elem.get( 'subclass', False ) )
+                                if in_memory_dtype == dtype and in_memory_type_extension == type_extension and in_memory_mimetype == mimetype \
+                                    and in_memory_display_in_upload == display_in_upload and in_memory_make_subclass == make_subclass:
+                                    self.datatype_elems.remove( in_memory_elem )
+                    else:
+                        # Keep an in-memory list of datatype elems to enable persistence.
+                        self.datatype_elems.append( elem )
+                    if extension and extension in self.datatypes_by_extension and deactivate:
+                        # We are deactivating an installed tool shed repository, so eliminate the datatype from the registry.
+                        # TODO: Handle deactivating datatype converters, etc before removing from self.datatypes_by_extension.
+                        self.log.debug( "Removing datatype with extension '%s' from the registry." % extension )
+                        del self.datatypes_by_extension[ extension ]
+                        can_process_datatype = False
+                    else:
+                        can_process_datatype = ( extension and ( dtype or type_extension ) ) and ( extension not in self.datatypes_by_extension or override )
+                    if can_process_datatype:
+                        if dtype:
+                            fields = dtype.split( ':' )
+                            datatype_module = fields[0]
+                            datatype_class_name = fields[1]
+                            datatype_class = None
+                            if proprietary_path and proprietary_datatype_module:
+                                # We need to change the value of sys.path, so do it in a way that is thread-safe.
+                                lock = threading.Lock()
+                                lock.acquire( True )
+                                try:
+                                    imported_module = __import_module( proprietary_path, proprietary_datatype_module )
+                                    if imported_module not in self.imported_modules:
+                                        self.imported_modules.append( imported_module )
+                                    if hasattr( imported_module, datatype_class_name ):
+                                        datatype_class = getattr( imported_module, datatype_class_name )
+                                except Exception, e:
+                                    full_path = os.path.join( full_path, proprietary_datatype_module )
+                                    self.log.debug( "Exception importing proprietary code file %s: %s" % ( str( full_path ), str( e ) ) )
+                                finally:
+                                    lock.release()
+                            if datatype_class is None:
+                                # The datatype class name must be contained in one of the datatype modules in the Galaxy distribution.
+                                fields = datatype_module.split( '.' )
+                                module = __import__( fields.pop(0) )
+                                for mod in fields:
+                                    module = getattr( module, mod )
+                                datatype_class = getattr( module, datatype_class_name )
+                        elif type_extension:
+                            datatype_class = self.datatypes_by_extension[type_extension].__class__
+                        if make_subclass:
+                            datatype_class = type( datatype_class_name, (datatype_class,), {} )
+                        if extension in self.datatypes_by_extension:
+                            self.log.warning( "Overriding conflicting datatype with extension '%s', using datatype from %s." % ( extension, config ) )
+                        self.datatypes_by_extension[ extension ] = datatype_class()
+                        if mimetype is None:
+                            # Use default mime type as per datatype spec
+                            mimetype = self.datatypes_by_extension[ extension ].get_mime()
+                        self.mimetypes_by_extension[ extension ] = mimetype
+                        if hasattr( datatype_class, "get_track_type" ):
+                            self.available_tracks.append( extension )
+                        if display_in_upload:
+                            self.upload_file_formats.append( extension )
+                        # Max file size cut off for setting optional metadata
+                        self.datatypes_by_extension[ extension ].max_optional_metadata_filesize = elem.get( 'max_optional_metadata_filesize', None )
+                        for converter in elem.findall( 'converter' ):
+                            # Build the list of datatype converters which will later be loaded into the calling app's toolbox.
+                            converter_config = converter.get( 'file', None )
+                            target_datatype = converter.get( 'target_datatype', None )
+                            depends_on = converter.get( 'depends_on', None )
+                            if depends_on and target_datatype:
+                                if extension not in self.converter_deps:
+                                    self.converter_deps[extension] = {}
+                                self.converter_deps[extension][target_datatype] = depends_on.split(',')
+                            if converter_config and target_datatype:
+                                #if imported_modules:
+                                if proprietary_converter_path:
+                                    self.proprietary_converters.append( ( converter_config, extension, target_datatype ) )
+                                else:
+                                    self.converters.append( ( converter_config, extension, target_datatype ) )
+                        for composite_file in elem.findall( 'composite_file' ):
+                            # add composite files
+                            name = composite_file.get( 'name', None )
+                            if name is None:
+                                self.log.warning( "You must provide a name for your composite_file (%s)." % composite_file )
+                            optional = composite_file.get( 'optional', False )
+                            mimetype = composite_file.get( 'mimetype', None )
+                            self.datatypes_by_extension[extension].add_composite_file( name, optional=optional, mimetype=mimetype )
+                        for display_app in elem.findall( 'display' ):
+                            #if imported_modules:
+                            if proprietary_display_path:
+                                if elem not in self.proprietary_display_app_containers:
+                                    self.proprietary_display_app_containers.append( elem )
+                            else:
+                                if elem not in self.display_app_containers:
+                                    self.display_app_containers.append( elem )
+                    elif not deactivate:
+                        # A new tool shed repository that contains proprietary datatypes is being installed, and since installation
+                        # is occurring after the datatypes registry has been initialized, its contents cannot be overridden by new
+                        # introduced conflicting data types.
+                        self.log.warning( "Ignoring conflicting datatype with extension '%s' from %s." % ( extension, config ) )
+                except Exception, e:
+                    if deactivate:
+                        self.log.warning( "Error deactivating datatype with extension '%s': %s" % ( extension, str( e ) ) )
+                    else:
+                        self.log.warning( "Error loading datatype with extension '%s': %s" % ( extension, str( e ) ) )
+            # Load datatype sniffers from the config
+            sniffers = root.find( 'sniffers' )
+            if sniffers:
+                for elem in sniffers.findall( 'sniffer' ):
+                    # Keep an in-memory list of sniffer elems to enable persistence.
+                    self.sniffer_elems.append( elem )
+                    dtype = elem.get( 'type', None )
+                    if dtype:
+                        try:
+                            fields = dtype.split( ":" )
+                            datatype_module = fields[0]
+                            datatype_class_name = fields[1]
+                            module = None
+                            #if imported_modules:
+                            if handling_proprietary_datatypes:
+                                # See if one of the imported modules contains the datatype class name.
+                                for imported_module in self.imported_modules:
+                                    if hasattr( imported_module, datatype_class_name ):
+                                        module = imported_module
+                                        break
+                            if module is None:
+                                # The datatype class name must be contained in one of the datatype modules in the Galaxy distribution.
+                                module = __import__( datatype_module )
+                                for comp in datatype_module.split( '.' )[ 1: ]:
+                                    module = getattr( module, comp )
+                            aclass = getattr( module, datatype_class_name )()
+                            if deactivate:
+                                for sniffer_class in self.sniff_order:
+                                    if sniffer_class.__class__ == aclass.__class__:
+                                        self.sniff_order.remove( sniffer_class )
+                                        break
+                                self.log.debug( "Deactivated sniffer for datatype '%s'" % dtype )
+                            else:
+                                # See if we have a conflicting sniffer already loaded.
+                                conflict = False
+                                for conflict_loc, sniffer_class in enumerate( self.sniff_order ):
+                                    if sniffer_class.__class__ == aclass.__class__:
+                                        # We have a conflicting sniffer, so replace the one previously loaded.
+                                        conflict = True
+                                        if override:
+                                            del self.sniff_order[ conflict_loc ]
+                                            self.log.debug( "Replaced conflicting sniffer for datatype '%s'" % dtype )
+                                        break
+                                if conflict:
+                                    if override:
+                                        self.sniff_order.append( aclass )
+                                        self.log.debug( "Loaded sniffer for datatype '%s'" % dtype )
+                                else:
+                                    self.sniff_order.append( aclass )
+                                    self.log.debug( "Loaded sniffer for datatype '%s'" % dtype )
+                        except Exception, exc:
+                            if deactivate:
+                                self.log.warning( "Error deactivating sniffer for datatype '%s': %s" % ( dtype, str( exc ) ) )
+                            else:
+                                self.log.warning( "Error appending sniffer for datatype '%s' to sniff_order: %s" % ( dtype, str( exc ) ) )
+            self.upload_file_formats.sort()
+            # Persist the xml form of the registry into a temporary file so that it
+            # can be loaded from the command line by tools and set_metadata processing.
+            self.to_xml_file()
+        # Default values.
+        if not self.datatypes_by_extension:
+            self.datatypes_by_extension = { 
+                ## required for galaxy-obo:
+                'obo'         : ontology.Obo(),
+                'owl'         : ontology.Owl(),
+                'ontology'    : ontology.Ontology(),
+                'gaf'         : annotation.Gaf(),
+                'terf'        : termenrichment.TerfTab(),
+
+                'ab1'         : binary.Ab1(),
+                'axt'         : sequence.Axt(),
+                'bam'         : binary.Bam(),
+                'bed'         : interval.Bed(), 
+                'blastxml'    : xml.BlastXml(),
+                'coverage'    : coverage.LastzCoverage(),
+                'customtrack' : interval.CustomTrack(),
+                'csfasta'     : sequence.csFasta(),
+                'fasta'       : sequence.Fasta(),
+                'eland'       : tabular.Eland(),
+                'fastq'       : sequence.Fastq(),
+                'fastqsanger' : sequence.FastqSanger(),
+                'gtf'         : interval.Gtf(),
+                'gff'         : interval.Gff(),
+                'gff3'        : interval.Gff3(),
+                'genetrack'   : tracks.GeneTrack(),
+                'interval'    : interval.Interval(), 
+                'laj'         : images.Laj(),
+                'lav'         : sequence.Lav(),
+                'maf'         : sequence.Maf(),
+                'pileup'      : tabular.Pileup(),
+                'qualsolid'   : qualityscore.QualityScoreSOLiD(),
+                'qualsolexa'  : qualityscore.QualityScoreSolexa(),
+                'qual454'     : qualityscore.QualityScore454(),
+                'sam'         : tabular.Sam(), 
+                'scf'         : binary.Scf(),
+                'sff'         : binary.Sff(),
+                'tabular'     : tabular.Tabular(),
+                'taxonomy'    : tabular.Taxonomy(),
+                'txt'         : data.Text(),
+                'wig'         : interval.Wiggle(),
+                'xml'         : xml.GenericXml(),
+            }
+            self.mimetypes_by_extension = { 
+                'ab1'         : 'application/octet-stream',
+                'axt'         : 'text/plain',
+                'bam'         : 'application/octet-stream',
+                'bed'         : 'text/plain', 
+                'blastxml'    : 'application/xml', 
+                'customtrack' : 'text/plain',
+                'csfasta'     : 'text/plain',
+                'eland'       : 'application/octet-stream',
+                'fasta'       : 'text/plain',
+                'fastq'       : 'text/plain',
+                'fastqsanger' : 'text/plain',
+                'gtf'         : 'text/plain',
+                'gff'         : 'text/plain',
+                'gff3'        : 'text/plain',
+                'interval'    : 'text/plain', 
+                'laj'         : 'text/plain',
+                'lav'         : 'text/plain',
+                'maf'         : 'text/plain',
+                'memexml'     : 'application/xml',
+                'pileup'      : 'text/plain',
+                'qualsolid'   : 'text/plain',
+                'qualsolexa'  : 'text/plain',
+                'qual454'     : 'text/plain',
+                'sam'         : 'text/plain',
+                'scf'         : 'application/octet-stream',
+                'sff'         : 'application/octet-stream',
+                'tabular'     : 'text/plain',
+                'taxonomy'    : 'text/plain',
+                'txt'         : 'text/plain',
+                'wig'         : 'text/plain',
+                'xml'         : 'application/xml',
+            }
+        # super supertype fix for input steps in workflows.
+        if 'data' not in self.datatypes_by_extension:
+            self.datatypes_by_extension['data'] = data.Data()
+            self.mimetypes_by_extension['data'] = 'application/octet-stream'
+        # Default values - the order in which we attempt to determine data types is critical
+        # because some formats are much more flexibly defined than others.
+        if len(self.sniff_order) < 1:
+            self.sniff_order = [
+                binary.Bam(),
+                binary.Sff(),
+                xml.BlastXml(),
+                xml.GenericXml(),
+                sequence.Maf(),
+                sequence.Lav(),
+                sequence.csFasta(),
+                qualityscore.QualityScoreSOLiD(),
+                qualityscore.QualityScore454(),
+                sequence.Fasta(),
+                sequence.Fastq(),
+                interval.Wiggle(),
+                images.Html(),
+                sequence.Axt(),
+                interval.Bed(), 
+                interval.CustomTrack(),
+                interval.Gtf(),
+                interval.Gff(),
+                interval.Gff3(),
+                tabular.Pileup(),
+                interval.Interval(),
+                tabular.Sam(),
+                tabular.Eland()
+            ]
+        def append_to_sniff_order():
+            # Just in case any supported data types are not included in the config's sniff_order section.
+            for ext in self.datatypes_by_extension:
+                datatype = self.datatypes_by_extension[ext]
+                included = False
+                for atype in self.sniff_order:
+                    if isinstance(atype, datatype.__class__):
+                        included = True
+                        break
+                if not included:
+                    self.sniff_order.append(datatype)
+        append_to_sniff_order()
+    def get_available_tracks(self):
+        return self.available_tracks
+    def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ):
+        """Returns a mimetype based on an extension"""
+        try:
+            mimetype = self.mimetypes_by_extension[ext]
+        except KeyError:
+            #datatype was never declared
+            mimetype = default
+            self.log.warning('unknown mimetype in data factory %s' % ext)
+        return mimetype
+    def get_datatype_by_extension(self, ext ):
+        """Returns a datatype based on an extension"""
+        try:
+            builder = self.datatypes_by_extension[ext]
+        except KeyError:
+            builder = data.Text()
+        return builder
+    def change_datatype(self, data, ext, set_meta = True ):
+        data.extension = ext
+        # call init_meta and copy metadata from itself.  The datatype
+        # being converted *to* will handle any metadata copying and
+        # initialization.
+        if data.has_data():
+            data.set_size()
+            data.init_meta( copy_from=data )
+            if set_meta:
+                #metadata is being set internally
+                data.set_meta( overwrite = False )
+                data.set_peek()
+        return data
+    def old_change_datatype(self, data, ext):
+        """Creates and returns a new datatype based on an existing data and an extension"""
+        newdata = factory(ext)(id=data.id)
+        for key, value in data.__dict__.items():
+            setattr(newdata, key, value)
+        newdata.ext = ext
+        return newdata
+    def load_datatype_converters( self, toolbox, installed_repository_dict=None, deactivate=False ):
+        """
+        If deactivate is False, add datatype converters from self.converters or self.proprietary_converters
+        to the calling app's toolbox.  If deactivate is True, eliminates relevant converters from the calling
+        app's toolbox.
+        """   
+        if installed_repository_dict:
+            # Load converters defined by datatypes_conf.xml included in installed tool shed repository.
+            converters = self.proprietary_converters
+        else:
+            # Load converters defined by local datatypes_conf.xml.
+            converters = self.converters
+        for elem in converters:
+            tool_config = elem[0]
+            source_datatype = elem[1]
+            target_datatype = elem[2]
+            if installed_repository_dict:
+                converter_path = installed_repository_dict[ 'converter_path' ]
+                config_path = os.path.join( converter_path, tool_config )
+            else:
+                config_path = os.path.join( self.converters_path, tool_config )
+            try:
+                converter = toolbox.load_tool( config_path )
+                if installed_repository_dict:
+                    # If the converter is included in an installed tool shed repository, set the tool
+                    # shed related tool attributes.
+                    converter.tool_shed = installed_repository_dict[ 'tool_shed' ]
+                    converter.repository_name = installed_repository_dict[ 'repository_name' ]
+                    converter.repository_owner = installed_repository_dict[ 'repository_owner' ]
+                    converter.installed_changeset_revision = installed_repository_dict[ 'installed_changeset_revision' ]
+                    converter.old_id = converter.id
+                    # The converter should be included in the list of tools defined in tool_dicts.
+                    tool_dicts = installed_repository_dict[ 'tool_dicts' ]
+                    for tool_dict in tool_dicts:
+                        if tool_dict[ 'id' ] == converter.id:
+                            converter.guid = tool_dict[ 'guid' ]
+                            converter.id = tool_dict[ 'guid' ]
+                            break
+                if deactivate:
+                    del toolbox.tools_by_id[ converter.id ]
+                    if source_datatype in self.datatype_converters:
+                        del self.datatype_converters[ source_datatype ][ target_datatype ]
+                    self.log.debug( "Deactivated converter: %s", converter.id )
+                else:
+                    toolbox.tools_by_id[ converter.id ] = converter
+                    if source_datatype not in self.datatype_converters:
+                        self.datatype_converters[ source_datatype ] = odict()
+                    self.datatype_converters[ source_datatype ][ target_datatype ] = converter
+                    self.log.debug( "Loaded converter: %s", converter.id )
+            except Exception, e:
+                if deactivate:
+                    self.log.exception( "Error deactivating converter (%s): %s" % ( config_path, str( e ) ) )
+                else:
+                    self.log.exception( "Error loading converter (%s): %s" % ( config_path, str( e ) ) )
+    def load_display_applications( self, installed_repository_dict=None, deactivate=False ):
+        """
+        If deactivate is False, add display applications from self.display_app_containers or
+        self.proprietary_display_app_containers to appropriate datatypes.  If deactivate is
+        True, eliminates relevant display applications from appropriate datatypes.
+        """
+        if installed_repository_dict:
+            # Load display applications defined by datatypes_conf.xml included in installed tool shed repository.
+            datatype_elems = self.proprietary_display_app_containers
+        else:
+            # Load display applications defined by local datatypes_conf.xml.
+            datatype_elems = self.display_app_containers
+        for elem in datatype_elems:
+            extension = elem.get( 'extension', None )
+            for display_app in elem.findall( 'display' ):
+                display_file = display_app.get( 'file', None )
+                if installed_repository_dict:
+                    display_path = installed_repository_dict[ 'display_path' ]
+                    display_file_head, display_file_tail = os.path.split( display_file )
+                    config_path = os.path.join( display_path, display_file_tail )
+                else:
+                    config_path = os.path.join( self.display_applications_path, display_file )
+                try:
+                    inherit = galaxy.util.string_as_bool( display_app.get( 'inherit', 'False' ) )
+                    display_app = DisplayApplication.from_file( config_path, self )
+                    if display_app:
+                        if display_app.id in self.display_applications:
+                            if deactivate:
+                                del self.display_applications[ display_app.id ]
+                            else:
+                                # If we already loaded this display application, we'll use the first one loaded.
+                                display_app = self.display_applications[ display_app.id ]
+                        elif installed_repository_dict:
+                            # If the display application is included in an installed tool shed repository,
+                            # set the tool shed related tool attributes.
+                            display_app.tool_shed = installed_repository_dict[ 'tool_shed' ]
+                            display_app.repository_name = installed_repository_dict[ 'repository_name' ]
+                            display_app.repository_owner = installed_repository_dict[ 'repository_owner' ]
+                            display_app.installed_changeset_revision = installed_repository_dict[ 'installed_changeset_revision' ]
+                            display_app.old_id = display_app.id
+                            # The display application should be included in the list of tools defined in tool_dicts.
+                            tool_dicts = installed_repository_dict[ 'tool_dicts' ]
+                            for tool_dict in tool_dicts:
+                                if tool_dict[ 'id' ] == display_app.id:
+                                    display_app.guid = tool_dict[ 'guid' ]
+                                    display_app.id = tool_dict[ 'guid' ]
+                                    break
+                        if deactivate:
+                            del self.display_applications[ display_app.id ]
+                            del self.datatypes_by_extension[ extension ].display_applications[ display_app.id ]
+                            if inherit and ( self.datatypes_by_extension[ extension ], display_app ) in self.inherit_display_application_by_class:
+                                self.inherit_display_application_by_class.remove( ( self.datatypes_by_extension[ extension ], display_app ) )
+                            self.log.debug( "Deactivated display application '%s' for datatype '%s'." % ( display_app.id, extension ) )
+                        else:
+                            self.display_applications[ display_app.id ] = display_app
+                            self.datatypes_by_extension[ extension ].add_display_application( display_app )
+                            if inherit and ( self.datatypes_by_extension[ extension ], display_app ) not in self.inherit_display_application_by_class:
+                                self.inherit_display_application_by_class.append( ( self.datatypes_by_extension[ extension ], display_app ) )
+                            self.log.debug( "Loaded display application '%s' for datatype '%s', inherit=%s." % ( display_app.id, extension, inherit ) )
+                except Exception, e:
+                    if deactivate:
+                        self.log.exception( "Error deactivating display application (%s): %s" % ( config_path, str( e ) ) )
+                    else:
+                        self.log.exception( "Error loading display application (%s): %s" % ( config_path, str( e ) ) )
+        # Handle display_application subclass inheritance.
+        for extension, d_type1 in self.datatypes_by_extension.iteritems():
+            for d_type2, display_app in self.inherit_display_application_by_class:
+                current_app = d_type1.get_display_application( display_app.id, None )
+                if current_app is None and isinstance( d_type1, type( d_type2 ) ):
+                    self.log.debug( "Adding inherited display application '%s' to datatype '%s'" % ( display_app.id, extension ) )
+                    d_type1.add_display_application( display_app )
+    def load_external_metadata_tool( self, toolbox ):
+        """Adds a tool which is used to set external metadata"""
+        # We need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated
+        # tool.  We'll create a special tool to be used for Auto-Detecting metadata; this is less than ideal, but effective
+        # Properly building a tool without relying on parsing an XML file is near impossible...so we'll create a temporary file 
+        tool_xml_text = """
+            <tool id="__SET_METADATA__" name="Set External Metadata" version="1.0.1" tool_type="set_metadata">
+              <type class="SetMetadataTool" module="galaxy.tools"/>
+              <action module="galaxy.tools.actions.metadata" class="SetMetadataToolAction"/>
+              <command>$__SET_EXTERNAL_METADATA_COMMAND_LINE__</command>
+              <inputs>
+                <param format="data" name="input1" type="data" label="File to set metadata on."/>
+                <param name="__ORIGINAL_DATASET_STATE__" type="hidden" value=""/>
+                <param name="__SET_EXTERNAL_METADATA_COMMAND_LINE__" type="hidden" value=""/>
+              </inputs>
+            </tool>
+            """
+        tmp_name = tempfile.NamedTemporaryFile()
+        tmp_name.write( tool_xml_text )
+        tmp_name.flush()
+        set_meta_tool = toolbox.load_tool( tmp_name.name )
+        toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool
+        self.set_external_metadata_tool = set_meta_tool
+        self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id )
+    def get_converters_by_datatype(self, ext):
+        """Returns available converters by source type"""
+        converters = odict()
+        source_datatype = type(self.get_datatype_by_extension(ext))
+        for ext2, dict in self.datatype_converters.items():
+            converter_datatype = type(self.get_datatype_by_extension(ext2))
+            if issubclass(source_datatype, converter_datatype):
+                converters.update(dict)
+        #Ensure ext-level converters are present
+        if ext in self.datatype_converters.keys():
+            converters.update(self.datatype_converters[ext])
+        return converters
+    def get_converter_by_target_type(self, source_ext, target_ext):
+        """Returns a converter based on source and target datatypes"""
+        converters = self.get_converters_by_datatype(source_ext)
+        if target_ext in converters.keys():
+            return converters[target_ext]
+        return None
+    def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe = True ):
+        """Returns ( target_ext, existing converted dataset )"""
+        for convert_ext in self.get_converters_by_datatype( dataset.ext ):
+            if isinstance( self.get_datatype_by_extension( convert_ext ), accepted_formats ):
+                converted_dataset = dataset.get_converted_files_by_type( convert_ext )
+                if converted_dataset:
+                    ret_data = converted_dataset
+                elif not converter_safe:
+                    continue
+                else:
+                    ret_data = None
+                return ( convert_ext, ret_data )
+        return ( None, None )
+    def get_composite_extensions( self ):
+        return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
+    def get_upload_metadata_params( self, context, group, tool ):
+        """Returns dict of case value:inputs for metadata conditional for upload tool"""
+        rval = {}
+        for ext, d_type in self.datatypes_by_extension.iteritems():
+            inputs = []
+            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
+                if meta_spec.set_in_upload:
+                    help_txt = meta_spec.desc
+                    if not help_txt or help_txt == meta_name:
+                        help_txt = ""
+                    inputs.append( '<param type="text" name="%s" label="Set metadata value for &quot;%s&quot;" value="%s" help="%s"/>' % ( meta_name, meta_name, meta_spec.default, help_txt ) )
+            rval[ ext ] = "\n".join( inputs )
+        if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype
+            rval[ 'auto' ] = rval[ 'txt' ]
+        return rval
+    @property
+    def integrated_datatypes_configs( self ):
+        if self.xml_filename and os.path.isfile( self.xml_filename ):
+            return self.xml_filename
+        self.to_xml_file()
+        return self.xml_filename
+    def to_xml_file( self ):
+        if self.xml_filename is not None:
+            # If persisted previously, attempt to remove
+            # the temporary file in which we were written.
+            try:
+                os.unlink( self.xml_filename )
+            except:
+                pass
+            self.xml_filename = None
+        fd, filename = tempfile.mkstemp()
+        self.xml_filename = os.path.abspath( filename )
+        if self.converters_path_attr:
+            converters_path_str = ' converters_path="%s"' % self.converters_path_attr
+        else:
+            converters_path_str = ''
+        if self.display_path_attr:
+            display_path_str = ' display_path="%s"' % self.display_path_attr
+        else:
+            display_path_str = ''
+        os.write( fd, '<?xml version="1.0"?>\n' )
+        os.write( fd, '<datatypes>\n' )
+        os.write( fd, '<registration%s%s>\n' % ( converters_path_str, display_path_str ) )
+        for elem in self.datatype_elems:
+            os.write( fd, '%s' % galaxy.util.xml_to_string( elem ) )
+        os.write( fd, '</registration>\n' )
+        os.write( fd, '<sniffers>\n' )
+        for elem in self.sniffer_elems:
+            os.write( fd, '%s' % galaxy.util.xml_to_string( elem ) )
+        os.write( fd, '</sniffers>\n' )
+        os.write( fd, '</datatypes>\n' )
+        os.close( fd )
+        os.chmod( self.xml_filename, 0644 )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/termenrichment.py	Tue Feb 12 13:46:10 2013 -0500
@@ -0,0 +1,51 @@
+"""
+Term enrichment datatypes
+
+"""
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
+import logging, os, sys, time, tempfile, shutil
+import data
+from galaxy import util
+from galaxy.datatypes.sniff import *
+from galaxy.web import url_for
+from cgi import escape
+import urllib
+from bx.intervals.io import *
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.tabular import Tabular
+import math
+
+log = logging.getLogger(__name__)
+
+class TermEnrichmentResult( data.Text ):
+    """Any term enrichment format"""
+    file_ext = "enrichment"
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+
+
+class TerfTab( TermEnrichmentResult ):
+    """TERF TSV Format"""
+    file_ext = "terf"
+
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in TERF format
+        """
+        headers = get_headers( filename, '\n' )
+        try:
+            for hdr in headers:
+                if hdr and hdr[0].startswith( '##terf-version' ) :
+                    return True
+            return False
+        except:
+            return False
+
+
+