diff webapollo.py @ 5:7610987e0c48 draft

planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 29795b77c0d5c7894219b018a92c5ee7818096c3
author eric-rasche
date Wed, 01 Mar 2017 22:39:58 -0500
parents d4ae83dedb14
children f9a6e151b3b4
line wrap: on
line diff
--- a/webapollo.py	Thu Jan 12 11:53:44 2017 -0500
+++ b/webapollo.py	Wed Mar 01 22:39:58 2017 -0500
@@ -2,30 +2,415 @@
 import json
 import os
 import collections
-import StringIO
+try:
+    import StringIO as io
+except:
+    import io
 import logging
+import time
+import argparse
+from abc import abstractmethod
 from BCBio import GFF
 from Bio import SeqIO
 logging.getLogger("requests").setLevel(logging.CRITICAL)
 log = logging.getLogger()
 
 
+#############################################
+###### BEGIN IMPORT OF CACHING LIBRARY ######
+#############################################
+# This code is licensed under the MIT       #
+# License and is a copy of code publicly    #
+# available in rev.                         #
+# e27332bc82f4e327aedaec17c9b656ae719322ed  #
+# of https://github.com/tkem/cachetools/    #
+#############################################
+class DefaultMapping(collections.MutableMapping):
+
+    __slots__ = ()
+
+    @abstractmethod
+    def __contains__(self, key):  # pragma: nocover
+        return False
+
+    @abstractmethod
+    def __getitem__(self, key):  # pragma: nocover
+        if hasattr(self.__class__, '__missing__'):
+            return self.__class__.__missing__(self, key)
+        else:
+            raise KeyError(key)
+
+    def get(self, key, default=None):
+        if key in self:
+            return self[key]
+        else:
+            return default
+
+    __marker = object()
+
+    def pop(self, key, default=__marker):
+        if key in self:
+            value = self[key]
+            del self[key]
+        elif default is self.__marker:
+            raise KeyError(key)
+        else:
+            value = default
+        return value
+
+    def setdefault(self, key, default=None):
+        if key in self:
+            value = self[key]
+        else:
+            self[key] = value = default
+        return value
+
+DefaultMapping.register(dict)
+
+
+class _DefaultSize(object):
+    def __getitem__(self, _):
+        return 1
+
+    def __setitem__(self, _, value):
+        assert value == 1
+
+    def pop(self, _):
+        return 1
+
+
+class Cache(DefaultMapping):
+    """Mutable mapping to serve as a simple cache or cache base class."""
+
+    __size = _DefaultSize()
+
+    def __init__(self, maxsize, missing=None, getsizeof=None):
+        if missing:
+            self.__missing = missing
+        if getsizeof:
+            self.__getsizeof = getsizeof
+            self.__size = dict()
+        self.__data = dict()
+        self.__currsize = 0
+        self.__maxsize = maxsize
+
+    def __repr__(self):
+        return '%s(%r, maxsize=%r, currsize=%r)' % (
+            self.__class__.__name__,
+            list(self.__data.items()),
+            self.__maxsize,
+            self.__currsize,
+        )
+
+    def __getitem__(self, key):
+        try:
+            return self.__data[key]
+        except KeyError:
+            return self.__missing__(key)
+
+    def __setitem__(self, key, value):
+        maxsize = self.__maxsize
+        size = self.getsizeof(value)
+        if size > maxsize:
+            raise ValueError('value too large')
+        if key not in self.__data or self.__size[key] < size:
+            while self.__currsize + size > maxsize:
+                self.popitem()
+        if key in self.__data:
+            diffsize = size - self.__size[key]
+        else:
+            diffsize = size
+        self.__data[key] = value
+        self.__size[key] = size
+        self.__currsize += diffsize
+
+    def __delitem__(self, key):
+        size = self.__size.pop(key)
+        del self.__data[key]
+        self.__currsize -= size
+
+    def __contains__(self, key):
+        return key in self.__data
+
+    def __missing__(self, key):
+        value = self.__missing(key)
+        try:
+            self.__setitem__(key, value)
+        except ValueError:
+            pass  # value too large
+        return value
+
+    def __iter__(self):
+        return iter(self.__data)
+
+    def __len__(self):
+        return len(self.__data)
+
+    @staticmethod
+    def __getsizeof(value):
+        return 1
+
+    @staticmethod
+    def __missing(key):
+        raise KeyError(key)
+
+    @property
+    def maxsize(self):
+        """The maximum size of the cache."""
+        return self.__maxsize
+
+    @property
+    def currsize(self):
+        """The current size of the cache."""
+        return self.__currsize
+
+    def getsizeof(self, value):
+        """Return the size of a cache element's value."""
+        return self.__getsizeof(value)
+
+
+class _Link(object):
+
+    __slots__ = ('key', 'expire', 'next', 'prev')
+
+    def __init__(self, key=None, expire=None):
+        self.key = key
+        self.expire = expire
+
+    def __reduce__(self):
+        return _Link, (self.key, self.expire)
+
+    def unlink(self):
+        next = self.next
+        prev = self.prev
+        prev.next = next
+        next.prev = prev
+
+
+class _Timer(object):
+
+    def __init__(self, timer):
+        self.__timer = timer
+        self.__nesting = 0
+
+    def __call__(self):
+        if self.__nesting == 0:
+            return self.__timer()
+        else:
+            return self.__time
+
+    def __enter__(self):
+        if self.__nesting == 0:
+            self.__time = time = self.__timer()
+        else:
+            time = self.__time
+        self.__nesting += 1
+        return time
+
+    def __exit__(self, *exc):
+        self.__nesting -= 1
+
+    def __reduce__(self):
+        return _Timer, (self.__timer,)
+
+    def __getattr__(self, name):
+        return getattr(self.__timer, name)
+
+
+class TTLCache(Cache):
+    """LRU Cache implementation with per-item time-to-live (TTL) value."""
+
+    def __init__(self, maxsize, ttl, timer=time.time, missing=None,
+                 getsizeof=None):
+        Cache.__init__(self, maxsize, missing, getsizeof)
+        self.__root = root = _Link()
+        root.prev = root.next = root
+        self.__links = collections.OrderedDict()
+        self.__timer = _Timer(timer)
+        self.__ttl = ttl
+
+    def __contains__(self, key):
+        try:
+            link = self.__links[key]  # no reordering
+        except KeyError:
+            return False
+        else:
+            return not (link.expire < self.__timer())
+
+    def __getitem__(self, key, cache_getitem=Cache.__getitem__):
+        try:
+            link = self.__getlink(key)
+        except KeyError:
+            expired = False
+        else:
+            expired = link.expire < self.__timer()
+        if expired:
+            return self.__missing__(key)
+        else:
+            return cache_getitem(self, key)
+
+    def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
+        with self.__timer as time:
+            self.expire(time)
+            cache_setitem(self, key, value)
+        try:
+            link = self.__getlink(key)
+        except KeyError:
+            self.__links[key] = link = _Link(key)
+        else:
+            link.unlink()
+        link.expire = time + self.__ttl
+        link.next = root = self.__root
+        link.prev = prev = root.prev
+        prev.next = root.prev = link
+
+    def __delitem__(self, key, cache_delitem=Cache.__delitem__):
+        cache_delitem(self, key)
+        link = self.__links.pop(key)
+        link.unlink()
+        if link.expire < self.__timer():
+            raise KeyError(key)
+
+    def __iter__(self):
+        root = self.__root
+        curr = root.next
+        while curr is not root:
+            # "freeze" time for iterator access
+            with self.__timer as time:
+                if not (curr.expire < time):
+                    yield curr.key
+            curr = curr.next
+
+    def __len__(self):
+        root = self.__root
+        curr = root.next
+        time = self.__timer()
+        count = len(self.__links)
+        while curr is not root and curr.expire < time:
+            count -= 1
+            curr = curr.next
+        return count
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        root = self.__root
+        root.prev = root.next = root
+        for link in sorted(self.__links.values(), key=lambda obj: obj.expire):
+            link.next = root
+            link.prev = prev = root.prev
+            prev.next = root.prev = link
+        self.expire(self.__timer())
+
+    def __repr__(self, cache_repr=Cache.__repr__):
+        with self.__timer as time:
+            self.expire(time)
+            return cache_repr(self)
+
+    @property
+    def currsize(self):
+        with self.__timer as time:
+            self.expire(time)
+            return super(TTLCache, self).currsize
+
+    @property
+    def timer(self):
+        """The timer function used by the cache."""
+        return self.__timer
+
+    @property
+    def ttl(self):
+        """The time-to-live value of the cache's items."""
+        return self.__ttl
+
+    def expire(self, time=None):
+        """Remove expired items from the cache."""
+        if time is None:
+            time = self.__timer()
+        root = self.__root
+        curr = root.next
+        links = self.__links
+        cache_delitem = Cache.__delitem__
+        while curr is not root and curr.expire < time:
+            cache_delitem(self, curr.key)
+            del links[curr.key]
+            next = curr.next
+            curr.unlink()
+            curr = next
+
+    def clear(self):
+        with self.__timer as time:
+            self.expire(time)
+            Cache.clear(self)
+
+    def get(self, *args, **kwargs):
+        with self.__timer:
+            return Cache.get(self, *args, **kwargs)
+
+    def pop(self, *args, **kwargs):
+        with self.__timer:
+            return Cache.pop(self, *args, **kwargs)
+
+    def setdefault(self, *args, **kwargs):
+        with self.__timer:
+            return Cache.setdefault(self, *args, **kwargs)
+
+    def popitem(self):
+        """Remove and return the `(key, value)` pair least recently used that
+        has not already expired.
+
+        """
+        with self.__timer as time:
+            self.expire(time)
+            try:
+                key = next(iter(self.__links))
+            except StopIteration:
+                raise KeyError('%s is empty' % self.__class__.__name__)
+            else:
+                return (key, self.pop(key))
+
+    if hasattr(collections.OrderedDict, 'move_to_end'):
+        def __getlink(self, key):
+            value = self.__links[key]
+            self.__links.move_to_end(key)
+            return value
+    else:
+        def __getlink(self, key):
+            value = self.__links.pop(key)
+            self.__links[key] = value
+            return value
+
+
+#############################################
+######  END IMPORT OF CACHING LIBRARY  ######
+#############################################
+
+cache = TTLCache(
+    100, # Up to 100 items
+    5 * 60 # 5 minute cache life
+)
+userCache = TTLCache(
+    2, # Up to 2 items
+    60 # 1 minute cache life
+)
+
+class UnknownUserException(Exception):
+    pass
+
 def WAAuth(parser):
     parser.add_argument('apollo', help='Complete Apollo URL')
     parser.add_argument('username', help='WA Username')
     parser.add_argument('password', help='WA Password')
-    parser.add_argument('--remote_user', default='', help='If set, ignore password, set the header with the name supplied to this argument to the value of email')
 
 
 def OrgOrGuess(parser):
-    parser.add_argument('--org_json', type=file, help='Apollo JSON output, source for common name')
+    parser.add_argument('--org_json', type=argparse.FileType("r"), help='Apollo JSON output, source for common name')
     parser.add_argument('--org_raw', help='Common Name')
     parser.add_argument('--org_id', help='Organism ID')
 
 
 def CnOrGuess(parser):
     OrgOrGuess(parser)
-    parser.add_argument('--seq_fasta', type=file, help='Fasta file, IDs used as sequence sources')
+    parser.add_argument('--seq_fasta', type=argparse.FileType("r"), help='Fasta file, IDs used as sequence sources')
     parser.add_argument('--seq_raw', nargs='*', help='Sequence Names')
 
 
@@ -63,9 +448,11 @@
 
 def AssertUser(user_list):
     if len(user_list) == 0:
-        raise Exception("Unknown user. Please register first")
+        raise UnknownUserException()
+    elif len(user_list) == 1:
+        return user_list[0]
     else:
-        return user_list[0]
+        raise Exception("Too many users!")
 
 
 def AssertAdmin(user):
@@ -81,6 +468,8 @@
         self.apollo_url = url
         self.username = username
         self.password = password
+        # TODO: Remove after apollo 2.0.6.
+        self.clientToken = time.time()
 
         self.annotations = AnnotationsClient(self)
         self.groups = GroupsClient(self)
@@ -93,6 +482,20 @@
     def __str__(self):
         return '<WebApolloInstance at %s>' % self.apollo_url
 
+    def requireUser(self, email):
+        cacheKey = 'user-list'
+        try:
+            # Get the cached value
+            data = userCache[cacheKey]
+        except KeyError:
+            # If we hit a key error above, indicating that
+            # we couldn't find the key, we'll simply re-request
+            # the data
+            data = self.users.loadUsers()
+            userCache[cacheKey] = data
+
+        return AssertUser([x for x in data if x.username == email])
+
 
 class GroupObj(object):
     def __init__(self, **kwargs):
@@ -162,6 +565,7 @@
         data.update({
             'username': self._wa.username,
             'password': self._wa.password,
+            'clientToken': self._wa.clientToken,
         })
 
         r = requests.post(url, data=json.dumps(data), headers=headers,
@@ -266,9 +670,22 @@
         data.update(self._extra_data)
         return self.request('setSymbol', data)
 
-    def getComments(self, features):
+    def getComments(self, feature_id):
         data = {
-            'features': features,
+            'features': [{'uniquename': feature_id}],
+        }
+        data = self._update_data(data)
+        return self.request('getComments', data)
+
+    def addComments(self, feature_id, comment):
+        #TODO: This is probably not great and will delete comments, if I had to guess...
+        data = {
+            'features': [
+                {
+                    'uniquename': feature_id,
+                    'comments': [comment]
+                }
+            ],
         }
         data = self._update_data(data)
         return self.request('getComments', data)
@@ -297,8 +714,9 @@
         if not trustme:
             raise NotImplementedError("Waiting on better docs from project. If you know what you are doing, pass trustme=True to this function.")
 
-        data = {}
-        data.update(feature)
+        data = {
+            'features': feature,
+        }
         data = self._update_data(data)
         return self.request('addFeature', data)
 
@@ -465,12 +883,32 @@
         }
         return self.request('getOrganismPermissionsForGroup', data)
 
+    def loadGroup(self, group):
+        return self.loadGroupById(group.groupId)
+
+    def loadGroupById(self, groupId):
+        res = self.request('loadGroups', {'groupId': groupId})
+        if isinstance(res, list):
+            # We can only match one, right?
+            return GroupObj(**res[0])
+        else:
+            return res
+
+    def loadGroupByName(self, name):
+        res = self.request('loadGroups', {'name': name})
+        if isinstance(res, list):
+            # We can only match one, right?
+            return GroupObj(**res[0])
+        else:
+            return res
+
     def loadGroups(self, group=None):
-        data = {}
+        res = self.request('loadGroups', {})
+        data = [GroupObj(**x) for x in res]
         if group is not None:
-            data['groupId'] = group.groupId
+            data = [x for x in data if x.name == group]
 
-        return self.request('loadGroups', data)
+        return data
 
     def deleteGroup(self, group):
         data = {
@@ -493,11 +931,11 @@
                                  export=False):
         data = {
             'groupId': group.groupId,
-            'name': organismName,
-            'administrate': administrate,
-            'write': write,
-            'export': export,
-            'read': read,
+            'organism': organismName,
+            'ADMINISTRATE': administrate,
+            'WRITE': write,
+            'EXPORT': export,
+            'READ': read,
         }
         return self.request('updateOrganismPermission', data)
 
@@ -706,7 +1144,7 @@
         org = self._wa.organisms.findOrganismByCn(cn)
         self._wa.annotations.setSequence(org['commonName'], org['id'])
 
-        data = StringIO.StringIO(self._wa.io.write(
+        data = io.StringIO(self._wa.io.write(
             exportType='GFF3',
             seqType='genomic',
             exportAllSequences=False,
@@ -816,9 +1254,9 @@
 def featuresToFeatureSchema(features):
     compiled = []
     for feature in features:
-        if feature.type != 'gene':
-            log.warn("Not able to handle %s features just yet...", feature.type)
-            continue
+        # if feature.type != 'gene':
+            # log.warn("Not able to handle %s features just yet...", feature.type)
+            # continue
 
         for x in _yieldFeatData([feature]):
             compiled.append(x)
@@ -834,6 +1272,10 @@
         'ADMINISTRATE' in x['permissions'] or
         user.role == 'ADMIN'
     }
+
+    if 'error' in orgs:
+        raise Exception("Error received from Apollo server: \"%s\"" % orgs['error'])
+
     return [
         (org['commonName'], org['id'], False)
         for org in sorted(orgs, key=lambda x: x['commonName'])
@@ -841,18 +1283,114 @@
     ]
 
 
+def galaxy_list_groups(trans, *args, **kwargs):
+    email = trans.get_user().email
+    wa = WebApolloInstance(
+        os.environ['GALAXY_WEBAPOLLO_URL'],
+        os.environ['GALAXY_WEBAPOLLO_USER'],
+        os.environ['GALAXY_WEBAPOLLO_PASSWORD']
+    )
+    # Assert that the email exists in apollo
+    try:
+        gx_user = wa.requireUser(email)
+    except UnknownUserException:
+        return []
+
+    # Key for cached data
+    cacheKey = 'groups-' + email
+    # We don't want to trust "if key in cache" because between asking and fetch
+    # it might through key error.
+    if cacheKey not in cache:
+        # However if it ISN'T there, we know we're safe to fetch + put in
+        # there.
+        data = _galaxy_list_groups(wa, gx_user, *args, **kwargs)
+        cache[cacheKey] = data
+        return data
+    try:
+        # The cache key may or may not be in the cache at this point, it
+        # /likely/ is. However we take no chances that it wasn't evicted between
+        # when we checked above and now, so we reference the object from the
+        # cache in preparation to return.
+        data = cache[cacheKey]
+        return data
+    except KeyError:
+        # If access fails due to eviction, we will fail over and can ensure that
+        # data is inserted.
+        data = _galaxy_list_groups(wa, gx_user, *args, **kwargs)
+        cache[cacheKey] = data
+        return data
+
+
+def _galaxy_list_groups(wa, gx_user, *args, **kwargs):
+    # Fetch the groups.
+    group_data = []
+    for group in wa.groups.loadGroups():
+        # Reformat
+        group_data.append((group.name, group.groupId, False))
+    return group_data
+
+
 def galaxy_list_orgs(trans, *args, **kwargs):
     email = trans.get_user().email
+    wa = WebApolloInstance(
+        os.environ['GALAXY_WEBAPOLLO_URL'],
+        os.environ['GALAXY_WEBAPOLLO_USER'],
+        os.environ['GALAXY_WEBAPOLLO_PASSWORD']
+    )
+    try:
+        gx_user = wa.requireUser(email)
+    except UnknownUserException:
+        return []
 
-    wa = WebApolloInstance(
-        os.environ.get('GALAXY_WEBAPOLLO_URL', 'https://example.com'),
-        os.environ.get('GALAXY_WEBAPOLLO_USER', 'admin'),
-        os.environ.get('GALAXY_WEBAPOLLO_PASSWORD', 'admin')
-    )
+    # Key for cached data
+    cacheKey = 'orgs-' + email
+    if cacheKey not in cache:
+        data = _galaxy_list_orgs(wa, gx_user, *args, **kwargs)
+        cache[cacheKey] = data
+        return data
+    try:
+        data = cache[cacheKey]
+        return data
+    except KeyError:
+        data = _galaxy_list_orgs(wa, gx_user, *args, **kwargs)
+        cache[cacheKey] = data
+        return data
+
 
-    gx_user = AssertUser(wa.users.loadUsers(email=email))
+def _galaxy_list_orgs(wa, gx_user, *args, **kwargs):
+    # Fetch all organisms
     all_orgs = wa.organisms.findAllOrganisms()
+    # Figure out which are accessible to the user
+    orgs = accessible_organisms(gx_user, all_orgs)
+    # Return org list
+    return orgs
+
+## This is all for implementing the command line interface for testing.
+
+class obj(object):
+    pass
+
+
+class fakeTrans(object):
+
+    def __init__(self, username):
+        self.un = username
 
-    orgs = accessible_organisms(gx_user, all_orgs)
+    def get_user(self):
+        o = obj()
+        o.email = self.un
+        return o
 
-    return orgs
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Test access to apollo server')
+    parser.add_argument('email', help='Email of user to test')
+    parser.add_argument('--action', choices=['org', 'group'], default='org', help='Data set to test, fetch a list of groups or users known to the requesting user.')
+    args = parser.parse_args()
+
+    trans = fakeTrans(args.email)
+    if args.action == 'org':
+        for f in galaxy_list_orgs(trans):
+            print(f)
+    else:
+        for f in galaxy_list_groups(trans):
+            print(f)