Mercurial > repos > eric-rasche > apollo
changeset 0:6002cc0df04e draft
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
author | eric-rasche |
---|---|
date | Tue, 03 May 2016 13:38:55 -0400 |
parents | |
children | f7d57e56f322 |
files | README.md add_organism.py add_organism.xml clean_org.py create_account.py create_account.xml create_features_from_gff3.py create_features_from_gff3.xml create_or_update_organism.py create_or_update_organism.xml export.py export.xml fetch_organism_jbrowse.py fetch_organism_jbrowse.xml find_organism.py find_organism.xml gff3_cleaner.py gff3_cleaner.xml json2iframe.py json2iframe.xml list_organisms.py list_organisms.xml macros.xml renumber.py renumber.xml test-data/bad-model.gff3 test-data/good-model.gff3 update_organism.py update_organism.xml webapollo.py |
diffstat | 30 files changed, 1930 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,8 @@ +# galaxy-webapollo + +Galaxy tools to interface with WebApollo +The webapollo.py file is also [separately available](https://github.com/erasche/python-apollo) as a pip-installable package. + +## License + +All python scripts, wrappers, and the webapollo.py are licensed under MIT license.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_organism.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,55 @@ +#!/usr/bin/env python +import json +import argparse +import time +from webapollo import WebApolloInstance +import logging +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('cn', help='Organism Common Name') + parser.add_argument('jbrowse', help='JBrowse Data Directory') + parser.add_argument('email', help='User Email') + parser.add_argument('--blatdb', help='BlatDB Directory') + parser.add_argument('--genus', help='Organism Genus') + parser.add_argument('--species', help='Organism Species') + parser.add_argument('--public', action='store_true', help='Make organism public') + + args = parser.parse_args() + + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = wa.users.loadUsers(email=args.email) + if len(gx_user) == 0: + raise Exception("Unknown user. Please register first") + + log.info("Adding Organism") + orgs = wa.organisms.addOrganism( + args.cn, + args.jbrowse, + blatdb=args.blatdb, + genus=args.genus, + species=args.species, + public=args.public + ) + log.info("Success: %s", orgs[0]['id']) + + # Must sleep before we're ready to handle + time.sleep(1) + log.info("Updating permissions for %s on %s", gx_user[0], args.cn) + data = wa.users.updateOrganismPermission( + gx_user[0], args.cn, + write=True, + export=True, + read=True, + ) + + print json.dumps([org for org in orgs if org['commonName'] == args.cn], indent=2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_organism.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,58 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.add_organism" name="WA2: Add Organism" version="1.5"> + <description>registers a new genome with WebApollo</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +cp -R ${jbrowse.extra_files_path}/data/ /opt/apollo/data/galaxy/${jbrowse.id}; + +python $__tool_directory__/add_organism.py + +@AUTH@ + +#if str($blatdb) != "None": + --blatdb "$blatdb" +#end if + +--genus "$genus" +--species "$species" +$public + +@CN_OR_GUESS@ + +"/opt/apollo/data/galaxy/${jbrowse.id}" + +$__user_email__ + +> $output]]></command> + <inputs> + <param name="jbrowse" type="data" format="html" label="JBrowse HTML Output" /> + <param name="blatdb" type="data" label="Blat DB" optional="True" /> + + <expand macro="cn_or_guess" /> + <param name="genus" type="text" label="Host Bacteria" optional="True" /> + <param name="species" type="text" label="Phage Name" optional="True" /> + <param name="public" type="boolean" truevalue="--public" falsevalue="" label="Is Organism Public" /> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**NOTA BENE** + +All organism data is currently shared. By using this tool your annotation data will be visible to your fellow lab members. +This will be fixed at a later date. + +This only works with JBrowse v0.4 and above! + +**What it does** + +Adds an organism to the Apollo database. The tool takes the output of a +JBrowse run as that contains all of the necessary information for which +tracks are appropriate for a given analysis. + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clean_org.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +import argparse +from webapollo import WebApolloInstance +import logging +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('cn', help='Organism Common Name') + parser.add_argument('email', help='User Email') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = wa.users.loadUsers(email=args.email) + if len(gx_user) == 0: + raise Exception("Unknown user. Please register first") + + # TODO: Check user perms on org. + org = wa.organisms.findOrganismByCn(args.cn) + + wa.annotations.setSequence(args.cn, org['id']) + + for feature in wa.annotations.getFeatures().get('features', []): + print feature['uniquename'] + wa.annotations.deleteFeatures([feature['uniquename']])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_account.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,37 @@ +#!/usr/bin/env python +import random +import argparse +import time +from webapollo import WebApolloInstance, GroupObj + +def pwgen(length): + chars = list('qwrtpsdfghjklzxcvbnm') + return ''.join(random.choice(chars) for _ in range(length)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an account via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('email', help='User Email') + parser.add_argument('--first', help='First Name', default='J') + parser.add_argument('--last', help='Last Name', default='Aggie') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + password = pwgen(12) + wa.users.createUser(args.email, args.first, args.last, password, role='user') + time.sleep(1) + user = [u for u in wa.users.loadUsers() + if u.username == args.email][0] + + bich464 = GroupObj(name="bich464-2016-spring") + + # Update name, regen password if the user ran it again + wa.users.updateUser(user, args.email, args.first, args.last, password) + # Add to bich464 group + wa.users.addUserToGroup(bich464, user) + + print 'Username: %s\nPassword: %s' % (args.email, password)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_account.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,33 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.create_account" name="WA2: Register Account" version="1.5"> + <description>registers a new account with WebApollo</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/create_account.py + +@AUTH@ + +$__user_email__ + +--first "$first" +--last "$last" +> $output]]></command> + <inputs> + <param name="first" type="text" label="First Name" /> + <param name="last" type="text" label="Last Name" /> + </inputs> + <outputs> + <data format="txt" name="output" label="WebApollo Credentials"/> + </outputs> + <help><![CDATA[ +**What it does** + +Registers an account with WebApollo. Just click run, then view +the output file for your password. + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_features_from_gff3.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import argparse +from webapollo import WebApolloInstance, featuresToFeatureSchema +from BCBio import GFF +import logging +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('cn', help='Organism Common Name') + parser.add_argument('email', help='User Email') + parser.add_argument('gff3', type=file, help='GFF3 file') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = wa.users.loadUsers(email=args.email) + if len(gx_user) == 0: + raise Exception("Unknown user. Please register first") + + # TODO: Check user perms on org. + + org = wa.organisms.findOrganismByCn(args.cn) + wa.annotations.setSequence(args.cn, org['id']) + + for rec in GFF.parse(args.gff3): + featureData = featuresToFeatureSchema(rec.features) + wa.annotations.addFeature( + { + 'features': featureData + }, trustme=True + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_features_from_gff3.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,36 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.feat_from_gff3" name="WA2: Create Features from GFF3" version="0.1"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/create_features_from_gff3.py +@AUTH@ +@CN_OR_GUESS@ +"$__user_email__" +$gff3_data +> $output]]></command> + <inputs> + <expand macro="cn_or_guess" /> + <expand macro="gff3_input" /> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**NOTA BENE** + +This is **incredibly, highly experimental* + +DO NOT: + +- Run on gff3 referencing multiple reference sequences/contigs +- Expect it to work well +- Expect it to work at all + + +@REFERENCES@ +]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_or_update_organism.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,75 @@ +#!/usr/bin/env python +import json +import argparse +import time +from webapollo import WebApolloInstance +import logging +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('cn', help='Organism Common Name') + parser.add_argument('jbrowse', help='JBrowse Data Directory') + parser.add_argument('email', help='User Email') + parser.add_argument('--blatdb', help='BlatDB Directory') + parser.add_argument('--genus', help='Organism Genus') + parser.add_argument('--species', help='Organism Species') + parser.add_argument('--public', action='store_true', help='Make organism public') + + args = parser.parse_args() + + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = wa.users.loadUsers(email=args.email) + if len(gx_user) == 0: + raise Exception("Unknown user. Please register first") + + log.info("Determining if add or update required") + try: + org = wa.organisms.findOrganismByCn(args.cn) + except Exception: + org = None + + if org: + log.info("\tUpdating Organism") + data = wa.organisms.updateOrganismInfo( + org['id'], + args.cn, + args.jbrowse, + # mandatory + blatdb=args.blatdb, + genus=args.genus, + species=args.species, + public=args.public + ) + else: + # New organism + log.info("\tAdding Organism") + data = wa.organisms.addOrganism( + args.cn, + args.jbrowse, + blatdb=args.blatdb, + genus=args.genus, + species=args.species, + public=args.public + ) + + # Must sleep before we're ready to handle + time.sleep(1) + log.info("Updating permissions for %s on %s", gx_user[0], args.cn) + wa.users.updateOrganismPermission( + gx_user[0], args.cn, + write=True, + export=True, + read=True, + ) + + data = [o for o in data if o['commonName'] == args.cn] + print json.dumps(data, indent=2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_or_update_organism.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,58 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.create_or_update" name="WA2: Create or Update Organism" version="1.5"> + <description>will create the organism if it doesn't exist, and update otherwise</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +cp -R ${jbrowse.extra_files_path}/data/ /opt/apollo/data/galaxy/${jbrowse.id}; + +python $__tool_directory__/create_or_update_organism.py + +@AUTH@ + +#if str($blatdb) != "None": + --blatdb "$blatdb" +#end if + +--genus "$genus" +--species "$species" +$public + +@CN_OR_GUESS@ + +"/opt/apollo/data/galaxy/${jbrowse.id}" + +$__user_email__ + +> $output]]></command> + <inputs> + <param name="jbrowse" type="data" format="html" label="JBrowse HTML Output" /> + <param name="blatdb" type="data" label="Blat DB" optional="True" /> + + <expand macro="cn_or_guess" /> + <param name="genus" type="text" label="Host Bacteria" optional="True" /> + <param name="species" type="text" label="Phage Name" optional="True" /> + <param name="public" type="boolean" truevalue="--public" falsevalue="" label="Is Organism Public" /> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**NOTA BENE** + +All organism data is currently shared. By using this tool your annotation data will be visible to your fellow lab members. +This will be fixed at a later date. + +This only works with JBrowse v0.4 and above! + +**What it does** + +Adds an organism to the Apollo database. The tool takes the output of a +JBrowse run as that contains all of the necessary information for which +tracks are appropriate for a given analysis. + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,44 @@ +#!/usr/bin/env python +import StringIO +import sys +import json +import argparse +from Bio import SeqIO +from BCBio import GFF +from webapollo import WebApolloInstance + +if __name__ == '__main__': + json + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Username') + parser.add_argument('password', help='WA Password') + + parser.add_argument('commonName', nargs='+', help='Sequence Unique Names') + + parser.add_argument('--gff', type=argparse.FileType('w')) + parser.add_argument('--fasta', type=argparse.FileType('w')) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + data = StringIO.StringIO(wa.io.write( + exportType='GFF3', + seqType='genomic', + exportAllSequences=False, + exportGff3Fasta=True, + output="text", + exportFormat="text", + # TODO: CPT specific convention!!!!!!!! + organism=args.commonName, + sequences=args.commonName + )) + data.seek(0) + + for record in GFF.parse(data): + record.annotations = {} + GFF.write([record], args.gff) + record.description = "" + SeqIO.write([record], args.fasta, 'fasta') + sys.exit()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,33 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.export" name="WA2: Retrieve/Export data" version="1.4"> + <description>from WebApollo into Galaxy</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/export.py + +@AUTH@ + +@CN_OR_GUESS@ + +--gff $gff_out +--fasta $fasta_out + +]]></command> + <inputs> + <expand macro="cn_or_guess" /> + </inputs> + <outputs> + <data format="gff3" name="gff_out" /> + <data format="fasta" name="fasta_out"/> + </outputs> + <help><![CDATA[ +**What it does** + +Exports the GFF3/Fasta sequence from WebApollo. + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetch_organism_jbrowse.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +import os +import json +import argparse +import time +from webapollo import WebApolloInstance +import logging +import subprocess +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + parser.add_argument('cn', help='Organism Common Name') + parser.add_argument('email', help='User Email') + parser.add_argument('target_dir', help='Target directory') + + args = parser.parse_args() + + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = wa.users.loadUsers(email=args.email) + if len(gx_user) == 0: + raise Exception("Unknown user. Please register first") + org = wa.organisms.findOrganismByCn(args.cn) + + + if not os.path.exists(args.target_dir): + os.makedirs(args.target_dir) + + cmd = [ + 'cp', '-R', + org['directory'], + os.path.join(args.target_dir, 'data') + ] + print ' '.join(cmd) + subprocess.check_call(cmd)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetch_organism_jbrowse.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,65 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.fetch_jbrowse" name="WA2: Fetch JBrowse" version="1.4"> + <description>fetches JBrowse data for existing organisms</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +mkdir -p $jbrowse.files_path/; + +python $__tool_directory__/fetch_organism_jbrowse.py +@AUTH@ +@CN_OR_GUESS@ +$__user_email__ +$jbrowse.files_path/; + +cp $dummyIndex $jbrowse; + +]]></command> + <configfiles> + <configfile name="dummyIndex"> + <![CDATA[ + <html> + <head> + </head> + <body> + <h1>JBrowse Data Directory</h1> + <p> + Hi! This is not a full JBrowse instance. JBrowse v0.4(+?) + started shipping with the ability to produce just the + "data" directory from a JBrowse instance, rather than a + complete, standalone instance. This was intended to be used + with the in-development Apollo integration, but may have other + uses as well. + </p> + <p> + <u>This is not usable on its own</u>. The output dataset may be + used with Apollo, or may be passed through the "JBrowse - + Convert to Standalone" tool in Galaxy to "upgrade" to a full + JBrowse instance. + </p> + </body> + </html> + ]]> + </configfile> + </configfiles> + <inputs> + <expand macro="cn_or_guess" /> + </inputs> + <outputs> + <data format="html" name="jbrowse"/> + </outputs> + <help><![CDATA[ +**NOTA BENE** + +All organism data is currently shared. By using this tool your annotation data will be visible to your fellow lab members. +This will be fixed at a later date. + +**What it does** + +Fetches the JBrowse directory from Apollo back into Galaxy. + +@REFERENCES@ +]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_organism.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +import json +import argparse +from webapollo import WebApolloInstance + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('--commonName', help='Common Name') + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + data = wa.organisms.findAllOrganisms() + if args.commonName is not None: + data = [o for o in data if o['commonName'] == args.commonName] + + print json.dumps(data, indent=2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_organism.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,30 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.find_organism" name="WA2: Find Organism" version="1.0"> + <description>finds an organism's details by common name</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/find_organism.py + +@AUTH@ + +--commonName +@CN_OR_GUESS@ + +> $output]]></command> + <inputs> + <expand macro="cn_or_guess" /> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**What it does** + +Searches for an organism's information in the Apollo database. + +@REFERENCES@ +]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_cleaner.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,56 @@ +#!/usr/bin/env python +import sys +import argparse +from gff3 import feature_lambda, feature_test_type +from BCBio import GFF +import logging +logging.basicConfig(level=logging.WARN) +log = logging.getLogger(name='pav') + +def coding_genes(feature_list): + for x in feature_lambda(feature_list, feature_test_type, {'type': 'gene'}, subfeatures=True): + if len(list(feature_lambda(x.sub_features, feature_test_type, {'type': 'CDS'}, subfeatures=False))) > 0: + yield x + + +def genes(feature_list, feature_type='gene'): + for x in feature_lambda(feature_list, feature_test_type, + {'type': feature_type}, + subfeatures=True): + yield x + + +def fix_apollo_issues(annotations, user_email): + for rec in GFF.parse(annotations): + for feat in rec.features: + if feat.type != 'gene': + continue + + for sf in feat.sub_features: + if sf.type != 'mRNA': + continue + + for ssf in sf.sub_features: + if ssf.type != 'exon': + continue + + if len(ssf) > 10: + continue + + ssf.type = 'Shine_Dalgarno_sequence' + + sf.sub_features = [x for x in sf.sub_features if x.type not in + ('non_canonical_five_prime_splice_site', + 'non_canonical_three_prime_splice_site')] + yield rec + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") + parser.add_argument('annotations', type=file, help='Parent GFF3 annotations') + # parser.add_argument('genome', type=file, help='Genome Sequence') + parser.add_argument('--user_email') + + args = parser.parse_args() + for rec in fix_apollo_issues(**vars(args)): + rec.annotations = {} + GFF.write([rec], sys.stdout)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_cleaner.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.fix_model" name="Fix Apollo Gene Model" version="1.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command><![CDATA[ +$__tool_directory__/gff3_cleaner.py +$gff3_data +> $output]]></command> + <inputs> + <expand macro="gff3_input" /> + </inputs> + <outputs> + <data format="gff3" name="output"/> + </outputs> + <tests> + <test> + <param name="gff3_data" value="bad-model.gff3" /> + <output name="output" file="good-model.gff3" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +This tool corrects the gene model to fix some issues with Apollo's model + +- small "exons" are returned to their correct state of "Shine_Dalgarno_sequence" +- non_canonical_*_splice_sites are removed + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json2iframe.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,39 @@ +#!/usr/bin/env python +import json +import argparse + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('json', type=file, help='JSON Data') + + args = parser.parse_args() + + + # https://cpt.tamu.edu/apollo/annotator/loadLink?loc=NC_005880:0..148317&organism=326&tracks= + data = json.load(args.json) + if len(data) > 1: + raise Exception("More than one organism listed. TODO. Contact esr@tamu.edu") + + HTML_TPL = """ +<html> + <head> + <title>Embedded Apollo Access</title> + <style type="text/css"> + body {{ + margin: 0; + }} + iframe {{ + border: 0; + width: 100%; + height: 100% + }} + </style> + </head> + <body> + <iframe src="{base_url}/annotator/loadLink?loc={chrom}&organism={orgId}&tracklist=1"></iframe> + </body> +</html> + """ + + print HTML_TPL.format(base_url=args.apollo, chrom="", orgId=data[0]['id'])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json2iframe.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,27 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.iframe" name="WA2: Annotate" version="1.4"> + <description>opens an IFrame to Apollo</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/json2iframe.py +@URL@ +$json_file +> $output]]></command> + <inputs> + <param name="json_file" type="data" format="json" label="WebApollo Organism Listing" /> + </inputs> + <outputs> + <data format="html" name="output"/> + </outputs> + <help><![CDATA[ +**What it does** + +Given a json file that's output by the Galaxy<->WebApollo bridge, this +generates a nice little IFrame that you can use to access Apollo + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_organisms.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,17 @@ +#!/usr/bin/env python +import json +import argparse +from webapollo import WebApolloInstance + +if __name__ == '__main__': + json + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Username') + parser.add_argument('password', help='WA Password') + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + print json.dumps(wa.organisms.findAllOrganisms(), indent=2) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_organisms.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,24 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.list_organism" name="WA2: List Organisms" version="1.2"> + <description>lists all organisms know to Apollo</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/list_organisms.py +@AUTH@ +> $output]]></command> + <inputs> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**What it does** + +Lists organism in Apollo + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,96 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <yield/> + </requirements> + </xml> + <token name="@URL@"> +https://cpt.tamu.edu/apollo + </token> + <token name="@AUTH@"> +@URL@ +galaxy@cpt.tamu.edu +\$GALAXY_WEBAPOLLO_PASSWORD + </token> + <xml name="auth_file"> + <param label="Apollo Authentication File" + name="apolloCredentialsFile" type="data" format="json" /> + </xml> + + <token name="@CN_OR_GUESS@"> +<![CDATA[ +#if $cn_source.source_select == "auto": + \$(cat $cn_source.cn_file | grep '^>' | head -n 1 | sed 's/\s.*//g;s/>//g') +#elif $cn_source.source_select == "auto_json": + \$(cat $cn_source.cn_file | grep 'commonName' | head -n 1| sed 's/.*: "//g;s/".*//g') +#else + "${cn_source.organism_cn}" +#end if +]]> + </token> + <xml name="cn_or_guess"> + <conditional name="cn_source"> + <param name="source_select" type="select" label="Organism Common Name Source"> + <option value="auto">Autodetect from Fasta</option> + <option value="auto_json">Autodetect from Apollo JSON</option> + <option value="direct">Direct Entry</option> + </param> + <when value="auto"> + <param name="cn_file" type="data" format="fasta" label="Organism Common Name" /> + </when> + <when value="auto_json"> + <param name="cn_file" type="data" format="json" label="Apollo Organism File" /> + </when> + <when value="direct"> + <param name="organism_cn" type="text" label="Organism Common Name" /> + </when> + </conditional> + </xml> + + <xml name="citations"> + <citations> + </citations> + </xml> + <xml name="dummy"> + <param label="Dummy Inputs" name="dummy" type="data" multiple="True" optional="True"> + <help> + With workflows that contact remote databases, it is sometimes + necessary to have particular operations happen in a particular + order. I.e. you would not want to try and add data to an organism + if the organism did not yet exist in WebApollo. + Since few of the WebApollo2 toolsuite output files which are + used in a downstream task (e.g. the output adding an organism is + not used elsewhere), we provide this dummy option which lets you + select outputs to help provide context for when this task should + execute. + </help> + </param> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <token name="@GENOME_SELECTOR_PRE@"> +#if $reference_genome.reference_genome_source == 'history': + ln -s $reference_genome.genome_fasta genomeref.fa; +#end if + </token> + <token name="@GENOME_SELECTOR@"> +#if $reference_genome.reference_genome_source == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#elif $reference_genome.reference_genome_source == 'history': + genomeref.fa +#end if + </token> +<token name="@REFERENCES@"> +<![CDATA[ +------ + +**Citation** + +If you use this tool in Galaxy, please cite: +Eric Rasche (2016), `Galaxy wrapper <https://github.com/TAMU-CPT/galaxy-webapollo>`_ +]]> + </token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/renumber.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,57 @@ +#!/usr/bin/env python +import json +import copy +import argparse +from webapollo import WebApolloInstance +import logging +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('cn', help='Organism Common Name') + + parser.add_argument('--email') + parser.add_argument('--prefix', default='gene_') + parser.add_argument('--leading', default='3') + + args = parser.parse_args() + + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = wa.users.loadUsers(email=args.email) + if len(gx_user) == 0: + raise Exception("Unknown user. Please register first") + + # Must find the organism + org = wa.organisms.findOrganismByCn(args.cn) + # TODO: verify user has permissions on the organism + wa.annotations.setSequence(args.cn, org['id']) + raw_data = wa.annotations.getFeatures()['features'] + + data = sorted([ + (x['parent_id'], x['uniquename'], x['location']['fmin'], x['name']) + for x in raw_data + ], key=lambda x: x[2]) + + format_string = args.prefix + '%0' + args.leading + 'd' + format_string_mrna = format_string + '.mRNA' + + outData = copy.copy(org) + outData['changes'] = [] + + for i, feat in enumerate(data): + idx = i + 1 + log.info('Renaming %s to %s', feat[3], format_string % idx) + outData['changes'].append((feat[0], format_string % idx)) + wa.annotations.setName(feat[0], format_string % idx) + outData['changes'].append((feat[1], format_string_mrna % idx)) + wa.annotations.setName(feat[1], format_string_mrna % idx) + + print json.dumps(outData, indent=2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/renumber.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,44 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.renumber" name="WA2: Renumber Genes" version="1.1"> + <description>following standard conventions</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/renumber.py + +@AUTH@ + +@CN_OR_GUESS@ + +--prefix $prefix +--leading $leading +--email $__user_email__ + +> $output]]></command> + <inputs> + <expand macro="cn_or_guess" /> + <param name="prefix" type="text" label="Prefix" optional="True" value="gene_"/> + <param name="leading" type="integer" value="3" label="Number of Leading Zeroes" /> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**NOTA BENE** + +All organism data is currently shared. By using this tool your annotation data will be visible to your fellow lab members. +This will be fixed at a later date. + +This only works with JBrowse v0.4 and above! + +**What it does** + +Adds an organism to the Apollo database. The tool takes the output of a +JBrowse run as that contains all of the necessary information for which +tracks are appropriate for a given analysis. + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bad-model.gff3 Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,9 @@ +##gff-version 3 +##sequence-region Maroon_JMcDermott 1 144762 +Maroon_JMcDermott . gene 14488 14805 . + . Name=gene_26;date_creation=2016-02-17;owner=jmc_texas@tamu.edu;ID=707c88b7-36d1-44e3-93e6-d1d4f1219d57;date_last_modified=2016-02-17 +Maroon_JMcDermott . mRNA 14488 14805 . + . Name=gene_26-00001;date_creation=2016-02-17;Parent=707c88b7-36d1-44e3-93e6-d1d4f1219d57;owner=jmc_texas@tamu.edu;ID=8760695d-b88c-41c0-857b-540e6db81fe8;date_last_modified=2016-02-17 +Maroon_JMcDermott . CDS 14707 14805 . + 0 Name=94abf796-4c8d-45f4-916b-4d279616565e-CDS;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=94abf796-4c8d-45f4-916b-4d279616565e +Maroon_JMcDermott . exon 14497 14805 . + . Name=d2ebd8d0-6558-4674-a38f-346f88256340-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=d2ebd8d0-6558-4674-a38f-346f88256340 +Maroon_JMcDermott . exon 14488 14491 . + . Name=2e4119f9-3220-4502-8ddd-4821c872e0d6-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=2e4119f9-3220-4502-8ddd-4821c872e0d6 +Maroon_JMcDermott . non_canonical_five_prime_splice_site 14494 14494 . + . Name=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_five_prime_splice_site-14493;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_five_prime_splice_site-14493 +Maroon_JMcDermott . non_canonical_three_prime_splice_site 14497 14497 . + . Name=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_three_prive_splice_site-14496;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_three_prive_splice_site-14496
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/good-model.gff3 Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,7 @@ +##gff-version 3 +##sequence-region Maroon_JMcDermott 1 14805 +Maroon_JMcDermott feature gene 14488 14805 . + . ID=707c88b7-36d1-44e3-93e6-d1d4f1219d57;Name=gene_26;date_creation=2016-02-17;date_last_modified=2016-02-17;owner=jmc_texas%40tamu.edu +Maroon_JMcDermott feature mRNA 14488 14805 . + . ID=8760695d-b88c-41c0-857b-540e6db81fe8;Name=gene_26-00001;Parent=707c88b7-36d1-44e3-93e6-d1d4f1219d57;date_creation=2016-02-17;date_last_modified=2016-02-17;owner=jmc_texas%40tamu.edu +Maroon_JMcDermott feature CDS 14707 14805 . + 0 ID=94abf796-4c8d-45f4-916b-4d279616565e;Name=94abf796-4c8d-45f4-916b-4d279616565e-CDS;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 +Maroon_JMcDermott feature exon 14497 14805 . + . ID=d2ebd8d0-6558-4674-a38f-346f88256340;Name=d2ebd8d0-6558-4674-a38f-346f88256340-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 +Maroon_JMcDermott feature Shine_Dalgarno_sequence 14488 14491 . + . ID=2e4119f9-3220-4502-8ddd-4821c872e0d6;Name=2e4119f9-3220-4502-8ddd-4821c872e0d6-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/update_organism.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,37 @@ +#!/usr/bin/env python +import json +import argparse +from webapollo import WebApolloInstance + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Admin Username') + parser.add_argument('password', help='WA Admin Password') + + parser.add_argument('organismId', help='Organism ID #') + parser.add_argument('email', help='User Email') + + parser.add_argument('--commonName', help='Organism Common Name') + parser.add_argument('--jbrowse', help='JBrowse Data Directory') + parser.add_argument('--blatdb', help='BlatDB Directory') + parser.add_argument('--genus', help='Organism Genus') + parser.add_argument('--species', help='Organism Species') + parser.add_argument('--public', action='store_true', help='Make organism public') + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + data = wa.organisms.updateOrganismInfo( + args.organismId, + args.commonName, + args.jbrowse, + # mandatory + blatdb=args.blatdb, + genus=args.genus, + species=args.species, + public=args.public + ) + # Need to filter data + wanted_data = [x for x in data if str(x['id']) == str(args.organismId)] + print json.dumps(wanted_data, indent=2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/update_organism.xml Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,72 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt2.webapollo.update_organism" name="WA2: Update Organism" version="1.7"> + <description>updates an existing genome with WebApollo</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +#if str($jbrowse) != "None": + cp -R ${jbrowse.extra_files_path}/data/ /opt/apollo/data/galaxy/${jbrowse.id}; +#end if + +python $__tool_directory__/update_organism.py + +@AUTH@ + +\$(python $__tool_directory__/find_organism.py @AUTH@ --commonName @CN_OR_GUESS@ | grep '"id"' | sed 's/.*: //g') + +#if str($jbrowse) !="None": + --jbrowse "/opt/apollo/data/galaxy/${jbrowse.id}" +#end if + +--commonName +@CN_OR_GUESS@ + +#if str($blatdb) != "None" and str($blatdb) != "": + --blatdb "$blatdb" +#end if + +#if str($genus) != "None" and str($genus) != "": + --genus "$genus" +#end if + +#if str($species) != "None" and str($species) != "": + --species "$species" +#end if + +$public + +$__user_email__ + +> $output]]></command> + <inputs> + <!--<expand macro="auth_file" />--> + <param name="jbrowse" type="data" format="html" label="JBrowse Output" optional="True"/> + <param name="blatdb" type="data" label="Blat DB" optional="True"/> + <expand macro="cn_or_guess" /> + <param name="genus" type="text" label="Host Bacteria" optional="True" /> + <param name="species" type="text" label="Phage Name" optional="True" /> + + <param name="public" type="boolean" truevalue="--public" falsevalue="" label="Is Organism Public" /> + </inputs> + <outputs> + <data format="json" name="output"/> + </outputs> + <help><![CDATA[ +**NOTA BENE** + +All organism data is currently shared. By using this tool your annotation data will be visible to your fellow lab members. +This will be fixed at a later date. + +This only works with JBrowse v0.4 and above! + +**What it does** + +Adds an organism to the Apollo database. The tool takes the output of a +JBrowse run as that contains all of the necessary information for which +tracks are appropriate for a given analysis. + +@REFERENCES@ + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/webapollo.py Tue May 03 13:38:55 2016 -0400 @@ -0,0 +1,745 @@ +import requests +import json +import collections +from BCBio import GFF +import StringIO +import logging +logging.getLogger("requests").setLevel(logging.CRITICAL) +log = logging.getLogger() + + + +class WebApolloInstance(object): + + def __init__(self, url, username, password): + self.apollo_url = url + self.username = username + self.password = password + + self.annotations = AnnotationsClient(self) + self.groups = GroupsClient(self) + self.io = IOClient(self) + self.organisms = OrganismsClient(self) + self.users = UsersClient(self) + self.metrics = MetricsClient(self) + self.bio = RemoteRecord(self) + + def __str__(self): + return '<WebApolloInstance at %s>' % self.apollo_url + + +class GroupObj(object): + def __init__(self, **kwargs): + self.name = kwargs['name'] + + if 'id' in kwargs: + self.groupId = kwargs['id'] + + +class UserObj(object): + ROLE_USER = 'USER' + ROLE_ADMIN = 'ADMIN' + + def __init__(self, **kwargs): + # Generally expect 'userId', 'firstName', 'lastName', 'username' (email) + for attr in kwargs.keys(): + setattr(self, attr, kwargs[attr]) + + if 'groups' in kwargs: + groups = [] + for groupData in kwargs['groups']: + groups.append(GroupObj(**groupData)) + self.groups = groups + + self.__props = kwargs.keys() + + + def isAdmin(self): + if hasattr(self, 'role'): + return self.role == self.ROLE_ADMIN + return False + + def refresh(self, wa): + # This method requires some sleeping usually. + newU = wa.users.loadUser(self).toDict() + for prop in newU: + setattr(self, prop, newU[prop]) + + def toDict(self): + data = {} + for prop in self.__props: + data[prop] = getattr(self, prop) + return data + + def __str__(self): + return '<User %s: %s %s <%s>>' % (self.userId, self.firstName, + self.lastName, self.username) + + +class Client(object): + + def __init__(self, webapolloinstance, **requestArgs): + self._wa = webapolloinstance + + self.__verify = requestArgs.get('verify', True) + self._requestArgs = requestArgs + + if 'verify' in self._requestArgs: + del self._requestArgs['verify'] + + def request(self, clientMethod, data, post_params={}, isJson=True): + url = self._wa.apollo_url + self.CLIENT_BASE + clientMethod + + headers = { + 'Content-Type': 'application/json' + } + + data.update({ + 'username': self._wa.username, + 'password': self._wa.password, + }) + + r = requests.post(url, data=json.dumps(data), headers=headers, + verify=self.__verify, params=post_params, **self._requestArgs) + + if r.status_code == 200: + if isJson: + d = r.json() + if 'username' in d: + del d['username'] + if 'password' in d: + del d['password'] + return d + else: + return r.text + + # @see self.body for HTTP response body + raise Exception("Unexpected response from apollo %s: %s" % + (r.status_code, r.text)) + + def get(self, clientMethod, get_params): + url = self._wa.apollo_url + self.CLIENT_BASE + clientMethod + headers = {} + + r = requests.get(url, headers=headers, verify=self.__verify, + params=get_params, **self._requestArgs) + if r.status_code == 200: + d = r.json() + if 'username' in d: + del d['username'] + if 'password' in d: + del d['password'] + return d + # @see self.body for HTTP response body + raise Exception("Unexpected response from apollo %s: %s" % + (r.status_code, r.text)) + + +class MetricsClient(Client): + CLIENT_BASE = '/metrics/' + + def getServerMetrics(self): + return self.get('metrics', {}) + + +class AnnotationsClient(Client): + CLIENT_BASE = '/annotationEditor/' + + def _update_data(self, data): + if not hasattr(self, '_extra_data'): raise Exception("Please call setSequence first") + data.update(self._extra_data) + return data + + def setSequence(self, sequence, organism): + self._extra_data = { + 'sequence': sequence, + 'organism': organism, + } + + def setDescription(self, featureDescriptions): + data = { + 'features': featureDescriptions, + } + data = self._update_data(data) + return self.request('setDescription', data) + + def setName(self, uniquename, name): + # TODO + data = { + 'features': [ + { + 'uniquename': uniquename, + 'name': name, + } + ], + } + data = self._update_data(data) + return self.request('setName', data) + + def setNames(self, features): + # TODO + data = { + 'features': features, + } + data = self._update_data(data) + return self.request('setName', data) + + def setStatus(self, statuses): + # TODO + data = { + 'features': statuses, + } + data = self._update_data(data) + return self.request('setStatus', data) + + def setSymbol(self, symbols): + data = { + 'features': symbols, + } + data.update(self._extra_data) + return self.request('setSymbol', data) + + def getComments(self, features): + data = { + 'features': features, + } + data = self._update_data(data) + return self.request('getComments', data) + + def addAttribute(self, features): + data = { + 'features': features, + } + data = self._update_data(data) + return self.request('addAttribute', data) + + def getFeatures(self): + data = self._update_data({}) + return self.request('getFeatures', data) + + def getSequence(self, uniquename): + data = { + 'features': [ + {'uniquename': uniquename} + ] + } + data = self._update_data(data) + return self.request('getSequence', data) + + def addFeature(self, feature, trustme=False): + if not trustme: + raise NotImplementedError("Waiting on better docs from project. If you know what you are doing, pass trustme=True to this function.") + + data = {} + data.update(feature) + data = self._update_data(data) + return self.request('addFeature', data) + + def addTranscript(self, transcript, trustme=False): + if not trustme: + raise NotImplementedError("Waiting on better docs from project. If you know what you are doing, pass trustme=True to this function.") + + data = {} + data.update(transcript) + data = self._update_data(data) + return self.request('addTranscript', data) + + # addExon, add/delete/updateComments, addTranscript skipped due to docs + + def duplicateTranscript(self, transcriptId): + data = { + 'features': [{'uniquename': transcriptId}] + } + + data = self._update_data(data) + return self.request('duplicateTranscript', data) + + def setTranslationStart(self, uniquename, start): + data = { + 'features': [{ + 'uniquename': uniquename, + 'location': { + 'fmin': start + } + }] + } + data = self._update_data(data) + return self.request('setTranslationStart', data) + + def setTranslationEnd(self, uniquename, end): + data = { + 'features': [{ + 'uniquename': uniquename, + 'location': { + 'fmax': end + } + }] + } + data = self._update_data(data) + return self.request('setTranslationEnd', data) + + def setLongestOrf(self, uniquename): + data = { + 'features': [{ + 'uniquename': uniquename, + }] + } + data = self._update_data(data) + return self.request('setLongestOrf', data) + + def setBoundaries(self, uniquename, start, end): + data = { + 'features': [{ + 'uniquename': uniquename, + 'location': { + 'fmin': start, + 'fmax': end, + } + }] + } + data = self._update_data(data) + return self.request('setBoundaries', data) + + def getSequenceAlterations(self): + data = { + } + data = self._update_data(data) + return self.request('getSequenceAlterations', data) + + def setReadthroughStopCodon(self, uniquename): + data = { + 'features': [{ + 'uniquename': uniquename, + }] + } + data = self._update_data(data) + return self.request('setReadthroughStopCodon', data) + + def deleteSequenceAlteration(self, uniquename): + data = { + 'features': [{ + 'uniquename': uniquename, + }] + } + data = self._update_data(data) + return self.request('deleteSequenceAlteration', data) + + def flipStrand(self, uniquenames): + data = { + 'features': [ + {'uniquename': x} for x in uniquenames + ] + } + data = self._update_data(data) + return self.request('flipStrand', data) + + def mergeExons(self, exonA, exonB): + data = { + 'features': [ + {'uniquename': exonA}, + {'uniquename': exonB}, + ] + } + data = self._update_data(data) + return self.request('mergeExons', data) + + # def splitExon(): pass + + def deleteFeatures(self, uniquenames): + assert isinstance(uniquenames, collections.Iterable) + data = { + 'features': [ + {'uniquename': x} for x in uniquenames + ] + } + data = self._update_data(data) + return self.request('deleteFeature', data) + + # def deleteExon(): pass + + # def makeIntron(self, uniquename, ): pass + + def getSequenceSearchTools(self): + return self.get('getSequenceSearchTools', {}) + + def getCannedComments(self): + return self.get('getCannedComments', {}) + + def searchSequence(self, searchTool, sequence, database): + data = { + 'key': searchTool, + 'residues': sequence, + 'database_id': database, + } + return self.request('searchSequences', data) + + def getGff3(self, uniquenames): + assert isinstance(uniquenames, collections.Iterable) + data = { + 'features': [ + {'uniquename': x} for x in uniquenames + ] + } + data = self._update_data(data) + return self.request('getGff3', data, isJson=False) + + +class GroupsClient(Client): + CLIENT_BASE = '/group/' + + def createGroup(self, name): + data = {'name': name} + return self.request('createGroup', data) + + def getOrganismPermissionsForGroup(self, group): + data = { + 'id': group.groupId, + 'name': group.name, + } + return self.request('getOrganismPermissionsForGroup', data) + + def loadGroups(self, group=None): + data ={} + if group is not None: + data['groupId'] = group.groupId + + return self.request('loadGroups', data) + + def deleteGroup(self, group): + data = { + 'id': group.groupId, + 'name': group.name, + } + return self.request('deleteGroup', data) + + def updateGroup(self, group, newName): + # TODO: Sure would be nice if modifying ``group.name`` would invoke + # this? + data = { + 'id': group.groupId, + 'name': newName, + } + return self.request('updateGroup', data) + + def updateOrganismPermission(self, group, organismName, + administrate=False, write=False, read=False, + export=False): + data = { + 'groupId': group.groupId, + 'name': organismName, + 'administrate': administrate, + 'write': write, + 'export': export, + 'read': read, + } + return self.request('updateOrganismPermission', data) + + def updateMembership(self, group, users): + data = { + 'groupId': group.groupId, + 'user': [user.email for user in users] + } + return self.request('updateMembership', data) + + +class IOClient(Client): + CLIENT_BASE = '/IOService/' + + def write(self, exportType='FASTA', seqType='peptide', + exportFormat='text', sequences=None, organism=None, + output='text', exportAllSequences=False, + exportGff3Fasta=False): + if exportType not in ('FASTA', 'GFF3'): + raise Exception("exportType must be one of FASTA, GFF3") + + if seqType not in ('peptide', 'cds', 'cdna', 'genomic'): + raise Exception("seqType must be one of peptide, cds, dna, genomic") + + if exportFormat not in ('gzip', 'text'): + raise Exception("exportFormat must be one of gzip, text") + + if output not in ('file', 'text'): + raise Exception("output must be one of file, text") + + data = { + 'type': exportType, + 'seqType': seqType, + 'format': exportFormat, + 'sequences': sequences, + 'organism': organism, + 'output': output, + 'exportAllSequences': exportAllSequences, + 'exportGff3Fasta': exportGff3Fasta, + } + + return self.request('write', data, isJson=output == 'file') + + def download(self, uuid, outputFormat='gzip'): + + if outputFormat.lower() not in ('gzip', 'text'): + raise Exception("outputFormat must be one of file, text") + + data = { + 'format': outputFormat, + 'uuid': uuid, + } + return self.request('write', data) + + +class OrganismsClient(Client): + CLIENT_BASE = '/organism/' + + def addOrganism(self, commonName, directory, blatdb=None, species=None, + genus=None, public=False): + data = { + 'commonName': commonName, + 'directory': directory, + 'publicMode': public, + } + + if blatdb is not None: + data['blatdb'] = blatdb + if genus is not None: + data['genus'] = genus + if species is not None: + data['species'] = species + + return self.request('addOrganism', data) + + def findAllOrganisms(self): + return self.request('findAllOrganisms', {}) + + def findOrganismByCn(self, cn): + orgs = self.findAllOrganisms() + orgs = [x for x in orgs if x['commonName'] == cn] + if len(orgs) == 0: + raise Exception("Unknown common name") + else: + return orgs[0] + + def deleteOrganism(self, organismId): + return self.request('deleteOrganism', {'id': organismId}) + + def deleteOrganismFeatures(self, organismId): + return self.request('deleteOrganismFeatures', {'id': organismId}) + + def getSequencesForOrganism(self, commonName): + return self.request('getSequencesForOrganism', {'organism': commonName}) + + def updateOrganismInfo(self, organismId, commonName, directory, blatdb=None, species=None, genus=None, public=False): + data = { + 'id': organismId, + 'name': commonName, + 'directory': directory, + 'publicMode': public, + } + + if blatdb is not None: + data['blatdb'] = blatdb + if genus is not None: + data['genus'] = genus + if species is not None: + data['species'] = species + + return self.request('updateOrganismInfo', data) + + +class UsersClient(Client): + CLIENT_BASE = '/user/' + + def getOrganismPermissionsForUser(self, user): + data = { + 'userId': user.userId, + } + return self.request('getOrganismPermissionsForUser', data) + + def updateOrganismPermission(self, user, organism, administrate=False, + write=False, export=False, read=False): + data = { + 'userId': user.userId, + 'organism': organism, + 'ADMINISTRATE': administrate, + 'WRITE': write, + 'EXPORT': export, + 'READ': read, + } + return self.request('updateOrganismPermission', data) + + def loadUser(self, user): + return self.loadUserById(user.userId) + + def loadUserById(self, userId): + res = self.request('loadUsers', {'userId': userId}) + if isinstance(res, list): + # We can only match one, right? + return UserObj(**res[0]) + else: + return res + + def loadUsers(self, email=None): + res = self.request('loadUsers', {}) + data = [UserObj(**x) for x in res] + if email is not None: + data = [x for x in data if x.username == email] + + return data + + def addUserToGroup(self, group, user): + data = {'group': group.name, 'userId': user.userId} + return self.request('addUserToGroup', data) + + def removeUserFromGroup(self, group, user): + data = {'group': group.name, 'userId': user.userId} + return self.request('removeUserFromGroup', data) + + def createUser(self, email, firstName, lastName, newPassword, role="user", groups=None): + data = { + 'firstName': firstName, + 'lastName': lastName, + 'email': email, + 'role': role, + 'groups': [] if groups is None else groups, + # 'availableGroups': [], + 'newPassword': newPassword, + # 'organismPermissions': [], + } + return self.request('createUser', data) + + def deleteUser(self, user): + return self.request('deleteUser', {'userId': user.userId}) + + def updateUser(self, user, email, firstName, lastName, newPassword): + data = { + 'userId': user.userId, + 'email': email, + 'firstName': firstName, + 'lastName': lastName, + 'newPassword': newPassword, + } + return self.request('updateUser', data) + + +class RemoteRecord(Client): + CLIENT_BASE = None + + def ParseRecord(self, cn): + org = self._wa.organisms.findOrganismByCn(cn) + self._wa.annotations.setSequence(org['commonName'], org['id']) + + data = StringIO.StringIO(self._wa.io.write( + exportType='GFF3', + seqType='genomic', + exportAllSequences=False, + exportGff3Fasta=True, + output="text", + exportFormat="text", + sequences=cn, + )) + data.seek(0) + + for record in GFF.parse(data): + yield WebApolloSeqRecord(record, self._wa) + + +class WebApolloSeqRecord(object): + def __init__(self, sr, wa): + self._sr = sr + self._wa = wa + + def __dir__(self): + return dir(self._sr) + + def __getattr__(self, key): + if key in ('_sr', '_wa'): + print self.__dict__ + return self.__dict__[key] + else: + if key == 'features': + return (WebApolloSeqFeature(x, self._wa) + for x in self._sr.__dict__[key]) + else: + return self._sr.__dict__[key] + + def __setattr__(self, key, value): + if key in ('_sd', '_wa'): + self.__dict__[key] = value + else: + self._sr.__dict__[key] = value + # Methods acting on the SeqRecord object + print key, value + + +class WebApolloSeqFeature(object): + def __init__(self, sf, wa): + self._sf = sf + self._wa = wa + + def __dir__(self): + return dir(self._sf) + + def __getattr__(self, key): + if key in ('_sf', '_wa'): + return self.__dict__[key] + else: + return self._sf.__dict__[key] + + def __setattr__(self, key, value): + if key in ('_sf', '_wa'): + self.__dict__[key] = value + else: + # Methods acting on the SeqFeature object + if key == 'location': + if value.strand != self._sf.location.strand: + self.wa.annotations.flipStrand( + self._sf.qualifiers['ID'][0] + ) + + self.wa.annotations.setBoundaries( + self._sf.qualifiers['ID'][0], + value.start, + value.end, + ) + + self._sf.__dict__[key] = value + else: + self._sf.__dict__[key] = value + print key, value + +def _tnType(feature): + if feature.type in ('gene', 'mRNA', 'exon', 'CDS'): + return feature.type + else: + return 'exon' + +def _yieldFeatData(features): + for f in features: + current = { + 'location': { + 'strand': f.strand, + 'fmin': int(f.location.start), + 'fmax': int(f.location.end), + }, + 'type': { + 'name': _tnType(f), + 'cv': { + 'name': 'sequence', + } + }, + } + if f.type in ('gene', 'mRNA'): + current['name'] = f.qualifiers.get('Name', [f.id])[0] + if hasattr(f, 'sub_features') and len(f.sub_features) > 0: + current['children'] = [x for x in _yieldFeatData(f.sub_features)] + + yield current + +def featuresToFeatureSchema(features): + compiled = [] + for feature in features: + if feature.type != 'gene': + log.warn("Not able to handle %s features just yet...", feature.type) + continue + + for x in _yieldFeatData([feature]): + compiled.append(x) + return compiled