comparison create_features_from_gff3.py @ 5:7610987e0c48 draft

planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 29795b77c0d5c7894219b018a92c5ee7818096c3
author eric-rasche
date Wed, 01 Mar 2017 22:39:58 -0500
parents
children f9a6e151b3b4
comparison
equal deleted inserted replaced
4:23ead6905145 5:7610987e0c48
1 #!/usr/bin/env python
2 import sys
3 import json
4 import time
5 import argparse
6 from webapollo import WebApolloInstance, featuresToFeatureSchema
7 from webapollo import WAAuth, OrgOrGuess, GuessOrg, AssertUser
8 from BCBio import GFF
9 import logging
10 logging.basicConfig(level=logging.INFO)
11 log = logging.getLogger(__name__)
12
13
14 if __name__ == '__main__':
15 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services')
16 WAAuth(parser)
17 parser.add_argument('email', help='User Email')
18 OrgOrGuess(parser)
19
20 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file')
21 args = parser.parse_args()
22
23 wa = WebApolloInstance(args.apollo, args.username, args.password)
24 # User must have an account
25 gx_user = AssertUser(wa.users.loadUsers(email=args.email))
26
27 # Get organism
28 org_cn = GuessOrg(args, wa)
29 if isinstance(org_cn, list):
30 org_cn = org_cn[0]
31
32 # TODO: Check user perms on org.
33 org = wa.organisms.findOrganismByCn(org_cn)
34
35 bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID']
36
37 sys.stdout.write('# ')
38 sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages']))
39 sys.stdout.write('\n')
40
41 # print(wa.annotations.getFeatures())
42 for rec in GFF.parse(args.gff3):
43 wa.annotations.setSequence(rec.id, org['id'])
44 for feature in rec.features:
45 # We can only handle genes right now
46 if feature.type != 'gene':
47 continue
48 # Convert the feature into a presentation that Apollo will accept
49 featureData = featuresToFeatureSchema([feature])
50
51 try:
52 # We're experiencing a (transient?) problem where gene_001 to
53 # gene_025 will be rejected. Thus, hardcode to a known working
54 # gene name and update later.
55 featureData[0]['name'] = 'gene_000'
56 # Extract CDS feature from the feature data, this will be used
57 # to set the CDS location correctly (apollo currently screwing
58 # this up (2.0.6))
59 CDS = featureData[0]['children'][0]['children']
60 CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location']
61 # Create the new feature
62 newfeature = wa.annotations.addFeature(featureData, trustme=True)
63 # Extract the UUIDs that apollo returns to us
64 mrna_id = newfeature['features'][0]['uniquename']
65 gene_id = newfeature['features'][0]['parent_id']
66 # Sleep to give it time to actually persist the feature. Apollo
67 # is terrible about writing + immediately reading back written
68 # data.
69 time.sleep(1)
70 # Correct the translation start, but with strand specific log
71 if CDS['strand'] == 1:
72 wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax']))
73 else:
74 wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1)
75
76 # Finally we set the name, this should be correct.
77 wa.annotations.setName(mrna_id, feature.qualifiers.get('product', ["Unknown"])[0])
78 wa.annotations.setName(gene_id, feature.qualifiers.get('product', ["Unknown"])[0])
79
80 for (k, v) in feature.qualifiers.items():
81 if k not in bad_quals:
82 # set qualifier
83 pass
84
85 sys.stdout.write('\t'.join([
86 feature.id,
87 gene_id,
88 'success',
89 "Dropped qualifiers: %s" % (json.dumps({k: v for (k, v) in feature.qualifiers.items() if k not in bad_quals})),
90 ]))
91 except Exception as e:
92 sys.stdout.write('\t'.join([
93 feature.id,
94 '',
95 'ERROR',
96 str(e)
97 ]))
98
99 sys.stdout.write('\n')