annotate ensembl_rest.py @ 9:4d3ac66875d2 draft default tip

Uploaded
author jjohnson
date Thu, 14 Dec 2017 13:35:00 -0500
parents 5c92d0be6514
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
2 """
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
3 #
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
4 #------------------------------------------------------------------------------
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
5 # University of Minnesota
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
6 # Copyright 2017, Regents of the University of Minnesota
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
7 #------------------------------------------------------------------------------
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
8 # Author:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
9 #
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
10 # James E Johnson
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
11 #
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
12 #------------------------------------------------------------------------------
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
13 """
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
14
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
15 import sys
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
16 import requests
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
17 from time import sleep
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
18
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
19
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
20 server = "https://rest.ensembl.org"
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
21 ext = "/info/assembly/homo_sapiens?"
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
22 max_region = 4000000
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
23
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
24
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
25 def ensembl_rest(ext, headers):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
26 if True: print >> sys.stderr, "%s" % ext
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
27 r = requests.get(server+ext, headers=headers)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
28 if r.status_code == 429:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
29 print >> sys.stderr, "response headers: %s\n" % r.headers
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
30 if 'Retry-After' in r.headers:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
31 sleep(r.headers['Retry-After'])
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
32 r = requests.get(server+ext, headers=headers)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
33 if not r.ok:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
34 r.raise_for_status()
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
35 return r
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
36
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
37
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
38 def get_species():
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
39 results = dict()
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
40 ext = "/info/species"
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
41 req_header = {"Content-Type": "application/json"}
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
42 r = ensembl_rest(ext, req_header)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
43 for species in r.json()['species']:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
44 results[species['name']] = species
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
45 print >> sys.stdout,\
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
46 "%s\t%s\t%s\t%s\t%s"\
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
47 % (species['name'], species['common_name'],
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
48 species['display_name'],
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
49 species['strain'],
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
50 species['taxon_id'])
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
51 return results
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
52
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
53
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
54 def get_biotypes(species):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
55 biotypes = []
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
56 ext = "/info/biotypes/%s?" % species
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
57 req_header = {"Content-Type": "application/json"}
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
58 r = ensembl_rest(ext, req_header)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
59 for entry in r.json():
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
60 if 'biotype' in entry:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
61 biotypes.append(entry['biotype'])
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
62 return biotypes
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
63
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
64
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
65 def get_toplevel(species):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
66 coord_systems = dict()
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
67 ext = "/info/assembly/%s?" % species
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
68 req_header = {"Content-Type": "application/json"}
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
69 r = ensembl_rest(ext, req_header)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
70 toplevel = r.json()
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
71 for seq in toplevel['top_level_region']:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
72 if seq['coord_system'] not in coord_systems:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
73 coord_systems[seq['coord_system']] = dict()
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
74 coord_system = coord_systems[seq['coord_system']]
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
75 coord_system[seq['name']] = int(seq['length'])
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
76 return coord_systems
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
77
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
78
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
79 def get_transcripts_bed(species, refseq, start, length, strand='', params=None):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
80 bed = []
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
81 param = params if params else ''
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
82 req_header = {"Content-Type": "text/x-bed"}
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
83 regions = range(start, length, max_region)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
84 if not regions or regions[-1] < length:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
85 regions.append(length)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
86 for end in regions[1:]:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
87 ext = "/overlap/region/%s/%s:%d-%d%s?feature=transcript;%s"\
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
88 % (species, refseq, start, end, strand, param)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
89 start = end + 1
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
90 r = ensembl_rest(ext, req_header)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
91 if r.text:
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
92 bed += r.text.splitlines()
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
93 return bed
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
94
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
95
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
96 def get_seq(id, seqtype,params=None):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
97 param = params if params else ''
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
98 ext = "/sequence/id/%s?type=%s;%s" % (id, seqtype,param)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
99 req_header = {"Content-Type": "text/plain"}
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
100 r = ensembl_rest(ext, req_header)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
101 return r.text
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
102
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
103
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
104 def get_cdna(id,params=None):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
105 return get_seq(id, 'cdna',params=params)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
106
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
107
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
108 def get_cds(id,params=None):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
109 return get_seq(id, 'cds',params=params)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
110
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
111
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
112 def get_genomic(id,params=None):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
113 return get_seq(id, 'genomic',params=params)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
114
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
115
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
116 def get_transcript_haplotypes(species,transcript):
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
117 ext = "/transcript_haplotypes/%s/%s?aligned_sequences=1" % (species,transcript)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
118 req_header = {"Content-Type" : "application/json"}
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
119 r = ensembl_rest(ext, req_header)
5c92d0be6514 Uploaded
jjohnson
parents:
diff changeset
120 decoded = r.json()