Mercurial > repos > jjohnson > ensembl_cdna_translate
comparison ensembl_rest.py @ 8:5c92d0be6514 draft
Uploaded
author | jjohnson |
---|---|
date | Thu, 14 Dec 2017 13:32:00 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7:d59e3ce10e74 | 8:5c92d0be6514 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 # | |
4 #------------------------------------------------------------------------------ | |
5 # University of Minnesota | |
6 # Copyright 2017, Regents of the University of Minnesota | |
7 #------------------------------------------------------------------------------ | |
8 # Author: | |
9 # | |
10 # James E Johnson | |
11 # | |
12 #------------------------------------------------------------------------------ | |
13 """ | |
14 | |
15 import sys | |
16 import requests | |
17 from time import sleep | |
18 | |
19 | |
20 server = "https://rest.ensembl.org" | |
21 ext = "/info/assembly/homo_sapiens?" | |
22 max_region = 4000000 | |
23 | |
24 | |
25 def ensembl_rest(ext, headers): | |
26 if True: print >> sys.stderr, "%s" % ext | |
27 r = requests.get(server+ext, headers=headers) | |
28 if r.status_code == 429: | |
29 print >> sys.stderr, "response headers: %s\n" % r.headers | |
30 if 'Retry-After' in r.headers: | |
31 sleep(r.headers['Retry-After']) | |
32 r = requests.get(server+ext, headers=headers) | |
33 if not r.ok: | |
34 r.raise_for_status() | |
35 return r | |
36 | |
37 | |
38 def get_species(): | |
39 results = dict() | |
40 ext = "/info/species" | |
41 req_header = {"Content-Type": "application/json"} | |
42 r = ensembl_rest(ext, req_header) | |
43 for species in r.json()['species']: | |
44 results[species['name']] = species | |
45 print >> sys.stdout,\ | |
46 "%s\t%s\t%s\t%s\t%s"\ | |
47 % (species['name'], species['common_name'], | |
48 species['display_name'], | |
49 species['strain'], | |
50 species['taxon_id']) | |
51 return results | |
52 | |
53 | |
54 def get_biotypes(species): | |
55 biotypes = [] | |
56 ext = "/info/biotypes/%s?" % species | |
57 req_header = {"Content-Type": "application/json"} | |
58 r = ensembl_rest(ext, req_header) | |
59 for entry in r.json(): | |
60 if 'biotype' in entry: | |
61 biotypes.append(entry['biotype']) | |
62 return biotypes | |
63 | |
64 | |
65 def get_toplevel(species): | |
66 coord_systems = dict() | |
67 ext = "/info/assembly/%s?" % species | |
68 req_header = {"Content-Type": "application/json"} | |
69 r = ensembl_rest(ext, req_header) | |
70 toplevel = r.json() | |
71 for seq in toplevel['top_level_region']: | |
72 if seq['coord_system'] not in coord_systems: | |
73 coord_systems[seq['coord_system']] = dict() | |
74 coord_system = coord_systems[seq['coord_system']] | |
75 coord_system[seq['name']] = int(seq['length']) | |
76 return coord_systems | |
77 | |
78 | |
79 def get_transcripts_bed(species, refseq, start, length, strand='', params=None): | |
80 bed = [] | |
81 param = params if params else '' | |
82 req_header = {"Content-Type": "text/x-bed"} | |
83 regions = range(start, length, max_region) | |
84 if not regions or regions[-1] < length: | |
85 regions.append(length) | |
86 for end in regions[1:]: | |
87 ext = "/overlap/region/%s/%s:%d-%d%s?feature=transcript;%s"\ | |
88 % (species, refseq, start, end, strand, param) | |
89 start = end + 1 | |
90 r = ensembl_rest(ext, req_header) | |
91 if r.text: | |
92 bed += r.text.splitlines() | |
93 return bed | |
94 | |
95 | |
96 def get_seq(id, seqtype,params=None): | |
97 param = params if params else '' | |
98 ext = "/sequence/id/%s?type=%s;%s" % (id, seqtype,param) | |
99 req_header = {"Content-Type": "text/plain"} | |
100 r = ensembl_rest(ext, req_header) | |
101 return r.text | |
102 | |
103 | |
104 def get_cdna(id,params=None): | |
105 return get_seq(id, 'cdna',params=params) | |
106 | |
107 | |
108 def get_cds(id,params=None): | |
109 return get_seq(id, 'cds',params=params) | |
110 | |
111 | |
112 def get_genomic(id,params=None): | |
113 return get_seq(id, 'genomic',params=params) | |
114 | |
115 | |
116 def get_transcript_haplotypes(species,transcript): | |
117 ext = "/transcript_haplotypes/%s/%s?aligned_sequences=1" % (species,transcript) | |
118 req_header = {"Content-Type" : "application/json"} | |
119 r = ensembl_rest(ext, req_header) | |
120 decoded = r.json() |