Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/example/perl/gene-lookup.pl @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 #!/usr/bin/env perl | |
2 # =========================================================================== | |
3 # | |
4 # PUBLIC DOMAIN NOTICE | |
5 # National Center for Biotechnology Information | |
6 # | |
7 # This software/database is a "United States Government Work" under the | |
8 # terms of the United States Copyright Act. It was written as part of | |
9 # the author's official duties as a United States Government employee and | |
10 # thus cannot be copyrighted. This software/database is freely available | |
11 # to the public for use. The National Library of Medicine and the U.S. | |
12 # Government have not placed any restriction on its use or reproduction. | |
13 # | |
14 # Although all reasonable efforts have been taken to ensure the accuracy | |
15 # and reliability of the software and data, the NLM and the U.S. | |
16 # Government do not and cannot warrant the performance or results that | |
17 # may be obtained by using this software or data. The NLM and the U.S. | |
18 # Government disclaim all warranties, express or implied, including | |
19 # warranties of performance, merchantability or fitness for any particular | |
20 # purpose. | |
21 # | |
22 # Please cite the author in any work or product based on this material. | |
23 # | |
24 # =========================================================================== | |
25 | |
26 use warnings; | |
27 use LWP::Simple; | |
28 | |
29 use Data::Dumper; | |
30 | |
31 my %opts = ( | |
32 host => 'www.ncbi.nlm.nih.gov', | |
33 cgi => 'projects/variation/tools/1000genomes/genomesearch.cgi', | |
34 assm => 'GCF_000001405.17', | |
35 filter => '' | |
36 ); | |
37 | |
38 sub usage() | |
39 { | |
40 print <<"HELP"; | |
41 convert gene names to chromosome and range | |
42 | |
43 Usage: | |
44 $0 <name>... | |
45 options are | |
46 -h | -? | --help this message | |
47 | |
48 Example: | |
49 $0 williams | |
50 | |
51 HELP | |
52 exit 0; | |
53 } | |
54 | |
55 #@ARGV = ( 'cat' ) if scalar @ARGV == 0; | |
56 usage if scalar @ARGV == 0; | |
57 foreach (@ARGV) { | |
58 usage() if (/^-h$/ || /^-\?$/ || /^--help$/); | |
59 } | |
60 | |
61 package JSON; | |
62 | |
63 sub _json_object; | |
64 sub _json_array; | |
65 sub _json_value; | |
66 sub _json_value_check; | |
67 | |
68 use constant { | |
69 cc_WHITESPACE => 1, | |
70 cc_OPERATOR => 2, | |
71 cc_QUOTE => 4, | |
72 cc_ESCAPE => 8, | |
73 cc_ESCAPED => 16, | |
74 cc_UNICODED => 32 | |
75 }; | |
76 | |
77 my @CC = map { | |
78 my $x = 0; | |
79 | |
80 $x |= cc_WHITESPACE if (chr($_) =~ /\s/); | |
81 $x |= cc_OPERATOR if (chr($_) =~ /[\[\]{},:]/); | |
82 $x |= cc_QUOTE if (chr($_) eq '"'); | |
83 $x |= cc_ESCAPE if (chr($_) eq '\\'); | |
84 $x |= cc_ESCAPED if (chr($_) =~ /[\\\/bfnrt]/); | |
85 $x |= cc_UNICODED if (chr($_) eq 'u'); | |
86 $x; | |
87 } (0..255); | |
88 | |
89 sub _json_token_string | |
90 { | |
91 my ($sref, $i) = @_; | |
92 my $slen = length($$sref); | |
93 my $st = 1; | |
94 my $string = ''; | |
95 | |
96 for ( ; $$i < $slen; ++$$i) { | |
97 local $_ = substr($$sref, $$i, 1); | |
98 my $cc = $CC[ord($_)]; | |
99 | |
100 if ($st == 1) { # string tokenizing | |
101 if ($cc & cc_QUOTE) { | |
102 ++$$i; | |
103 return ('string', eval "\"$string\"") | |
104 } | |
105 if ($cc & cc_ESCAPE) { | |
106 ++$st; | |
107 } | |
108 else { | |
109 $string .= $_; | |
110 } | |
111 next; | |
112 } | |
113 if ($st == 2) { # escape tokenizing | |
114 if ($cc & cc_ESCAPED) { | |
115 $string .= "\\$_"; | |
116 } | |
117 elsif ($cc & cc_UNICODED) { | |
118 my $value = substr($$sref, $$i + 1, 4); | |
119 | |
120 return (undef, 'expected 4 hex digits') unless $value =~ /[[:xdigit:]]{4}/; | |
121 $string .= "\\N{U+$value}"; | |
122 $$i += 4; | |
123 } | |
124 else { | |
125 $string .= "\\\\$_"; | |
126 } | |
127 $st = 1; | |
128 next; | |
129 } | |
130 } | |
131 } | |
132 | |
133 sub _json_token | |
134 { | |
135 my ($sref, $i) = @_; | |
136 my $slen = length($$sref); | |
137 | |
138 for ( ; $$i < $slen; ++$$i) { | |
139 local $_ = substr($$sref, $$i, 1); | |
140 my $cc = $CC[ord($_)]; | |
141 | |
142 next if ($cc & cc_WHITESPACE); | |
143 | |
144 if ($cc & cc_QUOTE) { | |
145 ++$$i; | |
146 return _json_token_string(@_); | |
147 } | |
148 if ($cc & cc_OPERATOR) { | |
149 ++$$i; | |
150 return ('op', $_); | |
151 } | |
152 if (substr($$sref, $$i, 5) eq 'false') { | |
153 $$i += 5; | |
154 return ('bool', 0); | |
155 } | |
156 if (substr($$sref, $$i, 4) eq 'true') { | |
157 $$i += 4; | |
158 return ('bool', 1); | |
159 } | |
160 if (substr($$sref, $$i, 4) eq 'null') { | |
161 $$i += 4; | |
162 return ('null', undef); | |
163 } | |
164 if (substr($$sref, $$i) =~ /^(-{0,1}\d+(?:\.\d+){0,1}(?:e[-+]{0,1}\d+){0,1})/i) { | |
165 my $value = $1; | |
166 | |
167 $$i += length $value; | |
168 return ('number', eval $value); | |
169 } | |
170 substr($$sref, $$i) =~ /(\w+)/; | |
171 return (undef, "unexpected '$1'"); | |
172 } | |
173 return ('empty', 'no input'); | |
174 } | |
175 | |
176 sub _json_value | |
177 { | |
178 my $start = ${$_[1]}; | |
179 my ($type, $value) = _json_token(@_); | |
180 my $len = ${$_[1]} - $start; | |
181 | |
182 $len = 10 if $len < 10; | |
183 die substr(${$_[0]}, $start, $len)."\nJSON syntax error: $value" unless $type; | |
184 if ($type eq 'op') { | |
185 if ($value eq '{') { | |
186 $type = 'object'; | |
187 $value = _json_object(@_); | |
188 } | |
189 elsif ($value eq '[') { | |
190 $type = 'array'; | |
191 $value = _json_array(@_); | |
192 } | |
193 } | |
194 return ($type, $value); | |
195 } | |
196 | |
197 sub _json_value_check | |
198 { | |
199 my ($type, $value) = @_; | |
200 | |
201 return $value if ( $type eq 'array' | |
202 || $type eq 'bool' | |
203 || $type eq 'null' | |
204 || $type eq 'number' | |
205 || $type eq 'object' | |
206 || $type eq 'string'); | |
207 | |
208 die "JSON syntax error: expected array, bool, null, object, or string"; | |
209 } | |
210 | |
211 sub _json_object | |
212 { | |
213 my $rslt; | |
214 | |
215 for ( ; ; ) { | |
216 my ($type, $value) = _json_token(@_); | |
217 my $name; | |
218 | |
219 unless (defined($rslt)) { | |
220 $rslt = {}; | |
221 last if ($type eq 'op' && $value eq '}'); | |
222 } | |
223 die "JSON syntax error: expected string" unless $type eq 'string'; | |
224 $name = $value; | |
225 | |
226 ($type, $value) = _json_token(@_); | |
227 die "JSON syntax error: expected ':', have '$value'" unless ($type eq 'op' && $value eq ':'); | |
228 | |
229 $rslt->{$name} = _json_value_check(_json_value(@_)); | |
230 | |
231 ($type, $value) = _json_token(@_); | |
232 die "JSON syntax error: $value" unless defined($type); | |
233 last if ($type eq 'op' && $value eq '}'); | |
234 die "JSON syntax error: expected ',' or '}', have '$value'" unless ($type eq 'op' && $value eq ','); | |
235 } | |
236 return $rslt; | |
237 } | |
238 | |
239 sub _json_array | |
240 { | |
241 my $rslt; | |
242 | |
243 for ( ; ; ) { | |
244 my ($type, $value) = _json_value(@_); | |
245 | |
246 unless (defined($rslt)) { | |
247 $rslt = []; | |
248 last if ($type eq 'op' && $value eq ']'); | |
249 } | |
250 push @{$rslt}, _json_value_check($type, $value); | |
251 | |
252 ($type, $value) = _json_token(@_); | |
253 last if ($type eq 'op' && $value eq ']'); | |
254 die "JSON syntax error: expected ',' or ']', have '$value'" unless ($type eq 'op' && $value eq ','); | |
255 } | |
256 return $rslt; | |
257 } | |
258 | |
259 sub decode() | |
260 { | |
261 my $self = $_[0]; | |
262 my $i = 0; # holds current parsing position | |
263 | |
264 return _json_value_check(_json_value(\$self->{'source'}, \$i)); | |
265 } | |
266 | |
267 sub new($) | |
268 { | |
269 my $class = $_[0]; | |
270 my $self = { 'source' => ($_[1] || '') }; | |
271 | |
272 bless $self, $class; | |
273 return $self; | |
274 } | |
275 | |
276 package main; | |
277 | |
278 sub get_object_locations($) | |
279 { | |
280 my $response = get("http://$opts{'host'}/$opts{'cgi'}?assm=$opts{'assm'}&query=$_[0]"); | |
281 my $json = new JSON($response); | |
282 my $parsed = $json->decode(); | |
283 | |
284 return $parsed->{'results'}->{'obj_locs'}; | |
285 } | |
286 | |
287 sub process($) | |
288 { | |
289 my $obj_locs = get_object_locations($_[0]); | |
290 | |
291 foreach ( @{$obj_locs} ) { | |
292 my $type = $_->{'feature_type'}; | |
293 | |
294 if (!$opts{'filter'} || $type eq $opts{'filter'}) { | |
295 my $label = $_->{'label'}; | |
296 my $chrom = $_->{'chr'}->{'chrom'}; | |
297 my $from = $_->{'seq_from'}; | |
298 my $to = $_->{'seq_to'}; | |
299 | |
300 printf("%s: %s\tslice: '%s:%u-%u'\n", | |
301 $type, $label, $chrom, $from, $to); | |
302 } | |
303 } | |
304 } | |
305 | |
306 for (my $i = 0; $i < scalar @ARGV; ++$i) { | |
307 $_ = $ARGV[$i]; | |
308 | |
309 if (/^-/) { | |
310 if (/^--filter/) { | |
311 $opts{'filter'} = $ARGV[++$i]; | |
312 next; | |
313 } | |
314 if (/^--assembly/) { | |
315 $opts{'assm'} = $ARGV[++$i]; | |
316 next; | |
317 } | |
318 usage(); | |
319 } | |
320 process $_; | |
321 } |