Mercurial > repos > charles_s_test > seqsero2
diff libs/sratoolkit.2.8.0-centos_linux64/example/perl/gene-lookup.pl @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/sratoolkit.2.8.0-centos_linux64/example/perl/gene-lookup.pl Mon Nov 27 11:21:07 2017 -0500 @@ -0,0 +1,321 @@ +#!/usr/bin/env perl +# =========================================================================== +# +# PUBLIC DOMAIN NOTICE +# National Center for Biotechnology Information +# +# This software/database is a "United States Government Work" under the +# terms of the United States Copyright Act. It was written as part of +# the author's official duties as a United States Government employee and +# thus cannot be copyrighted. This software/database is freely available +# to the public for use. The National Library of Medicine and the U.S. +# Government have not placed any restriction on its use or reproduction. +# +# Although all reasonable efforts have been taken to ensure the accuracy +# and reliability of the software and data, the NLM and the U.S. +# Government do not and cannot warrant the performance or results that +# may be obtained by using this software or data. The NLM and the U.S. +# Government disclaim all warranties, express or implied, including +# warranties of performance, merchantability or fitness for any particular +# purpose. +# +# Please cite the author in any work or product based on this material. +# +# =========================================================================== + +use warnings; +use LWP::Simple; + +use Data::Dumper; + +my %opts = ( + host => 'www.ncbi.nlm.nih.gov', + cgi => 'projects/variation/tools/1000genomes/genomesearch.cgi', + assm => 'GCF_000001405.17', + filter => '' +); + +sub usage() +{ + print <<"HELP"; +convert gene names to chromosome and range + +Usage: +$0 <name>... +options are + -h | -? | --help this message + +Example: + $0 williams + +HELP + exit 0; +} + +#@ARGV = ( 'cat' ) if scalar @ARGV == 0; +usage if scalar @ARGV == 0; +foreach (@ARGV) { + usage() if (/^-h$/ || /^-\?$/ || /^--help$/); +} + +package JSON; + +sub _json_object; +sub _json_array; +sub _json_value; +sub _json_value_check; + +use constant { + cc_WHITESPACE => 1, + cc_OPERATOR => 2, + cc_QUOTE => 4, + cc_ESCAPE => 8, + cc_ESCAPED => 16, + cc_UNICODED => 32 +}; + +my @CC = map { + my $x = 0; + + $x |= cc_WHITESPACE if (chr($_) =~ /\s/); + $x |= cc_OPERATOR if (chr($_) =~ /[\[\]{},:]/); + $x |= cc_QUOTE if (chr($_) eq '"'); + $x |= cc_ESCAPE if (chr($_) eq '\\'); + $x |= cc_ESCAPED if (chr($_) =~ /[\\\/bfnrt]/); + $x |= cc_UNICODED if (chr($_) eq 'u'); + $x; +} (0..255); + +sub _json_token_string +{ + my ($sref, $i) = @_; + my $slen = length($$sref); + my $st = 1; + my $string = ''; + + for ( ; $$i < $slen; ++$$i) { + local $_ = substr($$sref, $$i, 1); + my $cc = $CC[ord($_)]; + + if ($st == 1) { # string tokenizing + if ($cc & cc_QUOTE) { + ++$$i; + return ('string', eval "\"$string\"") + } + if ($cc & cc_ESCAPE) { + ++$st; + } + else { + $string .= $_; + } + next; + } + if ($st == 2) { # escape tokenizing + if ($cc & cc_ESCAPED) { + $string .= "\\$_"; + } + elsif ($cc & cc_UNICODED) { + my $value = substr($$sref, $$i + 1, 4); + + return (undef, 'expected 4 hex digits') unless $value =~ /[[:xdigit:]]{4}/; + $string .= "\\N{U+$value}"; + $$i += 4; + } + else { + $string .= "\\\\$_"; + } + $st = 1; + next; + } + } +} + +sub _json_token +{ + my ($sref, $i) = @_; + my $slen = length($$sref); + + for ( ; $$i < $slen; ++$$i) { + local $_ = substr($$sref, $$i, 1); + my $cc = $CC[ord($_)]; + + next if ($cc & cc_WHITESPACE); + + if ($cc & cc_QUOTE) { + ++$$i; + return _json_token_string(@_); + } + if ($cc & cc_OPERATOR) { + ++$$i; + return ('op', $_); + } + if (substr($$sref, $$i, 5) eq 'false') { + $$i += 5; + return ('bool', 0); + } + if (substr($$sref, $$i, 4) eq 'true') { + $$i += 4; + return ('bool', 1); + } + if (substr($$sref, $$i, 4) eq 'null') { + $$i += 4; + return ('null', undef); + } + if (substr($$sref, $$i) =~ /^(-{0,1}\d+(?:\.\d+){0,1}(?:e[-+]{0,1}\d+){0,1})/i) { + my $value = $1; + + $$i += length $value; + return ('number', eval $value); + } + substr($$sref, $$i) =~ /(\w+)/; + return (undef, "unexpected '$1'"); + } + return ('empty', 'no input'); +} + +sub _json_value +{ + my $start = ${$_[1]}; + my ($type, $value) = _json_token(@_); + my $len = ${$_[1]} - $start; + + $len = 10 if $len < 10; + die substr(${$_[0]}, $start, $len)."\nJSON syntax error: $value" unless $type; + if ($type eq 'op') { + if ($value eq '{') { + $type = 'object'; + $value = _json_object(@_); + } + elsif ($value eq '[') { + $type = 'array'; + $value = _json_array(@_); + } + } + return ($type, $value); +} + +sub _json_value_check +{ + my ($type, $value) = @_; + + return $value if ( $type eq 'array' + || $type eq 'bool' + || $type eq 'null' + || $type eq 'number' + || $type eq 'object' + || $type eq 'string'); + + die "JSON syntax error: expected array, bool, null, object, or string"; +} + +sub _json_object +{ + my $rslt; + + for ( ; ; ) { + my ($type, $value) = _json_token(@_); + my $name; + + unless (defined($rslt)) { + $rslt = {}; + last if ($type eq 'op' && $value eq '}'); + } + die "JSON syntax error: expected string" unless $type eq 'string'; + $name = $value; + + ($type, $value) = _json_token(@_); + die "JSON syntax error: expected ':', have '$value'" unless ($type eq 'op' && $value eq ':'); + + $rslt->{$name} = _json_value_check(_json_value(@_)); + + ($type, $value) = _json_token(@_); + die "JSON syntax error: $value" unless defined($type); + last if ($type eq 'op' && $value eq '}'); + die "JSON syntax error: expected ',' or '}', have '$value'" unless ($type eq 'op' && $value eq ','); + } + return $rslt; +} + +sub _json_array +{ + my $rslt; + + for ( ; ; ) { + my ($type, $value) = _json_value(@_); + + unless (defined($rslt)) { + $rslt = []; + last if ($type eq 'op' && $value eq ']'); + } + push @{$rslt}, _json_value_check($type, $value); + + ($type, $value) = _json_token(@_); + last if ($type eq 'op' && $value eq ']'); + die "JSON syntax error: expected ',' or ']', have '$value'" unless ($type eq 'op' && $value eq ','); + } + return $rslt; +} + +sub decode() +{ + my $self = $_[0]; + my $i = 0; # holds current parsing position + + return _json_value_check(_json_value(\$self->{'source'}, \$i)); +} + +sub new($) +{ + my $class = $_[0]; + my $self = { 'source' => ($_[1] || '') }; + + bless $self, $class; + return $self; +} + +package main; + +sub get_object_locations($) +{ + my $response = get("http://$opts{'host'}/$opts{'cgi'}?assm=$opts{'assm'}&query=$_[0]"); + my $json = new JSON($response); + my $parsed = $json->decode(); + + return $parsed->{'results'}->{'obj_locs'}; +} + +sub process($) +{ + my $obj_locs = get_object_locations($_[0]); + + foreach ( @{$obj_locs} ) { + my $type = $_->{'feature_type'}; + + if (!$opts{'filter'} || $type eq $opts{'filter'}) { + my $label = $_->{'label'}; + my $chrom = $_->{'chr'}->{'chrom'}; + my $from = $_->{'seq_from'}; + my $to = $_->{'seq_to'}; + + printf("%s: %s\tslice: '%s:%u-%u'\n", + $type, $label, $chrom, $from, $to); + } + } +} + +for (my $i = 0; $i < scalar @ARGV; ++$i) { + $_ = $ARGV[$i]; + + if (/^-/) { + if (/^--filter/) { + $opts{'filter'} = $ARGV[++$i]; + next; + } + if (/^--assembly/) { + $opts{'assm'} = $ARGV[++$i]; + next; + } + usage(); + } + process $_; +}