annotate variant_effect_predictor/Bio/ClusterIO/dbsnp.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: dbsnp.pm,v 1.7.2.1 2003/08/21 21:07:06 allenday Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 # BioPerl module for Bio::ClusterIO::dbsnp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 # Copyright Allen Day <allenday@ucla.edu>, Stan Nelson <snelson@ucla.edu>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Human Genetics, UCLA Medical School, University of California, Los Angeles
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 Bio::ClusterIO::dbsnp - dbSNP input stream
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Do not use this module directly. Use it via the Bio::ClusterIO class.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 Parse dbSNP XML files, one refSNP entry at a time.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 Bioperl modules. Send your comments and suggestions preferably to one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 of the Bioperl mailing lists. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 http://bioperl.org/MailList.shtml - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 the bugs and their resolution.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 Bug reports can be submitted via email or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 =head1 AUTHOR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 Allen Day E<lt>allenday@ucla.eduE<gt>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 The rest of the documentation details each of the object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 methods. Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 package Bio::ClusterIO::dbsnp;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 use Bio::Root::Root;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 use Bio::ClusterIO;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 use Bio::Variation::SNP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 use XML::Parser::PerlSAX;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 use XML::Handler::Subs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 use Data::Dumper;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 use IO::File;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 use vars qw(@ISA $DTD $DEBUG %MODEMAP %MAPPING);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 $DTD = 'ftp://ftp.ncbi.nih.gov/snp/specs/NSE.dtd';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 @ISA = qw(Bio::ClusterIO);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 BEGIN {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 %MAPPING = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 #the ones commented out i haven't written methods for yet... -Allen
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 'NSE-rs_refsnp-id' => 'id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 # 'NSE-rs_taxid' => 'tax_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 # 'NSE-rs_organism' => 'organism',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 'NSE-rs_snp-type' => {'type' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 'NSE-rs_observed' => 'observed',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 'NSE-rs_seq-5_E' => 'seq_5',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 'NSE-rs_seq-3_E' => 'seq_3',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 # 'NSE-rs_seq-ss-exemplar' => 'exemplar_subsnp',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 'NSE-rs_ncbi-build-id' => 'ncbi_build',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 'NSE-rs_ncbi-num-chr-hits' => 'ncbi_chr_hits',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 'NSE-rs_ncbi-num-ctg-hits' => 'ncbi_ctg_hits',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 'NSE-rs_ncbi-num-seq-loc' => 'ncbi_seq_loc',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 # 'NSE-rs_ncbi-mapweight' => 'ncbi_mapweight',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 'NSE-rs_ucsc-build-id' => 'ucsc_build',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 'NSE-rs_ucsc-num-chr-hits' => 'ucsc_chr_hits',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 'NSE-rs_ucsc-num-seq-loc' => 'ucsc_ctg_hits',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 # 'NSE-rs_ucsc-mapweight' => 'ucsc_mapweight',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 'NSE-rs_het' => 'heterozygous',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 'NSE-rs_het-SE' => 'heterozygous_SE',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 'NSE-rs_validated' => {'validated' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 'NSE-rs_genotype' => {'genotype' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 'NSE-ss_handle' => 'handle',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 'NSE-ss_batch-id' => 'batch_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 'NSE-ss_subsnp-id' => 'id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 # 'NSE-ss_loc-snp-id' => 'loc_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 # 'NSE-ss_orient' => {'orient' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 # 'NSE-ss_build-id' => 'build',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 'NSE-ss_method-class' => {'method' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 # 'NSE-ss_accession_E' => 'accession',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 # 'NSE-ss_comment_E' => 'comment',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 # 'NSE-ss_genename' => 'gene_name',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 # 'NSE-ss_assay-5_E' => 'seq_5',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 # 'NSE-ss_assay-3_E' => 'seq_3',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 # 'NSE-ss_observed' => 'observed',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 # 'NSE-ss-popinfo_type' => 'pop_type',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 # 'NSE-ss-popinfo_batch-id' => 'pop_batch_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 # 'NSE-ss-popinfo_pop-name' => 'pop_name',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 # 'NSE-ss-popinfo_samplesize' => 'pop_samplesize',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 # 'NSE-ss_popinfo_est-het' => 'pop_est_heterozygous',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 # 'NSE-ss_popinfo_est-het-se-sq' => 'pop_est_heterozygous_se_sq',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 # 'NSE-ss-alleleinfo_type' => 'allele_type',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 # 'NSE-ss-alleleinfo_batch-id' => 'allele_batch_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 # 'NSE-ss-alleleinfo_pop-id' => 'allele_pop_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 # 'NSE-ss-alleleinfo_snp-allele' => 'allele_snp',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 # 'NSE-ss-alleleinfo_other-allele' => 'allele_other',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 # 'NSE-ss-alleleinfo_freq' => 'allele_freq',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 # 'NSE-ss-alleleinfo_count' => 'allele_count',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 # 'NSE-rsContigHit_contig-id' => 'contig_hit',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 # 'NSE-rsContigHit_accession' => 'accession_hit',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 # 'NSE-rsContigHit_version' => 'version',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 # 'NSE-rsContigHit_chromosome' => 'chromosome_hit',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 # 'NSE-rsMaploc_asn-from' => 'asn_from',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 # 'NSE-rsMaploc_asn-to' => 'asn_to',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 # 'NSE-rsMaploc_loc-type' => {'loc_type' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 # 'NSE-rsMaploc_hit-quality' => {'hit_quality' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 # 'NSE-rsMaploc_orient' => {'orient' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 # 'NSE-rsMaploc_physmap-str' => 'phys_from',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 # 'NSE-rsMaploc_physmap-int' => 'phys_to',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 'NSE-FxnSet_locusid' => 'locus_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 'NSE-FxnSet_symbol' => 'symbol',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 'NSE-FxnSet_mrna-acc' => 'mrna',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 'NSE-FxnSet_prot-acc' => 'protein',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 'NSE-FxnSet_fxn-class-contig' => {'functional_class' => 'value'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 #...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 #...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 #there are lots more, but i don't need them at the moment... -Allen
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 sub _initialize{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 my ($self,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 $self->SUPER::_initialize(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)],@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 defined $usetempfile && $self->use_tempfile($usetempfile);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 $self->{'_xmlparser'} = new XML::Parser::PerlSAX();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 =head2 next_cluster
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 Title : next_cluster
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 Usage : $dbsnp = $stream->next_cluster()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 Function: returns the next refSNP in the stream
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 Returns : Bio::Variation::SNP object representing composite refSNP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 and its component subSNP(s).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 Args : NONE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 ###
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 #Adapted from Jason's blastxml.pm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 ###
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 sub next_cluster {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 my $data = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 my($tfh);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 if( $self->use_tempfile ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 $tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 $tfh->autoflush(1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 my $start = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 while( defined( $_ = $self->_readline ) ){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 #skip to beginning of refSNP entry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 if($_ !~ m!<NSE-rs>! && $start){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 } elsif($_ =~ m!<NSE-rs>! && $start){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 $start = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 #slurp up the data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 if( defined $tfh ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 print $tfh $_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 $data .= $_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 #and stop at the end of the refSNP entry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 last if $_ =~ m!</NSE-rs>!;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 #if we didn't find a start tag
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 return undef if $start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 my %parser_args;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 if( defined $tfh ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 seek($tfh,0,0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 %parser_args = ('Source' => { 'ByteStream' => $tfh },
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 'Handler' => $self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 %parser_args = ('Source' => { 'String' => $data },
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 'Handler' => $self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 my $starttime;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 my $result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 if( $DEBUG ) { $starttime = [ Time::HiRes::gettimeofday() ]; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 $result = $self->{'_xmlparser'}->parse(%parser_args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 if( $@ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 $self->warn("error in parsing a report:\n $@");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 $result = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 if( $DEBUG ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 $self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 return $self->refsnp;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 =head2 SAX methods
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 =head2 start_document
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 Title : start_document
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 Usage : $parser->start_document;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 Function: SAX method to indicate starting to parse a new document.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 Creates a Bio::Variation::SNP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 Returns : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 sub start_document{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 $self->{refsnp} = Bio::Variation::SNP->new;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 sub refsnp {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 return shift->{refsnp};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 =head2 end_document
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 Title : end_document
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 Usage : $parser->end_document;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 Function: SAX method to indicate finishing parsing a new document
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 Returns : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 sub end_document{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 my ($self,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 =head2 start_element
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 Title : start_element
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 Usage : $parser->start_element($data)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 Function: SAX method to indicate starting a new element
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 Returns : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 Args : hash ref for data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 sub start_element{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 my ($self,$data) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 my $nm = $data->{'Name'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 my $at = $data->{'Attributes'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 if($nm eq 'NSE-ss'){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 $self->refsnp->add_subsnp;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 return;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 if(my $type = $MAPPING{$nm}){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 if(ref $type eq 'HASH'){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 #okay, this is nasty. what can you do?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 $self->{will_handle} = (keys %$type)[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 my $valkey = (values %$type)[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 $self->{last_data} = $at->{$valkey};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 $self->{will_handle} = $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 $self->{last_data} = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 undef $self->{will_handle};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 =head2 end_element
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 Title : end_element
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 Usage : $parser->end_element($data)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 Function: Signals finishing an element
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 Returns : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 Args : hash ref for data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 sub end_element {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 my ($self,$data) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 my $nm = $data->{'Name'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 my $at = $data->{'Attributes'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 my $method = $self->{will_handle};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 if($method){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 if($nm =~ /^NSE-rs/ or $nm =~ /^NSE-SeqLoc/ or $nm =~ /^NSE-FxnSet/){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 $self->refsnp->$method($self->{last_data});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 } elsif ($nm =~ /^NSE-ss/){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 $self->refsnp->subsnp->$method($self->{last_data});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 =head2 characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 Title : characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 Usage : $parser->characters($data)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 Function: Signals new characters to be processed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 Returns : characters read
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 Args : hash ref with the key 'Data'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 sub characters{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 my ($self,$data) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 $self->{last_data} = $data->{Data}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 if $data->{Data} =~ /\S/; #whitespace is meaningless -ad
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 =head2 use_tempfile
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 Title : use_tempfile
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 Usage : $obj->use_tempfile($newval)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 Function: Get/Set boolean flag on whether or not use a tempfile
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 Returns : value of use_tempfile
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 Args : newvalue (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 sub use_tempfile{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 $self->{'_use_tempfile'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 return $self->{'_use_tempfile'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 1;