annotate variant_effect_predictor/Bio/ClusterIO/dbsnp.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: dbsnp.pm,v 1.7.2.1 2003/08/21 21:07:06 allenday Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # BioPerl module for Bio::ClusterIO::dbsnp
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # Copyright Allen Day <allenday@ucla.edu>, Stan Nelson <snelson@ucla.edu>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Human Genetics, UCLA Medical School, University of California, Los Angeles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 Bio::ClusterIO::dbsnp - dbSNP input stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Do not use this module directly. Use it via the Bio::ClusterIO class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 Parse dbSNP XML files, one refSNP entry at a time.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 http://bioperl.org/MailList.shtml - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 the bugs and their resolution.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 Bug reports can be submitted via email or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 Allen Day E<lt>allenday@ucla.eduE<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 package Bio::ClusterIO::dbsnp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use Bio::ClusterIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::Variation::SNP;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 use XML::Parser::PerlSAX;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use XML::Handler::Subs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 use IO::File;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 use vars qw(@ISA $DTD $DEBUG %MODEMAP %MAPPING);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 $DTD = 'ftp://ftp.ncbi.nih.gov/snp/specs/NSE.dtd';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 @ISA = qw(Bio::ClusterIO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 BEGIN {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 %MAPPING = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 #the ones commented out i haven't written methods for yet... -Allen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 'NSE-rs_refsnp-id' => 'id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 # 'NSE-rs_taxid' => 'tax_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 # 'NSE-rs_organism' => 'organism',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 'NSE-rs_snp-type' => {'type' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 'NSE-rs_observed' => 'observed',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 'NSE-rs_seq-5_E' => 'seq_5',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 'NSE-rs_seq-3_E' => 'seq_3',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 # 'NSE-rs_seq-ss-exemplar' => 'exemplar_subsnp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 'NSE-rs_ncbi-build-id' => 'ncbi_build',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 'NSE-rs_ncbi-num-chr-hits' => 'ncbi_chr_hits',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 'NSE-rs_ncbi-num-ctg-hits' => 'ncbi_ctg_hits',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 'NSE-rs_ncbi-num-seq-loc' => 'ncbi_seq_loc',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 # 'NSE-rs_ncbi-mapweight' => 'ncbi_mapweight',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 'NSE-rs_ucsc-build-id' => 'ucsc_build',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 'NSE-rs_ucsc-num-chr-hits' => 'ucsc_chr_hits',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 'NSE-rs_ucsc-num-seq-loc' => 'ucsc_ctg_hits',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 # 'NSE-rs_ucsc-mapweight' => 'ucsc_mapweight',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 'NSE-rs_het' => 'heterozygous',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 'NSE-rs_het-SE' => 'heterozygous_SE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 'NSE-rs_validated' => {'validated' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 'NSE-rs_genotype' => {'genotype' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 'NSE-ss_handle' => 'handle',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 'NSE-ss_batch-id' => 'batch_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 'NSE-ss_subsnp-id' => 'id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 # 'NSE-ss_loc-snp-id' => 'loc_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 # 'NSE-ss_orient' => {'orient' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 # 'NSE-ss_build-id' => 'build',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 'NSE-ss_method-class' => {'method' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 # 'NSE-ss_accession_E' => 'accession',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 # 'NSE-ss_comment_E' => 'comment',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 # 'NSE-ss_genename' => 'gene_name',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 # 'NSE-ss_assay-5_E' => 'seq_5',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 # 'NSE-ss_assay-3_E' => 'seq_3',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 # 'NSE-ss_observed' => 'observed',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 # 'NSE-ss-popinfo_type' => 'pop_type',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 # 'NSE-ss-popinfo_batch-id' => 'pop_batch_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 # 'NSE-ss-popinfo_pop-name' => 'pop_name',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 # 'NSE-ss-popinfo_samplesize' => 'pop_samplesize',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 # 'NSE-ss_popinfo_est-het' => 'pop_est_heterozygous',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 # 'NSE-ss_popinfo_est-het-se-sq' => 'pop_est_heterozygous_se_sq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 # 'NSE-ss-alleleinfo_type' => 'allele_type',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 # 'NSE-ss-alleleinfo_batch-id' => 'allele_batch_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 # 'NSE-ss-alleleinfo_pop-id' => 'allele_pop_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 # 'NSE-ss-alleleinfo_snp-allele' => 'allele_snp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 # 'NSE-ss-alleleinfo_other-allele' => 'allele_other',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 # 'NSE-ss-alleleinfo_freq' => 'allele_freq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 # 'NSE-ss-alleleinfo_count' => 'allele_count',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 # 'NSE-rsContigHit_contig-id' => 'contig_hit',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 # 'NSE-rsContigHit_accession' => 'accession_hit',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 # 'NSE-rsContigHit_version' => 'version',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 # 'NSE-rsContigHit_chromosome' => 'chromosome_hit',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 # 'NSE-rsMaploc_asn-from' => 'asn_from',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 # 'NSE-rsMaploc_asn-to' => 'asn_to',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 # 'NSE-rsMaploc_loc-type' => {'loc_type' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 # 'NSE-rsMaploc_hit-quality' => {'hit_quality' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 # 'NSE-rsMaploc_orient' => {'orient' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 # 'NSE-rsMaploc_physmap-str' => 'phys_from',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 # 'NSE-rsMaploc_physmap-int' => 'phys_to',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 'NSE-FxnSet_locusid' => 'locus_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 'NSE-FxnSet_symbol' => 'symbol',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 'NSE-FxnSet_mrna-acc' => 'mrna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 'NSE-FxnSet_prot-acc' => 'protein',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 'NSE-FxnSet_fxn-class-contig' => {'functional_class' => 'value'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 #...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 #...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 #there are lots more, but i don't need them at the moment... -Allen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 sub _initialize{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 my ($self,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 $self->SUPER::_initialize(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)],@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 defined $usetempfile && $self->use_tempfile($usetempfile);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 $self->{'_xmlparser'} = new XML::Parser::PerlSAX();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 =head2 next_cluster
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Title : next_cluster
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Usage : $dbsnp = $stream->next_cluster()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 Function: returns the next refSNP in the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 Returns : Bio::Variation::SNP object representing composite refSNP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 and its component subSNP(s).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 Args : NONE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 #Adapted from Jason's blastxml.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 sub next_cluster {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 my $data = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 my($tfh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 if( $self->use_tempfile ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 $tfh->autoflush(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 while( defined( $_ = $self->_readline ) ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 #skip to beginning of refSNP entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 if($_ !~ m!<NSE-rs>! && $start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 } elsif($_ =~ m!<NSE-rs>! && $start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 $start = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 #slurp up the data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 if( defined $tfh ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 print $tfh $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 $data .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 #and stop at the end of the refSNP entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 last if $_ =~ m!</NSE-rs>!;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 #if we didn't find a start tag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 return undef if $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 my %parser_args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 if( defined $tfh ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 seek($tfh,0,0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 %parser_args = ('Source' => { 'ByteStream' => $tfh },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 'Handler' => $self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 %parser_args = ('Source' => { 'String' => $data },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 'Handler' => $self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 my $starttime;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 my $result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 if( $DEBUG ) { $starttime = [ Time::HiRes::gettimeofday() ]; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 $result = $self->{'_xmlparser'}->parse(%parser_args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 if( $@ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $self->warn("error in parsing a report:\n $@");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $result = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 if( $DEBUG ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 $self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 return $self->refsnp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 =head2 SAX methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 =head2 start_document
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 Title : start_document
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 Usage : $parser->start_document;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 Function: SAX method to indicate starting to parse a new document.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 Creates a Bio::Variation::SNP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 Returns : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 sub start_document{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 $self->{refsnp} = Bio::Variation::SNP->new;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 sub refsnp {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 return shift->{refsnp};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 =head2 end_document
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 Title : end_document
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 Usage : $parser->end_document;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 Function: SAX method to indicate finishing parsing a new document
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 Returns : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 sub end_document{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 my ($self,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 =head2 start_element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 Title : start_element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 Usage : $parser->start_element($data)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 Function: SAX method to indicate starting a new element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 Returns : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 Args : hash ref for data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 sub start_element{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 my ($self,$data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 my $nm = $data->{'Name'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 my $at = $data->{'Attributes'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 if($nm eq 'NSE-ss'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 $self->refsnp->add_subsnp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 if(my $type = $MAPPING{$nm}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 if(ref $type eq 'HASH'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 #okay, this is nasty. what can you do?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $self->{will_handle} = (keys %$type)[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 my $valkey = (values %$type)[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 $self->{last_data} = $at->{$valkey};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 $self->{will_handle} = $type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 $self->{last_data} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 undef $self->{will_handle};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 =head2 end_element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 Title : end_element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 Usage : $parser->end_element($data)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 Function: Signals finishing an element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 Returns : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 Args : hash ref for data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 sub end_element {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 my ($self,$data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 my $nm = $data->{'Name'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 my $at = $data->{'Attributes'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 my $method = $self->{will_handle};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 if($method){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 if($nm =~ /^NSE-rs/ or $nm =~ /^NSE-SeqLoc/ or $nm =~ /^NSE-FxnSet/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 $self->refsnp->$method($self->{last_data});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 } elsif ($nm =~ /^NSE-ss/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 $self->refsnp->subsnp->$method($self->{last_data});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 =head2 characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 Title : characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 Usage : $parser->characters($data)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 Function: Signals new characters to be processed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 Returns : characters read
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 Args : hash ref with the key 'Data'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 sub characters{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 my ($self,$data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 $self->{last_data} = $data->{Data}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 if $data->{Data} =~ /\S/; #whitespace is meaningless -ad
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 =head2 use_tempfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 Title : use_tempfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 Usage : $obj->use_tempfile($newval)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 Function: Get/Set boolean flag on whether or not use a tempfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 Returns : value of use_tempfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 Args : newvalue (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 sub use_tempfile{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 my ($self,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 $self->{'_use_tempfile'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 return $self->{'_use_tempfile'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 1;