0
|
1 # $Id: xml.pm,v 1.12.2.1 2003/03/01 17:23:43 jason Exp $
|
|
2 # BioPerl module for Bio::Variation::IO::xml
|
|
3 #
|
|
4 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
|
|
5 #
|
|
6 # Copyright Heikki Lehvaslaiho
|
|
7 #
|
|
8 # You may distribute this module under the same terms as perl itself
|
|
9 #
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Variation::IO::xml - XML sequence variation input/output stream
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 Do not use this module directly. Use it via the Bio::Variation::IO class.
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 This object can transform Bio::Variation::SeqDiff objects to and from XML
|
|
24 file databases.
|
|
25
|
|
26 The XML format, although consistent, is still evolving. The current
|
|
27 DTD for it is at L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>mutationsE<sol>DTDE<sol>seqDiff.dtd>.
|
|
28
|
|
29 =head1 REQUIREMENTS
|
|
30
|
|
31 To use this code you need the module XML::Twig which creates an
|
|
32 interface to XML::Parser to read XML and modules XML::Writer and
|
|
33 IO::String to write XML out.
|
|
34
|
|
35 =head1 FEEDBACK
|
|
36
|
|
37 =head2 Mailing Lists
|
|
38
|
|
39 User feedback is an integral part of the evolution of this and other
|
|
40 Bioperl modules. Send your comments and suggestions preferably to the
|
|
41 Bioperl mailing lists Your participation is much appreciated.
|
|
42
|
|
43 bioperl-l@bioperl.org - General discussion
|
|
44 http://bio.perl.org/MailList.html - About the mailing lists
|
|
45
|
|
46 =head2 Reporting Bugs
|
|
47
|
|
48 report bugs to the Bioperl bug tracking system to help us keep track
|
|
49 the bugs and their resolution. Bug reports can be submitted via
|
|
50 email or the web:
|
|
51
|
|
52 bioperl-bugs@bio.perl.org
|
|
53 http://bugzilla.bioperl.org/
|
|
54
|
|
55 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
56
|
|
57 Email: heikki@ebi.ac.uk
|
|
58 Address:
|
|
59
|
|
60 EMBL Outstation, European Bioinformatics Institute
|
|
61 Wellcome Trust Genome Campus, Hinxton
|
|
62 Cambs. CB10 1SD, United Kingdom
|
|
63
|
|
64 =head1 APPENDIX
|
|
65
|
|
66 The rest of the documentation details each of the object
|
|
67 methods. Internal methods are usually preceded with a _
|
|
68
|
|
69 =cut
|
|
70
|
|
71 # Let the code begin...
|
|
72
|
|
73 package Bio::Variation::IO::xml;
|
|
74 my $VERSION=1.1;
|
|
75 use vars qw(@ISA $seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj);
|
|
76 use strict;
|
|
77
|
|
78 use XML::Twig;
|
|
79 use XML::Writer 0.4;
|
|
80 use IO::String;
|
|
81 use Bio::Variation::IO;
|
|
82 use Bio::Variation::SeqDiff;
|
|
83 use Bio::Variation::DNAMutation;
|
|
84 use Bio::Variation::RNAChange;
|
|
85 use Bio::Variation::AAChange;
|
|
86 use Bio::Variation::Allele;
|
|
87
|
|
88 # new() is inherited from Bio::Root::Object
|
|
89 @ISA = qw( Bio::Variation::IO );
|
|
90
|
|
91 # _initialize is where the heavy stuff will happen when new is called
|
|
92
|
|
93 sub new {
|
|
94 my ($class,@args) = @_;
|
|
95 my $self = bless {}, $class;
|
|
96 $self->_initialize(@args);
|
|
97 return $self;
|
|
98 }
|
|
99
|
|
100 sub _initialize {
|
|
101 my($self,@args) = @_;
|
|
102 return unless $self->SUPER::_initialize(@args);
|
|
103 }
|
|
104
|
|
105 =head2 next
|
|
106
|
|
107 Title : next
|
|
108 Usage : $haplo = $stream->next()
|
|
109 Function: returns the next seqDiff in the stream
|
|
110 Returns : Bio::Variation::SeqDiff object
|
|
111 Args : NONE
|
|
112
|
|
113 =cut
|
|
114
|
|
115
|
|
116 sub _seqDiff {
|
|
117 my ($t, $term)= @_;
|
|
118 $seqdiff->id( $term->att('id') );
|
|
119 $seqdiff->alphabet( $term->att('moltype') );
|
|
120 $seqdiff->offset( $term->att('offset') );
|
|
121
|
|
122 foreach my $child ($term->children) {
|
|
123 _variant($t, $child);
|
|
124 }
|
|
125 }
|
|
126
|
|
127 sub _variant {
|
|
128 my ($t, $term)= @_;
|
|
129 my $var;
|
|
130 my $att = $term->atts();
|
|
131 my ($variation_number, $change_number) = split /\./, $att->{number};
|
|
132
|
|
133 # if more than two alleles
|
|
134 if ($variation_number and $change_number and $change_number > 1 ) {
|
|
135 my $a3 = Bio::Variation::Allele->new;
|
|
136 $a3->seq( $term->first_child_text('allele_mut') )
|
|
137 if $term->first_child_text('allele_mut');
|
|
138 if ($term->gi eq 'DNA') {
|
|
139 $prevdnaobj->add_Allele($a3);
|
|
140 }
|
|
141 elsif ($term->gi eq 'RNA') {
|
|
142 $prevrnaobj->add_Allele($a3);
|
|
143 } else { # AA
|
|
144 $prevaaobj->add_Allele($a3);
|
|
145 }
|
|
146 } else { # create new variants
|
|
147 if ($term->gi eq 'DNA') {
|
|
148 $var = new Bio::Variation::DNAMutation;
|
|
149 }
|
|
150 elsif ($term->gi eq 'RNA') {
|
|
151 $var = new Bio::Variation::RNAChange;
|
|
152 } else { # AA
|
|
153 $var = new Bio::Variation::AAChange;
|
|
154 }
|
|
155
|
|
156 # these are always present
|
|
157 $var->start( $att->{start} );
|
|
158 $var->end( $att->{end});
|
|
159 $var->length($att->{len});
|
|
160 $var->mut_number( $att->{number});
|
|
161 $var->upStreamSeq($term->first_child_text('upFlank'));
|
|
162 $var->dnStreamSeq($term->first_child_text('dnFlank'));
|
|
163 $var->proof($term->first_child_text('proof'));
|
|
164
|
|
165 # region
|
|
166 my $region = $term->first_child('region');
|
|
167 if ($region) {
|
|
168 $var->region($region->text);
|
|
169 my $region_atts = $region->atts;
|
|
170 $var->region_value( $region_atts->{value} )
|
|
171 if $region_atts->{value};
|
|
172 $var->region_dist( $region_atts->{dist} )
|
|
173 if $region_atts->{dist};
|
|
174 }
|
|
175
|
|
176 # alleles
|
|
177 my $a1 = Bio::Variation::Allele->new;
|
|
178 $a1->seq($term->first_child_text('allele_ori') )
|
|
179 if $term->first_child_text('allele_ori');
|
|
180 $var->allele_ori($a1);
|
|
181 my $a2 = Bio::Variation::Allele->new;
|
|
182 $a2->seq($term->first_child_text('allele_mut') )
|
|
183 if $term->first_child_text('allele_mut');
|
|
184 $var->isMutation(1) if $term->att('isMutation');
|
|
185 $var->allele_mut($a2);
|
|
186 $var->add_Allele($a2);
|
|
187 $var->length( $term->att('length') );
|
|
188 $seqdiff->add_Variant($var);
|
|
189
|
|
190 # variant specific code
|
|
191 if ($term->gi eq 'DNA') {
|
|
192 $prevdnaobj = $var;
|
|
193 }
|
|
194 elsif ($term->gi eq 'RNA') {
|
|
195 my $codon = $term->first_child('codon');
|
|
196 if ($codon) {
|
|
197 my $codon_atts = $codon->atts;
|
|
198 $var->codon_table( $codon->att('codon_table') )
|
|
199 if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1;
|
|
200 $var->codon_pos( $codon->att('codon_pos') )
|
|
201 if $codon_atts->{codon_pos};
|
|
202 }
|
|
203 $prevdnaobj->RNAChange($var);
|
|
204 $var->DNAMutation($prevdnaobj);
|
|
205 $prevrnaobj = $var;
|
|
206 } else {
|
|
207 $prevrnaobj->AAChange($var);
|
|
208 $var->RNAChange($prevrnaobj);
|
|
209 $prevaaobj = $var;
|
|
210 }
|
|
211 }
|
|
212 }
|
|
213
|
|
214 sub next {
|
|
215 my( $self ) = @_;
|
|
216
|
|
217 local $/ = "</seqDiff>\n";
|
|
218 return unless my $entry = $self->_readline;
|
|
219 # print STDERR "|$entry|";
|
|
220 return unless $entry =~ /^\W*<seqDiff/;
|
|
221
|
|
222 $seqdiff = Bio::Variation::SeqDiff->new;
|
|
223
|
|
224 # create new parser object
|
|
225 my $twig_handlers = {'seqDiff' => \&_seqDiff };
|
|
226 my $t = new XML::Twig ( TwigHandlers => $twig_handlers,
|
|
227 KeepEncoding => 1 );
|
|
228 $t->parse($entry);
|
|
229
|
|
230 return $seqdiff;
|
|
231 }
|
|
232
|
|
233 =head2 write
|
|
234
|
|
235 Title : write
|
|
236 Usage : $stream->write(@haplos)
|
|
237 Function: writes the $seqDiff objects into the stream
|
|
238 Returns : 1 for success and 0 for error
|
|
239 Args : Bio::Variation::SeqDiff object
|
|
240
|
|
241 =cut
|
|
242
|
|
243 sub write {
|
|
244 my ($self,@h) = @_;
|
|
245
|
|
246 if( ! defined $h[0] ) {
|
|
247 $self->throw("Attempting to write with no information!");
|
|
248 }
|
|
249 my $str;
|
|
250 my $output = IO::String->new($str);
|
|
251 my $w = new XML::Writer(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 );
|
|
252 foreach my $h (@h) {
|
|
253 #
|
|
254 # seqDiff
|
|
255 #
|
|
256 $h->alphabet || $self->throw("Moltype of the reference sequence is not set!");
|
|
257 my $hasAA = 0;
|
|
258 foreach my $mut ($h->each_Variant) {
|
|
259 $hasAA = 1 if $mut->isa('Bio::Variation::AAChange');
|
|
260 }
|
|
261 if ($hasAA) {
|
|
262 $w->startTag("seqDiff",
|
|
263 "id" => $h->id,
|
|
264 "moltype" => $h->alphabet,
|
|
265 "offset" => $h->offset,
|
|
266 "sysname" => $h->sysname,
|
|
267 "trivname" => $h->trivname
|
|
268 );
|
|
269 } else {
|
|
270 $w->startTag("seqDiff",
|
|
271 "id" => $h->id,
|
|
272 "moltype" => $h->alphabet,
|
|
273 "offset" => $h->offset,
|
|
274 "sysname" => $h->sysname
|
|
275 );
|
|
276 }
|
|
277 my @allvariants = $h->each_Variant;
|
|
278 #print "allvars:", scalar @allvariants, "\n";
|
|
279 my %variants = ();
|
|
280 foreach my $mut ($h->each_Variant) {
|
|
281 #print STDERR $mut->mut_number, "\t", $mut, "\t",
|
|
282 #$mut->proof, "\t", scalar $mut->each_Allele, "\n";
|
|
283 push @{$variants{$mut->mut_number} }, $mut;
|
|
284 }
|
|
285 foreach my $var (sort keys %variants) {
|
|
286 foreach my $mut (@{$variants{$var}}) {
|
|
287 #
|
|
288 # DNA
|
|
289 #
|
|
290 if( $mut->isa('Bio::Variation::DNAMutation') ) {
|
|
291 $mut->isMutation(0) if not $mut->isMutation;
|
|
292 my @alleles = $mut->each_Allele;
|
|
293 my $count = 0;
|
|
294 foreach my $allele (@alleles) {
|
|
295 $count++;
|
|
296 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
|
|
297 if ($change_number and $change_number != $count){
|
|
298 $mut->mut_number("$change_number.$count");
|
|
299 }
|
|
300 $mut->allele_mut($allele);
|
|
301 $w->startTag("DNA",
|
|
302 "number" => $mut->mut_number,
|
|
303 "start" => $mut->start,
|
|
304 "end" => $mut->end,
|
|
305 "length" => $mut->length,
|
|
306 "isMutation" => $mut->isMutation
|
|
307 );
|
|
308 if ($mut->label) {
|
|
309 foreach my $label (split ', ', $mut->label) {
|
|
310 $w->startTag("label");
|
|
311 $w->characters($label);
|
|
312 $w->endTag;
|
|
313 }
|
|
314 }
|
|
315 if ($mut->proof) {
|
|
316 $w->startTag("proof");
|
|
317 $w->characters($mut->proof );
|
|
318 $w->endTag;
|
|
319 }
|
|
320 if ($mut->upStreamSeq) {
|
|
321 $w->startTag("upFlank");
|
|
322 $w->characters($mut->upStreamSeq );
|
|
323 $w->endTag;
|
|
324 }
|
|
325 #if ( $mut->isMutation) {
|
|
326 #if ($mut->allele_ori) {
|
|
327 $w->startTag("allele_ori");
|
|
328 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
|
|
329 $w->endTag;
|
|
330 #}
|
|
331 #if ($mut->allele_mut) {
|
|
332 $w->startTag("allele_mut");
|
|
333 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
|
|
334 $w->endTag;
|
|
335 #}
|
|
336 #}
|
|
337 if ($mut->dnStreamSeq) {
|
|
338 $w->startTag("dnFlank");
|
|
339 $w->characters($mut->dnStreamSeq );
|
|
340 $w->endTag;
|
|
341 }
|
|
342 if ($mut->restriction_changes) {
|
|
343 $w->startTag("restriction_changes");
|
|
344 $w->characters($mut->restriction_changes);
|
|
345 $w->endTag;
|
|
346 }
|
|
347 if ($mut->region) {
|
|
348 if($mut->region_value and $mut->region_dist) {
|
|
349 $w->startTag("region",
|
|
350 "value" => $mut->region_value,
|
|
351 "dist" => $mut->region_dist
|
|
352 );
|
|
353 }
|
|
354 elsif($mut->region_value) {
|
|
355 $w->startTag("region",
|
|
356 "value" => $mut->region_value
|
|
357 );
|
|
358 }
|
|
359 elsif($mut->region_dist) {
|
|
360 $w->startTag("region",
|
|
361 "dist" => $mut->region_dist
|
|
362 );
|
|
363 } else {
|
|
364 $w->startTag("region");
|
|
365 }
|
|
366 $w->characters($mut->region );
|
|
367 $w->endTag;
|
|
368 }
|
|
369 $w->endTag; #DNA
|
|
370 }
|
|
371 }
|
|
372 #
|
|
373 # RNA
|
|
374 #
|
|
375 elsif( $mut->isa('Bio::Variation::RNAChange') ) {
|
|
376 $mut->isMutation(0) if not $mut->isMutation;
|
|
377 my @alleles = $mut->each_Allele;
|
|
378 my $count = 0;
|
|
379 foreach my $allele (@alleles) {
|
|
380 $count++;
|
|
381 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
|
|
382 if ($change_number and $change_number != $count){
|
|
383 $mut->mut_number("$change_number.$count");
|
|
384 }
|
|
385 $mut->allele_mut($allele);
|
|
386 $w->startTag("RNA",
|
|
387 "number" => $mut->mut_number,
|
|
388 "start" => $mut->start,
|
|
389 "end" => $mut->end,
|
|
390 "length" => $mut->length,
|
|
391 "isMutation" => $mut->isMutation
|
|
392 );
|
|
393
|
|
394 if ($mut->label) {
|
|
395 foreach my $label (split ', ', $mut->label) {
|
|
396 $w->startTag("label");
|
|
397 $w->characters($label );
|
|
398 $w->endTag;
|
|
399 }
|
|
400 }
|
|
401 if ($mut->proof) {
|
|
402 $w->startTag("proof");
|
|
403 $w->characters($mut->proof );
|
|
404 $w->endTag;
|
|
405 }
|
|
406 if ($mut->upStreamSeq) {
|
|
407 $w->startTag("upFlank");
|
|
408 $w->characters($mut->upStreamSeq );
|
|
409 $w->endTag;
|
|
410 }
|
|
411 #if ( $mut->isMutation) {
|
|
412 if ($mut->allele_ori) {
|
|
413 $w->startTag("allele_ori");
|
|
414 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
|
|
415 $w->endTag;
|
|
416 }
|
|
417 if ($mut->allele_mut) {
|
|
418 $w->startTag("allele_mut");
|
|
419 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ;
|
|
420 $w->endTag;
|
|
421 }
|
|
422 #}
|
|
423 if ($mut->dnStreamSeq) {
|
|
424 $w->startTag("dnFlank");
|
|
425 $w->characters($mut->dnStreamSeq );
|
|
426 $w->endTag;
|
|
427 }
|
|
428 if ($mut->region eq 'coding') {
|
|
429 if (! $mut->codon_mut) {
|
|
430 $w->startTag("codon",
|
|
431 "codon_ori" => $mut->codon_ori,
|
|
432 "codon_pos" => $mut->codon_pos
|
|
433 );
|
|
434 } else {
|
|
435 $w->startTag("codon",
|
|
436 "codon_ori" => $mut->codon_ori,
|
|
437 "codon_mut" => $mut->codon_mut,
|
|
438 "codon_pos" => $mut->codon_pos
|
|
439 );
|
|
440 }
|
|
441 $w->endTag;
|
|
442 }
|
|
443 if ($mut->codon_table != 1) {
|
|
444 $w->startTag("codon_table");
|
|
445 $w->characters($mut->codon_table);
|
|
446 $w->endTag;
|
|
447 }
|
|
448
|
|
449 if ($mut->restriction_changes) {
|
|
450 $w->startTag("restriction_changes");
|
|
451 $w->characters($mut->restriction_changes);
|
|
452 $w->endTag;
|
|
453 }
|
|
454 if ($mut->region) {
|
|
455 if($mut->region_value and $mut->region_dist) {
|
|
456 $w->startTag("region",
|
|
457 "value" => $mut->region_value,
|
|
458 "dist" => $mut->region_dist
|
|
459 );
|
|
460 }
|
|
461 elsif($mut->region_value) {
|
|
462 $w->startTag("region",
|
|
463 "value" => $mut->region_value
|
|
464 );
|
|
465 }
|
|
466 elsif($mut->region_dist) {
|
|
467 $w->startTag("region",
|
|
468 "dist" => $mut->region_dist
|
|
469 );
|
|
470 } else {
|
|
471 $w->startTag("region");
|
|
472 }
|
|
473 $w->characters($mut->region );
|
|
474 $w->endTag;
|
|
475 }
|
|
476 $w->endTag; #RNA
|
|
477 }
|
|
478 }
|
|
479 #
|
|
480 # AA
|
|
481 #
|
|
482 elsif( $mut->isa('Bio::Variation::AAChange') ) {
|
|
483 $mut->isMutation(0) if not $mut->isMutation;
|
|
484 my @alleles = $mut->each_Allele;
|
|
485 my $count = 0;
|
|
486 foreach my $allele (@alleles) {
|
|
487 $count++;
|
|
488 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
|
|
489 if ($change_number and $change_number != $count){
|
|
490 $mut->mut_number("$change_number.$count");
|
|
491 }
|
|
492 $mut->allele_mut($allele);
|
|
493 $w->startTag("AA",
|
|
494 "number" => $mut->mut_number,
|
|
495 "start" => $mut->start,
|
|
496 "end" => $mut->end,
|
|
497 "length" => $mut->length,
|
|
498 "isMutation" => $mut->isMutation
|
|
499 );
|
|
500
|
|
501 if ($mut->label) {
|
|
502 foreach my $label (split ', ', $mut->label) {
|
|
503 $w->startTag("label");
|
|
504 $w->characters($label );
|
|
505 $w->endTag;
|
|
506 }
|
|
507 }
|
|
508 if ($mut->proof) {
|
|
509 $w->startTag("proof");
|
|
510 $w->characters($mut->proof );
|
|
511 $w->endTag;
|
|
512 }
|
|
513 #if ( $mut->isMutation) {
|
|
514 if ($mut->allele_ori) {
|
|
515 $w->startTag("allele_ori");
|
|
516 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq;
|
|
517 $w->endTag;
|
|
518 }
|
|
519 if ($mut->allele_mut) {
|
|
520 $w->startTag("allele_mut");
|
|
521 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
|
|
522 $w->endTag;
|
|
523 }
|
|
524 #}
|
|
525 if ($mut->region) {
|
|
526 if($mut->region_value and $mut->region_dist) {
|
|
527 $w->startTag("region",
|
|
528 "value" => $mut->region_value,
|
|
529 "dist" => $mut->region_dist
|
|
530 );
|
|
531 }
|
|
532 elsif($mut->region_value) {
|
|
533 $w->startTag("region",
|
|
534 "value" => $mut->region_value
|
|
535 );
|
|
536 }
|
|
537 elsif($mut->region_dist) {
|
|
538 $w->startTag("region",
|
|
539 "dist" => $mut->region_dist
|
|
540 );
|
|
541 } else {
|
|
542 $w->startTag("region");
|
|
543 }
|
|
544 $w->characters($mut->region );
|
|
545 $w->endTag;
|
|
546 }
|
|
547 $w->endTag; #AA
|
|
548 }
|
|
549 }
|
|
550 }
|
|
551 }
|
|
552 }
|
|
553 $w->endTag;
|
|
554
|
|
555 $w->end;
|
|
556 $self->_print($str);
|
|
557 $output = undef;
|
|
558 return 1;
|
|
559 }
|
|
560
|
|
561 1;
|