Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/IO/xml.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: xml.pm,v 1.12.2.1 2003/03/01 17:23:43 jason Exp $ | |
2 # BioPerl module for Bio::Variation::IO::xml | |
3 # | |
4 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk> | |
5 # | |
6 # Copyright Heikki Lehvaslaiho | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 # | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Variation::IO::xml - XML sequence variation input/output stream | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 Do not use this module directly. Use it via the Bio::Variation::IO class. | |
20 | |
21 =head1 DESCRIPTION | |
22 | |
23 This object can transform Bio::Variation::SeqDiff objects to and from XML | |
24 file databases. | |
25 | |
26 The XML format, although consistent, is still evolving. The current | |
27 DTD for it is at L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>mutationsE<sol>DTDE<sol>seqDiff.dtd>. | |
28 | |
29 =head1 REQUIREMENTS | |
30 | |
31 To use this code you need the module XML::Twig which creates an | |
32 interface to XML::Parser to read XML and modules XML::Writer and | |
33 IO::String to write XML out. | |
34 | |
35 =head1 FEEDBACK | |
36 | |
37 =head2 Mailing Lists | |
38 | |
39 User feedback is an integral part of the evolution of this and other | |
40 Bioperl modules. Send your comments and suggestions preferably to the | |
41 Bioperl mailing lists Your participation is much appreciated. | |
42 | |
43 bioperl-l@bioperl.org - General discussion | |
44 http://bio.perl.org/MailList.html - About the mailing lists | |
45 | |
46 =head2 Reporting Bugs | |
47 | |
48 report bugs to the Bioperl bug tracking system to help us keep track | |
49 the bugs and their resolution. Bug reports can be submitted via | |
50 email or the web: | |
51 | |
52 bioperl-bugs@bio.perl.org | |
53 http://bugzilla.bioperl.org/ | |
54 | |
55 =head1 AUTHOR - Heikki Lehvaslaiho | |
56 | |
57 Email: heikki@ebi.ac.uk | |
58 Address: | |
59 | |
60 EMBL Outstation, European Bioinformatics Institute | |
61 Wellcome Trust Genome Campus, Hinxton | |
62 Cambs. CB10 1SD, United Kingdom | |
63 | |
64 =head1 APPENDIX | |
65 | |
66 The rest of the documentation details each of the object | |
67 methods. Internal methods are usually preceded with a _ | |
68 | |
69 =cut | |
70 | |
71 # Let the code begin... | |
72 | |
73 package Bio::Variation::IO::xml; | |
74 my $VERSION=1.1; | |
75 use vars qw(@ISA $seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj); | |
76 use strict; | |
77 | |
78 use XML::Twig; | |
79 use XML::Writer 0.4; | |
80 use IO::String; | |
81 use Bio::Variation::IO; | |
82 use Bio::Variation::SeqDiff; | |
83 use Bio::Variation::DNAMutation; | |
84 use Bio::Variation::RNAChange; | |
85 use Bio::Variation::AAChange; | |
86 use Bio::Variation::Allele; | |
87 | |
88 # new() is inherited from Bio::Root::Object | |
89 @ISA = qw( Bio::Variation::IO ); | |
90 | |
91 # _initialize is where the heavy stuff will happen when new is called | |
92 | |
93 sub new { | |
94 my ($class,@args) = @_; | |
95 my $self = bless {}, $class; | |
96 $self->_initialize(@args); | |
97 return $self; | |
98 } | |
99 | |
100 sub _initialize { | |
101 my($self,@args) = @_; | |
102 return unless $self->SUPER::_initialize(@args); | |
103 } | |
104 | |
105 =head2 next | |
106 | |
107 Title : next | |
108 Usage : $haplo = $stream->next() | |
109 Function: returns the next seqDiff in the stream | |
110 Returns : Bio::Variation::SeqDiff object | |
111 Args : NONE | |
112 | |
113 =cut | |
114 | |
115 | |
116 sub _seqDiff { | |
117 my ($t, $term)= @_; | |
118 $seqdiff->id( $term->att('id') ); | |
119 $seqdiff->alphabet( $term->att('moltype') ); | |
120 $seqdiff->offset( $term->att('offset') ); | |
121 | |
122 foreach my $child ($term->children) { | |
123 _variant($t, $child); | |
124 } | |
125 } | |
126 | |
127 sub _variant { | |
128 my ($t, $term)= @_; | |
129 my $var; | |
130 my $att = $term->atts(); | |
131 my ($variation_number, $change_number) = split /\./, $att->{number}; | |
132 | |
133 # if more than two alleles | |
134 if ($variation_number and $change_number and $change_number > 1 ) { | |
135 my $a3 = Bio::Variation::Allele->new; | |
136 $a3->seq( $term->first_child_text('allele_mut') ) | |
137 if $term->first_child_text('allele_mut'); | |
138 if ($term->gi eq 'DNA') { | |
139 $prevdnaobj->add_Allele($a3); | |
140 } | |
141 elsif ($term->gi eq 'RNA') { | |
142 $prevrnaobj->add_Allele($a3); | |
143 } else { # AA | |
144 $prevaaobj->add_Allele($a3); | |
145 } | |
146 } else { # create new variants | |
147 if ($term->gi eq 'DNA') { | |
148 $var = new Bio::Variation::DNAMutation; | |
149 } | |
150 elsif ($term->gi eq 'RNA') { | |
151 $var = new Bio::Variation::RNAChange; | |
152 } else { # AA | |
153 $var = new Bio::Variation::AAChange; | |
154 } | |
155 | |
156 # these are always present | |
157 $var->start( $att->{start} ); | |
158 $var->end( $att->{end}); | |
159 $var->length($att->{len}); | |
160 $var->mut_number( $att->{number}); | |
161 $var->upStreamSeq($term->first_child_text('upFlank')); | |
162 $var->dnStreamSeq($term->first_child_text('dnFlank')); | |
163 $var->proof($term->first_child_text('proof')); | |
164 | |
165 # region | |
166 my $region = $term->first_child('region'); | |
167 if ($region) { | |
168 $var->region($region->text); | |
169 my $region_atts = $region->atts; | |
170 $var->region_value( $region_atts->{value} ) | |
171 if $region_atts->{value}; | |
172 $var->region_dist( $region_atts->{dist} ) | |
173 if $region_atts->{dist}; | |
174 } | |
175 | |
176 # alleles | |
177 my $a1 = Bio::Variation::Allele->new; | |
178 $a1->seq($term->first_child_text('allele_ori') ) | |
179 if $term->first_child_text('allele_ori'); | |
180 $var->allele_ori($a1); | |
181 my $a2 = Bio::Variation::Allele->new; | |
182 $a2->seq($term->first_child_text('allele_mut') ) | |
183 if $term->first_child_text('allele_mut'); | |
184 $var->isMutation(1) if $term->att('isMutation'); | |
185 $var->allele_mut($a2); | |
186 $var->add_Allele($a2); | |
187 $var->length( $term->att('length') ); | |
188 $seqdiff->add_Variant($var); | |
189 | |
190 # variant specific code | |
191 if ($term->gi eq 'DNA') { | |
192 $prevdnaobj = $var; | |
193 } | |
194 elsif ($term->gi eq 'RNA') { | |
195 my $codon = $term->first_child('codon'); | |
196 if ($codon) { | |
197 my $codon_atts = $codon->atts; | |
198 $var->codon_table( $codon->att('codon_table') ) | |
199 if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1; | |
200 $var->codon_pos( $codon->att('codon_pos') ) | |
201 if $codon_atts->{codon_pos}; | |
202 } | |
203 $prevdnaobj->RNAChange($var); | |
204 $var->DNAMutation($prevdnaobj); | |
205 $prevrnaobj = $var; | |
206 } else { | |
207 $prevrnaobj->AAChange($var); | |
208 $var->RNAChange($prevrnaobj); | |
209 $prevaaobj = $var; | |
210 } | |
211 } | |
212 } | |
213 | |
214 sub next { | |
215 my( $self ) = @_; | |
216 | |
217 local $/ = "</seqDiff>\n"; | |
218 return unless my $entry = $self->_readline; | |
219 # print STDERR "|$entry|"; | |
220 return unless $entry =~ /^\W*<seqDiff/; | |
221 | |
222 $seqdiff = Bio::Variation::SeqDiff->new; | |
223 | |
224 # create new parser object | |
225 my $twig_handlers = {'seqDiff' => \&_seqDiff }; | |
226 my $t = new XML::Twig ( TwigHandlers => $twig_handlers, | |
227 KeepEncoding => 1 ); | |
228 $t->parse($entry); | |
229 | |
230 return $seqdiff; | |
231 } | |
232 | |
233 =head2 write | |
234 | |
235 Title : write | |
236 Usage : $stream->write(@haplos) | |
237 Function: writes the $seqDiff objects into the stream | |
238 Returns : 1 for success and 0 for error | |
239 Args : Bio::Variation::SeqDiff object | |
240 | |
241 =cut | |
242 | |
243 sub write { | |
244 my ($self,@h) = @_; | |
245 | |
246 if( ! defined $h[0] ) { | |
247 $self->throw("Attempting to write with no information!"); | |
248 } | |
249 my $str; | |
250 my $output = IO::String->new($str); | |
251 my $w = new XML::Writer(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 ); | |
252 foreach my $h (@h) { | |
253 # | |
254 # seqDiff | |
255 # | |
256 $h->alphabet || $self->throw("Moltype of the reference sequence is not set!"); | |
257 my $hasAA = 0; | |
258 foreach my $mut ($h->each_Variant) { | |
259 $hasAA = 1 if $mut->isa('Bio::Variation::AAChange'); | |
260 } | |
261 if ($hasAA) { | |
262 $w->startTag("seqDiff", | |
263 "id" => $h->id, | |
264 "moltype" => $h->alphabet, | |
265 "offset" => $h->offset, | |
266 "sysname" => $h->sysname, | |
267 "trivname" => $h->trivname | |
268 ); | |
269 } else { | |
270 $w->startTag("seqDiff", | |
271 "id" => $h->id, | |
272 "moltype" => $h->alphabet, | |
273 "offset" => $h->offset, | |
274 "sysname" => $h->sysname | |
275 ); | |
276 } | |
277 my @allvariants = $h->each_Variant; | |
278 #print "allvars:", scalar @allvariants, "\n"; | |
279 my %variants = (); | |
280 foreach my $mut ($h->each_Variant) { | |
281 #print STDERR $mut->mut_number, "\t", $mut, "\t", | |
282 #$mut->proof, "\t", scalar $mut->each_Allele, "\n"; | |
283 push @{$variants{$mut->mut_number} }, $mut; | |
284 } | |
285 foreach my $var (sort keys %variants) { | |
286 foreach my $mut (@{$variants{$var}}) { | |
287 # | |
288 # DNA | |
289 # | |
290 if( $mut->isa('Bio::Variation::DNAMutation') ) { | |
291 $mut->isMutation(0) if not $mut->isMutation; | |
292 my @alleles = $mut->each_Allele; | |
293 my $count = 0; | |
294 foreach my $allele (@alleles) { | |
295 $count++; | |
296 my ($variation_number, $change_number) = split /\./, $mut->mut_number; | |
297 if ($change_number and $change_number != $count){ | |
298 $mut->mut_number("$change_number.$count"); | |
299 } | |
300 $mut->allele_mut($allele); | |
301 $w->startTag("DNA", | |
302 "number" => $mut->mut_number, | |
303 "start" => $mut->start, | |
304 "end" => $mut->end, | |
305 "length" => $mut->length, | |
306 "isMutation" => $mut->isMutation | |
307 ); | |
308 if ($mut->label) { | |
309 foreach my $label (split ', ', $mut->label) { | |
310 $w->startTag("label"); | |
311 $w->characters($label); | |
312 $w->endTag; | |
313 } | |
314 } | |
315 if ($mut->proof) { | |
316 $w->startTag("proof"); | |
317 $w->characters($mut->proof ); | |
318 $w->endTag; | |
319 } | |
320 if ($mut->upStreamSeq) { | |
321 $w->startTag("upFlank"); | |
322 $w->characters($mut->upStreamSeq ); | |
323 $w->endTag; | |
324 } | |
325 #if ( $mut->isMutation) { | |
326 #if ($mut->allele_ori) { | |
327 $w->startTag("allele_ori"); | |
328 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ; | |
329 $w->endTag; | |
330 #} | |
331 #if ($mut->allele_mut) { | |
332 $w->startTag("allele_mut"); | |
333 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq; | |
334 $w->endTag; | |
335 #} | |
336 #} | |
337 if ($mut->dnStreamSeq) { | |
338 $w->startTag("dnFlank"); | |
339 $w->characters($mut->dnStreamSeq ); | |
340 $w->endTag; | |
341 } | |
342 if ($mut->restriction_changes) { | |
343 $w->startTag("restriction_changes"); | |
344 $w->characters($mut->restriction_changes); | |
345 $w->endTag; | |
346 } | |
347 if ($mut->region) { | |
348 if($mut->region_value and $mut->region_dist) { | |
349 $w->startTag("region", | |
350 "value" => $mut->region_value, | |
351 "dist" => $mut->region_dist | |
352 ); | |
353 } | |
354 elsif($mut->region_value) { | |
355 $w->startTag("region", | |
356 "value" => $mut->region_value | |
357 ); | |
358 } | |
359 elsif($mut->region_dist) { | |
360 $w->startTag("region", | |
361 "dist" => $mut->region_dist | |
362 ); | |
363 } else { | |
364 $w->startTag("region"); | |
365 } | |
366 $w->characters($mut->region ); | |
367 $w->endTag; | |
368 } | |
369 $w->endTag; #DNA | |
370 } | |
371 } | |
372 # | |
373 # RNA | |
374 # | |
375 elsif( $mut->isa('Bio::Variation::RNAChange') ) { | |
376 $mut->isMutation(0) if not $mut->isMutation; | |
377 my @alleles = $mut->each_Allele; | |
378 my $count = 0; | |
379 foreach my $allele (@alleles) { | |
380 $count++; | |
381 my ($variation_number, $change_number) = split /\./, $mut->mut_number; | |
382 if ($change_number and $change_number != $count){ | |
383 $mut->mut_number("$change_number.$count"); | |
384 } | |
385 $mut->allele_mut($allele); | |
386 $w->startTag("RNA", | |
387 "number" => $mut->mut_number, | |
388 "start" => $mut->start, | |
389 "end" => $mut->end, | |
390 "length" => $mut->length, | |
391 "isMutation" => $mut->isMutation | |
392 ); | |
393 | |
394 if ($mut->label) { | |
395 foreach my $label (split ', ', $mut->label) { | |
396 $w->startTag("label"); | |
397 $w->characters($label ); | |
398 $w->endTag; | |
399 } | |
400 } | |
401 if ($mut->proof) { | |
402 $w->startTag("proof"); | |
403 $w->characters($mut->proof ); | |
404 $w->endTag; | |
405 } | |
406 if ($mut->upStreamSeq) { | |
407 $w->startTag("upFlank"); | |
408 $w->characters($mut->upStreamSeq ); | |
409 $w->endTag; | |
410 } | |
411 #if ( $mut->isMutation) { | |
412 if ($mut->allele_ori) { | |
413 $w->startTag("allele_ori"); | |
414 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ; | |
415 $w->endTag; | |
416 } | |
417 if ($mut->allele_mut) { | |
418 $w->startTag("allele_mut"); | |
419 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ; | |
420 $w->endTag; | |
421 } | |
422 #} | |
423 if ($mut->dnStreamSeq) { | |
424 $w->startTag("dnFlank"); | |
425 $w->characters($mut->dnStreamSeq ); | |
426 $w->endTag; | |
427 } | |
428 if ($mut->region eq 'coding') { | |
429 if (! $mut->codon_mut) { | |
430 $w->startTag("codon", | |
431 "codon_ori" => $mut->codon_ori, | |
432 "codon_pos" => $mut->codon_pos | |
433 ); | |
434 } else { | |
435 $w->startTag("codon", | |
436 "codon_ori" => $mut->codon_ori, | |
437 "codon_mut" => $mut->codon_mut, | |
438 "codon_pos" => $mut->codon_pos | |
439 ); | |
440 } | |
441 $w->endTag; | |
442 } | |
443 if ($mut->codon_table != 1) { | |
444 $w->startTag("codon_table"); | |
445 $w->characters($mut->codon_table); | |
446 $w->endTag; | |
447 } | |
448 | |
449 if ($mut->restriction_changes) { | |
450 $w->startTag("restriction_changes"); | |
451 $w->characters($mut->restriction_changes); | |
452 $w->endTag; | |
453 } | |
454 if ($mut->region) { | |
455 if($mut->region_value and $mut->region_dist) { | |
456 $w->startTag("region", | |
457 "value" => $mut->region_value, | |
458 "dist" => $mut->region_dist | |
459 ); | |
460 } | |
461 elsif($mut->region_value) { | |
462 $w->startTag("region", | |
463 "value" => $mut->region_value | |
464 ); | |
465 } | |
466 elsif($mut->region_dist) { | |
467 $w->startTag("region", | |
468 "dist" => $mut->region_dist | |
469 ); | |
470 } else { | |
471 $w->startTag("region"); | |
472 } | |
473 $w->characters($mut->region ); | |
474 $w->endTag; | |
475 } | |
476 $w->endTag; #RNA | |
477 } | |
478 } | |
479 # | |
480 # AA | |
481 # | |
482 elsif( $mut->isa('Bio::Variation::AAChange') ) { | |
483 $mut->isMutation(0) if not $mut->isMutation; | |
484 my @alleles = $mut->each_Allele; | |
485 my $count = 0; | |
486 foreach my $allele (@alleles) { | |
487 $count++; | |
488 my ($variation_number, $change_number) = split /\./, $mut->mut_number; | |
489 if ($change_number and $change_number != $count){ | |
490 $mut->mut_number("$change_number.$count"); | |
491 } | |
492 $mut->allele_mut($allele); | |
493 $w->startTag("AA", | |
494 "number" => $mut->mut_number, | |
495 "start" => $mut->start, | |
496 "end" => $mut->end, | |
497 "length" => $mut->length, | |
498 "isMutation" => $mut->isMutation | |
499 ); | |
500 | |
501 if ($mut->label) { | |
502 foreach my $label (split ', ', $mut->label) { | |
503 $w->startTag("label"); | |
504 $w->characters($label ); | |
505 $w->endTag; | |
506 } | |
507 } | |
508 if ($mut->proof) { | |
509 $w->startTag("proof"); | |
510 $w->characters($mut->proof ); | |
511 $w->endTag; | |
512 } | |
513 #if ( $mut->isMutation) { | |
514 if ($mut->allele_ori) { | |
515 $w->startTag("allele_ori"); | |
516 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq; | |
517 $w->endTag; | |
518 } | |
519 if ($mut->allele_mut) { | |
520 $w->startTag("allele_mut"); | |
521 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq; | |
522 $w->endTag; | |
523 } | |
524 #} | |
525 if ($mut->region) { | |
526 if($mut->region_value and $mut->region_dist) { | |
527 $w->startTag("region", | |
528 "value" => $mut->region_value, | |
529 "dist" => $mut->region_dist | |
530 ); | |
531 } | |
532 elsif($mut->region_value) { | |
533 $w->startTag("region", | |
534 "value" => $mut->region_value | |
535 ); | |
536 } | |
537 elsif($mut->region_dist) { | |
538 $w->startTag("region", | |
539 "dist" => $mut->region_dist | |
540 ); | |
541 } else { | |
542 $w->startTag("region"); | |
543 } | |
544 $w->characters($mut->region ); | |
545 $w->endTag; | |
546 } | |
547 $w->endTag; #AA | |
548 } | |
549 } | |
550 } | |
551 } | |
552 } | |
553 $w->endTag; | |
554 | |
555 $w->end; | |
556 $self->_print($str); | |
557 $output = undef; | |
558 return 1; | |
559 } | |
560 | |
561 1; |