comparison variant_effect_predictor/Bio/Variation/IO/xml.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: xml.pm,v 1.12.2.1 2003/03/01 17:23:43 jason Exp $
2 # BioPerl module for Bio::Variation::IO::xml
3 #
4 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
5 #
6 # Copyright Heikki Lehvaslaiho
7 #
8 # You may distribute this module under the same terms as perl itself
9 #
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Variation::IO::xml - XML sequence variation input/output stream
16
17 =head1 SYNOPSIS
18
19 Do not use this module directly. Use it via the Bio::Variation::IO class.
20
21 =head1 DESCRIPTION
22
23 This object can transform Bio::Variation::SeqDiff objects to and from XML
24 file databases.
25
26 The XML format, although consistent, is still evolving. The current
27 DTD for it is at L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>mutationsE<sol>DTDE<sol>seqDiff.dtd>.
28
29 =head1 REQUIREMENTS
30
31 To use this code you need the module XML::Twig which creates an
32 interface to XML::Parser to read XML and modules XML::Writer and
33 IO::String to write XML out.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to the
41 Bioperl mailing lists Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bio.perl.org/MailList.html - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via
50 email or the web:
51
52 bioperl-bugs@bio.perl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR - Heikki Lehvaslaiho
56
57 Email: heikki@ebi.ac.uk
58 Address:
59
60 EMBL Outstation, European Bioinformatics Institute
61 Wellcome Trust Genome Campus, Hinxton
62 Cambs. CB10 1SD, United Kingdom
63
64 =head1 APPENDIX
65
66 The rest of the documentation details each of the object
67 methods. Internal methods are usually preceded with a _
68
69 =cut
70
71 # Let the code begin...
72
73 package Bio::Variation::IO::xml;
74 my $VERSION=1.1;
75 use vars qw(@ISA $seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj);
76 use strict;
77
78 use XML::Twig;
79 use XML::Writer 0.4;
80 use IO::String;
81 use Bio::Variation::IO;
82 use Bio::Variation::SeqDiff;
83 use Bio::Variation::DNAMutation;
84 use Bio::Variation::RNAChange;
85 use Bio::Variation::AAChange;
86 use Bio::Variation::Allele;
87
88 # new() is inherited from Bio::Root::Object
89 @ISA = qw( Bio::Variation::IO );
90
91 # _initialize is where the heavy stuff will happen when new is called
92
93 sub new {
94 my ($class,@args) = @_;
95 my $self = bless {}, $class;
96 $self->_initialize(@args);
97 return $self;
98 }
99
100 sub _initialize {
101 my($self,@args) = @_;
102 return unless $self->SUPER::_initialize(@args);
103 }
104
105 =head2 next
106
107 Title : next
108 Usage : $haplo = $stream->next()
109 Function: returns the next seqDiff in the stream
110 Returns : Bio::Variation::SeqDiff object
111 Args : NONE
112
113 =cut
114
115
116 sub _seqDiff {
117 my ($t, $term)= @_;
118 $seqdiff->id( $term->att('id') );
119 $seqdiff->alphabet( $term->att('moltype') );
120 $seqdiff->offset( $term->att('offset') );
121
122 foreach my $child ($term->children) {
123 _variant($t, $child);
124 }
125 }
126
127 sub _variant {
128 my ($t, $term)= @_;
129 my $var;
130 my $att = $term->atts();
131 my ($variation_number, $change_number) = split /\./, $att->{number};
132
133 # if more than two alleles
134 if ($variation_number and $change_number and $change_number > 1 ) {
135 my $a3 = Bio::Variation::Allele->new;
136 $a3->seq( $term->first_child_text('allele_mut') )
137 if $term->first_child_text('allele_mut');
138 if ($term->gi eq 'DNA') {
139 $prevdnaobj->add_Allele($a3);
140 }
141 elsif ($term->gi eq 'RNA') {
142 $prevrnaobj->add_Allele($a3);
143 } else { # AA
144 $prevaaobj->add_Allele($a3);
145 }
146 } else { # create new variants
147 if ($term->gi eq 'DNA') {
148 $var = new Bio::Variation::DNAMutation;
149 }
150 elsif ($term->gi eq 'RNA') {
151 $var = new Bio::Variation::RNAChange;
152 } else { # AA
153 $var = new Bio::Variation::AAChange;
154 }
155
156 # these are always present
157 $var->start( $att->{start} );
158 $var->end( $att->{end});
159 $var->length($att->{len});
160 $var->mut_number( $att->{number});
161 $var->upStreamSeq($term->first_child_text('upFlank'));
162 $var->dnStreamSeq($term->first_child_text('dnFlank'));
163 $var->proof($term->first_child_text('proof'));
164
165 # region
166 my $region = $term->first_child('region');
167 if ($region) {
168 $var->region($region->text);
169 my $region_atts = $region->atts;
170 $var->region_value( $region_atts->{value} )
171 if $region_atts->{value};
172 $var->region_dist( $region_atts->{dist} )
173 if $region_atts->{dist};
174 }
175
176 # alleles
177 my $a1 = Bio::Variation::Allele->new;
178 $a1->seq($term->first_child_text('allele_ori') )
179 if $term->first_child_text('allele_ori');
180 $var->allele_ori($a1);
181 my $a2 = Bio::Variation::Allele->new;
182 $a2->seq($term->first_child_text('allele_mut') )
183 if $term->first_child_text('allele_mut');
184 $var->isMutation(1) if $term->att('isMutation');
185 $var->allele_mut($a2);
186 $var->add_Allele($a2);
187 $var->length( $term->att('length') );
188 $seqdiff->add_Variant($var);
189
190 # variant specific code
191 if ($term->gi eq 'DNA') {
192 $prevdnaobj = $var;
193 }
194 elsif ($term->gi eq 'RNA') {
195 my $codon = $term->first_child('codon');
196 if ($codon) {
197 my $codon_atts = $codon->atts;
198 $var->codon_table( $codon->att('codon_table') )
199 if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1;
200 $var->codon_pos( $codon->att('codon_pos') )
201 if $codon_atts->{codon_pos};
202 }
203 $prevdnaobj->RNAChange($var);
204 $var->DNAMutation($prevdnaobj);
205 $prevrnaobj = $var;
206 } else {
207 $prevrnaobj->AAChange($var);
208 $var->RNAChange($prevrnaobj);
209 $prevaaobj = $var;
210 }
211 }
212 }
213
214 sub next {
215 my( $self ) = @_;
216
217 local $/ = "</seqDiff>\n";
218 return unless my $entry = $self->_readline;
219 # print STDERR "|$entry|";
220 return unless $entry =~ /^\W*<seqDiff/;
221
222 $seqdiff = Bio::Variation::SeqDiff->new;
223
224 # create new parser object
225 my $twig_handlers = {'seqDiff' => \&_seqDiff };
226 my $t = new XML::Twig ( TwigHandlers => $twig_handlers,
227 KeepEncoding => 1 );
228 $t->parse($entry);
229
230 return $seqdiff;
231 }
232
233 =head2 write
234
235 Title : write
236 Usage : $stream->write(@haplos)
237 Function: writes the $seqDiff objects into the stream
238 Returns : 1 for success and 0 for error
239 Args : Bio::Variation::SeqDiff object
240
241 =cut
242
243 sub write {
244 my ($self,@h) = @_;
245
246 if( ! defined $h[0] ) {
247 $self->throw("Attempting to write with no information!");
248 }
249 my $str;
250 my $output = IO::String->new($str);
251 my $w = new XML::Writer(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 );
252 foreach my $h (@h) {
253 #
254 # seqDiff
255 #
256 $h->alphabet || $self->throw("Moltype of the reference sequence is not set!");
257 my $hasAA = 0;
258 foreach my $mut ($h->each_Variant) {
259 $hasAA = 1 if $mut->isa('Bio::Variation::AAChange');
260 }
261 if ($hasAA) {
262 $w->startTag("seqDiff",
263 "id" => $h->id,
264 "moltype" => $h->alphabet,
265 "offset" => $h->offset,
266 "sysname" => $h->sysname,
267 "trivname" => $h->trivname
268 );
269 } else {
270 $w->startTag("seqDiff",
271 "id" => $h->id,
272 "moltype" => $h->alphabet,
273 "offset" => $h->offset,
274 "sysname" => $h->sysname
275 );
276 }
277 my @allvariants = $h->each_Variant;
278 #print "allvars:", scalar @allvariants, "\n";
279 my %variants = ();
280 foreach my $mut ($h->each_Variant) {
281 #print STDERR $mut->mut_number, "\t", $mut, "\t",
282 #$mut->proof, "\t", scalar $mut->each_Allele, "\n";
283 push @{$variants{$mut->mut_number} }, $mut;
284 }
285 foreach my $var (sort keys %variants) {
286 foreach my $mut (@{$variants{$var}}) {
287 #
288 # DNA
289 #
290 if( $mut->isa('Bio::Variation::DNAMutation') ) {
291 $mut->isMutation(0) if not $mut->isMutation;
292 my @alleles = $mut->each_Allele;
293 my $count = 0;
294 foreach my $allele (@alleles) {
295 $count++;
296 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
297 if ($change_number and $change_number != $count){
298 $mut->mut_number("$change_number.$count");
299 }
300 $mut->allele_mut($allele);
301 $w->startTag("DNA",
302 "number" => $mut->mut_number,
303 "start" => $mut->start,
304 "end" => $mut->end,
305 "length" => $mut->length,
306 "isMutation" => $mut->isMutation
307 );
308 if ($mut->label) {
309 foreach my $label (split ', ', $mut->label) {
310 $w->startTag("label");
311 $w->characters($label);
312 $w->endTag;
313 }
314 }
315 if ($mut->proof) {
316 $w->startTag("proof");
317 $w->characters($mut->proof );
318 $w->endTag;
319 }
320 if ($mut->upStreamSeq) {
321 $w->startTag("upFlank");
322 $w->characters($mut->upStreamSeq );
323 $w->endTag;
324 }
325 #if ( $mut->isMutation) {
326 #if ($mut->allele_ori) {
327 $w->startTag("allele_ori");
328 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
329 $w->endTag;
330 #}
331 #if ($mut->allele_mut) {
332 $w->startTag("allele_mut");
333 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
334 $w->endTag;
335 #}
336 #}
337 if ($mut->dnStreamSeq) {
338 $w->startTag("dnFlank");
339 $w->characters($mut->dnStreamSeq );
340 $w->endTag;
341 }
342 if ($mut->restriction_changes) {
343 $w->startTag("restriction_changes");
344 $w->characters($mut->restriction_changes);
345 $w->endTag;
346 }
347 if ($mut->region) {
348 if($mut->region_value and $mut->region_dist) {
349 $w->startTag("region",
350 "value" => $mut->region_value,
351 "dist" => $mut->region_dist
352 );
353 }
354 elsif($mut->region_value) {
355 $w->startTag("region",
356 "value" => $mut->region_value
357 );
358 }
359 elsif($mut->region_dist) {
360 $w->startTag("region",
361 "dist" => $mut->region_dist
362 );
363 } else {
364 $w->startTag("region");
365 }
366 $w->characters($mut->region );
367 $w->endTag;
368 }
369 $w->endTag; #DNA
370 }
371 }
372 #
373 # RNA
374 #
375 elsif( $mut->isa('Bio::Variation::RNAChange') ) {
376 $mut->isMutation(0) if not $mut->isMutation;
377 my @alleles = $mut->each_Allele;
378 my $count = 0;
379 foreach my $allele (@alleles) {
380 $count++;
381 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
382 if ($change_number and $change_number != $count){
383 $mut->mut_number("$change_number.$count");
384 }
385 $mut->allele_mut($allele);
386 $w->startTag("RNA",
387 "number" => $mut->mut_number,
388 "start" => $mut->start,
389 "end" => $mut->end,
390 "length" => $mut->length,
391 "isMutation" => $mut->isMutation
392 );
393
394 if ($mut->label) {
395 foreach my $label (split ', ', $mut->label) {
396 $w->startTag("label");
397 $w->characters($label );
398 $w->endTag;
399 }
400 }
401 if ($mut->proof) {
402 $w->startTag("proof");
403 $w->characters($mut->proof );
404 $w->endTag;
405 }
406 if ($mut->upStreamSeq) {
407 $w->startTag("upFlank");
408 $w->characters($mut->upStreamSeq );
409 $w->endTag;
410 }
411 #if ( $mut->isMutation) {
412 if ($mut->allele_ori) {
413 $w->startTag("allele_ori");
414 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
415 $w->endTag;
416 }
417 if ($mut->allele_mut) {
418 $w->startTag("allele_mut");
419 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ;
420 $w->endTag;
421 }
422 #}
423 if ($mut->dnStreamSeq) {
424 $w->startTag("dnFlank");
425 $w->characters($mut->dnStreamSeq );
426 $w->endTag;
427 }
428 if ($mut->region eq 'coding') {
429 if (! $mut->codon_mut) {
430 $w->startTag("codon",
431 "codon_ori" => $mut->codon_ori,
432 "codon_pos" => $mut->codon_pos
433 );
434 } else {
435 $w->startTag("codon",
436 "codon_ori" => $mut->codon_ori,
437 "codon_mut" => $mut->codon_mut,
438 "codon_pos" => $mut->codon_pos
439 );
440 }
441 $w->endTag;
442 }
443 if ($mut->codon_table != 1) {
444 $w->startTag("codon_table");
445 $w->characters($mut->codon_table);
446 $w->endTag;
447 }
448
449 if ($mut->restriction_changes) {
450 $w->startTag("restriction_changes");
451 $w->characters($mut->restriction_changes);
452 $w->endTag;
453 }
454 if ($mut->region) {
455 if($mut->region_value and $mut->region_dist) {
456 $w->startTag("region",
457 "value" => $mut->region_value,
458 "dist" => $mut->region_dist
459 );
460 }
461 elsif($mut->region_value) {
462 $w->startTag("region",
463 "value" => $mut->region_value
464 );
465 }
466 elsif($mut->region_dist) {
467 $w->startTag("region",
468 "dist" => $mut->region_dist
469 );
470 } else {
471 $w->startTag("region");
472 }
473 $w->characters($mut->region );
474 $w->endTag;
475 }
476 $w->endTag; #RNA
477 }
478 }
479 #
480 # AA
481 #
482 elsif( $mut->isa('Bio::Variation::AAChange') ) {
483 $mut->isMutation(0) if not $mut->isMutation;
484 my @alleles = $mut->each_Allele;
485 my $count = 0;
486 foreach my $allele (@alleles) {
487 $count++;
488 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
489 if ($change_number and $change_number != $count){
490 $mut->mut_number("$change_number.$count");
491 }
492 $mut->allele_mut($allele);
493 $w->startTag("AA",
494 "number" => $mut->mut_number,
495 "start" => $mut->start,
496 "end" => $mut->end,
497 "length" => $mut->length,
498 "isMutation" => $mut->isMutation
499 );
500
501 if ($mut->label) {
502 foreach my $label (split ', ', $mut->label) {
503 $w->startTag("label");
504 $w->characters($label );
505 $w->endTag;
506 }
507 }
508 if ($mut->proof) {
509 $w->startTag("proof");
510 $w->characters($mut->proof );
511 $w->endTag;
512 }
513 #if ( $mut->isMutation) {
514 if ($mut->allele_ori) {
515 $w->startTag("allele_ori");
516 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq;
517 $w->endTag;
518 }
519 if ($mut->allele_mut) {
520 $w->startTag("allele_mut");
521 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
522 $w->endTag;
523 }
524 #}
525 if ($mut->region) {
526 if($mut->region_value and $mut->region_dist) {
527 $w->startTag("region",
528 "value" => $mut->region_value,
529 "dist" => $mut->region_dist
530 );
531 }
532 elsif($mut->region_value) {
533 $w->startTag("region",
534 "value" => $mut->region_value
535 );
536 }
537 elsif($mut->region_dist) {
538 $w->startTag("region",
539 "dist" => $mut->region_dist
540 );
541 } else {
542 $w->startTag("region");
543 }
544 $w->characters($mut->region );
545 $w->endTag;
546 }
547 $w->endTag; #AA
548 }
549 }
550 }
551 }
552 }
553 $w->endTag;
554
555 $w->end;
556 $self->_print($str);
557 $output = undef;
558 return 1;
559 }
560
561 1;