diff variant_effect_predictor/Bio/Variation/IO/xml.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Variation/IO/xml.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,561 @@
+# $Id: xml.pm,v 1.12.2.1 2003/03/01 17:23:43 jason Exp $
+# BioPerl module for Bio::Variation::IO::xml
+#
+# Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::IO::xml - XML sequence variation input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Variation::IO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Variation::SeqDiff objects to and from XML
+file databases.
+
+The XML format, although consistent, is still evolving. The current
+DTD for it is at L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>mutationsE<sol>DTDE<sol>seqDiff.dtd>.
+
+=head1 REQUIREMENTS
+
+To use this code you need the module XML::Twig which creates an
+interface to XML::Parser to read XML and modules XML::Writer and
+IO::String to write XML out.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                         - General discussion
+  http://bio.perl.org/MailList.html             - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.  Bug reports can be submitted via
+ email or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki@ebi.ac.uk
+Address:
+
+     EMBL Outstation, European Bioinformatics Institute
+     Wellcome Trust Genome Campus, Hinxton
+     Cambs. CB10 1SD, United Kingdom
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Variation::IO::xml;
+my $VERSION=1.1;
+use vars qw(@ISA $seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj);
+use strict;
+
+use XML::Twig;
+use XML::Writer 0.4;
+use IO::String;
+use Bio::Variation::IO;
+use Bio::Variation::SeqDiff;
+use Bio::Variation::DNAMutation;
+use Bio::Variation::RNAChange;
+use Bio::Variation::AAChange;
+use Bio::Variation::Allele;
+
+# new() is inherited from Bio::Root::Object
+@ISA = qw( Bio::Variation::IO );
+
+# _initialize is where the heavy stuff will happen when new is called
+
+sub new {
+    my ($class,@args) = @_;
+    my $self = bless {}, $class;
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+  my($self,@args) = @_;
+  return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 next
+
+ Title   : next
+ Usage   : $haplo = $stream->next()
+ Function: returns the next seqDiff in the stream
+ Returns : Bio::Variation::SeqDiff object
+ Args    : NONE
+
+=cut
+
+
+sub _seqDiff {
+    my ($t, $term)= @_;
+    $seqdiff->id( $term->att('id') );
+    $seqdiff->alphabet( $term->att('moltype') );
+    $seqdiff->offset( $term->att('offset') );
+
+    foreach my $child ($term->children) {
+	 _variant($t, $child);
+    }
+}
+
+sub _variant {
+    my ($t, $term)= @_;
+    my $var;
+    my $att = $term->atts();
+    my ($variation_number, $change_number) = split /\./,  $att->{number}; 
+    
+    # if more than two alleles
+    if ($variation_number and $change_number and $change_number > 1 ) {
+	my $a3 = Bio::Variation::Allele->new;
+	$a3->seq( $term->first_child_text('allele_mut') ) 
+	    if $term->first_child_text('allele_mut');
+	if ($term->gi eq 'DNA') {
+	    $prevdnaobj->add_Allele($a3);
+	}
+	elsif ($term->gi eq 'RNA') {
+	    $prevrnaobj->add_Allele($a3);
+	} else { # AA
+	    $prevaaobj->add_Allele($a3);
+	}
+    } else { # create new variants
+	if ($term->gi eq 'DNA') {
+	    $var = new Bio::Variation::DNAMutation;
+	}
+	elsif ($term->gi eq 'RNA') {
+	    $var = new Bio::Variation::RNAChange;
+	} else { # AA
+	    $var = new Bio::Variation::AAChange;
+	}
+
+	# these are always present
+	$var->start( $att->{start} );
+	$var->end( $att->{end});
+	$var->length($att->{len});
+	$var->mut_number( $att->{number});
+	$var->upStreamSeq($term->first_child_text('upFlank'));
+	$var->dnStreamSeq($term->first_child_text('dnFlank'));
+	$var->proof($term->first_child_text('proof'));
+
+	# region
+	my $region = $term->first_child('region');
+	if ($region) {
+	    $var->region($region->text);
+	    my $region_atts = $region->atts;
+	    $var->region_value( $region_atts->{value} ) 
+		if $region_atts->{value};
+	    $var->region_dist( $region_atts->{dist} ) 
+		if $region_atts->{dist};
+	}
+	
+	# alleles
+	my $a1 = Bio::Variation::Allele->new;
+	$a1->seq($term->first_child_text('allele_ori') ) 
+	    if $term->first_child_text('allele_ori');
+	$var->allele_ori($a1);
+	my $a2 = Bio::Variation::Allele->new;
+	$a2->seq($term->first_child_text('allele_mut') ) 
+	    if $term->first_child_text('allele_mut');
+	$var->isMutation(1) if $term->att('isMutation');
+	$var->allele_mut($a2);	
+	$var->add_Allele($a2);
+	$var->length( $term->att('length') );
+	$seqdiff->add_Variant($var);
+
+	# variant specific code
+	if ($term->gi eq 'DNA') {
+	    $prevdnaobj = $var;
+	}
+	elsif ($term->gi eq 'RNA') {
+	    my $codon = $term->first_child('codon');
+	    if ($codon) {
+		my $codon_atts = $codon->atts;
+		$var->codon_table( $codon->att('codon_table') ) 
+		    if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1;
+		$var->codon_pos( $codon->att('codon_pos') ) 
+		    if $codon_atts->{codon_pos};
+	    }
+	    $prevdnaobj->RNAChange($var);
+	    $var->DNAMutation($prevdnaobj);
+	    $prevrnaobj = $var;
+	} else {
+	    $prevrnaobj->AAChange($var);
+	    $var->RNAChange($prevrnaobj);
+	    $prevaaobj = $var;
+	}
+    }
+}
+
+sub next {
+    my( $self ) = @_;
+
+    local $/ = "</seqDiff>\n";
+    return unless my $entry = $self->_readline;
+#    print  STDERR "|$entry|";
+    return unless $entry =~ /^\W*<seqDiff/;
+
+    $seqdiff = Bio::Variation::SeqDiff->new;
+
+    # create new parser object
+    my $twig_handlers = {'seqDiff' =>  \&_seqDiff };
+    my $t = new XML::Twig ( TwigHandlers => $twig_handlers,
+			    KeepEncoding => 1 );
+    $t->parse($entry);
+
+    return $seqdiff;
+}
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write(@haplos)
+ Function: writes the $seqDiff objects into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Variation::SeqDiff object
+
+=cut
+
+sub write {
+    my ($self,@h) = @_;
+
+    if( ! defined $h[0] ) {
+        $self->throw("Attempting to write with no information!");
+    }
+    my $str;
+    my $output = IO::String->new($str);
+    my $w = new XML::Writer(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 );
+    foreach my $h (@h) {
+	#
+	# seqDiff
+	#
+	$h->alphabet || $self->throw("Moltype of the reference sequence is not set!");
+	my $hasAA = 0;
+	foreach my $mut ($h->each_Variant) {	
+	    $hasAA = 1 if  $mut->isa('Bio::Variation::AAChange');
+	}
+	if ($hasAA) {
+	    $w->startTag("seqDiff",
+			 "id" => $h->id,
+			 "moltype" => $h->alphabet,
+			 "offset" => $h->offset,
+			 "sysname" => $h->sysname,
+			 "trivname" => $h->trivname
+			 );
+	} else {
+	    $w->startTag("seqDiff",
+			 "id" => $h->id,
+			 "moltype" => $h->alphabet,
+			 "offset" => $h->offset,
+			 "sysname" => $h->sysname
+			 );
+	}
+	my @allvariants = $h->each_Variant;
+	#print "allvars:", scalar @allvariants, "\n";
+	my %variants = ();
+	foreach my $mut ($h->each_Variant) {
+	    #print STDERR  $mut->mut_number, "\t", $mut, "\t",
+	    #$mut->proof, "\t", scalar $mut->each_Allele,  "\n";	
+	    push @{$variants{$mut->mut_number} }, $mut;
+	}
+	foreach my $var (sort keys %variants) {
+	    foreach my $mut (@{$variants{$var}}) {
+		#
+		# DNA
+		#
+		if( $mut->isa('Bio::Variation::DNAMutation') ) {
+		    $mut->isMutation(0) if not $mut->isMutation;
+		    my @alleles = $mut->each_Allele;
+		    my $count = 0;
+		    foreach my $allele (@alleles) {
+			$count++;
+			my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			if ($change_number and $change_number != $count){
+			    $mut->mut_number("$change_number.$count");
+			}
+			$mut->allele_mut($allele);
+			$w->startTag("DNA",
+				     "number" => $mut->mut_number,
+				     "start"  => $mut->start,
+				     "end"    => $mut->end,
+				     "length" => $mut->length,
+				     "isMutation" => $mut->isMutation
+				     );
+			if ($mut->label) {
+			    foreach my $label (split ', ', $mut->label) {
+				$w->startTag("label");
+				$w->characters($label);
+				$w->endTag;
+			    }
+			}	
+			if ($mut->proof) {
+			    $w->startTag("proof");
+			    $w->characters($mut->proof );
+			    $w->endTag;
+			}	
+			if ($mut->upStreamSeq) {
+			    $w->startTag("upFlank");
+			    $w->characters($mut->upStreamSeq );
+			    $w->endTag;
+			}
+			#if ( $mut->isMutation) {
+			#if ($mut->allele_ori) {
+			$w->startTag("allele_ori");
+			$w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
+			$w->endTag;
+			#}	
+			#if ($mut->allele_mut) {
+			$w->startTag("allele_mut");
+			$w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
+			$w->endTag;
+			#}	
+			#}
+			if ($mut->dnStreamSeq) {
+			    $w->startTag("dnFlank");
+			    $w->characters($mut->dnStreamSeq );
+			    $w->endTag;
+			}
+			if ($mut->restriction_changes) {
+			    $w->startTag("restriction_changes");
+			    $w->characters($mut->restriction_changes);
+			    $w->endTag;
+			}	
+			if ($mut->region) {
+			    if($mut->region_value and $mut->region_dist) {
+				$w->startTag("region",
+					     "value" => $mut->region_value,
+					     "dist" => $mut->region_dist
+					     );
+			    }
+			    elsif($mut->region_value) {
+				$w->startTag("region",
+					     "value" => $mut->region_value
+					     );
+			    }
+			    elsif($mut->region_dist) {
+				$w->startTag("region",
+					     "dist" => $mut->region_dist
+					     );
+			    } else {
+				$w->startTag("region");
+			    }
+			    $w->characters($mut->region );
+			    $w->endTag;
+			}
+			$w->endTag; #DNA
+		    }
+		}
+		#
+		# RNA
+		#
+		elsif(  $mut->isa('Bio::Variation::RNAChange') ) {
+		    $mut->isMutation(0) if not $mut->isMutation;
+		    my @alleles = $mut->each_Allele;
+		    my $count = 0;
+		    foreach my $allele (@alleles) {
+			$count++;
+			my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			if ($change_number and $change_number != $count){
+			    $mut->mut_number("$change_number.$count");
+			}
+			$mut->allele_mut($allele);
+			$w->startTag("RNA",
+				     "number" => $mut->mut_number,
+				     "start"  => $mut->start,
+				     "end"    => $mut->end,
+				     "length" => $mut->length,
+				     "isMutation" => $mut->isMutation
+				     );
+
+			if ($mut->label) {
+			    foreach my $label (split ', ', $mut->label) {
+				$w->startTag("label");
+				$w->characters($label );
+				$w->endTag;
+			    }
+			}	
+			if ($mut->proof) {
+			    $w->startTag("proof");
+			    $w->characters($mut->proof );
+			    $w->endTag;
+			}	
+			if ($mut->upStreamSeq) {
+			    $w->startTag("upFlank");
+			    $w->characters($mut->upStreamSeq );
+			    $w->endTag;
+			}	
+			#if ( $mut->isMutation) {
+			if ($mut->allele_ori) {
+			    $w->startTag("allele_ori");
+			    $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
+			    $w->endTag;
+			}	
+			if ($mut->allele_mut) {
+			    $w->startTag("allele_mut");
+			    $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ;
+			    $w->endTag;
+			}	
+			#}
+			if ($mut->dnStreamSeq) {
+			    $w->startTag("dnFlank");
+			    $w->characters($mut->dnStreamSeq );
+			    $w->endTag;
+			}
+			if ($mut->region eq 'coding') {
+			    if (! $mut->codon_mut) {
+				$w->startTag("codon",
+					     "codon_ori" => $mut->codon_ori,
+					     "codon_pos" => $mut->codon_pos
+					     );
+			    } else {
+				$w->startTag("codon",
+					     "codon_ori" => $mut->codon_ori,
+					     "codon_mut" => $mut->codon_mut,
+					     "codon_pos" => $mut->codon_pos
+					     );
+			    }
+			    $w->endTag;
+			}
+			if ($mut->codon_table != 1) {
+			    $w->startTag("codon_table");
+			    $w->characters($mut->codon_table);
+			    $w->endTag;
+			}	
+			
+			if ($mut->restriction_changes) {
+			    $w->startTag("restriction_changes");
+			    $w->characters($mut->restriction_changes);
+			    $w->endTag;
+			}	
+			if ($mut->region) {
+			    if($mut->region_value and $mut->region_dist) {
+				$w->startTag("region",
+					     "value" => $mut->region_value,
+					     "dist" => $mut->region_dist
+					     );
+			    }
+			    elsif($mut->region_value) {
+				$w->startTag("region",
+					     "value" => $mut->region_value
+					     );
+			    }
+			    elsif($mut->region_dist) {
+				$w->startTag("region",
+					     "dist" => $mut->region_dist
+					     );
+			    } else {
+				$w->startTag("region");
+			    }
+			    $w->characters($mut->region );
+			    $w->endTag;
+			}
+			$w->endTag; #RNA
+		    }
+		}
+		#
+		# AA
+		#
+		elsif(  $mut->isa('Bio::Variation::AAChange') ) {
+		    $mut->isMutation(0) if not $mut->isMutation;		
+		    my @alleles = $mut->each_Allele;
+		    my $count = 0;
+		    foreach my $allele (@alleles) {
+			$count++;
+			my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			if ($change_number and $change_number != $count){
+			    $mut->mut_number("$change_number.$count");
+			}
+			$mut->allele_mut($allele);
+			$w->startTag("AA",
+				     "number" => $mut->mut_number,
+				     "start"  => $mut->start,
+				     "end"    => $mut->end,
+				     "length" => $mut->length,
+				     "isMutation" => $mut->isMutation
+				     );
+
+			if ($mut->label) {
+			    foreach my $label (split ', ', $mut->label) {
+				$w->startTag("label");
+				$w->characters($label );
+				$w->endTag;
+			    }
+			}	
+			if ($mut->proof) {
+			    $w->startTag("proof");
+			    $w->characters($mut->proof );
+			    $w->endTag;
+			}	
+			#if ( $mut->isMutation) {
+			if ($mut->allele_ori) {
+			    $w->startTag("allele_ori");
+			    $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq;
+			    $w->endTag;
+			}	
+			if ($mut->allele_mut) {
+			    $w->startTag("allele_mut");
+			    $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
+			    $w->endTag;
+			}	
+			#}
+			if ($mut->region) {
+			    if($mut->region_value and $mut->region_dist) {
+				$w->startTag("region",
+					     "value" => $mut->region_value,
+					     "dist" => $mut->region_dist
+					     );
+			    }
+			    elsif($mut->region_value) {
+				$w->startTag("region",
+					     "value" => $mut->region_value
+					     );
+			    }
+			    elsif($mut->region_dist) {
+				$w->startTag("region",
+					     "dist" => $mut->region_dist
+					     );
+			    } else {
+				$w->startTag("region");
+			    }
+			    $w->characters($mut->region );
+			    $w->endTag;
+			}
+			$w->endTag; #AA
+		    }
+		}
+	    }
+	}
+    }
+    $w->endTag;
+
+    $w->end;
+    $self->_print($str);
+    $output = undef;
+    return 1;
+}
+
+1;