| 
0
 | 
     1 
 | 
| 
 | 
     2 #
 | 
| 
 | 
     3 # BioPerl module for Bio::Tools::Sim4::Results
 | 
| 
 | 
     4 #
 | 
| 
 | 
     5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
 | 
| 
 | 
     6 #          and Hilmar Lapp <hlapp@gmx.net>
 | 
| 
 | 
     7 #
 | 
| 
 | 
     8 # Copyright Ewan Birney and Hilmar Lapp
 | 
| 
 | 
     9 #
 | 
| 
 | 
    10 # You may distribute this module under the same terms as perl itself
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 # POD documentation - main docs before the code
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 =head1 NAME
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 Bio::Tools::Sim4::Results - Results of one Sim4 run
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 =head1 SYNOPSIS
 | 
| 
 | 
    19 
 | 
| 
 | 
    20    # to preset the order of EST and genomic file as given on the sim4 
 | 
| 
 | 
    21    # command line:
 | 
| 
 | 
    22    my $sim4 = Bio::Tools::Sim4::Results->new(-file => 'result.sim4',
 | 
| 
 | 
    23                                              -estfirst => 1);
 | 
| 
 | 
    24    # to let the order be determined automatically (by length comparison):
 | 
| 
 | 
    25    $sim4 = Bio::Tools::Sim4::Results->new( -file => 'sim4.results' );
 | 
| 
 | 
    26    # filehandle:
 | 
| 
 | 
    27    $sim4 = Bio::Tools::Sim4::Results->new( -fh   => \*INPUT );
 | 
| 
 | 
    28 
 | 
| 
 | 
    29    # parse the results
 | 
| 
 | 
    30    while(my $exonset = $sim4->next_exonset()) {
 | 
| 
 | 
    31        # $exonset is-a Bio::SeqFeature::Generic with Bio::Tools::Sim4::Exons
 | 
| 
 | 
    32        # as sub features
 | 
| 
 | 
    33        print "Delimited on sequence ", $exonset->seq_id(), 
 | 
| 
 | 
    34              "from ", $exonset->start(), " to ", $exonset->end(), "\n";
 | 
| 
 | 
    35        foreach my $exon ( $exonset->sub_SeqFeature() ) {
 | 
| 
 | 
    36 	  # $exon is-a Bio::SeqFeature::FeaturePair
 | 
| 
 | 
    37 	  print "Exon from ", $exon->start, " to ", $exon->end, 
 | 
| 
 | 
    38                 " on strand ", $exon->strand(), "\n";
 | 
| 
 | 
    39           # you can get out what it matched using the est_hit attribute
 | 
| 
 | 
    40           my $homol = $exon->est_hit();
 | 
| 
 | 
    41           print "Matched to sequence ", $homol->seq_id, 
 | 
| 
 | 
    42                 " at ", $homol->start," to ", $homol->end, "\n";
 | 
| 
 | 
    43       }
 | 
| 
 | 
    44    }
 | 
| 
 | 
    45 
 | 
| 
 | 
    46    # essential if you gave a filename at initialization (otherwise the file
 | 
| 
 | 
    47    # stays open)
 | 
| 
 | 
    48    $sim4->close();
 | 
| 
 | 
    49 
 | 
| 
 | 
    50 =head1 DESCRIPTION
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 The sim4 module provides a parser and results object for sim4 output. The
 | 
| 
 | 
    53 sim4 results are specialised types of SeqFeatures, meaning you can add them
 | 
| 
 | 
    54 to AnnSeq objects fine, and manipulate them in the "normal" seqfeature manner.
 | 
| 
 | 
    55 
 | 
| 
 | 
    56 The sim4 Exon objects are Bio::SeqFeature::FeaturePair inherited objects. The 
 | 
| 
 | 
    57 $esthit = $exon-E<gt>est_hit() is the alignment as a feature on the matching 
 | 
| 
 | 
    58 object (normally, an EST), in which the start/end points are where the hit
 | 
| 
 | 
    59 lies.
 | 
| 
 | 
    60 
 | 
| 
 | 
    61 To make this module work sensibly you need to run
 | 
| 
 | 
    62 
 | 
| 
 | 
    63      sim4 genomic.fasta est.database.fasta
 | 
| 
 | 
    64 or
 | 
| 
 | 
    65      sim4 est.fasta genomic.database.fasta
 | 
| 
 | 
    66 
 | 
| 
 | 
    67 To get the sequence identifiers recorded for the first sequence, too, use
 | 
| 
 | 
    68 A=4 as output option for sim4.
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 One fiddle here is that there are only two real possibilities to the matching
 | 
| 
 | 
    71 criteria: either one sequence needs reversing or not. Because of this, it
 | 
| 
 | 
    72 is impossible to tell whether the match is in the forward or reverse strand
 | 
| 
 | 
    73 of the genomic DNA. We solve this here by assuming that the genomic DNA is
 | 
| 
 | 
    74 always forward. As a consequence, the strand attribute of the matching EST is
 | 
| 
 | 
    75 unknown, and the strand attribute of the genomic DNA (i.e., the Exon object)
 | 
| 
 | 
    76 will reflect the direction of the hit.
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 See the documentation of parse_next_alignment() for abilities of the parser
 | 
| 
 | 
    79 to deal with the different output format options of sim4.
 | 
| 
 | 
    80 
 | 
| 
 | 
    81 =head1 FEEDBACK
 | 
| 
 | 
    82 
 | 
| 
 | 
    83 =head2 Mailing Lists
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 User feedback is an integral part of the evolution of this and other
 | 
| 
 | 
    86 Bioperl modules. Send your comments and suggestions preferably to one
 | 
| 
 | 
    87 of the Bioperl mailing lists.  Your participation is much appreciated.
 | 
| 
 | 
    88 
 | 
| 
 | 
    89   bioperl-l@bioperl.org          - General discussion
 | 
| 
 | 
    90   http://bio.perl.org/MailList.html             - About the mailing lists
 | 
| 
 | 
    91 
 | 
| 
 | 
    92 =head2 Reporting Bugs
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 Report bugs to the Bioperl bug tracking system to help us keep track
 | 
| 
 | 
    95 the bugs and their resolution.  Bug reports can be submitted via email
 | 
| 
 | 
    96 or the web:
 | 
| 
 | 
    97 
 | 
| 
 | 
    98   bioperl-bugs@bio.perl.org
 | 
| 
 | 
    99   http://bugzilla.bioperl.org/
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 =head1 AUTHOR - Ewan Birney, Hilmar Lapp
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 Email birney@sanger.ac.uk
 | 
| 
 | 
   104       hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com)
 | 
| 
 | 
   105 
 | 
| 
 | 
   106 Describe contact details here
 | 
| 
 | 
   107 
 | 
| 
 | 
   108 =head1 APPENDIX
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
 | 
| 
 | 
   111 
 | 
| 
 | 
   112 =cut
 | 
| 
 | 
   113 
 | 
| 
 | 
   114 
 | 
| 
 | 
   115 # Let the code begin...
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 
 | 
| 
 | 
   118 package Bio::Tools::Sim4::Results;
 | 
| 
 | 
   119 use vars qw(@ISA);
 | 
| 
 | 
   120 use strict;
 | 
| 
 | 
   121 
 | 
| 
 | 
   122 # Object preamble - inherits from Bio::Root::Object
 | 
| 
 | 
   123 
 | 
| 
 | 
   124 use File::Basename;
 | 
| 
 | 
   125 use Bio::Root::Root;
 | 
| 
 | 
   126 use Bio::Tools::AnalysisResult;
 | 
| 
 | 
   127 use Bio::Tools::Sim4::Exon;
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 @ISA = qw(Bio::Tools::AnalysisResult);
 | 
| 
 | 
   130 
 | 
| 
 | 
   131 
 | 
| 
 | 
   132 sub _initialize_state {
 | 
| 
 | 
   133     my($self,@args) = @_;
 | 
| 
 | 
   134 
 | 
| 
 | 
   135     # call the inherited method first
 | 
| 
 | 
   136     my $make = $self->SUPER::_initialize_state(@args);
 | 
| 
 | 
   137 
 | 
| 
 | 
   138     my ($est_is_first) = $self->_rearrange([qw(ESTFIRST)], @args);
 | 
| 
 | 
   139 
 | 
| 
 | 
   140     delete($self->{'_est_is_first'});
 | 
| 
 | 
   141     $self->{'_est_is_first'} = $est_is_first if(defined($est_is_first));
 | 
| 
 | 
   142     $self->analysis_method("Sim4");
 | 
| 
 | 
   143 }
 | 
| 
 | 
   144 
 | 
| 
 | 
   145 =head2 analysis_method
 | 
| 
 | 
   146 
 | 
| 
 | 
   147  Usage     : $sim4->analysis_method();
 | 
| 
 | 
   148  Purpose   : Inherited method. Overridden to ensure that the name matches
 | 
| 
 | 
   149              /sim4/i.
 | 
| 
 | 
   150  Returns   : String
 | 
| 
 | 
   151  Argument  : n/a
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 =cut
 | 
| 
 | 
   154 
 | 
| 
 | 
   155 #-------------
 | 
| 
 | 
   156 sub analysis_method { 
 | 
| 
 | 
   157 #-------------
 | 
| 
 | 
   158     my ($self, $method) = @_;  
 | 
| 
 | 
   159     if($method && ($method !~ /sim4/i)) {
 | 
| 
 | 
   160 	$self->throw("method $method not supported in " . ref($self));
 | 
| 
 | 
   161     }
 | 
| 
 | 
   162     return $self->SUPER::analysis_method($method);
 | 
| 
 | 
   163 }
 | 
| 
 | 
   164 
 | 
| 
 | 
   165 =head2 parse_next_alignment
 | 
| 
 | 
   166 
 | 
| 
 | 
   167  Title   : parse_next_alignment
 | 
| 
 | 
   168  Usage   : @exons = $sim4_result->parse_next_alignment;
 | 
| 
 | 
   169            foreach $exon (@exons) {
 | 
| 
 | 
   170                # do something
 | 
| 
 | 
   171            }
 | 
| 
 | 
   172  Function: Parses the next alignment of the Sim4 result file and returns the
 | 
| 
 | 
   173            found exons as an array of Bio::Tools::Sim4::Exon objects. Call
 | 
| 
 | 
   174            this method repeatedly until an empty array is returned to get the
 | 
| 
 | 
   175            results for all alignments.
 | 
| 
 | 
   176 
 | 
| 
 | 
   177            The $exon->seq_id() attribute will be set to the identifier of the
 | 
| 
 | 
   178            respective sequence for both sequences if A=4 was used in the sim4
 | 
| 
 | 
   179            run, and otherwise for the second sequence only. If the output does
 | 
| 
 | 
   180            not contain the identifier, the filename stripped of path and 
 | 
| 
 | 
   181            extension is used instead. In addition, the full filename 
 | 
| 
 | 
   182            will be recorded for both features ($exon inherits off 
 | 
| 
 | 
   183            Bio::SeqFeature::SimilarityPair) as tag 'filename'. The length
 | 
| 
 | 
   184            is accessible via the seqlength() attribute of $exon->query() and
 | 
| 
 | 
   185            $exon->est_hit().
 | 
| 
 | 
   186 
 | 
| 
 | 
   187            Note that this method is capable of dealing with outputs generated
 | 
| 
 | 
   188            with format 0,1,3, and 4 (via the A=n option to sim4). It
 | 
| 
 | 
   189            automatically determines which of the two sequences has been 
 | 
| 
 | 
   190            reversed, and adjusts the coordinates for that sequence. It will
 | 
| 
 | 
   191            also detect whether the EST sequence(s) were given as first or as
 | 
| 
 | 
   192            second file to sim4, unless this has been specified at creation
 | 
| 
 | 
   193            time of the object.
 | 
| 
 | 
   194 
 | 
| 
 | 
   195  Example :
 | 
| 
 | 
   196  Returns : An array of Bio::Tools::Sim4::Exon objects
 | 
| 
 | 
   197  Args    :
 | 
| 
 | 
   198 
 | 
| 
 | 
   199 
 | 
| 
 | 
   200 =cut
 | 
| 
 | 
   201 
 | 
| 
 | 
   202 sub parse_next_alignment {
 | 
| 
 | 
   203    my ($self) = @_;
 | 
| 
 | 
   204    my @exons = ();
 | 
| 
 | 
   205    my %seq1props = ();
 | 
| 
 | 
   206    my %seq2props = ();
 | 
| 
 | 
   207    # we refer to the properties of each seq by reference
 | 
| 
 | 
   208    my ($estseq, $genomseq, $to_reverse);
 | 
| 
 | 
   209    my $started = 0;
 | 
| 
 | 
   210    my $hit_direction = 1;
 | 
| 
 | 
   211    my $output_fmt = 3; # same as 0 and 1 (we cannot deal with A=2 produced
 | 
| 
 | 
   212                        # output yet)
 | 
| 
 | 
   213    
 | 
| 
 | 
   214    while(defined($_ = $self->_readline())) {
 | 
| 
 | 
   215        #chomp();
 | 
| 
 | 
   216 
 | 
| 
 | 
   217        #
 | 
| 
 | 
   218        # bascially, each sim4 'hit' starts with seq1...
 | 
| 
 | 
   219        #
 | 
| 
 | 
   220        /^seq1/ && do {
 | 
| 
 | 
   221 	   if($started) {
 | 
| 
 | 
   222 	       $self->_pushback($_);
 | 
| 
 | 
   223 	       last;
 | 
| 
 | 
   224 	   }
 | 
| 
 | 
   225 	   $started = 1;
 | 
| 
 | 
   226 
 | 
| 
 | 
   227 	   # filename and length of seq 1
 | 
| 
 | 
   228 	   /^seq1\s+=\s+(\S+)\,\s+(\d+)/ ||
 | 
| 
 | 
   229 	       $self->throw("Sim4 parsing error on seq1 [$_] line. Sorry!");
 | 
| 
 | 
   230 	   $seq1props{'filename'} = $1;
 | 
| 
 | 
   231 	   $seq1props{'length'} = $2;
 | 
| 
 | 
   232 	   next;
 | 
| 
 | 
   233        };
 | 
| 
 | 
   234        /^seq2/ && do {
 | 
| 
 | 
   235 	   # the second hit has also the database name in the >name syntax 
 | 
| 
 | 
   236 	   # (in brackets).
 | 
| 
 | 
   237 	   /^seq2\s+=\s+(\S+)\s+\(>?(\S+)\s*\)\,\s+(\d+)/||
 | 
| 
 | 
   238 	       $self->throw("Sim4 parsing error on seq2 [$_] line. Sorry!");
 | 
| 
 | 
   239 	   $seq2props{'filename'} = $1;
 | 
| 
 | 
   240 	   $seq2props{'seqname'} = $2;
 | 
| 
 | 
   241 	   $seq2props{'length'} = $3;
 | 
| 
 | 
   242 	   next;
 | 
| 
 | 
   243        };
 | 
| 
 | 
   244        if(/^>(\S+)\s*(.*)$/) {
 | 
| 
 | 
   245 	   # output option was A=4, which not only gives the complete
 | 
| 
 | 
   246 	   # description lines, but also causes the longer sequence to be
 | 
| 
 | 
   247 	   # reversed if the second file contained one (genomic) sequence
 | 
| 
 | 
   248 	   $seq1props{'seqname'} = $1;
 | 
| 
 | 
   249 	   $seq1props{'description'} = $2 if $2;
 | 
| 
 | 
   250 	   $output_fmt = 4;
 | 
| 
 | 
   251 	   # we handle seq1 and seq2 both here
 | 
| 
 | 
   252 	   if(defined($_ = $self->_readline()) && (/^>(\S+)\s*(.*)$/)) {
 | 
| 
 | 
   253 	       $seq2props{'seqname'} = $1; # redundant, since already set above
 | 
| 
 | 
   254 	       $seq2props{'description'} = $2 if $2;
 | 
| 
 | 
   255 	   }
 | 
| 
 | 
   256 	   next;
 | 
| 
 | 
   257        }
 | 
| 
 | 
   258        /^\(complement\)/ && do {
 | 
| 
 | 
   259 	   $hit_direction = -1;
 | 
| 
 | 
   260 	   next;
 | 
| 
 | 
   261        };
 | 
| 
 | 
   262        # this matches
 | 
| 
 | 
   263        # start-end (start-end) pctid%
 | 
| 
 | 
   264        if(/(\d+)-(\d+)\s+\((\d+)-(\d+)\)\s+(\d+)%/) {
 | 
| 
 | 
   265  	   $seq1props{'start'} = $1;
 | 
| 
 | 
   266  	   $seq1props{'end'} = $2;
 | 
| 
 | 
   267  	   $seq2props{'start'} = $3;
 | 
| 
 | 
   268  	   $seq2props{'end'} = $4;
 | 
| 
 | 
   269 	   my $pctid   = $5;
 | 
| 
 | 
   270 	   
 | 
| 
 | 
   271 	   if(! defined($estseq)) {
 | 
| 
 | 
   272 	       # for the first time here: need to set the references referring
 | 
| 
 | 
   273 	       # to seq1 and seq2 
 | 
| 
 | 
   274 	       if(! exists($self->{'_est_is_first'})) {
 | 
| 
 | 
   275 		   # detect which one is the EST by looking at the lengths,
 | 
| 
 | 
   276 		   # and assume that this holds throughout the entire result
 | 
| 
 | 
   277 		   # file (i.e., when this method is called for the next
 | 
| 
 | 
   278 		   # alignment, this will not be checked again)
 | 
| 
 | 
   279 		   if($seq1props{'length'} > $seq2props{'length'}) {
 | 
| 
 | 
   280 		       $self->{'_est_is_first'} = 0;
 | 
| 
 | 
   281 		   } else {
 | 
| 
 | 
   282 		       $self->{'_est_is_first'} = 1;
 | 
| 
 | 
   283 		   }
 | 
| 
 | 
   284 	       }
 | 
| 
 | 
   285 	       if($self->{'_est_is_first'}) {
 | 
| 
 | 
   286 		   $estseq = \%seq1props;
 | 
| 
 | 
   287 		   $genomseq = \%seq2props;
 | 
| 
 | 
   288 		   # if the EST is given first, A=4 selects the genomic
 | 
| 
 | 
   289 		   # seq for being reversed (reversing the EST is default)
 | 
| 
 | 
   290 		   $to_reverse = ($output_fmt == 4) ? $genomseq : $estseq;
 | 
| 
 | 
   291 	       } else {
 | 
| 
 | 
   292 		   $estseq = \%seq2props;
 | 
| 
 | 
   293 		   $genomseq = \%seq1props;
 | 
| 
 | 
   294 		   # if the EST is the second, A=4 does not change the
 | 
| 
 | 
   295 		   # seq being reversed (always the EST is reversed)
 | 
| 
 | 
   296 		   $to_reverse = $estseq;
 | 
| 
 | 
   297 	       }
 | 
| 
 | 
   298 	   }
 | 
| 
 | 
   299 	   if($hit_direction == -1) {
 | 
| 
 | 
   300 	       # we have to reverse the coordinates of one of both seqs
 | 
| 
 | 
   301 	       my $tmp = $to_reverse->{'start'};
 | 
| 
 | 
   302 	       $to_reverse->{'start'} =
 | 
| 
 | 
   303 		   $to_reverse->{'length'} - $to_reverse->{'end'} + 1;
 | 
| 
 | 
   304 	       $to_reverse->{'end'} = $to_reverse->{'length'} - $tmp + 1;
 | 
| 
 | 
   305 	   }
 | 
| 
 | 
   306 	   # create and initialize the exon object
 | 
| 
 | 
   307 	   my $exon = Bio::Tools::Sim4::Exon->new(
 | 
| 
 | 
   308 					    '-start' => $genomseq->{'start'},
 | 
| 
 | 
   309 					    '-end'   => $genomseq->{'end'},
 | 
| 
 | 
   310 					    '-strand' => $hit_direction);
 | 
| 
 | 
   311 	   if(exists($genomseq->{'seqname'})) {
 | 
| 
 | 
   312 	       $exon->seq_id($genomseq->{'seqname'});
 | 
| 
 | 
   313 	   } else {
 | 
| 
 | 
   314 	       # take filename stripped of path as fall back
 | 
| 
 | 
   315 	       my ($basename) = &File::Basename::fileparse($genomseq->{'filename'}, '\..*');
 | 
| 
 | 
   316 	       $exon->seq_id($basename);
 | 
| 
 | 
   317 	   }
 | 
| 
 | 
   318 	   $exon->feature1()->add_tag_value('filename',
 | 
| 
 | 
   319 					    $genomseq->{'filename'});
 | 
| 
 | 
   320 	   # feature1 is supposed to be initialized to a Similarity object,
 | 
| 
 | 
   321            # but we provide a safety net
 | 
| 
 | 
   322 	   if($exon->feature1()->can('seqlength')) {
 | 
| 
 | 
   323 	       $exon->feature1()->seqlength($genomseq->{'length'});
 | 
| 
 | 
   324 	   } else {
 | 
| 
 | 
   325 	       $exon->feature1()->add_tag_value('SeqLength',
 | 
| 
 | 
   326 						$genomseq->{'length'});
 | 
| 
 | 
   327 	   }
 | 
| 
 | 
   328 	   # create and initialize the feature wrapping the 'hit' (the EST)
 | 
| 
 | 
   329 	   my $fea2 = Bio::SeqFeature::Similarity->new(
 | 
| 
 | 
   330                                             '-start' => $estseq->{'start'},
 | 
| 
 | 
   331 					    '-end'   => $estseq->{'end'},
 | 
| 
 | 
   332 					    '-strand' => 0,
 | 
| 
 | 
   333 					    '-primary' => "aligning_EST");
 | 
| 
 | 
   334 	   if(exists($estseq->{'seqname'})) {
 | 
| 
 | 
   335 	       $fea2->seq_id($estseq->{'seqname'});
 | 
| 
 | 
   336 	   } else {
 | 
| 
 | 
   337 	       # take filename stripped of path as fall back
 | 
| 
 | 
   338 	       my ($basename) =
 | 
| 
 | 
   339 		   &File::Basename::fileparse($estseq->{'filename'}, '\..*');
 | 
| 
 | 
   340 	       $fea2->seq_id($basename);
 | 
| 
 | 
   341 	   }
 | 
| 
 | 
   342 	   $fea2->add_tag_value('filename', $estseq->{'filename'});
 | 
| 
 | 
   343 	   $fea2->seqlength($estseq->{'length'});
 | 
| 
 | 
   344 	   # store
 | 
| 
 | 
   345 	   $exon->est_hit($fea2);	   
 | 
| 
 | 
   346 	   # general properties
 | 
| 
 | 
   347 	   $exon->source_tag($self->analysis_method());
 | 
| 
 | 
   348 	   $exon->percentage_id($pctid);
 | 
| 
 | 
   349 	   $exon->score($exon->percentage_id());
 | 
| 
 | 
   350 	   # push onto array
 | 
| 
 | 
   351 	   push(@exons, $exon);
 | 
| 
 | 
   352 	   next; # back to while loop
 | 
| 
 | 
   353        }
 | 
| 
 | 
   354    }
 | 
| 
 | 
   355    return @exons;
 | 
| 
 | 
   356 }
 | 
| 
 | 
   357 
 | 
| 
 | 
   358 =head2 next_exonset
 | 
| 
 | 
   359 
 | 
| 
 | 
   360  Title   : next_exonset
 | 
| 
 | 
   361  Usage   : $exonset = $sim4_result->parse_next_exonset;
 | 
| 
 | 
   362            print "Exons start at ", $exonset->start(), 
 | 
| 
 | 
   363                  "and end at ", $exonset->end(), "\n";
 | 
| 
 | 
   364            foreach $exon ($exonset->sub_SeqFeature()) {
 | 
| 
 | 
   365                # do something
 | 
| 
 | 
   366            }
 | 
| 
 | 
   367  Function: Parses the next alignment of the Sim4 result file and returns the
 | 
| 
 | 
   368            set of exons as a container of features. The container is itself
 | 
| 
 | 
   369            a Bio::SeqFeature::Generic object, with the Bio::Tools::Sim4::Exon
 | 
| 
 | 
   370            objects as sub features. Start, end, and strand of the container
 | 
| 
 | 
   371            will represent the total region covered by the exons of this set.
 | 
| 
 | 
   372 
 | 
| 
 | 
   373            See the documentation of parse_next_alignment() for further
 | 
| 
 | 
   374            reference about parsing and how the information is stored.
 | 
| 
 | 
   375 
 | 
| 
 | 
   376  Example : 
 | 
| 
 | 
   377  Returns : An Bio::SeqFeature::Generic object holding Bio::Tools::Sim4::Exon
 | 
| 
 | 
   378            objects as sub features.
 | 
| 
 | 
   379  Args    :
 | 
| 
 | 
   380 
 | 
| 
 | 
   381 =cut
 | 
| 
 | 
   382 
 | 
| 
 | 
   383 sub next_exonset {
 | 
| 
 | 
   384     my $self = shift;
 | 
| 
 | 
   385     my $exonset;
 | 
| 
 | 
   386 
 | 
| 
 | 
   387     # get the next array of exons
 | 
| 
 | 
   388     my @exons = $self->parse_next_alignment();
 | 
| 
 | 
   389     return if($#exons < 0);
 | 
| 
 | 
   390     # create the container of exons as a feature object itself, with the
 | 
| 
 | 
   391     # data of the first exon for initialization
 | 
| 
 | 
   392     $exonset = Bio::SeqFeature::Generic->new('-start' => $exons[0]->start(),
 | 
| 
 | 
   393 					     '-end' => $exons[0]->end(),
 | 
| 
 | 
   394 					     '-strand' => $exons[0]->strand(),
 | 
| 
 | 
   395 					     '-primary' => "ExonSet");
 | 
| 
 | 
   396     $exonset->source_tag($exons[0]->source_tag());
 | 
| 
 | 
   397     $exonset->seq_id($exons[0]->seq_id());
 | 
| 
 | 
   398     # now add all exons as sub features, with enabling EXPANsion of the region
 | 
| 
 | 
   399     # covered in total
 | 
| 
 | 
   400     foreach my $exon (@exons) {
 | 
| 
 | 
   401 	$exonset->add_sub_SeqFeature($exon, 'EXPAND');
 | 
| 
 | 
   402     }
 | 
| 
 | 
   403     return $exonset;
 | 
| 
 | 
   404 }
 | 
| 
 | 
   405 
 | 
| 
 | 
   406 =head2 next_feature
 | 
| 
 | 
   407 
 | 
| 
 | 
   408  Title   : next_feature
 | 
| 
 | 
   409  Usage   : while($exonset = $sim4->next_feature()) {
 | 
| 
 | 
   410                   # do something
 | 
| 
 | 
   411            }
 | 
| 
 | 
   412  Function: Does the same as L<next_exonset()>. See there for documentation of
 | 
| 
 | 
   413            the functionality. Call this method repeatedly until FALSE is
 | 
| 
 | 
   414            returned.
 | 
| 
 | 
   415 
 | 
| 
 | 
   416            The returned object is actually a SeqFeatureI implementing object.
 | 
| 
 | 
   417            This method is required for classes implementing the
 | 
| 
 | 
   418            SeqAnalysisParserI interface, and is merely an alias for 
 | 
| 
 | 
   419            next_exonset() at present.
 | 
| 
 | 
   420 
 | 
| 
 | 
   421  Example :
 | 
| 
 | 
   422  Returns : A Bio::SeqFeature::Generic object.
 | 
| 
 | 
   423  Args    :
 | 
| 
 | 
   424 
 | 
| 
 | 
   425 =cut
 | 
| 
 | 
   426 
 | 
| 
 | 
   427 sub next_feature {
 | 
| 
 | 
   428     my ($self,@args) = @_;
 | 
| 
 | 
   429     # even though next_exonset doesn't expect any args (and this method
 | 
| 
 | 
   430     # does neither), we pass on args in order to be prepared if this changes
 | 
| 
 | 
   431     # ever
 | 
| 
 | 
   432     return $self->next_exonset(@args);
 | 
| 
 | 
   433 }
 | 
| 
 | 
   434 
 | 
| 
 | 
   435 1;
 |