| 0 | 1 # BioPerl module for Bio::Tools::RepeatMasker | 
|  | 2 # | 
|  | 3 # Cared for by Shawn Hoon <shawnh@fugu-sg.org> | 
|  | 4 # | 
|  | 5 # Copyright Shawn Hoon | 
|  | 6 # | 
|  | 7 # You may distribute this module under the same terms as perl itself | 
|  | 8 | 
|  | 9 # POD documentation - main docs before the code | 
|  | 10 | 
|  | 11 =head1 NAME | 
|  | 12 | 
|  | 13 Bio::Tools::RepeatMasker - DESCRIPTION of Object | 
|  | 14 | 
|  | 15 =head1 SYNOPSIS | 
|  | 16 | 
|  | 17     use Bio::Tools::RepeatMasker; | 
|  | 18     my $parser = new Bio::Tools::RepeatMasker(-file => 'seq.fa.out'); | 
|  | 19     while( my $result = $parser->next_result ) { | 
|  | 20 | 
|  | 21     } | 
|  | 22 | 
|  | 23 =head1 DESCRIPTION | 
|  | 24 | 
|  | 25 A parser for RepeatMasker  output | 
|  | 26 | 
|  | 27 =head1 FEEDBACK | 
|  | 28 | 
|  | 29 =head2 Mailing Lists | 
|  | 30 | 
|  | 31 User feedback is an integral part of the evolution of this and other | 
|  | 32 Bioperl modules. Send your comments and suggestions preferably to | 
|  | 33 the Bioperl mailing list.  Your participation is much appreciated. | 
|  | 34 | 
|  | 35   bioperl-l@bioperl.org              - General discussion | 
|  | 36   http://bioperl.org/MailList.shtml  - About the mailing lists | 
|  | 37 | 
|  | 38 =head2 Reporting Bugs | 
|  | 39 | 
|  | 40 Report bugs to the Bioperl bug tracking system to help us keep track | 
|  | 41 of the bugs and their resolution. Bug reports can be submitted via | 
|  | 42 email or the web: | 
|  | 43 | 
|  | 44   bioperl-bugs@bioperl.org | 
|  | 45   http://bugzilla.bioperl.org/ | 
|  | 46 | 
|  | 47 =head1 AUTHOR - Shawn Hoon | 
|  | 48 | 
|  | 49 Email shawnh@fugu-sg.org | 
|  | 50 | 
|  | 51 Describe contact details here | 
|  | 52 | 
|  | 53 =head1 CONTRIBUTORS | 
|  | 54 | 
|  | 55 Additional contributors names and emails here | 
|  | 56 | 
|  | 57 =head1 APPENDIX | 
|  | 58 | 
|  | 59 The rest of the documentation details each of the object methods. | 
|  | 60 Internal methods are usually preceded with a _ | 
|  | 61 | 
|  | 62 =cut | 
|  | 63 | 
|  | 64 | 
|  | 65 # Let the code begin... | 
|  | 66 | 
|  | 67 | 
|  | 68 package Bio::Tools::RepeatMasker; | 
|  | 69 use vars qw(@ISA); | 
|  | 70 use strict; | 
|  | 71 | 
|  | 72 use Bio::Root::Root; | 
|  | 73 use Bio::SeqFeature::FeaturePair; | 
|  | 74 use Bio::Root::IO; | 
|  | 75 | 
|  | 76 @ISA = qw(Bio::Root::Root Bio::Root::IO ); | 
|  | 77 | 
|  | 78 =head2 new | 
|  | 79 | 
|  | 80  Title   : new | 
|  | 81  Usage   : my $obj = new Bio::Tools::RepeatMasker(); | 
|  | 82  Function: Builds a new Bio::Tools::RepeatMasker object | 
|  | 83  Returns : Bio::Tools::RepeatMasker | 
|  | 84  Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO | 
|  | 85 | 
|  | 86 | 
|  | 87 =cut | 
|  | 88 | 
|  | 89 sub new { | 
|  | 90   my($class,@args) = @_; | 
|  | 91 | 
|  | 92   my $self = $class->SUPER::new(@args); | 
|  | 93   $self->_initialize_io(@args); | 
|  | 94 | 
|  | 95   return $self; | 
|  | 96 } | 
|  | 97 | 
|  | 98 =head2 next_result | 
|  | 99 | 
|  | 100  Title   : next_result | 
|  | 101  Usage   : my $r = $rpt_masker->next_result | 
|  | 102  Function: Get the next result set from parser data | 
|  | 103  Returns : L<Bio::SeqFeature::FeaturePair> | 
|  | 104  Args    : none | 
|  | 105 | 
|  | 106 | 
|  | 107 =cut | 
|  | 108 | 
|  | 109 sub next_result{ | 
|  | 110    my ($self) = @_; | 
|  | 111    while ($_=$self->_readline()) { | 
|  | 112         if (/no repetitive sequences detected/) { | 
|  | 113            print STDERR "RepeatMasker didn't find any repetitive sequences\n"; | 
|  | 114            return ; | 
|  | 115         } | 
|  | 116         if (/\d+/) { #ignore introductory lines | 
|  | 117           my @element = split; | 
|  | 118           # ignore features with negatives | 
|  | 119           next if ($element[11-13] =~ /-/); | 
|  | 120           my (%feat1, %feat2); | 
|  | 121           my ($score, $query_name, $query_start, $query_end, $strand, | 
|  | 122           $repeat_name, $repeat_class ) = (split)[0, 4, 5, 6, 8, 9, 10]; | 
|  | 123 | 
|  | 124           my ($hit_start,$hit_end); | 
|  | 125           if ($strand eq '+') { | 
|  | 126             ($hit_start, $hit_end) = (split)[11, 12]; | 
|  | 127             $strand = 1; | 
|  | 128           } | 
|  | 129           elsif ($strand eq 'C') { | 
|  | 130             ($hit_start, $hit_end) = (split)[12, 13]; | 
|  | 131             $strand = -1; | 
|  | 132           } | 
|  | 133           my $rf = Bio::SeqFeature::Generic->new; | 
|  | 134           $rf->seq_id          ($query_name); | 
|  | 135           $rf->score            ($score); | 
|  | 136           $rf->start            ($query_start); | 
|  | 137           $rf->end              ($query_end); | 
|  | 138           $rf->strand           ($strand); | 
|  | 139           $rf->source_tag       ("RepeatMasker"); | 
|  | 140           $rf->primary_tag      ($repeat_class); | 
|  | 141           my $rf2 = Bio::SeqFeature::Generic->new; | 
|  | 142           $rf2->seq_id         ($repeat_name); | 
|  | 143           $rf2->score           ($score); | 
|  | 144           $rf2->start           ($hit_start); | 
|  | 145           $rf2->end             ($hit_end); | 
|  | 146           $rf2->strand          ($strand); | 
|  | 147           $rf2->source_tag      ("RepeatMasker"); | 
|  | 148           $rf->primary_tag      ($repeat_class); | 
|  | 149           my $fp = Bio::SeqFeature::FeaturePair->new(-feature1=>$rf, | 
|  | 150                                                      -feature2=>$rf2); | 
|  | 151 | 
|  | 152           return $fp; | 
|  | 153         } | 
|  | 154     } | 
|  | 155 } | 
|  | 156 | 
|  | 157 1; |