Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Est2Genome.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Est2Genome.pm,v 1.11 2002/12/05 13:46:36 heikki Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Tools::Est2Genome | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Tools::Est2Genome - Parse est2genome output, makes simple Bio::SeqFeature::Generic objects | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::Tools::Est2Genome; | |
| 20 | |
| 21 my $featureiter = new Bio::Tools::Est2Genome(-file => 'output.est2genome'); | |
| 22 | |
| 23 # This is going to be fixed to use the SeqAnalysisI next_feature | |
| 24 # Method eventually when we have the objects to put the data in | |
| 25 # properly | |
| 26 while( my $f = $featureiter->parse_next_gene ) { | |
| 27 # process Bio::SeqFeature::Generic objects here | |
| 28 } | |
| 29 | |
| 30 =head1 DESCRIPTION | |
| 31 | |
| 32 This module is a parser for est2genome [EMBOSS] alignments of est/cdna | |
| 33 sequence to genomic DNA. This is generally accepted as the best | |
| 34 program for predicting splice sites based on est/cdnas*. | |
| 35 | |
| 36 This module currently does not try pull out the ungapped alignments | |
| 37 (Segment) but may in the future. | |
| 38 | |
| 39 | |
| 40 * AFAIK | |
| 41 | |
| 42 =head1 FEEDBACK | |
| 43 | |
| 44 =head2 Mailing Lists | |
| 45 | |
| 46 User feedback is an integral part of the evolution of this and other | |
| 47 Bioperl modules. Send your comments and suggestions preferably to | |
| 48 the Bioperl mailing list. Your participation is much appreciated. | |
| 49 | |
| 50 bioperl-l@bioperl.org - General discussion | |
| 51 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 52 | |
| 53 =head2 Reporting Bugs | |
| 54 | |
| 55 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 56 of the bugs and their resolution. Bug reports can be submitted via | |
| 57 email or the web: | |
| 58 | |
| 59 bioperl-bugs@bioperl.org | |
| 60 http://bugzilla.bioperl.org/ | |
| 61 | |
| 62 =head1 AUTHOR - Jason Stajich | |
| 63 | |
| 64 Email jason@bioperl.org | |
| 65 | |
| 66 Describe contact details here | |
| 67 | |
| 68 =head1 CONTRIBUTORS | |
| 69 | |
| 70 Additional contributors names and emails here | |
| 71 | |
| 72 =head1 APPENDIX | |
| 73 | |
| 74 The rest of the documentation details each of the object methods. | |
| 75 Internal methods are usually preceded with a _ | |
| 76 | |
| 77 =cut | |
| 78 | |
| 79 | |
| 80 # Let the code begin... | |
| 81 | |
| 82 | |
| 83 package Bio::Tools::Est2Genome; | |
| 84 use vars qw(@ISA); | |
| 85 use strict; | |
| 86 | |
| 87 # Object preamble - inherits from Bio::Root::Root | |
| 88 | |
| 89 use Bio::Root::Root; | |
| 90 use Bio::Tools::AnalysisResult; | |
| 91 use Bio::SeqFeature::Gene::Exon; | |
| 92 use Bio::SeqFeature::Gene::Intron; | |
| 93 use Bio::SeqFeature::Gene::GeneStructure; | |
| 94 use Bio::SeqFeature::SimilarityPair; | |
| 95 | |
| 96 @ISA = qw(Bio::Tools::AnalysisResult ); | |
| 97 | |
| 98 =head2 new | |
| 99 | |
| 100 Title : new | |
| 101 Usage : my $obj = new Bio::Tools::Est2Genome(); | |
| 102 Function: Builds a new Bio::Tools::Est2Genome object | |
| 103 Returns : an instance of Bio::Tools::Est2Genome | |
| 104 Args : -file => 'output.est2genome' or | |
| 105 -fh => \*EST2GENOMEOUTPUT | |
| 106 -genomefirst => 1 # genome was the first input (not standard) | |
| 107 | |
| 108 =cut | |
| 109 | |
| 110 sub _initialize_state { | |
| 111 my($self,@args) = @_; | |
| 112 | |
| 113 # call the inherited method first | |
| 114 my $make = $self->SUPER::_initialize_state(@args); | |
| 115 | |
| 116 my ($genome_is_first) = $self->_rearrange([qw(GENOMEFIRST)], @args); | |
| 117 | |
| 118 delete($self->{'_genome_is_first'}); | |
| 119 $self->{'_genome_is_first'} = $genome_is_first if(defined($genome_is_first)); | |
| 120 $self->analysis_method("est2genome"); | |
| 121 } | |
| 122 | |
| 123 =head2 analysis_method | |
| 124 | |
| 125 Usage : $sim4->analysis_method(); | |
| 126 Purpose : Inherited method. Overridden to ensure that the name matches | |
| 127 /est2genome/i. | |
| 128 Returns : String | |
| 129 Argument : n/a | |
| 130 | |
| 131 =cut | |
| 132 | |
| 133 #------------- | |
| 134 sub analysis_method { | |
| 135 #------------- | |
| 136 my ($self, $method) = @_; | |
| 137 if($method && ($method !~ /est2genome/i)) { | |
| 138 $self->throw("method $method not supported in " . ref($self)); | |
| 139 } | |
| 140 return $self->SUPER::analysis_method($method); | |
| 141 } | |
| 142 | |
| 143 =head2 parse_next_gene | |
| 144 | |
| 145 Title : parse_next_gene | |
| 146 Usage : @gene = $est2genome_result->parse_next_gene; | |
| 147 foreach $exon (@exons) { | |
| 148 # do something | |
| 149 } | |
| 150 | |
| 151 Function: Parses the next alignments of the est2genome result file and | |
| 152 returns the found exons as an array of | |
| 153 Bio::SeqFeature::SimilarityPair objects. Call | |
| 154 this method repeatedly until an empty array is returned to get the | |
| 155 results for all alignments. | |
| 156 | |
| 157 The $exon->seq_id() attribute will be set to the identifier of the | |
| 158 respective sequence for both sequences. | |
| 159 The length is accessible via the seqlength() | |
| 160 attribute of $exon->query() and | |
| 161 $exon->est_hit(). | |
| 162 Returns : An array (or array reference) of Bio::SeqFeature::SimilarityPair and Bio::SeqFeature::Generic objects | |
| 163 Args : none | |
| 164 | |
| 165 | |
| 166 =cut | |
| 167 | |
| 168 sub parse_next_gene { | |
| 169 my ($self) = @_; | |
| 170 my $seensegment = 0; | |
| 171 my @features; | |
| 172 my ($qstrand,$hstrand) = (1,1); | |
| 173 my $lasthseqname; | |
| 174 while( defined($_ = $self->_readline) ) { | |
| 175 if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) { | |
| 176 if( $seensegment ) { | |
| 177 $self->_pushback($_); | |
| 178 return wantarray ? @features : \@features; | |
| 179 } | |
| 180 $hstrand = -1 if $1 eq 'reversed'; | |
| 181 $qstrand = -1 if $4 eq 'REVERSED GENE'; | |
| 182 $self->debug( "1=$1, 2=$2, 4=$4\n"); | |
| 183 } | |
| 184 elsif( /^Exon/ ) { | |
| 185 my ($name,$len,$score,$qstart,$qend,$qseqname, | |
| 186 $hstart,$hend, $hseqname) = split; | |
| 187 $lasthseqname = $hseqname; | |
| 188 my $query = new Bio::SeqFeature::Similarity(-primary => $name, | |
| 189 -source => $self->analysis_method, | |
| 190 -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE | |
| 191 -start => $qstart, | |
| 192 -end => $qend, | |
| 193 -strand => $qstrand, | |
| 194 -score => $score, | |
| 195 -tag => { | |
| 196 # 'Location' => "$hstart..$hend", | |
| 197 'Sequence' => "$hseqname", | |
| 198 } | |
| 199 ); | |
| 200 my $hit = new Bio::SeqFeature::Similarity(-primary => 'exon_hit', | |
| 201 -source => $self->analysis_method, | |
| 202 -seq_id => $hseqname, | |
| 203 -start => $hstart, | |
| 204 -end => $hend, | |
| 205 -strand => $hstrand, | |
| 206 -score => $score, | |
| 207 -tag => { | |
| 208 # 'Location' => "$qstart..$qend", | |
| 209 'Sequence' => "$qseqname", | |
| 210 | |
| 211 } | |
| 212 ); | |
| 213 push @features, new Bio::SeqFeature::SimilarityPair | |
| 214 (-query => $query, | |
| 215 -hit => $hit, | |
| 216 -source => $self->analysis_method); | |
| 217 } elsif( /^([\-\+\?])(Intron)/) { | |
| 218 my ($name,$len,$score,$qstart,$qend,$qseqname) = split; | |
| 219 push @features, new Bio::SeqFeature::Generic(-primary => $2, | |
| 220 -source => $self->analysis_method, | |
| 221 -start => $qstart, | |
| 222 -end => $qend, | |
| 223 -strand => $qstrand, | |
| 224 -score => $score, | |
| 225 -seq_id => $qseqname, | |
| 226 -tag => { | |
| 227 'Sequence' => $lasthseqname}); | |
| 228 } elsif( /^Span/ ) { | |
| 229 } elsif( /^Segment/ ) { | |
| 230 $seensegment = 1; | |
| 231 } elsif( /^\s+$/ ) { # do nothing | |
| 232 } else { | |
| 233 $self->warn( "unknown line $_\n"); | |
| 234 } | |
| 235 } | |
| 236 return undef unless( @features ); | |
| 237 return wantarray ? @features : \@features; | |
| 238 } | |
| 239 | |
| 240 =head2 next_feature | |
| 241 | |
| 242 Title : next_feature | |
| 243 Usage : $seqfeature = $obj->next_feature(); | |
| 244 Function: Returns the next feature available in the analysis result, or | |
| 245 undef if there are no more features. | |
| 246 Example : | |
| 247 Returns : A Bio::SeqFeatureI implementing object, or undef if there are no | |
| 248 more features. | |
| 249 Args : none | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 sub next_feature { | |
| 254 my ($self) = shift; | |
| 255 $self->throw("We haven't really done this right, yet, use parse_next_gene"); | |
| 256 } | |
| 257 | |
| 258 | |
| 259 1; |
