annotate variant_effect_predictor/Bio/Tools/BPlite/Iteration.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: Iteration.pm,v 1.15 2002/06/19 00:27:49 jason Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # Bioperl module Bio::Tools::BPlite::Iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # based closely on the Bio::Tools::BPlite modules
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Lorenz Pollak (lorenz@ist.org, bioperl port)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Peter Schattner
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 # _history
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # October 20, 2000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 # Added to get a simple_align object for a psiblast run with the -m 6 flag /AE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 Bio::Tools::BPlite::Iteration - object for parsing single iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 of a PSIBLAST report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 use Bio::Tools:: BPpsilite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 open FH, "t/psiblastreport.out";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 # determine number of iterations executed by psiblast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $total_iterations = $report->number_of_iterations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 $last_iteration = $report->round($total_iterations);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # Process only hits found in last iteration ...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $oldhitarray_ref = $last_iteration->oldhits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 HIT: while($sbjct = $last_iteration->nextSbjct) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $id = $sbjct->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $is_old = grep /\Q$id\E/, @$oldhitarray_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 if ($is_old ){next HIT;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 # do something with new hit...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =head2 ALIGNMENTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 # This assumed that you have $db pointing to a database, $out to an output file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 # $slxdir to a directory and $psiout
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 # note the alignments can only be obtained if the flag "-m 6" is run.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 # It might also be necessary to use the flag -v to get all alignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $report = $factory->blastpgp($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $total_iterations = $report->number_of_iterations();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my $last_iteration = $report->round($total_iterations);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 my $align=$last_iteration->Align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my $slxfile=$slxdir.$id.".slx";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 $slx->write_aln($align);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 See the documentation for BPpsilite.pm for a description of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 Iteration.pm module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 =head1 AUTHORS - Peter Schattner
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 Email: schattner@alum.mit.edu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 =head1 CONTRIBUTORS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 Jason Stajich, jason@cgt.mc.duke.edu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 =head1 ACKNOWLEDGEMENTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Based on work of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 Lorenz Pollak (lorenz@ist.org, bioperl port)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 =head1 COPYRIGHT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 BPlite.pm is copyright (C) 1999 by Ian Korf.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 =head1 DISCLAIMER
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 This software is provided "as is" without warranty of any kind.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 package Bio::Tools::BPlite::Iteration;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 use Bio::Root::Root; # root object to inherit from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 use Bio::Tools::BPlite; #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 use Bio::Tools::BPlite::Sbjct;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 @ISA = qw(Bio::Root::Root);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my ($class, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 my $self = $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 ($self->{'PARENT'},$self->{'ROUND'}) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 $self->_rearrange([qw(PARENT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 ROUND
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 )],@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 $self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 $self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 else {$self->{'REPORT_DONE'} = 1} # empty report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 return $self; # success - we hope!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 =head2 query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 Title : query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 Usage : $query = $obj->query();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 Function : returns the query object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 Returns : query object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 sub query {shift->{'QUERY'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 =head2 qlength
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 Title : qlength
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 Usage : $len = $obj->qlength();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 Returns : length of query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 sub qlength {shift->{'LENGTH'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 =head2 newhits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 Title : newhits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 Usage : $newhits = $obj->newhits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Returns : reference to an array listing all the hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 from the current iteration which were not identified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 in the previous iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 sub newhits {shift->{'NEWHITS'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 =head2 oldhits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 Title : oldhits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Usage : $oldhits = $obj->oldhits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Returns : reference to an array listing all the hits from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 the current iteration which were identified and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 above threshold in the previous iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 sub oldhits {shift->{'OLDHITS'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 =head2 nextSbjct
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 Title : nextSbjct
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 Usage : $sbjct = $obj->nextSbjct();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 Function : Method of iterating through all the Sbjct retrieved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 from parsing the report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 #Example : while ( my $sbjct = $obj->nextSbjct ) {}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 Returns : next Sbjct object or undef if finished
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 sub nextSbjct {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 $self->_fastForward or return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 #######################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 # get all sbjct lines #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 #######################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 my $def = $self->_readline();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 while(defined ($_ = $self->_readline) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 if ($_ !~ /\w/) {next}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 elsif ($_ =~ /^(\d+) .* \d+$/) { # This is not correct at all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $self->_pushback($_); # 1: HSP does not work for -m 6 flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 $def = $1; # 2: length/name are incorrect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my $length = undef; # 3: Names are repeated many times.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 '-length'=>$length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 '-parent'=>$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 return $sbjct;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 } # m-6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $self->_pushback( $_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 } else {$def .= $_}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $def =~ s/\s+/ /g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $def =~ s/\s+$//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $def =~ s/Length = ([\d,]+)$//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $length = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 return 0 unless $def =~ /^>/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 $def =~ s/^>//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 # the Sbjct object #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 '-length'=>$length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 '-parent'=>$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 return $sbjct;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 # This is added by /AE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 =head2 Align
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 Title : Align
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 Usage : $SimpleAlign = $obj->Align();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 Function : Method to obtain a simpleAlign object from psiblast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 Example : $SimpleAlign = $obj->Align();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 Returns : SimpleAlign object or undef if not found.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 BUG : Only works if psiblast has been run with m 6 flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 sub Align {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 use Bio::SimpleAlign;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 $self->_fastForward or return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 my $lastline = $self->_readline();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 return undef unless $lastline =~ /^QUERY/; # If psiblast not run correctly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 my (%sequence,%first,%last,$num);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 my $name='QUERY';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 my $start=$1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 my $seq=$2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my $stop=$3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 $seq =~ s/-/\./g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $start =~ s/ //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 $stop =~ s/ //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $sequence{$name} .= $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 if ($first{$name} eq ''){$first{$name}=$start;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if ($stop ne ''){$last{$name}=$stop;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 # print "FOUND:\t$seq\t$start\t$stop\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $num=0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 while(defined($_ = $self->_readline()) ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 chomp($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 my $name='QUERY';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my $start=$1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 my $seq=$2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my $stop=$3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $seq =~ s/-/\./g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $start =~ s/ //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 $stop =~ s/ //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 $sequence{$name} .= $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 if ($first{$name} eq '') { $first{$name} = $start;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 if ($stop ne '') { $last{$name}=$stop;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 $num=0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 } elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 my $name=$1.".".$num;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 my $start=$2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 my $seq=$3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my $stop=$4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 $seq =~ s/-/\./g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 $start =~ s/ //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $stop =~ s/ //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $sequence{$name} .= $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 if ($first{$name} eq ''){$first{$name}=$start;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 if ($stop ne ''){$last{$name}=$stop;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 my $align = new Bio::SimpleAlign();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 my @keys=sort keys(%sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 foreach my $name (@keys){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 my $nse = $name."/".$first{$name}."-".$last{$name};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 -id => $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 -name => $nse,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 -start => $first{$name},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 -end => $last{$name}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 $align->add_seq($seqobj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 return $align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 # Start of internal subroutines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 sub _parseHeader {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 my (@old_hits, @new_hits);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 my $newhits_true = ($self->{'ROUND'} < 2) ? 1 : 0 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 while(defined($_ = $self->_readline()) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 my $id = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 my $score= $2; #not used currently
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 my $evalue= $3; #not used currently
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 if ($newhits_true) { push ( @new_hits, $id);}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 else { push (@old_hits, $id);}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 elsif ($_ =~ /^Sequences not found previously/) {$newhits_true = 1 ;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 # This is changed for "-m 6" option /AE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 elsif ($_ =~ /^>/ || $_ =~ /^QUERY/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 $self->_pushback($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 $self->{'OLDHITS'} = \@old_hits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 $self->{'NEWHITS'} = \@new_hits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 $self->_pushback($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 return 0; # no sequences found in this iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 return 0; # no sequences found in this iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 sub _fastForward {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 return 0 if $self->{'REPORT_DONE'}; # empty report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 while(defined($_ = $self->_readline()) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 if( $_ =~ /^>/ ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 $_ =~ /^QUERY|^\d+ .* \d+$/ ) { # Changed to also handle "-m 6" /AE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 $self->_pushback($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 # print "FASTFORWARD",$_,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 if ($_ =~ /^>|^Parameters|^\s+Database:/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 $self->_pushback($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 $self->warn("Possible error (2) while parsing BLAST report!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 =head2 _readline
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 Title : _readline
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 Usage : $obj->_readline
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 Function: Reads a line of input.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 Note that this method implicitely uses the value of $/ that is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 in effect when called.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 Note also that the current implementation does not handle pushed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 back input correctly unless the pushed back input ends with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 value of $/.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 sub _readline{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 return $self->{'PARENT'}->_readline();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 =head2 _pushback
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 Title : _pushback
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 Usage : $obj->_pushback($newvalue)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 Function: puts a line previously read with _readline back into a buffer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 Args : newvalue
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 sub _pushback {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 my ($self, $arg) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 return $self->{'PARENT'}->_pushback($arg);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 __END__