annotate variant_effect_predictor/Bio/Tools/BPpsilite.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: BPpsilite.pm,v 1.22 2002/10/22 07:38:45 lapp Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # Bioperl module Bio::Tools::BPpsilite
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 ############################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # based closely on the Bio::Tools::BPlite modules
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # Lorenz Pollak (lorenz@ist.org, bioperl port)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # Copyright Peter Schattner
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # _history
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # October 20, 2000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Bio::Tools::BPpsilite - Lightweight BLAST parser for (iterated) psiblast reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use Bio::Tools::BPpsilite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 open FH, "t/psiblastreport.out";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 $report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 # determine number of iterations executed by psiblast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $total_iterations = $report->number_of_iterations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 $last_iteration = $report->round($total_iterations);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 # Process only hits found in last iteration ...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 $oldhitarray_ref = $last_iteration->oldhits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 HIT: while($sbjct = $last_iteration->nextSbjct) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $id = $sbjct->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $is_old = grep /\Q$id\E/, @$oldhitarray_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 if ($is_old ){next HIT;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # do something with new hit...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 BPpsilite is a package for parsing multiple iteration PSIBLAST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 reports. It is based closely on Ian Korf's BPlite.pm module for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 parsing single iteration BLAST reports (as modified by Lorenz Pollak).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 Two of the four basic objects of BPpsilite.pm are identical to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 corresponding objects in BPlite - the "HSP.pm" and "Sbjct.pm" objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 This DESCRIPTION documents only the one new object, the "iteration",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 as well as the additional methods that are implemented in BPpsilite
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 that are not in BPlite. See the BPlite documentation for information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 on the BPlite, SBJCT and HSP objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 The essential difference between PSIBLAST and the other BLAST programs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 (in terms of report parsing) is that PSIBLAST performs multiple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 iterations of the BLASTing of the database and the results of all of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 these iterations are stored in a single PSIBLAST report. (For general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 information on PSIBLAST see the README.bla file in the standalone
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 BLAST distribution and references therein). PSIBLAST's use of multiple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 iterations imposes additional demands on the report parser: * There
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 are several iterations of hits. Many of those hits will be repeated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 in more than one iteration. Often only the last iteration will be of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 interest. * Each iteration will list two different kinds of hits -
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 repeated hits that were used in the model and newly identified hits -
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 which may need to be processed in different manners * The total number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 of iterations performed is not displayed in the report until (almost)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 the very end of the report. (The user can specify a maximum number of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 iterations for the PSIBLAST search, but the program may perform fewer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 iterations if convergence is reached)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 BPpsilite addresses these issues by offering the following methods:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 * The total number of iteration used is given by the method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 number_of_iterations as in:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 $total_iterations = $report->number_of_iterations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 * Results from an arbitrary iteration round can be accessed by using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 the 'round' method:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 $iteration3_report = $report->round(3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 * The ids of the sequences which passed the significance threshold for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 the first time in the "nth" iteration can be identified by using the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 newhits method. Previously identified hits are identified by using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 the oldhits method, as in:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 $oldhitarray_ref = $iteration3_report->oldhits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 $newhitarray_ref = $iteration3_report->newhits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 BPpsilite.pm should work equally well on reports generated by the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 StandAloneBlast.pm local BLAST module as with reports generated by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 remote psiblast searches. For examples of usage of BPpsilite.pm, the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 user is referred to the BPpsilite.t script in the "t" directory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 =head1 AUTHOR - Peter Schattner
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 Email: schattner@alum.mit.edu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 =head1 CONTRIBUTORS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 Jason Stajich, jason@cgt.mc.duke.edu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 =head1 ACKNOWLEDGEMENTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Based on work of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Lorenz Pollak (lorenz@ist.org, bioperl port)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 =head1 COPYRIGHT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 BPlite.pm is copyright (C) 1999 by Ian Korf.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 =head1 DISCLAIMER
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 This software is provided "as is" without warranty of any kind.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 package Bio::Tools::BPpsilite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 use Bio::Tools::BPlite::Iteration; #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 use Bio::Tools::BPlite::Sbjct; # Debug code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 use Bio::Root::Root; # root interface to inherit from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 use Bio::Root::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 use Bio::Tools::BPlite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 @ISA = qw(Bio::Root::Root Bio::Root::IO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 my ($class, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my $self = $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 # initialize IO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 $self->_initialize_io(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 $self->{'_tempdir'} = $self->tempdir('CLEANUP' => 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 $self->{'QPATLOCATION'} = []; # Anonymous array of query pattern locations for PHIBLAST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 $self->{'NEXT_ITERATION_NUMBER'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $self->{'TOTAL_ITERATION_NUMBER'} = -1; # -1 indicates preprocessing not yet done
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 if ($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 else {$self->{'REPORT_DONE'} = 1} # empty report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 return $self; # success - we hope!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 =head2 query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 Title : query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 Usage : $query = $obj->query();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 Function : returns the query object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 Returns : query object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 sub query {shift->{'QUERY'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 =head2 qlength
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 Title : qlength
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 Usage : $len = $obj->qlength();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 Function : returns the length of the query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 Returns : length of query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 sub qlength {shift->{'LENGTH'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 =head2 database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 Title : database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 Usage : $db = $obj->database();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 Function : returns the database used in this search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 Returns : database used for search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 sub database {shift->{'DATABASE'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 =head2 number_of_iterations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 Title : number_of_iterations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 Usage : $total_iterations = $obj-> number_of_iterations();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 Function : returns the total number of iterations used in this search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 Returns : total number of iterations used for search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 =head2 pattern
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 Title : database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 Usage : $pattern = $obj->pattern();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 Function : returns the pattern used in a PHIBLAST search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 sub pattern {shift->{'PATTERN'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 =head2 query_pattern_location
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 Title : query_pattern_location
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 Usage : $qpl = $obj->query_pattern_location();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 Function : returns reference to array of locations in the query sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 of pattern used in a PHIBLAST search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 sub query_pattern_location {shift->{'QPATLOCATION'}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 sub number_of_iterations {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 if ($self->{'TOTAL_ITERATION_NUMBER'} == -1){&_preprocess($self);}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 $self->{'TOTAL_ITERATION_NUMBER'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 =head2 round
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 Title : round
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 Usage : $Iteration3 = $report->round(3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 Function : Method of retrieving data from a specific iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 Returns : reference to requested Iteration object or null if argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 is greater than total number of iterations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 Args : number of the requested iteration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 sub round {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 my $iter_num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $self->_initialize_io(-file => Bio::Root::IO->catfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 ($self->{'_tempdir'},"iteration".$iter_num.".tmp"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 if( ! $self->_fh ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $self->throw("unable to re-open iteration file for round ".$iter_num);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 return Bio::Tools::BPlite::Iteration->new(-round=>$iter_num,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 -parent=>$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 # begin private routines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 sub _parseHeader {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 while(defined ($_ = $self->_readline) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 if ($_ =~ /^Query=\s+([^\(]+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 my $query = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 while(defined ($_ = $self->_readline)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 last if $_ !~ /\S/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 $query .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $query =~ s/\s+/ /g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $query =~ s/^>//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $query =~ /\((\d+)\s+\S+\)\s*$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 my $length = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 $self->{'QUERY'} = $query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $self->{'LENGTH'} = $length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 elsif ($_ =~ /^Database:\s+(.+)/) {$self->{'DATABASE'} = $1}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 elsif ($_ =~ /^\s*pattern\s+(\S+).*position\s+(\d+)\D/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 { # For PHIBLAST reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 $self->{'PATTERN'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 push (@{$self->{'QPATLOCATION'}}, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 } elsif ($_ =~ /^>|^Results from round 1/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 $self->_pushback($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 } elsif ($_ =~ /^Parameters|^\s+Database:/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 $self->_pushback($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 return 0; # there's nothing in the report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 =head2 _preprocess
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 Title : _preprocess
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 Usage : internal routine, not called directly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 Function : determines number of iterations in report and prepares
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 data so individual iterations canbe parsed in non-sequential
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 order
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 Returns : nothing. Sets TOTAL_ITERATION_NUMBER in object's hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 Args : reference to calling object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 sub _preprocess {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 # $self->throw(" PSIBLAST report preprocessing not implemented yet!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 my $oldround = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 my ($currentline, $currentfile, $round);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 # open output file for data from iteration round #1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 $round = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 $currentfile = Bio::Root::IO->catfile($self->{'_tempdir'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 "iteration$round.tmp");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 open (FILEHANDLE, ">$currentfile") ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 $self->throw("cannot open filehandle to write to file $currentfile");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 while(defined ($currentline = $self->_readline()) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 if ($currentline =~ /^Results from round\s+(\d+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 if ($oldround) { close (FILEHANDLE) ;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 $round = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 $currentfile = Bio::Root::IO->catfile($self->{'_tempdir'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 "iteration$round.tmp");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 close FILEHANDLE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 open (FILEHANDLE, ">$currentfile") ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 $self->throw("cannot open filehandle to write to file $currentfile");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 $oldround = $round;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 }elsif ($currentline =~ /CONVERGED/){ # This is a fix for psiblast parsing with -m 6 /AE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 $round--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 print FILEHANDLE $currentline ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $self->{'TOTAL_ITERATION_NUMBER'}= $round;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 # It is necessary to close filehandle otherwise the whole
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 # file will not be read later !!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 close FILEHANDLE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 __END__