annotate variant_effect_predictor/Bio/Tools/Blast.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #----------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # PURPOSE : To encapsulate code for running, parsing, and analyzing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # BLAST reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # AUTHOR : Steve Chervitz (sac@bioperl.org)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # CREATED : March 1996
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # REVISION: $Id: Blast.pm,v 1.30 2002/11/04 09:12:50 heikki Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 # STATUS : Alpha
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 # For the latest version and documentation, visit:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # http://bio.perl.org/Projects/Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # To generate documentation, run this module through pod2html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 # (preferably from Perl v5.004 or better).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 # Copyright (c) 1996-2000 Steve Chervitz. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # This module is free software; you can redistribute it and/or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 # modify it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 #----------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 package Bio::Tools::Blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use Exporter;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 use Bio::Tools::SeqAnal;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 use Bio::Root::Global qw(:std);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 use Bio::Root::Utilities qw(:obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 require 5.002;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 $ID $VERSION $Blast @Blast_programs $Revision $Newline);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 @ISA = qw( Bio::Tools::SeqAnal Exporter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 @EXPORT = qw();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 @EXPORT_OK = qw($VERSION $Blast);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 %EXPORT_TAGS = ( obj => [qw($Blast)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 std => [qw($Blast)]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 $ID = 'Bio::Tools::Blast';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 $VERSION = 0.09;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 $Revision = '$Id: Blast.pm,v 1.30 2002/11/04 09:12:50 heikki Exp $'; #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 ## Static Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 $Blast = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 bless $Blast, $ID;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 $Blast->{'_name'} = "Static Blast object";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 @Blast_programs = qw(blastp blastn blastx tblastn tblastx);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use vars qw($DEFAULT_MATRIX $DEFAULT_SIGNIF);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $DEFAULT_MATRIX = 'BLOSUM62';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $DEFAULT_SIGNIF = 999;# Value used as significance cutoff if none supplied.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my $MAX_HSP_OVERLAP = 2; # Used when tiling multiple HSPs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 ## POD Documentation:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 Bio::Tools::Blast - Bioperl BLAST sequence analysis object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =head2 Parsing Blast reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Parse an existing Blast report from file:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use Bio::Tools::Blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 $blastObj = Bio::Tools::Blast->new( -file => '/tmp/blast.out',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 -parse => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 -signif => '1e-10',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Parse an existing Blast report from STDIN:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 $blastObj = Bio::Tools::Blast->new( -parse => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 -signif => '1e-10',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Then send a Blast report to your script via STDIN.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Full parameters for parsing Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 %blastParam = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 -run => \%runParam,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 -file => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 -parse => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 -signif => 1e-5,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 -filt_func => \&my_filter,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 -min_len => 15,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 -check_all_hits => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 -strict => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 -stats => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 -best => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 -share => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 -exec_func => \&process_blast,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 -save_array => \@blast_objs, # not used if -exce_func defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 See L<parse()|parse> for a description of parameters and see L<USAGE | USAGE> for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 more examples including how to parse streams containing multiple Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 reports L<Using the Static $Blast Object>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 See L<Memory Usage Issues> for information about how to make Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 parsing be more memory efficient.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 =head2 Running Blast reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 Run a new Blast2 at NCBI and then parse it:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 %runParam = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 -method => 'remote',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 -prog => 'blastp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 -database => 'swissprot',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 -seqs => [ $seq ], # Bio::Seq.pm objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 $blastObj = Bio::Tools::Blast->new( -run => \%runParam,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 -parse => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 -signif => '1e-10',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 -strict => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Full parameters for running Blasts at NCBI using Webblast.pm:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 %runParam = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 -method => 'remote',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 -prog => 'blastp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 -version => 2, # BLAST2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 -database =>'swissprot',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 -html => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 -seqs => [ $seqObject ], # Bio::Seq.pm object(s)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 -descr => 250,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 -align => 250,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 -expect => 10,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 -gap => 'on',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 -matrix => 'PAM250',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 -email => undef, # don't send report via e-mail if parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 -filter => undef, # use default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 -gap_c => undef, # use default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 -gap_e => undef, # use default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 -word => undef, # use default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 -min_len => undef, # use default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 See L<run()|run> and L<USAGE | USAGE> for more information about running Blasts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 =head2 HTML-formatting Blast reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 Print an HTML-formatted version of a Blast report:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 use Bio::Tools::Blast qw(:obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 $Blast->to_html($filename);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 $Blast->to_html(-file => $filename,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 -header => "<H1>Blast Results</H1>");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 $Blast->to_html(-file => $filename,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 -out => \@array); # store output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $Blast->to_html(); # use STDIN
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 Results are sent directly to STDOUT unless an C<-out =E<gt> array_ref>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 parameter is supplied. See L<to_html()|to_html> for details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 =head1 INSTALLATION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 This module is included with the central Bioperl distribution:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 http://bio.perl.org/Core/Latest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 ftp://bio.perl.org/pub/DIST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 Follow the installation instructions included in the README file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 The Bio::Tools::Blast.pm module encapsulates data and methods for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 running, parsing, and analyzing pre-existing BLAST reports. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 module defines an application programming interface (API) for working
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 with Blast reports. A Blast object is constructed from raw Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 output and encapsulates the Blast results which can then be accessed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 via the interface defined by the Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 The ways in which researchers use Blast data are many and varied. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 module attempts to be general and flexible enough to accommodate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 different uses. The Blast module API is still at an early stage of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 evolution and I expect it to continue to evolve as new uses for Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 data are developed. Your L<FEEDBACK | FEEDBACK> is welcome.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 B<FEATURES:>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 =over 2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 =item * Supports NCBI Blast1.x, Blast2.x, and WashU-Blast2.x, gapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 and ungapped.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 Can parse HTML-formatted as well as non-HTML-formatted reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 =item * Launch new Blast analyses remotely or locally.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 Blast objects can be constructed directly from the results of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 run. See L<run()|run>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 =item * Construct Blast objects from pre-existing files or from a new run.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 Build a Blast object from a single file or build multiple Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 objects from an input stream containing multiple reports. See
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 L<parse()|parse>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 =item * Add hypertext links from a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 See L<to_html()|to_html>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 =item * Generate sequence and sequence alignment objects from HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 If you have Bio::Seq.pm and Bio::UnivAln.pm installed on your system,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 they can be used for working with high-scoring segment pair (HSP)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 sequences in the Blast alignment. (A new version of Bio::Seq.pm is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 included in the distribution, see L<INSTALLATION | INSTALLATION>). For more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 information about them, see:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 http://bio.perl.org/Projects/Sequence/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 http://bio.perl.org/Projects/SeqAlign/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 A variety of different data can be extracted from the Blast report by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 querying the Blast.pm object. Some basic examples are given in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 L<USAGE | USAGE> section. For some working scripts, see the links provided in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 the L<the DEMO SCRIPTS section | DEMO> section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 As a part of the incipient Bioperl framework, the Bio::Tools::Blast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 module inherits from B<Bio::Tools::SeqAnal.pm>, which provides some
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 generic functionality for biological sequence analysis. See the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 documentation for that module for details
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 (L<Links to related modules>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 =head2 The BLAST Program
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 BLAST (Basic Local Alignment Search Tool) is a widely used algorithm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 for performing rapid sequence similarity searches between a single DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 or protein sequence and a large dataset of sequences. BLAST analyses
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 are typically performed by dedicated remote servers, such as the ones
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 at the NCBI. Individual groups may also run the program on local
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 machines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 The Blast family includes 5 different programs:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 Query Seq Database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 ------------ ----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 blastp -- protein protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 blastn -- nucleotide nucleotide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 blastx -- nucleotide* protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 tblastn -- protein nucleotide*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 tblastx -- nucleotide* nucleotide*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 * = dynamically translated in all reading frames, both strands
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 See L<References & Information about the BLAST program>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 =head2 Versions Supported
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 BLAST reports generated by different application front ends are similar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 but not exactly the same. Blast reports are not intended to be exchange formats,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 making parsing software susceptible to obsolescence. This module aims to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 support BLAST reports generated by different implementations:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 Implementation Latest version tested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 -------------- --------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 NCBI Blast1 1.4.11 [24-Nov-97]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 NCBI Blast2 2.0.8 [Jan-5-1999]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 WashU-BLAST2 2.0a19MP [05-Feb-1998]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 GCG 1.4.8 [1-Feb-95]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 Support for both gapped and ungapped versions is included. Currently, there
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 is only rudimentary support for PSI-BLAST in that these reports can be parsed but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 there is no special treatment of separate iteration rounds (they are all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 merged together).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 =head2 References & Information about the BLAST program
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 B<WEBSITES:>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 http://www.ncbi.nlm.nih.gov/BLAST/ - Homepage at NCBI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 http://www.ncbi.nlm.nih.gov/BLAST/blast_help.html - Help manual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 http://blast.wustl.edu/ - WashU-Blast2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 B<PUBLICATIONS:> (with PubMed links)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 Altschul S.F., Gish W., Miller W., Myers E.W., Lipman D.J. (1990).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 "Basic local alignment search tool", J Mol Biol 215: 403-410.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=2231712&form=6&db=m&Dopt=r
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 "Gapped BLAST and PSI-BLAST: a new generation of protein database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 search programs", Nucleic Acids Res. 25:3389-3402.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=9254694&form=6&db=m&Dopt=r
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 Karlin, Samuel and Stephen F. Altschul (1990). Methods for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 assessing the statistical significance of molecular sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 features by using general scoring schemes. Proc. Natl. Acad.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 Sci. USA 87:2264-68.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=2315319&form=6&db=m&Dopt=b
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 Karlin, Samuel and Stephen F. Altschul (1993). Applications
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 and statistics for multiple high-scoring segments in molecu-
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 lar sequences. Proc. Natl. Acad. Sci. USA 90:5873-7.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=8390686&form=6&db=m&Dopt=b
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 =head1 USAGE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 =head2 Creating Blast objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 A Blast object can be constructed from the contents of a Blast report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 using a set of named parameters that specify significance criteria for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 parsing. The report data can be read in from an existing file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 specified with the C<-file =E<gt> 'filename'> parameter or from a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 STDIN stream containing potentially multiple Blast reports. If the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 C<-file> parameter does not contain a valid filename, STDIN will be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 used. Separate Blast objects will be created for each report in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 stream.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 To parse the report, you must include a C<-parse =E<gt> 1> parameter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 in addition to any other parsing parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 See L<parse()|parse> for a full description of parsing parameters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 To run a new report and then parse it, include a C<-run =E<gt> \%runParams>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 parameter containing a reference to a hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 that hold the parameters required by the L<run()|run> method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 The constructor for Blast objects is inherited from Bio::Tools::SeqAnal.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 See the B<_initialize>() method of that package for general information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 relevant to creating Blast objects. (The B<new>() method, inherited from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 B<Bio::Root::Object.pm>, calls B<_initialize>(). See L<Links to related modules>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 The Blast object can read compressed (gzipped) Blast report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 files. Compression/decompression uses the gzip or compress programs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 that are standard on Unix systems and should not require special
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 configuration. If you can't or don't want to use gzip as the file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 compression tool, either pre-uncompress your files before parsing with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 this module or modify B<Bio::Root::Utilities.pm> to your liking.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 Blast objects can be generated either by direct instantiation as in:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 use Bio::Tools::Blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $blast = new Bio::Tools::Blast (%parameters);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 =head2 Using the Static $Blast Object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 use Bio::Tools::Blast qw(:obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 This exports the static $Blast object into your namespace. "Static"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 refers to the fact that it has class scope and there is one of these
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 created when you use this module. The static $Blast object is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 basically an empty object that is provided for convenience and is also
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 used for various internal chores.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 It is exported by this module and can be used for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 parsing and running reports as well as HTML-formatting without having
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 to first create an empty Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 Using the static $Blast object for parsing a STDIN stream of Blast reports:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 use Bio::Tools::Blast qw(:obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 sub process_blast {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 my $blastObj = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 print $blastObj->table();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 $blastObj->destroy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 $Blast->parse( -parse => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 -signif => '1e-10',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 -exec_func => \&process_blast,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 Then pipe a stream of Blast reports into your script via STDIN. For
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 each Blast report extracted from the input stream, the parser will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 generate a new Blast object and pass it to the function specified by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 C<-exec_func>. The destroy() call tells Perl to free the memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 associated with the object, important if you are crunching through
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 many reports. This method is inherited from B<Bio::Root::Object.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 (see L<Links to related modules>). See L<parse()|parse> for a full
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 description of parameters and L<the DEMO SCRIPTS section | DEMO> for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 additional examples.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 =head2 Running Blasts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 To run a Blast, create a new Blast object with a C<-run =E<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 \%runParams> parameter. Remote Blasts are performed by including a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 C<-method =E<gt> 'remote'> parameter; local Blasts are performed by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 including a C<-method =E<gt> 'local'> parameter. See
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 L<Running Blast reports> as well as the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 L<the DEMO SCRIPTS section | DEMO> for examples.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 Note that running local Blasts is not yet supported, see below.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 Note that the C<-seqs =E<gt> [ $seqs ]> run parameter must contain a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 reference to an array of B<Bio::Seq.pm> objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 (L<Links to related modules>). Encapsulating the sequence in an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 object makes sequence information much easier to handle as it can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 be supplied in a variety of formats. Bio::Seq.pm is included with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 this distribution (L<INSTALLATION | INSTALLATION>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 Remote Blasts are implemented using the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 B<Bio::Tools::Blast::Run::Webblast.pm> module. Local Blasts require
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 that you customize the B<Bio::Tools::Blast::Run::LocalBlast.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 module. The version of LocalBlast.pm included with this distribution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 provides the basic framework for running local Blasts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 See L<Links to related modules>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 =head2 Significance screening
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 A C<-signif> parameter can be used to screen out all hits with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 P-values (or Expect values) above a certain cutoff. For example, to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 exclude all hits with Expect values above 1.0e-10: C<-signif =E<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 1e-10>. Providing a C<-signif> cutoff can speed up processing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 tremendously, since only a small fraction of the report need be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 parsed. This is because the C<-signif> value is used to screen hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 based on the data in the "Description" section of the Blast report:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 For NCBI BLAST2 reports:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 Score E
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 Sequences producing significant alignments: (bits) Value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 sp|P31376|YAB1_YEAST HYPOTHETICAL 74.1 KD PROTEIN IN CYS3-MDM10... 957 0.0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 For BLAST1 or WashU-BLAST2 reports:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 Smallest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 Sum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 High Probability
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 Sequences producing High-scoring Segment Pairs: Score P(N) N
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 PDB:3PRK_E Proteinase K complexed with inhibitor ........... 504 1.8e-50 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 Thus, the C<-signif> parameter will screen based on Expect values for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 BLAST2 reports and based on P-values for BLAST1/WashU-BLAST2 reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 To screen based on other criteria, you can supply a C<-filt_func>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 parameter containing a function reference that takes a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 B<Bio::Tools::Sbjct.pm> object as an argument and returns a boolean,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 true if the hit is to be screened out. See example below for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 L<Screening hits using arbitrary criteria>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 =head2 Get the best hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 $hit = $blastObj->hit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 A "hit" is contained by a B<Bio::Tools::Blast::Sbjct.pm> object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 =head2 Get the P-value or Expect value of the most significant hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 $p = $blastObj->lowest_p;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 $e = $blastObj->lowest_expect;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 Alternatively:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $p = $blastObj->hit->p;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 $e = $blastObj->hit->expect;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 Note that P-values are not reported in NCBI Blast2 reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 =head2 Iterate through all the hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 foreach $hit ($blastObj->hits) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 printf "%s\t %.1e\t %d\t %.2f\t %d\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 $hit->name, $hit->expect, $hit->num_hsps,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 $hit->frac_identical, $hit->gaps;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 Refer to the documentation for B<Bio::Tools::Blast::Sbjct.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 for other ways to work with hit objects (L<Links to related modules>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 =head2 Screening hits using arbitrary criteria
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 sub filter { $hit=shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 return ($hit->gaps == 0 and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 $hit->frac_conserved > 0.5); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 $blastObj = Bio::Tools::Blast->new( -file => '/tmp/blast.out',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 -parse => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 -filt_func => \&filter );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 While the Blast object is parsing the report, each hit checked by calling
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 &filter($hit). All hits that generate false return values from &filter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 are screened out and will not be added to the Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 Note that the Blast object will normally stop parsing the report after
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 the first non-significant hit or the first hit that does not pass the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 filter function. To force the Blast object to check all hits,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 include a C<-check_all_hits =E<gt> 1> parameter.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 Refer to the documentation for B<Bio::Tools::Blast::Sbjct.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 for other ways to work with hit objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 =item Hit start, end coordinates.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 print $sbjct->start('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 print $sbjct->end('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 In array context, you can get information for both query and sbjct with one call:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 ($qstart, $sstart) = $sbjct->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 ($qend, $send) = $sbjct->end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 For important information regarding coordinate information, see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 the L<HSP start, end, and strand> section below.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 Also check out documentation for the start and end methods in B<Bio::Tools::Blast::Sbjct.pm>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 which explains what happens if there is more than one HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 =head2 Working with HSPs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 =item Iterate through all the HSPs of every hit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 foreach $hit ($blastObj->hits) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 foreach $hsp ($hit->hsps) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 printf "%.1e\t %d\t %.1f\t %.2f\t %.2f\t %d\t %d\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 $hsp->expect, $hsp->score, $hsp->bits,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 $hsp->frac_identical, $hsp->frac_conserved,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 $hsp->gaps('query'), $hsp->gaps('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 Refer to the documentation for B<Bio::Tools::Blast::HSP.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 for other ways to work with hit objects (L<Links to related modules>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 =item Extract HSP sequence data as strings or sequence objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 Get the first HSP of the first hit and the sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 of the query and sbjct as strings.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 $hsp = $blast_obj->hit->hsp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 $query_seq = $hsp->seq_str('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 $hsp_seq = $hsp->seq_str('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 Get the indices of identical and conserved positions in the HSP query seq.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 @query_iden_indices = $hsp->seq_inds('query', 'identical');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 @query_cons_indices = $hsp->seq_inds('query', 'conserved');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 Similarly for the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 @sbjct_iden_indices = $hsp->seq_inds('sbjct', 'identical');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 @sbjct_cons_indices = $hsp->seq_inds('sbjct', 'conserved');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 print "Query in Fasta format:\n", $hsp->seq('query')->layout('fasta');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 print "Sbjct in Fasta format:\n", $hsp->seq('sbjct')->layout('fasta');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 See the B<Bio::Seq.pm> package for more information about using these sequence objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 (L<Links to related modules>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 =item Create sequence alignment objects using HSP sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 $aln = $hsp->get_aln;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 print " consensus:\n", $aln->consensus();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 print $hsp->get_aln->layout('fasta');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 $ENV{READSEQ_DIR} = '/home/users/sac/bin/solaris';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 $ENV{READSEQ} = 'readseq';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 print $hsp->get_aln->layout('msf');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 MSF formated layout requires Don Gilbert's ReadSeq program (not included).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 See the B<Bio::UnivAln.pm> for more information about using these alignment objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 (L<Links to related modules>)'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 =item HSP start, end, and strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 To facilitate HSP processing, endpoint data for each HSP sequence are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 normalized so that B<start is always less than end>. This affects TBLASTN
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 and TBLASTX HSPs on the reverse complement or "Minus" strand.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 Some examples of obtaining start, end coordinates for HSP objects:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 print $hsp->start('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 print $hsp->end('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 ($qstart, $sstart) = $hsp->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 ($qend, $send) = $hsp->end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 Strandedness of the HSP can be assessed using the strand() method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 on the HSP object:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 print $hsp->strand('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 print $hsp->strand('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 These will return 'Minus' or 'Plus'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 Or, to get strand information for both query and sbjct with a single call:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 ($qstrand, $sstrand) = $hsp->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 =head2 Report Generation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 =item Generate a tab-delimited table of all results.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 print $blastObj->table;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 print $blastObj->table(0); # don't include hit descriptions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 print $blastObj->table_tiled;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 The L<table()|table> method returns data for each B<HSP> of each hit listed one per
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 line. The L<table_tiled()|table_tiled> method returns data for each B<hit, i.e., Sbjct>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 listed one per line; data from multiple HSPs are combined after tiling to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 reduce overlaps. See B<Bio::Tools::Blast::Sbjct.pm> for more information about
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 HSP tiling. These methods generate stereotypical, tab-delimited data for each
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 hit of the Blast report. The output is suitable for importation into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 spreadsheets or database tables. Feel free to roll your own table function if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 you need a custom table.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 For either table method, descriptions of each hit can be included if a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 single, true argument is supplied (e.g., $blastObj-E<gt>table(1)). The description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 will be added as the last field. This will significantly increase the size of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 the table. Labels for the table columns can be obtained with L<table_labels()|table_labels>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 and L<table_labels_tiled()|table_labels_tiled>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 =item Print a summary of the Blast report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 $blastObj->display();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 $blastObj->display(-show=>'hits');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 L<display()|display> prints various statistics extracted from the Blast report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 such as database name, database size, matrix used, etc. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 C<display(-show=E<gt>'hits')> call prints a non-tab-delimited table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 attempting to line the data up into more readable columns. The output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 generated is similar to L<table_tiled()|table_tiled>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 =item HTML-format an existing report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 use Bio::Tools::Blast qw(:obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 # Going straight from a non HTML report file to HTML output using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 # the static $Blast object exported by Bio::Tools::Blast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 $Blast->to_html(-file => '/usr/people/me/blast.output.txt',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 -header => qq|<H1>BLASTP Results</H1><A HREF="home.html">Home</A>|
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 # You can also use a specific Blast object created previously.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 $blastObj->to_html;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 L<to_html()|to_html> will send HTML output, line-by-line, directly to STDOUT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 unless an C<-out =E<gt> array_ref> parameter is supplied (e.g., C<-out
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 =E<gt> \@array>), in which case the HTML will be stored in @array, one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 line per array element. The direct outputting permits faster response
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 time since Blast reports can be huge. The -header tag can contain a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 string containing any HTML that you want to appear at the top of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 =head1 DEMO SCRIPTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 Sample Scripts are included in the central bioperl distribution in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 'examples/blast/' directory (see L<INSTALLATION | INSTALLATION>):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 =head2 Handy library for working with Bio::Tools::Blast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 examples/blast/blast_config.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 =head2 Parsing Blast reports one at a time.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 examples/blast/parse_blast.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 examples/blast/parse_blast2.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 examples/blast/parse_positions.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 =head2 Parsing sets of Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 examples/blast/parse_blast.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 examples/blast/parse_multi.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 B<Warning:> See note about L<Memory Usage Issues>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 =head2 Running Blast analyses one at a time.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 examples/blast/run_blast_remote.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 =head2 Running Blast analyses given a set of sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 examples/blast/blast_seq.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 =head2 HTML-formatting Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 examples/blast/html.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 =head1 TECHNICAL DETAILS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 =head2 Blast Modes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 A BLAST object may be created using one of three different modes as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 defined by the B<Bio::Tools::SeqAnal.pm> package
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 (See L<Links to related modules>):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 -- parse - Load a BLAST report and parse it, storing parsed data in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 Blast.pm object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 -- run - Run the BLAST program to generate a new report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 -- read - Load a BLAST report into the Blast object without parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 B<Run mode support has recently been added>. The module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 B<Bio::Tools::Blast::Run::Webblast.pm> is an modularized adaptation of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 the webblast script by Alex Dong Li:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 http://www.genet.sickkids.on.ca/bioinfo_resources/software.html#webblast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 for running remote Blast analyses and saving the results locally. Run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 mode can be combined with a parse mode to generate a Blast report and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 then build the Blast object from the parsed results of this report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 (see L<run()|run> and L<SYNOPSIS | SYNOPSIS>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 In read mode, the BLAST report is read in by the Blast object but is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 not parsed. This could be used to internalize a Blast report but not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 parse it for results (e.g., generating HTML formatted output).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 =head2 Significant Hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 This module permits the screening of hits on the basis of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 user-specified criteria for significance. Currently, Blast reports can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 be screened based on:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 CRITERIA PARAMETER VALUE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 ---------------------------------- --------- ----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 1) the best Expect (or P) value -signif float or sci-notation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 2) the length of the query sequence -min_length integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 3) arbitrary criteria -filt_func function reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 The parameters are used for construction of the BLAST object or when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 running the L<parse()|parse> method on the static $Blast object. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 -SIGNIF value represents the number listed in the description section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 at the top of the Blast report. For Blast2, this is an Expect value,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 for Blast1 and WashU-Blast2, this is a P-value. The idea behind the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 C<-filt_func> parameter is that the hit has to pass through a filter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 to be considered significant. Refer to the documentation for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 B<Bio::Tools::Blast::Sbjct.pm> for ways to work with hit objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 Using a C<-signif> parameter allows for the following:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 =over 2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 =item Faster parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 Each hit can be screened by examination of the description line alone
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 without fully parsing the HSP alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 =item Flexibility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 The C<-signif> tag provides a more semantic-free way to specify the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 value to be used as a basis for screening hits. Thus, C<-signif> can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 be used for screening Blast1 or Blast2 reports. It is up to the user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 to understand whether C<-signif> represents a P-value or an Expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 Any hit not meeting the significance criteria will not be added to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 "hit list" of the BLAST object. Also, a BLAST object without any hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 meeting the significance criteria will throw an exception during
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 object construction (a fatal event).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 =head2 Statistical Parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 There are numerous parameters which define the behavior of the BLAST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 program and which are useful for interpreting the search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 results. These parameters are extracted from the Blast report:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 filter -- for masking out low-complexity sequences or short repeats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 matrix -- name of the substitution scoring matrix (e.g., BLOSUM62)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 E -- Expect filter (screens out frequent scores)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 S -- Cutoff score for segment pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 W -- Word length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 T -- Threshold score for word pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 Lambda, -- Karlin-Altschul "sum" statistical parameters dependent on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 K, H sequence composition.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 G -- Gap creation penalty.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 E -- Gap extension penalty.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 These parameters are not always needed. Extraction may be turned off
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 explicitly by including a C<-stats =E<gt> 0> parameter during object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 construction. Support for all statistical parameters is not complete.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 For more about the meaning of parameters, check out the NCBI URLs given above.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 =head2 Module Organization
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 The modules that comprise this Bioperl Blast distribution are location in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 Bio:: hierarchy as shown in the diagram below.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 Bio/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 +--------------------------+
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 | |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 Bio::Tools Bio::Root
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 | |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 +----------------------+ Object.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 | | |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 SeqAnal.pm Blast.pm Blast/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 +---------+---------+------------+
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 | | | |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 Sbjct.pm HSP.pm HTML.pm Run/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 +------------+
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 | |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 Webblast.pm LocalBlast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 Bio::Tools::Blast.pm is a concrete class that inherits from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 B<Bio::Tools::SeqAnal.pm> and relies on other modules for parsing and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 managing BLAST data. Worth mentioning about this hierarchy is the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 lack of a "Parse.pm" module. Since parsing is considered central to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 the purpose of the Bioperl Blast module (and Bioperl in general), it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 seems somewhat unnatural to segregate out all parsing code. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 segregation could also lead to inefficiencies and harder to maintain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 code. I consider this issue still open for debate.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 Bio::Tools::Blast.pm, B<Bio::Tools::Blast::Sbjct.pm>, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 B<Bio::Tools::Blast::HSP.pm> are mostly dedicated to parsing and all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 can be used to instantiate objects. Blast.pm is the main "command and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 control" module, inheriting some basic behaviors from SeqAnal.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 (things that are not specific to Blast I<per se>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 B<Bio::Tools::Blast::HTML.pm> contains functions dedicated to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 generating HTML-formatted Blast reports and does not generate objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 =head2 Running Blasts: Details
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 B<Bio::Tools::Blast::Run::Webblast.pm> contains a set of functions for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 running Blast analyses at a remote server and also does not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 instantiate objects. It uses a helper script called postclient.pl,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 located in the Run directory. The proposed LocalBlast.pm module would
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 be used for running Blast reports on local machines and thus would be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 customizable for different sites. It would operate in a parallel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 fashion to Webblast.pm (i.e., being a collection of functions, taking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 in sequence objects or files, returning result files).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 The Run modules are considered experimental. In particular,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 Webblast.pm catures an HTML-formatted version of the Blast report from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 the NCBI server and strips out the HTML in preparation for parsing. A
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 more direct approach would be to capture the Blast results directly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 from the server using an interface to the NCBI toolkit. This approach
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 was recently proposed on the Bioperl mailing list:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 http://www.uni-bielefeld.de/mailinglists/BCD/vsns-bcd-perl/9805/0000.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 =head2 Memory Usage Issues
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 Parsing large numbers of Blast reports (a few thousand or so) with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 Bio::Tools::Blast.pm may lead to unacceptable memory usage situations.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 This is somewhat dependent of the size and complexity of the reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 While this problem is under investigation, here are some workarounds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 that fix the memory usage problem:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 =item 1 Don't specify a -signif criterion when calling L<parse()|parse>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882 The C<-signif> value is used for imposing a upper limit to the expect- or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 P-value for Blast hits to be parsed. For reasons that are still under
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 investigation, specifying a value for C<-signif> in the L<parse()|parse>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 method prevents Blast objects from being fully
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 garbage collected. When using the B<parse_blast.pl> or B<parse_multi.pl>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 scripts in C<examples/blast/> of the bioperl distribution), don't supply
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 a C<-signif> command-line parameter.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 =item 2 If you want to impose a -signif criterion, put it inside a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 -filt_func.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 For the L<parse()|parse> method, a -signif =E<gt> 1e-5 parameter is equivalent
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 to using a filter function parameter of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 -filt_func => sub { my $hit = shift; return $hit->signif <= 1e-5; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 Using the B<examples/blast/parse_multi.pl> script, you can supply a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 command-line argument of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 -filt_func '$hit->signif <= 1e-5'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 For more information, see L<parse()|parse> and the section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 L<Screening hits using arbitrary criteria>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 =head1 TODO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 =item * Develop a functional, prototype Bio::Tools::Blast::Run::LocalBlast.pm module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 =item * Add support for PSI-BLAST and PHI-BLAST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 =item * Parse histogram of expectations and retrieve gif image in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 Blast report (if present).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 =item * Further investigate memory leak that occurs when parsing Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 streams whe supplying a -signif parameter to L<parse()|parse>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 =item * Access Blast results directly from the NCBI server using a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 Perl interface to the NCBI toolkit or XML formated Blast reports (when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 available).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 =item * Further exploit Bio::UnivAln.pm and multiple-sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 alignment programs using HSP sequence data. Some of this may best go
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 into a separate, dedicated module or script as opposed to burdening
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 Blast.pm, Sbjct.pm, and HSP.pm with additional functionality that is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 not always required.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 =item * Add an example script for parsing Blast reports containing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 HTML formatting.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 =head1 VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 Bio::Tools::Blast.pm, 0.09
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 Steve Chervitz, sac@bioperl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 See the L<FEEDBACK | FEEDBACK> section for where to send bug reports and comments.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 =head1 ACKNOWLEDGEMENTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 This module was developed under the auspices of the Saccharomyces Genome
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 Database:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 http://genome-www.stanford.edu/Saccharomyces
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 Other contributors include: Alex Dong Li (webblast), Chris Dagdigian
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 (Seq.pm), Steve Brenner (Seq.pm), Georg Fuellen (Seq.pm, UnivAln.pm),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 and untold others who have offered comments (noted in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 Bio/Tools/Blast/CHANGES file of the distribution).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 =head1 COPYRIGHT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 Copyright (c) 1996-98 Steve Chervitz. All Rights Reserved. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 module is free software; you can redistribute it and/or modify it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 =head1 SEE ALSO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 Bio::Tools::SeqAnal.pm - Sequence analysis object base class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 Bio::Tools::Blast::Sbjct.pm - Blast hit object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 Bio::Tools::Blast::HSP.pm - Blast HSP object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 Bio::Tools::Blast::HTML.pm - Blast HTML-formating utility class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 Bio::Tools::Blast::Run::Webblast.pm - Utility module for running Blasts remotely.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 Bio::Tools::Blast::Run::LocalBlast.pm - Utility module for running Blasts locally.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 Bio::Seq.pm - Biosequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 Bio::UnivAln.pm - Biosequence alignment object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 Bio::Root::Object.pm - Proposed base class for all Bioperl objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 =head2 Links to related modules
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 Bio::Tools::SeqAnal.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 http://bio.perl.org/Core/POD/Bio/Tools/SeqAnal.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 Bio::Tools::Blast::Sbjct.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 http://bio.perl.org/Core/POD/Bio/Tools/Blast/Sbjct.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 Bio::Tools::Blast::HSP.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 http://bio.perl.org/Core/POD/Bio/Tools/Blast/HSP.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 Bio::Tools::Blast::HTML.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 http://bio.perl.org/Core/POD/Bio/Tools/Blast/HTML.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 Bio::Tools::Blast::Run::Webblast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 http://bio.perl.org/Core/POD/Bio/Tools/Blast/Run/Webblast.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 Bio::Tools::Blast::Run::LocalBlast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 http://bio.perl.org/Core/POD/Bio/Tools/Blast/Run/LocalBlast.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 Bio::Seq.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 http://bio.perl.org/Core/POD/Seq.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 Bio::UnivAln.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 http://bio.perl.org/Projects/SeqAlign/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 Europe: http://www.techfak.uni-bielefeld.de/bcd/Perl/Bio/#univaln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 Bio::Root::Object.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 http://bio.perl.org/Core/POD/Root/Object.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 http://bio.perl.org/Projects/modules.html - Online module documentation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 http://bio.perl.org/Projects/Blast/ - Bioperl Blast Project
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 http://bio.perl.org/ - Bioperl Project Homepage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 L<References & Information about the BLAST program>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 =head1 KNOWN BUGS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 There is a memory leak that occurs when parsing parsing streams
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 containing large numbers of Blast reports (a few thousand or so) and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 specifying a -signif parameter to the L<parse()|parse> method. For a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 workaround, see L<Memory Usage Issues>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 Not sharing statistical parameters between different Blast objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 when parsing a multi-report stream has not been completely tested and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 may be a little buggy.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 Documentation inconsistencies or inaccuracies may exist since this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 module underwend a fair bit of re-working going from 0.75 to 0.80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 (corresponds to versions 0.04.4 to 0.05 of the bioperl distribution).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 #### END of main POD documentation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 Methods beginning with a leading underscore are considered private and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 are intended for internal use by this module. They are B<not>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 considered part of the public interface and are described here for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 documentation purposes only.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 ##############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 ## CONSTRUCTOR ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 ##############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 my ($class,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 $class->warn("Bio::Tools::BLAST is deprecated, use Bio::SearchIO system or Bio::Tools::BPlite");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 return $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 ## The Blast.pm object relies on the the superclass constructor:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 ## Bio::Tools::SeqAnal::_initialize(). See that module for details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 sub destroy {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 my $self=shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 $DEBUG==2 && print STDERR "DESTROYING $self ${\$self->name}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 if($self->{'_hits'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 foreach($self->hits) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 $_->destroy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 undef $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 undef $self->{'_hits'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 #$self->{'_hits'}->remove_all; ## When and if this member becomes a vector.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 $self->SUPER::destroy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 ## ACCESSORS ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 =head2 run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 Usage : $object->run( %named_parameters )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 Purpose : Run a local or remote Blast analysis on one or more sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 Returns : String containing name of Blast output file if a single Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 : is run.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 : -- OR --
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 : List of Blast objects if multiple Blasts are being run as a group.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 Argument : Named parameters: (PARAMETER TAGS CAN BE UPPER OR LOWER CASE).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 : -METHOD => 'local' or 'remote' (default = remote),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 : -PARSE => boolean, (true if the results are to be parsed after the run)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 : -STRICT => boolean, the strict mode to use for the resulting Blast objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 : ADDITIONAL PARAMETERS:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 : See methods _run_remote() and _run_local() for required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 : parameters necessary for running the blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 Throws : Exception if no Blast output file was obtained.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 Comments : This method is called automatically during construction of a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 : Blast.pm object when run parameters are sent to the constructor:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 : $blastObj = new Bio::Tools::Blast (-RUN =>\%runParam,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 : %parseParam );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 : The specific run methods (local or remote) called by run()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 : must return a list containing the file name(s) with the Blast output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 : The run() method can perform single or multiple Blast runs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 : (analogous to the way parse() works) depending on how many
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 : sequences are submitted. However, the running of multiple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 : Blasts is probably better handled at the script level. See notes in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 : the "TODO" section below.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 : As for what to do with the Blast result file, that decision is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 : left for the user who can direct the Blast object to delete, compress,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 : or leave it alone.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 : This method does not worry about load balancing, which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 : is probably best handled at the server level.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 TODO: : Support for running+parsing multiple Blast analyses with a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 : single run() call is incomplete. One can generate multiple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 : reports by placing more than one sequence object in the -seqs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 : reference parameter. This saves some overhead in the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 : that executes the Blasts since all options are configured once.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 : (This is analogous to parsing using the static $Blast object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 : see parse() and _parse_stream()).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 : The trouble is that Blast objects for all runs are constructed,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 : parsed (if necessary), and then returned as a group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 : This can require lots of memory when run+parsing many Blasts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 : but should be fine if you just want to run a bunch Blasts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 : For now, when running+parsing Blasts, stick to running one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 : Blast at a time, building the Blast object with the results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 : of that report, and processing as necessary.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155 : Support for running PSI-Blast is not complete.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157 See Also: L<_run_remote()|_run_remote>, L<_run_local()|_run_local>, L<parse()|parse>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 my ($self, %param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 my($method, $parse, $strict) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 $self->_rearrange([qw(METHOD PARSE STRICT)], %param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 $strict = $self->strict($strict) if $strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 my (@files);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 if($method =~ /loc/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 @files = $self->_run_local(%param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 @files = $self->_run_remote(%param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 $self->throw("Run Blast failed: no Blast output created.") if !@files;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 if(scalar(@files) == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 # If there was just one Blast output file, prepare to incorporate it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 # into the current Blast object. run() is called before parse() in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 # SeqAnal.pm constructor.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 if($files[0] ne 'email') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 $self->file($files[0]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 # Can't do anything with the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 $self->throw("Blast report to be sent via e-mail.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 # If there are multiple report files, build individual Blast objects foreach.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 # In this situation, the static $Blast object is being used to run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 # a set of related Blasts, similar to the way parse() can be used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 # This strategy is not optimal since all reports are generated first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 # before any are parsed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 # Untested.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 my(@objs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 foreach(@files) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 push @objs, new Bio::Tools::Blast(-FILE => $_,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 -PARSE => $parse || 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 -STRICT => $strict,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 return @objs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 =head2 _run_remote
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 Usage : n/a; internal method called by run()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 : $object->_run_remote( %named_parameters )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 Purpose : Run Blast on a remote server.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215 Argument : Named parameters:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 : See documentation for function &blast_remote in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 : Bio::Tools::Blast::Run::Webblast.pm for description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 : of parameters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 Comments : This method requires the Bio::Tools::Blast::Run::Webblast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 : which conforms to this minimal API:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 : * export a method called &blast_remote that accepts a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 : Bio::Tools::Blast.pm object + named parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 : (specified in the Webblast.pm module).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 : * return a list of names of files containing the raw Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225 : (When building a Blast object, this list would contain a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 : single file from which the Blast object is to be constructed).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 See Also : L<run()|run>, L<_run_local()|_run_local>, B<Bio::Tools::Blast::Run::Webblast.pm::blast_remote()>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 sub _run_remote {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 my ($self, %param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 require Bio::Tools::Blast::Run::Webblast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 Bio::Tools::Blast::Run::Webblast->import(qw(&blast_remote));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 &blast_remote($self, %param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243 =head2 _run_local
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245 Usage : n/a; internal method called by run()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 : $object->_run_local(%named_parameters)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 Purpose : Run Blast on a local machine.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 Argument : Named parameters:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249 : See documentation for function &blast_local in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 : Bio::Tools::Blast::Run::LocalBlast.pm for description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251 : of parameters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 Comments : This method requires the Bio::Tools::Blast::Run::LocalBlast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 : module which should be customized for your site. This module would
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254 : contain all the commands, paths, environment variables, and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 : data necessary to run Blast commands on a local machine, but should
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 : not contain any semantics for specific query sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258 : LocalBlast.pm should also conform to this minimal API:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 : * export a method called &blast_local that accepts a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 : Bio::Tools::Blast.pm object + named parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 : (specified in the LocalBlast.pm module).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 : * return a list of names of files containing the raw Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 : (When building a Blast object, this list would contain a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264 : single file from which the Blast object is to be constructed).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 See Also : L<run()|run>, L<_run_remote()|_run_remote>, B<Bio::Tools::Blast::Run::LocalBlast::blast_local()>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 sub _run_local {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 my ($self, %param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 require Bio::Tools::Blast::Run::Webblast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276 Bio::Tools::Blast::Run::Webblast->import(qw(&blast_local));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278 &blast_local($self, %param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281 =head2 db_remote
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 Usage : @dbs = $Blast->db_remote( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 Purpose : Get a list of available sequence databases for remote Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 Returns : Array of strings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 Argument : seq_type = 'p' or 'n'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 : 'p' = Gets databases for peptide searches (default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 : 'n' = Gets databases for nucleotide searches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 Comments : Peptide databases are a subset of the nucleotide databases.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291 : It is convenient to call this method on the static $Blast object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292 : as shown in Usage.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 See Also : L<db_local()|db_local>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 sub db_remote {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 my ($self, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 $type ||= 'p';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304 require Bio::Tools::Blast::Run::Webblast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 Bio::Tools::Blast::Run::Webblast->import(qw(@Blast_dbp_remote
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 @Blast_dbn_remote));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308 # We shouldn't have to fully qualify the Blast_dbX_remote arrays. Hm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310 my(@dbs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311 if( $type =~ /^p|amino/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 @dbs = @Bio::Tools::Blast::Run::Webblast::Blast_dbp_remote;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314 @dbs = @Bio::Tools::Blast::Run::Webblast::Blast_dbn_remote;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 @dbs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 =head2 db_local
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 Usage : @dbs = $Blast->db_local( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322 Purpose : Get a list of available sequence databases for local Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323 Returns : Array of strings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 Argument : seq_type = 'p' or 'n'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 : 'p' = Gets databases for peptide searches (default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326 : 'n' = Gets databases for nucleotide searches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328 Comments : Peptide databases are a subset of the nucleotide databases.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 : It is convenient to call this method on the static $Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 as shown in Usage.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332 See Also : L<db_remote()|db_remote>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 sub db_local {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 my ($self, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340 $type ||= 'p';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 require Bio::Tools::Blast::Run::LocalBlast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 Bio::Tools::Blast::Run::LocalBlast->import(qw(@Blast_dbp_local
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 @Blast_dbn_local));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 # We shouldn't have to fully qualify the Blast_dbX_local arrays. Hm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 my(@dbs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349 if( $type =~ /^p|amino/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 @dbs = @Bio::Tools::Blast::Run::LocalBlast::Blast_dbp_local;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 @dbs = @Bio::Tools::Blast::Run::LocalBlast::Blast_dbn_local;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 @dbs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 =head2 parse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 Usage : $blast_object->parse( %named_parameters )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 Purpose : Parse a Blast report from a file or STDIN.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 : * Parses a raw BLAST data, populating Blast object with report data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 : * Sets the significance cutoff.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363 : * Extracts statistical parameters about the BLAST run.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 : * Handles both single files and streams containing multiple reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 Returns : integer (number of Blast reports parsed)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 Argument : <named parameters>: (PARAMETER TAGS CAN BE UPPER OR LOWER CASE).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 : -FILE => string (name of file containing raw Blast output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 : Optional. If a valid file is not supplied,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369 : STDIN will be used).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 : -SIGNIF => number (float or scientific notation number to be used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 : as a P- or Expect value cutoff;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 : default = $DEFAULT_SIGNIF (999)).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373 : -FILT_FUNC => func_ref (reference to a function to be used for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 : filtering out hits based on arbitrary criteria.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 : This function should take a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 : Bio::Tools::Blast::Sbjct.pm object as its first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377 : argument and return a boolean value,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378 : true if the hit should be filtered out).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 : Sample filter function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380 : -FILT_FUNC => sub { $hit = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 : $hit->gaps == 0; },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382 : -CHECK_ALL_HITS => boolean (check all hits for significance against
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383 : significance criteria. Default = false.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 : If false, stops processing hits after the first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 : non-significant hit or the first hit that fails
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 : the filt_func call. This speeds parsing,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387 : taking advantage of the fact that the hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 : are processed in the order they are ranked.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389 : -MIN_LEN => integer (to be used as a minimum query sequence length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390 : sequences below this length will not be processed).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 : default = no minimum length).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392 : -STATS => boolean (collect stats for report: matrix, filters, etc.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 : default = false).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 : -BEST => boolean (only process the best hit of each report;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 : default = false).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396 : -OVERLAP => integer (the amount of overlap to permit between
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 : adjacent HSPs when tiling HSPs,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 : Default = $MAX_HSP_OVERLAP (2))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 : PARAMETERS USED WHEN PARSING MULTI-REPORT STREAMS:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401 : --------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402 : -SHARE => boolean (set this to true if all reports in stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 : share the same stats. Default = true)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 : Must be set to false when parsing both Blast1 and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 : Blast2 reports in the same run or if you need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406 : statistical params for each report, Lambda, K, H).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407 : -STRICT => boolean (use strict mode for all Blast objects created.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408 : Increases sensitivity to errors. For single
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409 : Blasts, this is parameter is sent to new().)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410 : -EXEC_FUNC => func_ref (reference to a function for processing each
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 : Blast object after it is parsed. Should accept a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412 : Blast object as its sole argument. Return value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 : is ignored. If an -EXEC_FUNC parameter is supplied,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414 : the -SAVE_ARRAY parameter will be ignored.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415 : -SAVE_ARRAY =>array_ref, (reference to an array for storing all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 : Blast objects as they are created.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417 : Experimental. Not recommended.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418 : -SIGNIF_FMT => boolean String of 'exp' or 'parts'. Sets the format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 : for reporting P/Expect values. 'exp' reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420 : only the exponent portion. 'parts' reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421 : them as a 2 element list. See signif_fmt()..
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 Throws : Exception if BLAST report contains a FATAL: error.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424 : Propagates any exception thrown by read().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425 : Propagates any exception thrown by called parsing methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426 Comments : This method can be called either directly using the static $Blast object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 : or indirectly (by Bio::Tools::SeqAnal.pm) during constuction of an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 : individual Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 : HTML-formatted reports can be parsed as well. No special flag is required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431 : since it is detected automatically. The presence of HTML-formatting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432 : will result in slower performace, however, since it must be removed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 : prior to parsing. Parsing HTML-formatted reports is highly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434 : error prone and is generally not recommended.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 : If one has an HTML report, do NOT remove the HTML from it by using the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 : "Save As" option of a web browser to save it as text. This renders the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438 : report unparsable.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439 : HTML-formatted reports can be parsed after running through the strip_html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440 : function of Blast::HTML.pm as in:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441 : require Bio::Tools::Blast::HTML;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 : Bio::Tools::Blast::HTML->import(&strip_html);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443 : &strip_html(\$data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 : # where data contains full contents of an HTML-formatted report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445 : TODO: write a demo script that does this.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447 See Also : L<_init_parse_params()|_init_parse_params>, L<_parse_blast_stream()|_parse_blast_stream>, L<overlap()|overlap>, L<signif_fmt()|signif_fmt>, B<Bio::Root::Object::read()>, B<Bio::Tools::Blast::HTML.pm::strip_html()>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452 sub parse {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454 # $self might be the static $Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 my ($self, @param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 my($signif, $filt_func, $min_len, $check_all, $overlap, $stats,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458 $share, $strict, $best, $signif_fmt, $no_aligns) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459 $self->_rearrange([qw(SIGNIF FILT_FUNC MIN_LEN CHECK_ALL_HITS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460 OVERLAP STATS SHARE STRICT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461 BEST EXPONENT NO_ALIGNS )], @param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463 ## Initialize the static Blast object with parameters that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464 ## apply to all Blast objects within a parsing session.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466 &_init_parse_params($share, $filt_func, $check_all,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467 $signif, $min_len, $strict,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468 $best, $signif_fmt, $stats, $no_aligns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 my $count = $self->_parse_blast_stream(@param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 # print STDERR "\nDONE PARSING STREAM.\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475 if($Blast->{'_blast_errs'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 my @errs = @{$Blast->{'_blast_errs'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477 printf STDERR "\n*** %d BLAST REPORTS HAD FATAL ERRORS:\n", scalar(@errs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 foreach(@errs) { print STDERR "$_\n"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 @{$Blast->{'_blast_errs'}} = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482 return $count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485 =head2 _init_parse_params
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487 Title : _init_parse_params
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 Usage : n/a; called automatically by parse()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 Purpose : Initializes parameters used during parsing of Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490 : This is a static method used by the $Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491 : Calls _set_signif().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 Args : Args extracted by parse().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 See Also: L<parse()|parse>, L<_set_signif()|_set_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501 sub _init_parse_params {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 my ($share, $filt_func, $check_all,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504 $signif, $min_len, $strict,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 $best, $signif_fmt, $stats, $no_aligns) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507 ## Default is to share stats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508 $Blast->{'_share'} = defined($share) ? $share : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509 $Blast->{'_filt_func'} = $filt_func || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 $Blast->{'_check_all'} = $check_all || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511 $Blast->{'_signif_fmt'} ||= $signif_fmt || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512 $Blast->{'_no_aligns'} = $no_aligns || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514 &_set_signif($signif, $min_len, $filt_func);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515 $Blast->strict($strict) if defined $strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 $Blast->best($best) if $best;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 $Blast->{'_blast_count'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 ## If $stats is false, miscellaneous statistical and other parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520 ## are NOT extracted from the Blast report (e.g., matrix name, filter used, etc.).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 ## This can speed processing when crunching tons of Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 ## Default is to NOT get stats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523 $Blast->{'_get_stats'} = defined($stats) ? $stats : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 # Clear any errors from previous parse.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526 undef $Blast->{'_blast_errs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 =head2 _set_signif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 Usage : n/a; called automatically by _init_parse_params()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532 : This is now a "static" method used only by $Blast.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 : _set_signif($signif, $min_len, $filt_func);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534 Purpose : Sets significance criteria for the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 Argument : Obligatory three arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536 : $signif = float or sci-notation number or undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 : $min_len = integer or undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538 : $filt_func = function reference or undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540 : If $signif is undefined, a default value is set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541 : (see $DEFAULT_SIGNIF; min_length = not set).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542 Throws : Exception if significance value is defined but appears
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543 : out of range or invalid.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544 : Exception if $filt_func if defined and is not a func ref.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 Comments : The significance of a BLAST report can be based on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 : the P (or Expect) value and/or the length of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 : P (or Expect) values GREATER than '_significance' are not significant.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 : Query sequence lengths LESS than '_min_length' are not significant.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 : Hits can also be screened using arbitrary significance criteria
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 : as discussed in the parse() method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 : If no $signif is defined, the '_significance' level is set to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554 : $Bio::Tools::Blast::DEFAULT_SIGNIF (999).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 See Also : L<signif()|signif>, L<min_length()|min_length>, L<_init_parse_params()|_init_parse_params>, L<parse()|parse>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 #-----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561 sub _set_signif {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562 #-----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 my( $sig, $len, $func ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 if(defined $sig) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566 $Blast->{'_confirm_significance'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567 if( $sig =~ /[^\d.e-]/ or $sig <= 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568 $Blast->throw("Invalid significance value: $sig",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 "Must be greater than zero.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 $Blast->{'_significance'} = $sig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573 $Blast->{'_significance'} = $DEFAULT_SIGNIF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574 $Blast->{'_check_all'} = 1 if not $Blast->{'_filt_func'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577 if(defined $len) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578 if($len =~ /\D/ or $len <= 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 $Blast->warn("Invalid minimum length value: $len",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 "Value must be an integer > 0. Value not set.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582 $Blast->{'_min_length'} = $len;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586 if(defined $func) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 $Blast->{'_confirm_significance'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588 if($func and not ref $func eq 'CODE') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 $Blast->throw("Not a function reference: $func",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590 "The -filt_func parameter must be function reference.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595 =head2 _parse_blast_stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597 Usage : n/a. Internal method called by parse()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 Purpose : Obtains the function to be used during parsing and calls read().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599 Returns : Integer (the number of blast reports read)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600 Argument : Named parameters (forwarded from parse())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 Throws : Propagates any exception thrown by _get_parse_blast_func() and read().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603 See Also : L<_get_parse_blast_func()|_get_parse_blast_func>, B<Bio::Root::Object::read()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608 sub _parse_blast_stream {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 my ($self, %param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612 my $func = $self->_get_parse_blast_func(%param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613 # my $func = sub { my $data = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 # printf STDERR "Chunk length = %d\n", length($data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 # sleep(3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 # Only setting the newline character once per session.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619 $Newline ||= $Util->get_newline(-client => $self, %param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621 $self->read(-REC_SEP =>"$Newline>",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 -FUNC => $func,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 %param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 return $Blast->{'_blast_count'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 =head2 _get_parse_blast_func
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630 Usage : n/a; internal method used by _parse_blast_stream()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631 : $func_ref = $blast_object->_get_parse_blast_func()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 Purpose : Generates a function ref to be used as a closure for parsing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 : raw data as it is being loaded by Bio::Root::IOManager::read().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 Returns : Function reference (closure).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 Comments : The the function reference contains a fair bit of logic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 : at present. It could perhaps be split up into separate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637 : functions to make it more 'digestible'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639 See Also : L<_parse_blast_stream()|_parse_blast_stream>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643 #--------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644 sub _get_parse_blast_func {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645 #--------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646 my ($self, @param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 my ($save_a, $exec_func) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649 $self->_rearrange([qw(SAVE_ARRAY EXEC_FUNC)], @param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 # $MONITOR && print STDERR "\nParsing Blast stream (5/dot, 250/line)\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652 my $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653 my $strict = $self->strict();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655 # Some parameter validation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656 # Remember, all Blast parsing will use this function now.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657 # You won't need a exec-func or save_array when just creating a Blast object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658 # as in: $blast = new Bio::Tools::Blast();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659 if($exec_func and not ref($exec_func) eq 'CODE') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 $self->throw("The -EXEC_FUNC parameter must be function reference.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661 "exec_func = $exec_func");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663 } elsif($save_a and not ref($save_a) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 $self->throw("The -SAVE_ARRAY parameter must supply an array reference".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665 "when not using an -EXEC_FUNC parameter.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668 ## Might consider breaking this closure up if possible.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670 return sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 my ($data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 ## $data should contain one of three possible fragment types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673 ## from a Blast report:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674 ## 1. Header with description section,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675 ## 2. An alignment section for a single hit, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676 ## 3. The final alignment section plus the footer section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 ## (record separator = "Newline>").
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679 # print STDERR "\n(BLAST) DATA CHUNK: $data\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 my ($current_blast, $current_prog, $current_vers, $current_db);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682 my $prev_blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 my $contains_translation = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 ### steve --- Wed Mar 15 02:48:07 2000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 ### In the process of addressing bug PR#95. Tricky.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 ### Using the $contains_translation to do so. Not complete
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688 ### and possibly won't fix. We'll see.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690 # Check for header section. Start a new Blast object and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 # parse the description section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 # if ($data =~ /\sQuery\s?=/s || ($contains_translation && $data =~ /Database:/s)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693 if ($data =~ /\sQuery\s?=/s) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 $Blast->{'_blast_count'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 print STDERR ".", $Blast->{'_blast_count'} % 50 ? '' : "\n" if $MONITOR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697 if($data =~ /$Newline\s+Translating/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698 print STDERR "\nCONTAINS TRANSLATION\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699 $contains_translation = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 # If we're parsing a stream containing multiple reports,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 # all subsequent header sections will contain the last hit of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704 # the previous report which needs to be parsed and added to that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 # report if signifcant. It also contains the run parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706 # at the bottom of the Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 # if($Blast->{'_blast_count'} > 1 || $contains_translation) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708 if($Blast->{'_blast_count'} > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709 # print STDERR "\nMULTI-BLAST STREAM.\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 $Blast->{'_multi_stream'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712 if($data =~ /(.+?)$Newline(<\w+>)?(T?BLAST[NPX])\s+(.+?)$Newline(.+)/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713 ($current_prog, $current_vers, $data) = ($3, $4, $5);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714 # Final chunk containing last hit and last footer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715 $Blast->{'_current_blast'}->_parse_alignment($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 $prev_blast = $Blast->{'_current_blast'}; # finalized.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717 # } elsif($contains_translation) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 # $data =~ /(T?BLAST[NPX])\s+(.+?)$Newline(.+)/so;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719 # ($current_prog, $current_vers, $data) = ($1, $2, $3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 $Blast->throw("Can't determine program type from BLAST report.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 "Checked for: @Blast_programs.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723 # This has important implications for how to handle interval
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724 # information for HSPs. TBLASTN uses nucleotides in query HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725 # but amino acids in the sbjct HSP sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 if($data =~ m/Database:\s+(.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729 $current_db = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731 # In some reports, the Database is only listed at end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 #$Blast->warn("Can't determine database name from BLAST report.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 # Incyte_Fix: Nasty Invisible Bug.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 # Records in blast report are delimited by '>', but... when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 # there are no hits for a query, there won't be a '>'. That
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738 # causes several blast reports to run together in the data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739 # passed to this routine. Need to get rid of non-hits in data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740 if ($data =~ /.+(No hits? found.+Sequences.+)/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741 $data = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 # End Incyte_Fix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747 # Determine if we need to create a new Blast object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748 # or use the $self object for this method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 if($Blast->{'_multi_stream'} or $self->name eq 'Static Blast object') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 # Strict mode is not object-specific but may be someday.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752 # print STDERR "\nCreating new Blast object.\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753 $current_blast = new Bio::Tools::Blast(-STRICT => $strict);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755 $current_blast = $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757 $Blast->{'_current_blast'} = $current_blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 # If we're not sharing stats, set data on current blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1760 if(defined $current_prog and not $Blast->{'_share'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1761 $current_blast->program($current_prog);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1762 $current_blast->program_version($current_vers);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1763 $current_blast->database($current_db);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1764 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1766 # print STDERR "CURRENT BLAST = ", $current_blast->name, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1767 $current_blast->_parse_header($data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1769 # If there were any descriptions in the header,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1770 # we know if there are any significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1771 # No longer throwing exception if there were no significant hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1772 # and a -signif parameter was specified. Doing so prevents the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1773 # construction of a Blast object, which could still be useful.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1774 # if($current_blast->{'_has_descriptions'} and $Blast->{'_confirm_significance'} and not $current_blast->is_signif) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1775 # $current_blast->throw("No significant BLAST hits for ${\$current_blast->name}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1777 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1779 } # Done parsing header/description section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1781 ### For use with $contains_translation - not right - breaks regular report parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1782 # elsif(ref $Blast->{'_current_blast'} && $data !~ /\s*\w*\s*/s) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1783 elsif(ref $Blast->{'_current_blast'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1784 # Process an alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1785 $current_blast = $Blast->{'_current_blast'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1786 # print STDERR "\nCONTINUING PROCESSING ALN WITH ", $current_blast->name, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1787 # print STDERR "DATA: $data\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1788 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1789 $current_blast->_parse_alignment($data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1790 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1791 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1792 # push @{$self->{'_blast_errs'}}, $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1793 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1794 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1796 # If the current Blast object has been completely parsed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1797 # (occurs with a single Blast stream), or if there is a previous
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1798 # Blast object (occurs with a multi Blast stream),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1799 # execute a supplied function on it or store it in a supplied array.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1800
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1801 if( defined $prev_blast or $current_blast->{'_found_params'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1802 my $finished_blast = defined($prev_blast) ? $prev_blast : $current_blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1804 $finished_blast->_report_errors();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1805 # print STDERR "\nNEW BLAST OBJECT: ${\$finished_blast->name}\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1807 if($exec_func) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1808 # print STDERR " RUNNING EXEC_FUNC...\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1809 &$exec_func($finished_blast); # ignoring any return value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1810 # Report processed, no longer need object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1811 $finished_blast->destroy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1812 undef $finished_blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1813 } elsif($save_a) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1814 # print STDERR " SAVING IN ARRAY...\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1815 # We've already verified that if there is no exec_func
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1816 # then there must be a $save_array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1817 push @$save_a, $finished_blast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1818 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1819 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1820 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1821 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1822 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1823
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1824 =head2 _report_errors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1825
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1826 Title : _report_errors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1827 Usage : n/a; Internal method called by _get_parse_blast_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1828 Purpose : Throw or warn about any errors encountered.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1829 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1830 Args : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1831 Throws : If all hits generated exceptions, raise exception
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1832 : (a fatal event for the Blast object.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1833 : If some hits were okay but some were bad, generate a warning
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1834 : (a few bad applies should not spoil the bunch).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1835 : This usually indicates a limiting B-value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1836 : When the parsing code fails, it is either all or nothing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1837
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1838 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1839
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1840 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1841 sub _report_errors {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1842 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1843 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1844
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1845 return unless ref($self->{'_blast_errs'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1846 # ref($self->{'_blast_errs'}) || (print STDERR "\nNO ERRORS\n", return );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1847
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1848 my @errs = @{$self->{'_blast_errs'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1849
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1850 if(scalar @errs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1851 my ($str);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1852 @{$self->{'_blast_errs'}} = (); # clear the errs on the object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1853 # When there are many errors, in most of the cases, they are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1854 # caused by the same problem. Only need to see full data for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1855 # the first one.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1856 if(scalar @errs > 2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1857 $str = "SHOWING FIRST EXCEPTION ONLY:\n$errs[0]";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1858 $self->clear_err(); # clearing the existing set of errors (conserve memory).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1859 # Not necessary, unless the -RECORD_ERR =>1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1860 # constructor option was used for Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1861 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1862 $str = join("\n",@errs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1865 if(not $self->{'_num_hits_significant'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1866 $self->throw(sprintf("Failed to parse any hit data (n=%d).", scalar(@errs)),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1867 "\n\nTRAPPED EXCEPTION(S):\n$str\nEND TRAPPED EXCEPTION(S)\n"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1868 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1869 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1870 $self->warn(sprintf("Some potential hits were not parsed (n=%d).", scalar(@errs)),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1871 @errs > 2 ? "This may be due to a limiting B value (max alignment listings)." : "",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1872 "\n\nTRAPPED EXCEPTION(S):\n$str\nEND TRAPPED EXCEPTION(S)\n"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1873 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1874 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1875 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1876 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1877
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1878 =head2 _parse_header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1880 Usage : n/a; called automatically by the _get_parse_blast_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1881 Purpose : Parses the header section of a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1882 Argument : String containing the header+description section of a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1883 Throws : Exception if description data cannot be parsed properly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1884 : Exception if there is a 'FATAL' error in the Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1885 : Warning if there is a 'WARNING' in the Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1886 : Warning if there are no significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1887 Comments : Description section contains a single line for each hit listing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1888 : the seq id, description, score, Expect or P-value, etc.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1889
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1890 See Also : L<_get_parse_blast_func()|_get_parse_blast_func>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1891
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1892 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1893
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1894 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1895 sub _parse_header {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1896 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1897 my( $self, $data ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1898
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1899 # print STDERR "\n$ID: PARSING HEADER\n"; #$data\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1901 $data =~ s/^\s+|\s+>?$//sg;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1903 if($data =~ /<HTML/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1904 $self->throw("Can't parse HTML-formatted BLAST reports.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1905 # "Such reports can be parsed with a special parsing \n".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1906 # "script included in the examples/blast directory \n".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1907 # "of the Bioperl distribution. (TODO)"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1908 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1909 # This was the old strategy, can't do it with new strategy
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1910 # since we don't have the whole report in one chunk.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1911 # This could be the basis for the "special parsing script".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1912 # require Bio::Tools::Blast::HTML;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1913 # Bio::Tools::Blast::HTML->import(&strip_html);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1914 # &strip_html(\$data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1915 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1916
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1917 $data =~ /WARNING: (.+?)$Newline$Newline/so and $self->warn("$1") if $self->strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1918 $data =~ /FATAL: (.+?)$Newline$Newline/so and $self->throw("FATAL BLAST ERROR = $1");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1919 # No longer throwing exception when no hits were found. Still reporting it.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1920 $data =~ /No hits? found/i and $self->warn("No hits were found.") if $self->strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1921
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1922 # If this is the first Blast, the program, version, and database info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1923 # pertain to it. Otherwise, they are for the previous report and have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1924 # already been parsed out.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1925 # Data is stored in the static Blast object. Data for subsequent reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1926 # will be stored in separate objects if the -share parameter is not set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1927 # See _get_parse_blast_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1928
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1929 if($Blast->{'_blast_count'} == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1930 if($data =~ /(<\w+>)?(T?BLAST[NPX])\s+(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1931 $Blast->program($2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1932 $Blast->program_version($3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1933 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1934 $self->throw("Can't determine program type from BLAST report.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1935 "Checked for: @Blast_programs.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1936 # This has important implications for how to handle interval
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1937 # information for HSPs. TBLASTN uses nucleotides in query HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1938 # but amino acids in the sbjct HSP sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1939 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1940
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1941 if($data =~ m/Database:\s+(.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1942 $Blast->database($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1943 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1944 # In some reports, the Database is only listed at end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1945 #$self->warn("Can't determine database name from BLAST report (_parse_header)\n$data\n.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1946 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1947 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1948
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1949 my ($header, $descriptions);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1950
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1951 ## For efficiency reasons, we want to to avoid using $' and $`.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1952 ## Therefore using single-line mode pattern matching.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1953
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1954 if($data =~ /(.+?)\nSequences producing.+?\n(.+)/s ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1955 ($header, $descriptions) = ($1, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1956 $self->{'_has_descriptions'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1957 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1958 $header = $data;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1959 $self->{'_has_descriptions'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1960 # Blast reports can legally lack description section. No need to warn.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1961 #push @{$self->{'_blast_errs'}}, "Can't parse description data.";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1962 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1963
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1964 $self->_set_query($header); # The name of the sequence will appear in error report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1965 # print STDERR "\nQUERY = ", $Blast->{'_current_blast'}->query, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1966
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1967 $self->_set_date($header) if $Blast->{'_get_stats'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1968 $self->_set_length($header);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1969
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1970 # not $Blast->{'_confirm_significance'} and print STDERR "\nNOT PARSING DESCRIPTIONS.\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1971
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1972 # Setting the absolute max and min significance levels.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1973 $self->{'_highestSignif'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1974 $self->{'_lowestSignif'} = $DEFAULT_SIGNIF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1975
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1976 if ($Blast->{'_confirm_significance'} || $Blast->{'_no_aligns'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1977 $self->_parse_descriptions($descriptions) if $descriptions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1978 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1979 $self->{'_is_significant'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1980 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1981 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1982
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1983 #-----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1984 sub _parse_descriptions {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1985 #-----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1986 my ($self, $desc) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1987
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1988 # NOTE: This method will not be called if the report lacks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1989 # a description section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1990
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1991 # print STDERR "\nPARSING DESCRIPTION DATA\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1992
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1993 my @descriptions = split( $Newline, $desc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1994 my($line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1995
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1996 # NOW step through each line parsing out the P/Expect value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1997 # All we really need to do is check the first one, if it doesn't
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1998 # meet the significance requirement, we can skip the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1999 # BUT: we want to collect data for all hits anyway to get min/max signif.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2001 my $my_signif = $self->signif;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2002 my $layout_set = $Blast->{'_layout'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2003 my $layout;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2004 my $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2005 my $sig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2007 desc_loop:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2008 foreach $line (@descriptions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2009 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2010 last desc_loop if $line =~ / NONE |End of List/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2011 next desc_loop if $line =~ /^\s*$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2012 next desc_loop if $line =~ /^\.\./;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2013
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2014 ## Checking the significance value (P- or Expect value) of the hit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2015 ## in the description line.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2016
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2017 # These regexps need testing on a variety of reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2018 if ( $line =~ /\d+\s{1,5}[\de.-]+\s*$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2019 $layout = 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2020 } elsif( $line =~ /\d+\s{1,5}[\de.-]+\s{1,}\d+\s*$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2021 $layout = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2022 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2023 $self->warn("Can't parse significance data in description line $line");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2024 next desc_loop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2025 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2026 not $layout_set and ($self->_layout($layout), $layout_set = 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2027
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2028 $sig = &_parse_signif( $line, $layout );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2029
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2030 # print STDERR " Parsed signif ($layout) = $sig\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2031
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2032 last desc_loop if ($sig > $my_signif and not $Blast->{'_check_all'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2033 $self->_process_significance($sig, $my_signif);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2034 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2036 # printf "\n%d SIGNIFICANT HITS.\nDONE PARSING DESCRIPTIONS.\n", $self->{'_num_hits_significant'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2037 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2039 sub _process_significance {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2040 my($self, $sig, $my_signif) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2042 $self->{'_highestSignif'} = ($sig > $self->{'_highestSignif'})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2043 ? $sig : $self->{'_highestSignif'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2044
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2045 $self->{'_lowestSignif'} = ($sig < $self->{'_lowestSignif'})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2046 ? $sig : $self->{'_lowestSignif'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2047
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2048 # Significance value assessment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2049 $sig <= $my_signif and $self->{'_num_hits_significant'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2050 $self->{'_num_hits'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2051
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2052 $self->{'_is_significant'} = 1 if $self->{'_num_hits_significant'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2053 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2054
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2055 =head2 _parse_alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2057 Usage : n/a; called automatically by the _get_parse_blast_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2058 Purpose : Parses a single alignment section of a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2059 Argument : String containing the alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2060 Throws : n/a; All errors are trapped while parsing the hit data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2061 : and are processed as a group when the report is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2062 : completely processed (See _report_errors()).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2063 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2064 Comments : Alignment section contains all HSPs for a hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2065 : Requires Bio::Tools::Blast::Sbjct.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2066 : Optionally calls a filter function to screen the hit on arbitrary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2067 : criteria. If the filter function returns true for a given hit,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2068 : that hit will be skipped.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2069 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2070 : If the Blast object was created with -check_all_hits set to true,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2071 : all hits will be checked for significance and processed if necessary.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2072 : If this field is false, the parsing will stop after the first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2073 : non-significant hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2074 : See parse() for description of parsing parameters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2076 See Also : L<parse()|parse>, L<_get_parse_blast_func()|_get_parse_blast_func>, L<_report_errors()|_report_errors>, B<Bio::Tools::Blast::Sbjct()>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2078 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2080 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2081 sub _parse_alignment {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2082 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2083 # This method always needs to check detect if the $data argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2084 # contains the footer of a Blast report, indicating the last chunk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2085 # of a single Blast stream.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2087 my( $self, $data ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2088
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2089 # printf STDERR "\nPARSING ALIGNMENT DATA for %s $self.\n", $self->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2090
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2091 # NOTE: $self->{'_current_hit'} is an instance variable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2092 # The $Blast object will not have this member.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2093
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2094 # If all of the significant hits have been parsed,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2095 # return if we're not checking all or if we don't need to get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2096 # the Blast stats (parameters at footer of report).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2097 if(defined $self->{'_current_hit'} and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2098 defined $self->{'_num_hits_significant'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2099 return if $self->{'_current_hit'} >= $self->{'_num_hits_significant'} and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2100 not ($Blast->{'_check_all'} or $Blast->{'_get_stats'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2101 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2103 # Check for the presence of the Blast footer section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2104 # _parse_footer returns the alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2105 $data = $self->_parse_footer($data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2107 # Return if we're only interested in the best hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2108 # This has to occur after checking for the parameters section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2109 # in the footer (since we may still be interested in them).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2110 return if $Blast->best and ( defined $self->{'_current_hit'} and $self->{'_current_hit'} >=1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2112 # print "RETURNED FROM _parse_footer (", $self->to_string, ")";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2113 # print "\n --> FOUND PARAMS.\n" if $self->{'_found_params'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2114 # print "\n --> DID NOT FIND PARAMS.\n" unless $self->{'_found_params'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2116 require Bio::Tools::Blast::Sbjct;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2118 $data =~ s/^\s+|\s+>?$//sg;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2119 $data =~ s/$Newline$Newline/$Newline/sog; # remove blank lines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2120 my @data = split($Newline, $data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2121 push @data, 'end';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2123 # print STDERR "\nALIGNMENT DATA:\n$data\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2125 my $prog = $self->program;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2126 my $check_all = $Blast->{'_check_all'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2127 my $filt_func = $Blast->{'_filt_func'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2128 my $signif_fmt = $Blast->{'_signif_fmt'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2129 my $my_signif = $self->signif;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2130 my $err;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2132 # Now construct the Sbjct objects from the alignment section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2134 # debug(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2136 $self->{'_current_hit'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2138 # If not confirming significance, _parse_descriptions will not have been run,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2139 # so we need to count the total number of hits here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2140 if( not $Blast->{'_confirm_significance'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2141 $self->{'_num_hits'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2142 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2144 if($Blast->{'_no_aligns'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2145 # printf STDERR "\nNOT PARSING ALIGNMENT DATA\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2146 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2149 my $hit; # Must be my'ed within hit_loop.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2150 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2151 $hit = new Bio::Tools::Blast::Sbjct (-DATA =>\@data,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2152 -PARENT =>$self,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2153 -NAME =>$self->{'_current_hit'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2154 -RANK =>$self->{'_current_hit'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2155 -RANK_BY =>'order',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2156 -PROGRAM =>$prog,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2157 -SIGNIF_FMT=>$signif_fmt,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2158 -OVERLAP =>$Blast->{'_overlap'} || $MAX_HSP_OVERLAP,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2159 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2160 # printf STDERR "NEW HIT: %s, SIGNIFICANCE = %g\n", $hit->name, $hit->expect; <STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2161 # The BLAST report may have not had a description section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2162 if(not $self->{'_has_descriptions'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2163 $self->_process_significance($hit->signif, $my_signif);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2164 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2165 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2167 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2168 # Throwing lots of errors can slow down the code substantially.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2169 # Error handling code is not that efficient.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2170 #print STDERR "\nERROR _parse_alignment: $@\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2171 push @{$self->{'_blast_errs'}}, $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2172 $hit->destroy if ref $hit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2173 undef $hit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2174 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2175 # Collect overall signif data if we don't already have it,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2176 # (as occurs if no -signif parameter is supplied).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2177 my $hit_signif = $hit->signif;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2179 if (not $Blast->{'_confirm_significance'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2180 $self->{'_highestSignif'} = ($hit_signif > $self->{'_highestSignif'})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2181 ? $hit_signif : $self->{'_highestSignif'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2183 $self->{'_lowestSignif'} = ($hit_signif < $self->{'_lowestSignif'})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2184 ? $hit_signif : $self->{'_lowestSignif'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2185 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2187 # Test significance using custom function (if supplied)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2188 if($filt_func) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2189 if(&$filt_func($hit)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2190 push @{$self->{'_hits'}}, $hit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2191 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2192 $hit->destroy; undef $hit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2194 } elsif($hit_signif <= $my_signif) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2195 push @{$self->{'_hits'}}, $hit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2196 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2197 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2199 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2201 =head2 _parse_footer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2203 Usage : n/a; internal function. called by _parse_alignment()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2204 Purpose : Extracts statistical and other parameters from the BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2205 : Sets various key elements such as the program and version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2206 : gapping, and the layout for the report (blast1 or blast2).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2207 Argument : Data to be parsed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2208 Returns : String containing an alignment section for processing by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2209 : _parse_alignment().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2210 Throws : Exception if cannot find the parameters section of report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2211 : Warning if cannot determine if gapping was used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2212 : Warning if cannot determine the scoring matrix used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2213 Comments : This method must always get called, even if the -STATS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2214 : parse() parameter is false. The reason is that the layout
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2215 : of the report and the presence of gapping must always be set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2216 : The determination whether to set additional stats is made
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2217 : by methods called by _parse_footer().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2219 See Also : L<parse()|parse>, L<_parse_alignment()|_parse_alignment>, L<_set_database()|_set_database>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2221 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2223 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2224 sub _parse_footer {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2225 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2226 # Basic strategy:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2227 # 1. figure out if we're supposed to get the stats,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2228 # 2. figure out if the stats are to be shared. some, not all can be shared
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2229 # (eg., db info and matrix can be shared, karlin altschul params cannot.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2230 # However, this method assumes they are all sharable.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2231 # 3. Parse the stats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2232 # 4. return the block before the parameters section if the supplied data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2233 # contains a footer parameters section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2235 my ($self, $data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2236 my ($client, $last_align, $params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2238 # printf STDERR "\nPARSING PARAMETERS for %s $self.\n", $self->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2240 # Should the parameters be shared?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2241 # If so, set $self to be the static $Blast object and return if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2242 # the parameters were already set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2243 # Before returning, we need to extract the last alignment section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2244 # from the parameter section, if any.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2246 if ($Blast->{'_share'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2247 $client = $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2248 $self = $Blast if $Blast->{'_share'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2251 my $get_stats = $Blast->{'_get_stats'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2253 if( $data =~ /(.+?)${Newline}CPU time: (.*)/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2254 # NCBI-Blast2 format (v2.04).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2255 ($last_align, $params) = ($1, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2256 return $last_align if $client->{'_found_params'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2257 $self->_set_blast2_stats($params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2259 } elsif( $data =~ /(.+?)${Newline}Parameters:(.*)/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2260 # NCBI-Blast1 or WashU-Blast2 format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2261 ($last_align, $params) = ($1, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2262 return $last_align if $client->{'_found_params'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2263 $self->_set_blast1_stats($params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2265 } elsif( $data =~ /(.+?)$Newline\s+Database:(.*)/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2266 # Gotta watch out for confusion with the Database: line in the header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2267 # which will be present in the last hit of an internal Blast report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2268 # in a multi-report stream.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2270 # NCBI-Blast2 format (v2.05).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2271 ($last_align, $params) = ($1, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2272 return $last_align if $client->{'_found_params'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2273 $self->_set_blast2_stats($params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2275 } elsif( $data =~ /(.+?)$Newline\s*Searching/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2276 # trying to detect a Searching at the end of a PSI-blast round.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2277 # Gotta watch out for confusion with the Searching line in the header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2278 # which will be present in the last hit of an internal Blast report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2279 # in a multi-report, non-PSI-blast stream.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2281 # PSI-Blast format (v2.08).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2282 ($last_align) = ($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2283 return $last_align; # if $client->{'_found_params'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2286 # If parameter section was found, set a boolean,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2287 # otherwise return original data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2289 if (defined($params)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2290 $client->{'_found_params'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2291 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2292 return $data;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2293 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2295 $self->_set_database($params) if $get_stats;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2297 # The {'_gapped'} member should be set in the _set_blast?_stats() call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2298 # This is a last minute attempt to deduce it.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2300 if(!defined($self->{'_gapped'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2301 if($self->program_version() =~ /^1/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2302 $self->{'_gapped'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2303 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2304 if($self->strict > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2305 $self->warn("Can't determine if gapping was used. Assuming gapped.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2307 $self->{'_gapped'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2308 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2309 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2311 return $last_align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2314 =head2 _set_blast2_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2316 Usage : n/a; internal function called by _parse_footer()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2317 Purpose : Extracts statistical and other parameters from BLAST2 report footer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2318 : Stats collected: database release, gapping,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2319 : posted date, matrix used, filter used, Karlin-Altschul parameters,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2320 : E, S, T, X, W.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2321 Throws : Exception if cannot get "Parameters" section of Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2323 See Also : L<parse()|parse>, L<_parse_footer()|_parse_footer>, L<_set_database()|_set_database>, B<Bio::Tools::SeqAnal::set_date()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2325 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2327 #---------------------'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2328 sub _set_blast2_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2329 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2330 my ($self, $data) = (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2332 if($data =~ /$Newline\s*Gapped/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2333 $self->{'_gapped'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2334 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2335 $self->{'_gapped'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2336 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2338 # Other stats are not always essential.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2339 return unless $Blast->{'_get_stats'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2341 # Blast2 Doesn't report what filter was used in the parameters section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2342 # It just gives a warning that *some* filter was used in the header.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2343 # You just have to know the defaults (currently: protein = SEG, nucl = DUST).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2344 if($data =~ /\bfiltered\b/si) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2345 $self->{'_filter'} = 'DEFAULT FILTER';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2346 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2347 $self->{'_filter'} = 'NONE';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2348 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2350 if($data =~ /Gapped$Newline\s*Lambda +K +H$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2351 my ($l, $k, $h) = split(/\s+/, $1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2352 $self->{'_lambda'} = $l || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2353 $self->{'_k'} = $k || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2354 $self->{'_h'} = $h || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2355 } elsif($data =~ /Lambda +K +H$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2356 my ($l, $k, $h) = split(/\s+/, $1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2357 $self->{'_lambda'} = $l || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2358 $self->{'_k'} = $k || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2359 $self->{'_h'} = $h || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2360 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2362 if($data =~ /$Newline\s*Matrix: (.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2363 $self->{'_matrix'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2364 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2365 $self->{'_matrix'} = $DEFAULT_MATRIX.'?';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2366 if($self->strict > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2367 $self->warn("Can't determine scoring matrix. Assuming $DEFAULT_MATRIX.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2368 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2369 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2371 if($data =~ /$Newline\s*Gap Penalties: Existence: +(\d+), +Extension: (\d+)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2372 $self->{'_gapCreation'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2373 $self->{'_gapExtension'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2374 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2375 if($data =~ /sequences better than (\d+):/s) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2376 $self->{'_expect'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2377 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2379 if($data =~ /$Newline\s*T: (\d+)/o) { $self->{'_word_size'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2380 if($data =~ /$Newline\s*A: (\d+)/o) { $self->{'_a'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2381 if($data =~ /$Newline\s*S1: (\d+)/o) { $self->{'_s'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2382 if($data =~ /$Newline\s*S2: (\d+)/o) { $self->{'_s'} .= ", $1"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2383 if($data =~ /$Newline\s*X1: (\d+)/o) { $self->{'_x1'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2384 if($data =~ /$Newline\s*X2: (\d+)/o) { $self->{'_x2'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2385 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2386
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2387 =head2 _set_blast1_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2389 Usage : n/a; internal function called by _parse_footer()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2390 Purpose : Extracts statistical and other parameters from BLAST 1.x style eports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2391 : Handles NCBI Blast1 and WashU-Blast2 formats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2392 : Stats collected: database release, gapping,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2393 : posted date, matrix used, filter used, Karlin-Altschul parameters,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2394 : E, S, T, X, W.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2396 See Also : L<parse()|parse>, L<_parse_footer()|_parse_footer>, L<_set_database()|_set_database>, B<Bio::Tools::SeqAnal::set_date()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2398 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2400 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2401 sub _set_blast1_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2402 #----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2403 my ($self, $data) = (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2404
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2405 if(!$self->{'_gapped'} and $self->program_version() =~ /^2[\w\-\.]+WashU/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2406 $self->_set_gapping_wu($data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2407 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2408 $self->{'_gapped'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2409 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2411 # Other stats are not always essential.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2412 return unless $Blast->{'_get_stats'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2413
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2414 if($data =~ /filter=(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2415 $self->{'_filter'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2416 } elsif($data =~ /filter$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2417 $self->{'_filter'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2418 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2419 $self->{'_filter'} = 'NONE';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2420 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2422 if($data =~ /$Newline\s*E=(\d+)$Newline/so) { $self->{'_expect'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2424 if($data =~ /$Newline\s*M=(\w+)$Newline/so) { $self->{'_matrix'} = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2426 if($data =~ /\s*Frame MatID Matrix name .+?$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2427 ## WU-Blast2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2428 my ($fr, $mid, $mat, $lu, $ku, $hu, $lc, $kc, $hc) = split(/\s+/,$1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2429 $self->{'_matrix'} = $mat || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2430 $self->{'_lambda'} = $lu || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2431 $self->{'_k'} = $ku || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2432 $self->{'_h'} = $hu || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2434 } elsif($data =~ /Lambda +K +H$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2435 ## NCBI-Blast1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2436 my ($l, $k, $h) = split(/\s+/, $1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2437 $self->{'_lambda'} = $l || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2438 $self->{'_k'} = $k || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2439 $self->{'_h'} = $h || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2440 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2442 if($data =~ /E +S +W +T +X.+?$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2443 # WashU-Blast2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2444 my ($fr, $mid, $len, $elen, $e, $s, $w, $t, $x, $e2, $s2) = split(/\s+/,$1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2445 $self->{'_expect'} ||= $e || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2446 $self->{'_s'} = $s || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2447 $self->{'_word_size'} = $w || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2448 $self->{'_t'} = $t || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2449 $self->{'_x'} = $x || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2450
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2451 } elsif($data =~ /E +S +T1 +T2 +X1 +X2 +W +Gap$Newline +(.+?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2452 ## NCBI-Blast1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2453 my ($e, $s, $t1, $t2, $x1, $x2, $w, $gap) = split(/\s+/,$1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2454 $self->{'_expect'} ||= $e || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2455 $self->{'_s'} = $s || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2456 $self->{'_word_size'} = $w || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2457 $self->{'_t1'} = $t1 || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2458 $self->{'_t2'} = $t2 || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2459 $self->{'_x1'} = $x1 || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2460 $self->{'_x2'} = $x2 || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2461 $self->{'_gap'} = $gap || 'UNKNOWN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2462 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2464 if(!$self->{'_matrix'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2465 $self->{'_matrix'} = $DEFAULT_MATRIX.'?';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2466 if($self->strict > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2467 $self->warn("Can't determine scoring matrix. Assuming $DEFAULT_MATRIX.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2468 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2469 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2470 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2472 =head2 _set_gapping_wu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2474 Usage : n/a; internal function called by _set_blast1_stats()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2475 Purpose : Determine if gapping_wu was on for WashU Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2476 Comments : In earlier versions, gapping was always specified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2477 : but in the current version (2.0a19MP), gapping is on by default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2478 : and there is no positive "gapping" indicator in the Parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2479 : section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2481 See Also : L<_set_blast1_stats()|_set_blast1_stats>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2483 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2485 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2486 sub _set_gapping_wu {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2487 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2488 my ($self, $data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2489
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2490 if($data =~ /gaps?$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2491 $self->{'_gapped'} = ($data =~ /nogaps?$Newline/so) ? 0 : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2492 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2493 $self->{'_gapped'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2494 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2495 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2497 =head2 _set_date
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2499 Usage : n/a; internal function called by _parse_footer()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2500 Purpose : Determine the date on which the Blast analysis was performed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2501 Comments : Date information is not consistently added to Blast output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2502 : Uses superclass method set_date() to set date from the file,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2503 : (if any).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2505 See Also : L<_parse_footer()|_parse_footer>, B<Bio::Tools::SeqAnal::set_date()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2507 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2509 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2510 sub _set_date {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2511 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2512 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2513 my $data = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2515 ### Network BLAST reports from NCBI are time stamped as follows:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2516 #Fri Apr 18 15:55:41 EDT 1997, Up 1 day, 19 mins, 1 user, load: 19.54, 19.13, 17.77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2517 if($data =~ /Start:\s+(.+?)\s+End:/s) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2518 ## Calling superclass method to set the date.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2519 ## If we can't get date from the report, file date is obtained.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2520 $self->set_date($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2521 } elsif($data =~ /Date:\s+(.*?)$Newline/so) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2522 ## E-mailed reports have a Date: field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2523 $self->set_date($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2524 } elsif( $data =~ /done\s+at (.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2525 $self->set_date($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2526 } elsif( $data =~ /$Newline([\w:, ]+), Up \d+/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2527 $self->set_date($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2528 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2529 ## Otherwise, let superclass attempt to get the file creation date.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2530 $self->set_date() if $self->file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2531 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2532 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2534 =head2 _set_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2536 Usage : n/a; called automatically during Blast report parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2537 Purpose : Sets the length of the query sequence (extracted from report).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2538 Returns : integer (length of the query sequence)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2539 Throws : Exception if cannot determine the query sequence length from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2540 : the BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2541 : Exception if the length is below the min_length cutoff (if any).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2542 Comments : The logic here is a bit different from the other _set_XXX()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2543 : methods since the significance of the BLAST report is assessed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2544 : if MIN_LENGTH is set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2546 See Also : B<Bio::Tools::SeqAnal::length()>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2548 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2550 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2551 sub _set_length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2552 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2553 my ($self, $data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2555 my ($length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2556 if( $data =~ m/$Newline\s+\(([\d|,]+) letters[\);]/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2557 $length = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2558 $length =~ s/,//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2559 # printf "Length = $length in BLAST for %s$Newline",$self->name; <STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2560 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2561 $self->throw("Can't determine sequence length from BLAST report.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2562 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2564 my($sig_len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2565 if(defined($Blast->{'_min_length'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2566 local $^W = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2567 if($length < $Blast->{'_min_len'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2568 $self->throw("Query sequence too short for ${\$self->name} ($length)",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2569 "Minimum length is $Blast->{'_min_len'}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2570 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2571 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2573 $self->length($length); # defined in superclass.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2574 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2576 =head2 _set_database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2578 Usage : n/a; called automatically during Blast report parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2579 Purpose : Sets the name of the database used by the BLAST analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2580 : Extracted from raw BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2581 Throws : Exception if the name of the database cannot be determined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2582 Comments : The database name is used by methods or related objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2583 : for database-specific parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2585 See Also : L<parse()|parse>, B<Bio::Tools::SeqAnal::database()>,B<Bio::Tools::SeqAnal::_set_db_stats()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2587 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2589 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2590 sub _set_database {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2591 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2592 # This now only sets data base information extracted from the report footer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2593
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2594 my ($self, $data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2596 my ($name, $date, $lets, $seqs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2598 my $strict = $self->strict > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2600 # This is fail-safe since DB name usually gets set in _parse_header()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2601 # In some reports, the database is only listed at bottom (NCBI 2.0.8).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2602 if($data =~ m/Database: +(.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2603 $name = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2604 } elsif(not $self->database) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2605 $self->warn("Can't determine database name from BLAST report.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2606 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2608 if($data =~ m/Posted date: +(.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2609 $date = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2610 } elsif($data =~ m/Release date: +(.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2611 $date = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2612 } elsif($strict) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2613 $self->warn("Can't determine database release date.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2614 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2616 if($data =~ m/letters in database: +([\d,]+)/si ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2617 $data =~ m/length of database: +([\d,]+)/si ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2618 $lets = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2619 } elsif($strict) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2620 $self->warn("Can't determine number of letters in database.\n$data\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2621 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2623 if($data =~ m/sequences in database: +([\d,]+)/si ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2624 $data =~ m/number of sequences: +([\d,]+)/si ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2625 $seqs = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2626 } elsif($strict) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2627 $self->warn("Can't determine number of sequences in database.\n$data\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2628 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2630 $self->_set_db_stats( -NAME => $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2631 -RELEASE => $date || '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2632 -LETTERS => $lets || '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2633 -SEQS => $seqs || ''
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2634 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2635 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2637 =head2 _set_query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2639 Usage : n/a; called automatically during Blast report parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2640 Purpose : Set the name of the query and the query description.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2641 : Extracted from the raw BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2642 Returns : String containing name of query extracted from report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2643 Throws : Warning if the name of the query cannont be obtained.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2645 See Also : B<Bio::Tools::SeqAnal::query_desc()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2647 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2648
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2649 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2650 sub _set_query {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2651 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2652 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2653 my $data = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2655 if($data =~ m/${Newline}Query= *(.+?)$Newline/so ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2656 my $info = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2657 $info =~ s/TITLE //;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2658 # Split the query line into two parts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2659 # Using \s instead of ' '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2660 $info =~ /(\S+?)\s(.*)/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2661 $self->query_desc($2 || '');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2662 # set name of Blast object and return.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2663 $self->name($1 || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2664 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2665 $self->warn("Can't determine query sequence name from BLAST report.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2666 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2667 # print STDERR "$Newline NAME = ${\$self->name}$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2668 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2670 =head2 _parse_signif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2671
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2672 Usage : &_parse_signif(string, layout, gapped);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2673 : This is a class function.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2674 Purpose : Extracts the P- or Expect value from a single line of a BLAST description section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2675 Example : &_parse_signif("PDB_UNIQUEP:3HSC_ heat-shock cognate ... 799 4.0e-206 2", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2676 : &_parse_signif("gi|758803 (U23828) peritrophin-95 precurs 38 0.19", 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2677 Argument : string = line from BLAST description section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2678 : layout = integer (1 or 2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2679 : gapped = boolean (true if gapped Blast).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2680 Returns : Float (0.001 or 1e-03)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2681 Status : Static
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2683 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2685 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2686 sub _parse_signif {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2687 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2688 my ($line, $layout, $gapped) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2690 local $_ = $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2691 my @linedat = split();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2692
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2693 # When processing both Blast1 and Blast2 reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2694 # in the same run, offset needs to be configured each time.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2696 my $offset = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2697 $offset = 1 if $layout == 1 or not $gapped;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2699 my $signif = $linedat[ $#linedat - $offset ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2701 # fail-safe check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2702 if(not $signif =~ /[.-]/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2703 $offset = ($offset == 0 ? 1 : 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2704 $signif = $linedat[ $#linedat - $offset ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2705 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2707 $signif = "1$signif" if $signif =~ /^e/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2708 return $signif;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2709 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2710
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2711 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2712 ## BEGIN ACCESSOR METHODS THAT INCORPORATE THE STATIC $Blast OBJECT.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2713 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2715 sub program {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2716 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2717 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2718 return $self->SUPER::program(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2719 $self->SUPER::program || $Blast->SUPER::program; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2720 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2721
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2722 sub program_version {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2723 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2724 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2725 return $self->SUPER::program_version(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2726 $self->SUPER::program_version || $Blast->SUPER::program_version; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2727 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2728
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2729 sub database {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2730 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2731 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2732 return $self->SUPER::database(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2733 $self->SUPER::database || $Blast->SUPER::database; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2734 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2736 sub database_letters {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2737 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2738 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2739 return $self->SUPER::database_letters(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2740 $self->SUPER::database_letters || $Blast->SUPER::database_letters; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2741 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2743 sub database_release {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2744 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2745 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2746 return $self->SUPER::database_release(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2747 $self->SUPER::database_release || $Blast->SUPER::database_release; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2748 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2750 sub database_seqs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2751 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2752 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2753 return $self->SUPER::database_seqs(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2754 $self->SUPER::database_seqs || $Blast->SUPER::database_seqs; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2755 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2757 sub date {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2758 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2759 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2760 return $self->SUPER::date(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2761 $self->SUPER::date || $Blast->SUPER::date; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2762 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2764 sub best {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2765 ## Overridden method to incorporate the BLAST object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2766 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2767 return $Blast->SUPER::best(@_) if @_; # set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2768 $Blast->SUPER::best; # get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2769 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2771 =head2 signif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2773 Usage : $blast->signif();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2774 Purpose : Gets the P or Expect value used as significance screening cutoff.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2775 Returns : Scientific notation number with this format: 1.0e-05.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2776 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2777 Comments : Screening of significant hits uses the data provided on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2778 : description line. For Blast1 and WU-Blast2, this data is P-value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2779 : for Blast2 it is an Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2780 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2781 : Obtains info from the static $Blast object if it has not been set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2782 : for the current object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2783
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2784 See Also : L<_set_signif()|_set_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2786 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2788 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2789 sub signif {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2790 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2791 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2792 my $sig = $self->{'_significance'} || $Blast->{'_significance'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2793 sprintf "%.1e", $sig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2794 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2796 =head2 is_signif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2798 Usage : $blast->is_signif();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2799 Purpose : Determine if the BLAST report contains significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2800 Returns : Boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2801 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2802 Comments : BLAST reports without significant hits but with defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2803 : significance criteria will throw exceptions during construction.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2804 : This obviates the need to check significant() for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2805 : such objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2807 See Also : L<_set_signif()|_set_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2809 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2810
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2811 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2812 sub is_signif { my $self = shift; return $self->{'_is_significant'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2813 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2815 # is_signif() doesn't incorporate the static $Blast object but is included
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2816 # here to be with the other 'signif' methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2817
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2818 =head2 signif_fmt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2820 Usage : $blast->signif_fmt( [FMT] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2821 Purpose : Allows retrieval of the P/Expect exponent values only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2822 : or as a two-element list (mantissa, exponent).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2823 Usage : $blast_obj->signif_fmt('exp');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2824 : $blast_obj->signif_fmt('parts');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2825 Returns : String or '' if not set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2826 Argument : String, FMT = 'exp' (return the exponent only)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2827 : = 'parts'(return exponent + mantissa in 2-elem list)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2828 : = undefined (return the raw value)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2829 Comments : P/Expect values are still stored internally as the full,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2830 : scientific notation value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2831 : This method uses the static $Blast object since this issue
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2832 : will pertain to all Blast reports within a given set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2833 : This setting is propagated to Bio::Tools::Blast::Sbjct.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2834
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2835 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2837 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2838 sub signif_fmt {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2839 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2840 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2841 if(@_) { $Blast->{'_signif_fmt'} = shift; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2842 $Blast->{'_signif_fmt'} || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2843 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2844
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2845 =head2 min_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2847 Usage : $blast->min_length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2848 Purpose : Gets the query sequence length used as significance screening criteria.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2849 Returns : Integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2850 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2851 Comments : Obtains info from the static $Blast object if it has not been set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2852 : for the current object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2853
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2854 See Also : L<_set_signif()|_set_signif>, L<signif()|signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2856 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2857
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2858 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2859 sub min_length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2860 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2861 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2862 $self->{'_min_length'} || $Blast->{'_min_length'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2865 =head2 gapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2866
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2867 Usage : $blast->gapped();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2868 Purpose : Set/Get boolean indicator for gapped BLAST.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2869 Returns : Boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2870 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2871 Comments : Obtains info from the static $Blast object if it has not been set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2872 : for the current object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2874 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2876 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2877 sub gapped {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2878 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2879 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2880 if(@_) { $self->{'_gapped'} = shift; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2881 $self->{'_gapped'} || $Blast->{'_gapped'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2882 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2883
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2884 =head2 _get_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2885
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2886 Usage : n/a; internal method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2887 Purpose : Set/Get indicator for collecting full statistics from report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2888 Returns : Boolean (0 | 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2889 Comments : Obtains info from the static $Blast object which gets set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2890 : by _init_parse_params().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2891
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2892 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2893
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2894 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2895 sub _get_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2896 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2897 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2898 $Blast->{'_get_stats'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2899 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2901 =head2 _layout
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2903 Usage : n/a; internal method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2904 Purpose : Set/Get indicator for the layout of the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2905 Returns : Integer (1 | 2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2906 : Defaults to 2 if not set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2907 Comments : Blast1 and WashU-Blast2 have a layout = 1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2908 : This is intended for internal use by this and closely
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2909 : allied modules like Sbjct.pm and HSP.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2910 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2911 : Obtains info from the static $Blast object if it has not been set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2912 : for the current object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2914 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2915
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2916 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2917 sub _layout {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2918 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2919 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2920 if(@_) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2921 # Optimization if we know all reports share the same stats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2922 if($Blast->{'_share'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2923 $Blast->{'_layout'} = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2924 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2925 $self->{'_layout'} = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2926 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2927 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2928 $self->{'_layout'} || $Blast->{'_layout'} || 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2929 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2930
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2931 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2932 ## END ACCESSOR METHODS THAT INCORPORATE THE STATIC $Blast OBJECT.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2933 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2934
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2935 =head2 hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2936
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2937 Usage : $blast->hits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2938 Purpose : Get a list containing all BLAST hit (Sbjct) objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2939 : Get the numbers of significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2940 Examples : @hits = $blast->hits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2941 : $num_signif = $blast->hits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2942 Returns : List context : list of Bio::Tools::Blast::Sbjct.pm objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2943 : or an empty list if there are no hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2944 : Scalar context: integer (number of significant hits)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2945 : or zero if there are no hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2946 : (Equivalent to num_hits()).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2947 Argument : n/a. Relies on wantarray.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2948 Throws : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2949 : Not throwing exception because the absence of hits may have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2950 : resulted from stringent significance criteria, not a failure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2951 : set the hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2953 See Also : L<hit()|hit>, L<num_hits()|num_hits>, L<is_signif()|is_signif>, L<_set_signif()|_set_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2954
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2955 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2956
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2957 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2958 sub hits {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2959 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2960 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2962 if(wantarray) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2963 my @ary = ref($self->{'_hits'}) ? @{$self->{'_hits'}} : ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2964 return @ary;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2965 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2966 return $self->num_hits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2967 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2968
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2969 # my $num = ref($self->{'_hits'}) ? scalar(@{$self->{'_hits'}}) : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2970 # my @ary = ref($self->{'_hits'}) ? @{$self->{'_hits'}} : ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2971 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2972 # return wantarray
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2973 # # returning list containing all hits or empty list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2974 # ? $self->{'_is_significant'} ? @ary : ()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2975 # # returning number of hits or 0.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2976 # : $self->{'_is_significant'} ? $num : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2977 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2979 =head2 hit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2981 Example : $blast_obj->hit( [class] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2982 Purpose : Get a specific hit object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2983 : Provides some syntactic sugar for the hits() method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2984 Usage : $hitObj = $blast->hit();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2985 : $hitObj = $blast->hit('best');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2986 : $hitObj = $blast->hit('worst');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2987 : $hitObj = $blast->hit( $name );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2988 Returns : Object reference for a Bio::Tools::Blast::Sbjct.pm object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2989 : undef if there are no hit (Sbjct) objects defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2990 Argument : Class (or no argument).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2991 : No argument (default) = highest scoring hit (same as 'best').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2992 : 'best' or 'first' = highest scoring hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2993 : 'worst' or 'last' = lowest scoring hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2994 : $name = retrieve a hit by seq id (case-insensitive).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2995 Throws : Exception if the Blast object has no significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2996 : Exception if a hit cannot be found when supplying a specific
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2997 : hit sequence identifier as an argument.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2998 Comments : 'best' = lowest significance value (P or Expect) among significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2999 : 'worst' = highest sigificance value (P or Expect) among significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3001 See Also : L<hits()|hits>, L<num_hits()|num_hits>, L<is_signif()|is_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3002
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3003 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3004
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3005 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3006 sub hit {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3007 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3008 my( $self, $option) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3009 $option ||= 'best';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3010
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3011 if($Blast->{'_no_aligns'} || ! ref($self->{'_hits'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3012 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3013 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3014
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3015 $self->{'_is_significant'} or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3016 $self->throw("There were no significant hits.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3017 "Use num_hits(), hits(), is_signif() to check.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3019 my @hits = @{$self->{'_hits'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3020
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3021 return $hits[0] if $option =~ /^(best|first|1)$/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3022 return $hits[$#hits] if $option =~ /^(worst|last)$/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3024 # Get hit by name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3025 foreach ( @hits ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3026 return $_ if $_->name() =~ /$option/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3027 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3028
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3029 $self->throw("Can't get hit for: $option");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3030 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3031
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3032 =head2 num_hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3033
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3034 Usage : $blast->num_hits( ['total'] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3035 Purpose : Get number of significant hits or number of total hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3036 Examples : $num_signif = $blast-num_hits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3037 : $num_total = $blast->num_hits('total');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3038 Returns : Integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3039 Argument : String = 'total' (or no argument).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3040 : No argument (Default) = return number of significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3041 : 'total' = number of total hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3042 Throws : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3043 : Not throwing exception because the absence of hits may have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3044 : resulted from stringent significance criteria, not a failure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3045 : set the hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3046 Comments : A significant hit is defined as a hit with an expect value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3047 : (or P value for WU-Blast) at or below the -signif parameter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3048 : used when parsing the report. Additionally, if a filter function
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3049 : was supplied, the significant hit must also pass that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3050 : criteria.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3051
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3052 See Also : L<hits()|hits>, L<hit()|hit>, L<is_signif()|is_signif>, L<_set_signif()|_set_signif>, L<parse()|parse>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3054 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3055
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3056 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3057 sub num_hits {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3058 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3059 my( $self, $option) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3060 $option ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3061
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3062 $option =~ /total/i and return $self->{'_num_hits'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3063
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3064 # Default: returning number of significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3065 # return $self->{'_num_hits_significant'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3066 # return 0 if not ref $self->{'_hits'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3068 if(ref $self->{'_hits'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3069 return scalar(@{$self->{'_hits'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3070 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3071 return $self->{'_num_hits_significant'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3072 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3073 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3074
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3075 =head2 lowest_p
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3077 Usage : $blast->lowest_p()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3078 Purpose : Get the lowest P-value among all hits in a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3079 : Syntactic sugar for $blast->hit('best')->p().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3080 Returns : Float or scientific notation number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3081 : Returns -1.0 if lowest_p has not been set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3082 Argument : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3083 Throws : Exception if the Blast report does not report P-values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3084 : (as is the case for NCBI Blast2).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3085 Comments : A value is returned regardless of whether or not there were
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3086 : significant hits ($DEFAULT_SIGNIF, currently 999).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3087
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3088 See Also : L<lowest_expect()|lowest_expect>, L<lowest_signif()|lowest_signif>, L<highest_p()|highest_p>, L<signif_fmt()|signif_fmt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3089
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3090 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3092 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3093 sub lowest_p {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3094 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3095 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3096
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3097 # Layout 2 = NCBI Blast 2.x does not report P-values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3098 $self->_layout == 2 and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3099 $self->throw("Can't get P-value with BLAST2.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3100 "Use lowest_signif() or lowest_expect()");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3102 return $self->{'_lowestSignif'} || -1.0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3103 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3105 =head2 lowest_expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3107 Usage : $blast->lowest_expect()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3108 Purpose : Get the lowest Expect value among all hits in a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3109 : Syntactic sugar for $blast->hit('best')->expect()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3110 Returns : Float or scientific notation number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3111 : Returns -1.0 if lowest_expect has not been set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3112 Argument : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3113 Throws : Exception if there were no significant hits and the report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3114 : does not have Expect values on the description lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3115 : (i.e., Blast1, WashU-Blast2).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3117 See Also : L<lowest_p()|lowest_p>, L<lowest_signif()|lowest_signif>, L<highest_expect()|highest_expect>, L<signif_fmt()|signif_fmt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3119 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3121 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3122 sub lowest_expect {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3123 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3124 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3126 if ($self->_layout == 2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3127 return $self->{'_lowestSignif'} || -1.0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3128 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3130 if($self->{'_is_significant'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3131 my $bestHit = $self->{'_hits'}->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3132 return $bestHit->expect();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3133 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3134 $self->throw("Can't get lowest expect value: no significant hits ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3135 "The format of this report requires expect values to be extracted$Newline".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3136 "from the hits themselves.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3137 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3140 =head2 highest_p
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3142 Example : $blast->highest_p( ['overall'])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3143 Purpose : Get the highest P-value among all hits in a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3144 : Syntactic sugar for $blast->hit('worst')->p()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3145 : Can also get the highest P-value overall (not just among signif hits).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3146 Usage : $p_signif = $blast->highest_p();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3147 : $p_all = $blast->highest_p('overall');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3148 Returns : Float or scientific notation number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3149 : Returns -1.0 if highest_p has not been set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3150 Argument : String 'overall' or no argument.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3151 : No argument = get highest P-value among significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3152 Throws : Exception if object is created from a Blast2 report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3153 : (which does not report P-values).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3155 See Also : L<highest_signif()|highest_signif>, L<lowest_p()|lowest_p>, L<_set_signif()|_set_signif>, L<signif_fmt()|signif_fmt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3157 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3159 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3160 sub highest_p {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3161 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3162 my ($self, $overall) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3164 # Layout 2 = NCBI Blast 2.x does not report P-values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3165 $self->_layout == 2 and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3166 $self->throw("Can't get P-value with BLAST2.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3167 "Use highest_signif() or highest_expect()");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3169 $overall and return $self->{'_highestSignif'} || -1.0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3170 $self->hit('worst')->p();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3171 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3173 =head2 highest_expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3175 Usage : $blast_object->highest_expect( ['overall'])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3176 Purpose : Get the highest Expect value among all significant hits in a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3177 : Syntactic sugar for $blast->hit('worst')->expect()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3178 Examples : $e_sig = $blast->highest_expect();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3179 : $e_all = $blast->highest_expect('overall');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3180 Returns : Float or scientific notation number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3181 : Returns -1.0 if highest_exoect has not been set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3182 Argument : String 'overall' or no argument.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3183 : No argument = get highest Expect-value among significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3184 Throws : Exception if there were no significant hits and the report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3185 : does not have Expect values on the description lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3186 : (i.e., Blast1, WashU-Blast2).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3188 See Also : L<lowest_expect()|lowest_expect>, L<highest_signif()|highest_signif>, L<signif_fmt()|signif_fmt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3190 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3192 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3193 sub highest_expect {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3194 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3195 my ($self, $overall) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3197 if ( $overall and $self->_layout == 2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3198 return $self->{'_highestSignif'} || -1.0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3199 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3201 if($self->{'_is_significant'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3202 return $self->hit('worst')->expect;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3203 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3204 $self->throw("Can't get highest expect value: no significant hits ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3205 "The format of this report requires expect values to be extracted$Newline".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3206 "from the hits themselves.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3208 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3210 =head2 lowest_signif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3212 Usage : $blast_obj->lowest_signif();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3213 : Syntactic sugar for $blast->hit('best')->signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3214 Purpose : Get the lowest P or Expect value among all hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3215 : in a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3216 : This method is syntactic sugar for $blast->hit('best')->signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3217 : The value returned is the one which is reported in the decription
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3218 : section of the Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3219 : For Blast1 and WU-Blast2, this is a P-value,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3220 : for NCBI Blast2, it is an Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3221 Example : $blast->lowest_signif();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3222 Returns : Float or scientific notation number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3223 : Returns -1.0 if lowest_signif has not been set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3224 Argument : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3225 Throws : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3226 Status : Deprecated. Use lowest_expect() or lowest_p().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3227 Comments : The signif() method provides a way to deal with the fact that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3228 : Blast1 and Blast2 formats differ in what is reported in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3229 : description lines of each hit in the Blast report. The signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3230 : method frees any client code from having to know if this is a P-value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3231 : or an Expect value, making it easier to write code that can process
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3232 : both Blast1 and Blast2 reports. This is not necessarily a good thing, since
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3233 : one should always know when one is working with P-values or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3234 : Expect values (hence the deprecated status).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3235 : Use of lowest_expect() is recommended since all hits will have an Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3237 See Also : L<lowest_p()|lowest_p>, L<lowest_expect()|lowest_expect>, L<signif()|signif>, L<signif_fmt()|signif_fmt>, L<_set_signif()|_set_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3239 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3241 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3242 sub lowest_signif {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3243 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3244 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3246 return $self->{'_lowestSignif'} || -1.0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3247 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3249 =head2 highest_signif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3251 Usage : $blast_obj->highest_signif('overall');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3252 : Syntactic sugar for $blast->hit('worst')->signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3253 Purpose : Get the highest P or Expect value among all hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3254 : in a BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3255 : The value returned is the one which is reported in the decription
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3256 : section of the Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3257 : For Blast1 and WU-Blast2, this is a P-value,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3258 : for NCBI Blast2, it is an Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3259 Example : $blast->highest_signif();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3260 Returns : Float or scientific notation number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3261 : Returns -1.0 if highest_signif has not been set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3262 Argument : Optional string 'overall' to get the highest overall significance value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3263 Throws : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3264 Status : Deprecated. Use highest_expect() or highest_p().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3265 Comments : Analogous to lowest_signif(), q.v.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3267 See Also : L<lowest_signif()|lowest_signif>, L<lowest_p()|lowest_p>, L<lowest_expect()|lowest_expect>, L<signif()|signif>, L<signif_fmt()|signif_fmt>, L<_set_signif()|_set_signif>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3269 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3271 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3272 sub highest_signif {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3273 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3274 my ($self, $overall) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3276 $overall and return $self->{'_highestSignif'} || -1.0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3278 if($self->{'_is_significant'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3279 my $worst_hit = $self->hit('worst');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3280 if(defined $worst_hit) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3281 return $worst_hit->signif;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3282 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3283 return $self->{'_highestSignif'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3285 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3288 =head2 matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3290 Usage : $blast_object->matrix();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3291 Purpose : Get the name of the scoring matrix used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3292 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3293 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3294 Returns : string or undef if not defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3296 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3298 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3299 sub matrix { my $self = shift; $self->{'_matrix'} || $Blast->{'_matrix'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3300 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3302 =head2 filter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3304 Usage : $blast_object->filter();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3305 Purpose : Get the name of the low-complexity sequence filter used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3306 : (SEG, SEG+XNU, DUST, NONE).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3307 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3308 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3309 Returns : string or undef if not defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3311 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3313 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3314 sub filter { my $self = shift; $self->{'_filter'} || $Blast->{'_filter'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3315 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3317 =head2 expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3319 Usage : $blast_object->expect();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3320 Purpose : Get the expect parameter (E) used for the Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3321 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3322 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3323 Returns : string or undef if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3325 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3327 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3328 sub expect { my $self = shift; $self->{'_expect'} || $Blast->{'_expect'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3329 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3331 =head2 karlin_altschul
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3333 Usage : $blast_object->karlin_altschul();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3334 Purpose : Get the Karlin_Altschul sum statistics (Lambda, K, H)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3335 : These are extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3336 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3337 Returns : list of three floats (Lambda, K, H)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3338 : If not defined, returns list of three zeros)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3340 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3342 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3343 sub karlin_altschul {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3344 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3345 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3346 if(defined($self->{'_lambda'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3347 ($self->{'_lambda'}, $self->{'_k'}, $self->{'_h'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3348 } elsif(defined($Blast->{'_lambda'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3349 ($Blast->{'_lambda'}, $Blast->{'_k'}, $Blast->{'_h'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3350 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3351 (0, 0, 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3352 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3355 =head2 word_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3357 Usage : $blast_object->word_size();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3358 Purpose : Get the word_size used during the Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3359 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3360 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3361 Returns : integer or undef if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3363 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3365 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3366 sub word_size {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3367 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3368 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3369 $self->{'_word_size'} || $Blast->{'_word_size'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3370 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3372 =head2 s
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3374 Usage : $blast_object->s();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3375 Purpose : Get the s statistic for the Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3376 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3377 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3378 Returns : integer or undef if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3380 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3382 #------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3383 sub s { my $self = shift; $self->{'_s'} || $Blast->{'_s'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3384 #------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3386 =head2 gap_creation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3388 Usage : $blast_object->gap_creation();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3389 Purpose : Get the gap creation penalty used for a gapped Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3390 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3391 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3392 Returns : integer or undef if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3394 See Also : L<gap_extension()|gap_extension>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3396 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3398 #-----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3399 sub gap_creation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3400 #-----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3401 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3402 $self->{'_gapCreation'} || $Blast->{'_gapCreation'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3403 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3404
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3405 =head2 gap_extension
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3407 Usage : $blast_object->gap_extension();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3408 Purpose : Get the gap extension penalty used for a gapped Blast analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3409 : This is extracted from the report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3410 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3411 Returns : integer or undef if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3413 See Also : L<gap_extension()|gap_extension>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3415 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3417 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3418 sub gap_extension {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3419 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3420 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3421 $self->{'_gapExtension'} || $Blast->{'_gapExtension'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3422 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3424 =head2 ambiguous_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3426 Usage : $blast_object->ambiguous_aln();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3427 Purpose : Test all hits and determine if any have an ambiguous alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3428 Example : print "ambiguous" if $blast->ambiguous_aln();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3429 Returns : Boolean (true if ANY significant hit has an ambiguous alignment)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3430 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3431 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3432 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3433 Comments : An ambiguous BLAST alignment is defined as one where two or more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3434 : different HSPs have significantly overlapping sequences such
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3435 : that it is not possible to create a unique alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3436 : by simply concatenating HSPs. This may indicate the presence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3437 : of multiple domains in one sequence relative to another.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3438 : This method only indicates the presence of ambiguity in at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3439 : least one significant hit. To determine the nature of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3440 : ambiguity, each hit must be examined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3442 See Also : B<Bio::Tools::Blast::Sbjct::ambiguous_aln()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3444 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3446 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3447 sub ambiguous_aln {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3448 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3449 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3450 foreach($self->hits()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3451 return 1 if ($_->ambiguous_aln() ne '-');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3452 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3453 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3454 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3456 =head2 overlap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3458 Usage : $blast_object->overlap([integer]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3459 Purpose : Set/Get the number of overlapping residues allowed when tiling multiple HSPs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3460 : Delegates to Bio::Tools::Blast::Sbjct::overlap().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3461 Throws : Exception if there are no significant hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3462 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3464 See Also : B<Bio::Tools::Blast::Sbjct::overlap()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3466 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3468 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3469 sub overlap {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3470 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3471 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3472 if(not $self->hits) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3473 $self->throw("Can't get overlap data without significant hits.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3474 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3475 $self->hit->overlap();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3476 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3478 =head2 homol_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3480 Usage : @data = $blast_object->homo_data( %named_params );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3481 Purpose : Gets specific similarity data about each significant hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3482 Returns : Array of strings:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3483 : "Homology data" for each HSP is in the format:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3484 : "<integer> <start> <stop>"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3485 : Data for different HSPs are tab-delimited.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3486 Argument : named parameters passed along to the hit objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3487 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3488 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3489 Comments : This is a very experimental method used for obtaining an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3490 : indication of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3491 : 1) how many HSPs are in a Blast alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3492 : 2) how strong the similarity is between sequences in the HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3493 : 3) the endpoints of the alignment (sequence monomer numbers)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3495 See Also : B<Bio::Tools::Blast::Sbjct::homol_data()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3497 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3499 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3500 sub homol_data {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3501 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3503 my ($self, %param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3504 my @hits = $self->hits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3505 my @data = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3507 ## Note: Homology data can be either for the query sequence or the hit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3508 ## (Sbjct) sequence. Default is for sbjct. This is specifyable via
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3509 ## $param{-SEQ}='sbjct' || 'query'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3511 foreach ( @hits ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3512 push @data, $_->homol_data(%param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3513 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3514 @data;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3515 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3517 =head1 REPORT GENERATING METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3519 =head2 table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3521 Usage : $blast_obj->table( [get_desc]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3522 Purpose : Output data for each HSP of each hit in tab-delimited format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3523 Example : print $blast->table;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3524 : print $blast->table(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3525 : # Call table_labels() to print labels.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3526 Argument : get_desc = boolean, if false the description of each hit is not included.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3527 : Default: true (if not defined, include description column).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3528 Returns : String containing tab-delimited set of data for each HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3529 : of each significant hit. Different HSPs are separated by newlines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3530 : Left-to-Right order of fields:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3531 : 1 QUERY_NAME # Sequence identifier of the query.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3532 : 2 QUERY_LENGTH # Full length of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3533 : 3 SBJCT_NAME # Sequence identifier of the sbjct ("hit".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3534 : 4 SBJCT_LENGTH # Full length of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3535 : 5 EXPECT # Expect value for the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3536 : 6 SCORE # Blast score for the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3537 : 7 BITS # Bit score for the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3538 : 8 NUM_HSPS # Number of HSPs (not the "N" value).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3539 : 9 HSP_FRAC_IDENTICAL # fraction of identical substitutions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3540 : 10 HSP_FRAC_CONSERVED # fraction of conserved ("positive") substitutions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3541 : 11 HSP_QUERY_ALN_LENGTH # Length of the aligned portion of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3542 : 12 HSP_SBJCT_ALN_LENGTH # Length of the aligned portion of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3543 : 13 HSP_QUERY_GAPS # Number of gaps in the aligned query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3544 : 14 HSP_SBJCT_GAPS # Number of gaps in the aligned sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3545 : 15 HSP_QUERY_START # Starting coordinate of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3546 : 16 HSP_QUERY_END # Ending coordinate of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3547 : 17 HSP_SBJCT_START # Starting coordinate of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3548 : 18 HSP_SBJCT_END # Ending coordinate of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3549 : 19 HSP_QUERY_STRAND # Strand of the query sequence (TBLASTN/X only)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3550 : 20 HSP_SBJCT_STRAND # Strand of the sbjct sequence (TBLASTN/X only)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3551 : 21 HSP_FRAME # Frame for the sbjct translation (TBLASTN/X only)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3552 : 22 SBJCT_DESCRIPTION (optional) # Full description of the sbjct sequence from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3553 : # the alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3554 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3555 Comments : This method does not collect data based on tiling of the HSPs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3556 : The table will contains redundant information since the hit name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3557 : id, and other info for the hit are listed for each HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3558 : If you need more flexibility in the output format than this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3559 : method provides, design a custom function.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3561 See Also : L<table_tiled()|table_tiled>, L<table_labels()|table_labels>, L<_display_hits()|_display_hits>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3563 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3565 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3566 sub table {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3567 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3568 my ($self, $get_desc) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3569 my $str = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3571 $get_desc = defined($get_desc) ? $get_desc : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3572 # $str .= $self->_table_labels($get_desc) unless $self->{'_labels'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3574 my $sigfmt = $self->signif_fmt();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3575 $sigfmt eq 'parts' and $sigfmt = 'exp'; # disallow 'parts' format for this table.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3576 my $sigprint = $sigfmt eq 'exp' ? 'd' : '.1e';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3578 my ($hit, $hsp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3579 foreach $hit($self->hits) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3580 foreach $hsp($hit->hsps) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3581 # Note: range() returns a 2-element list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3582 $str .= sprintf "%s\t%d\t%s\t%d\t%$sigprint\t%d\t%d\t%d\t%.2f\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s$Newline",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3583 $self->name, $self->length, $hit->name, $hit->length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3584 $hit->expect($sigfmt), $hit->score, $hit->bits,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3585 $hit->num_hsps, $hsp->frac_identical, $hsp->frac_conserved,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3586 $hsp->length('query'), $hsp->length('sbjct'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3587 $hsp->gaps('list'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3588 $hsp->range('query'), $hsp->range('sbjct'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3589 $hsp->strand('query'), $hsp->strand('sbjct'), $hsp->frame,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3590 ($get_desc ? $hit->desc : '');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3591 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3592 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3593 $str =~ s/\t$Newline/$Newline/gs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3594 $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3595 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3597 =head2 table_labels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3599 Usage : print $blast_obj->table_labels( [get_desc] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3600 Purpose : Get column labels for table().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3601 Returns : String containing column labels. Tab-delimited.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3602 Argument : get_desc = boolean, if false the description column is not included.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3603 : Default: true (if not defined, include description column).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3604 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3606 See Also : L<table()|table>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3608 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3610 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3611 sub table_labels {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3612 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3613 my ($self, $get_desc) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3614 $get_desc = defined($get_desc) ? $get_desc : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3615 my $descstr = $get_desc ? 'DESC' : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3616 my $descln = $get_desc ? '-----' : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3618 my $str = sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s$Newline",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3619 'QUERY', 'Q_LEN', 'SBJCT', 'S_LEN', 'EXPCT', 'SCORE', 'BITS', 'HSPS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3620 'IDEN', 'CONSV', 'Q_ALN', 'S_ALN', 'Q_GAP', 'S_GAP',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3621 'Q_BEG', 'Q_END', 'S_BEG', 'S_END', 'Q_STR', 'S_STR', 'FRAM', $descstr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3622 $str .= sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s$Newline",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3623 '-----', '-----', '-----', '-----', '-----', '-----', '-----', '-----',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3624 '-----', '-----', '-----', '-----', '-----', '-----',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3625 '-----', '-----', '-----','-----', '-----', '-----','-----', $descln;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3626
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3627 $self->{'_labels'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3628 $str =~ s/\t$Newline/$Newline/gs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3629 $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3632 =head2 table_tiled
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3634 Purpose : Get data from tiled HSPs in tab-delimited format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3635 : Allows only minimal flexibility in the output format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3636 : If you need more flexibility, design a custom function.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3637 Usage : $blast_obj->table_tiled( [get_desc]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3638 Example : print $blast->table_tiled;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3639 : print $blast->table_tiled(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3640 : # Call table_labels_tiled() if you want labels.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3641 Argument : get_desc = boolean, if false the description of each hit is not included.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3642 : Default: true (include description).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3643 Returns : String containing tab-delimited set of data for each HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3644 : of each significant hit. Multiple hits are separated by newlines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3645 : Left-to-Right order of fields:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3646 : 1 QUERY_NAME # Sequence identifier of the query.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3647 : 2 QUERY_LENGTH # Full length of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3648 : 3 SBJCT_NAME # Sequence identifier of the sbjct ("hit".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3649 : 4 SBJCT_LENGTH # Full length of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3650 : 5 EXPECT # Expect value for the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3651 : 6 SCORE # Blast score for the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3652 : 7 BITS # Bit score for the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3653 : 8 NUM_HSPS # Number of HSPs (not the "N" value).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3654 : 9 FRAC_IDENTICAL* # fraction of identical substitutions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3655 : 10 FRAC_CONSERVED* # fraction of conserved ("positive") substitutions .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3656 : 11 FRAC_ALN_QUERY* # fraction of the query sequence that is aligned.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3657 : 12 FRAC_ALN_SBJCT* # fraction of the sbjct sequence that is aligned.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3658 : 13 QUERY_ALN_LENGTH* # Length of the aligned portion of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3659 : 14 SBJCT_ALN_LENGTH* # Length of the aligned portion of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3660 : 15 QUERY_GAPS* # Number of gaps in the aligned query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3661 : 16 SBJCT_GAPS* # Number of gaps in the aligned sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3662 : 17 QUERY_START* # Starting coordinate of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3663 : 18 QUERY_END* # Ending coordinate of the query sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3664 : 19 SBJCT_START* # Starting coordinate of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3665 : 20 SBJCT_END* # Ending coordinate of the sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3666 : 21 AMBIGUOUS_ALN # Ambiguous alignment indicator ('qs', 'q', 's').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3667 : 22 SBJCT_DESCRIPTION (optional) # Full description of the sbjct sequence from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3668 : # the alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3669 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3670 : * Items marked with a "*" report data summed across all HSPs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3671 : after tiling them to avoid counting data from overlapping regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3672 : multiple times.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3673 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3674 Comments : This function relies on tiling of the HSPs since it calls
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3675 : frac_identical() etc. on the hit as opposed to each HSP individually.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3677 See Also : L<table()|table>, L<table_labels_tiled()|table_labels_tiled>, B<Bio::Tools::Blast::Sbjct::"HSP Tiling and Ambiguous Alignments">, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3679 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3681 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3682 sub table_tiled {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3683 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3684 my ($self, $get_desc) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3685 my $str = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3687 $get_desc = defined($get_desc) ? $get_desc : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3689 my ($hit);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3690 my $sigfmt = $self->signif_fmt();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3691 $sigfmt eq 'parts' and $sigfmt = 'exp'; # disallow 'parts' format for this table.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3692 my $sigprint = $sigfmt eq 'exp' ? 'd' : '.1e';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3694 foreach $hit($self->hits) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3695 $str .= sprintf "%s\t%d\t%s\t%d\t%$sigprint\t%d\t%d\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s$Newline",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3696 $self->name, $self->length, $hit->name, $hit->length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3697 $hit->expect($sigfmt), $hit->score, $hit->bits,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3698 $hit->num_hsps, $hit->frac_identical, $hit->frac_conserved,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3699 $hit->frac_aligned_query, $hit->frac_aligned_hit,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3700 $hit->length_aln('query'), $hit->length_aln('sbjct'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3701 $hit->gaps('list'), $hit->range('query'), $hit->range('sbjct'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3702 $hit->ambiguous_aln, ($get_desc ? $hit->desc : '');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3703 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3704 $str =~ s/\t$Newline/$Newline/gs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3705 $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3706 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3708 =head2 table_labels_tiled
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3710 Usage : print $blast_obj->table_labels_tiled( [get_desc] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3711 Purpose : Get column labels for table_tiled().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3712 Returns : String containing column labels. Tab-delimited.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3713 Argument : get_desc = boolean, if false the description column is not included.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3714 : Default: true (include description column).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3715 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3717 See Also : L<table_tiled()|table_tiled>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3719 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3721 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3722 sub table_labels_tiled {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3723 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3724 my ($self, $get_desc) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3725 my $descstr = $get_desc ? 'DESC' : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3726 my $descln = $get_desc ? '-----' : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3728 my $str = sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s$Newline",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3729 'QUERY', 'Q_LEN', 'SBJCT', 'S_LEN', 'EXPCT', 'SCORE', 'BITS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3730 'HSPS', 'FR_ID', 'FR_CN', 'FR_ALQ', 'FR_ALS', 'Q_ALN',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3731 'S_ALN', 'Q_GAP', 'S_GAP', 'Q_BEG', 'Q_END', 'S_BEG', 'S_END',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3732 'AMBIG', $descstr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3733 $str =~ s/\t$Newline/$Newline/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3734 $str .= sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s$Newline",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3735 '-----', '-----', '------', '-----', '-----','-----', '-----',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3736 '-----', '-----', '-----', '-----', '-----', '-----',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3737 '-----', '-----', '-----','-----','-----', '-----',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3738 '-----','-----', $descln;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3740 $self->{'_labels_tiled'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3741 $str =~ s/\t$Newline/$Newline/gs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3742 $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3743 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3745 =head2 display
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3747 Usage : $blast_object->display( %named_parameters );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3748 Purpose : Display information about Bio::Tools::Blast.pm data members,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3749 : E.g., parameters of the report, data for each hit., etc.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3750 : Overrides Bio::Root::Object::display().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3751 Example : $object->display(-SHOW=>'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3752 : $object->display(-SHOW=>'stats,hits');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3753 Argument : Named parameters: (TAGS CAN BE UPPER OR LOWER CASE)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3754 : -SHOW => 'file' | 'hits' | 'homol'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3755 : -WHERE => filehandle (default = STDOUT)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3756 Returns : n/a (print/printf is called)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3757 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3758 Comments : For tab-delimited output, see table().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3760 See Also : L<_display_homol()|_display_homol>, L<_display_hits()|_display_hits>, L<_display_stats()|_display_stats>, L<table()|table>, B<Bio::Root::Tools::SeqAnal::display()>,L<Links to related modules>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3762 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3764 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3765 sub display {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3766 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3767 my( $self, %param ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3769 $self->SUPER::display(%param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3770 my $OUT = $self->fh();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3771
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3772 $self->show =~ /homol/i and $self->_display_homol($OUT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3773 $self->show =~ /hits/i and $self->_display_hits( %param );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3774 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3775 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3777 =head2 _display_homol
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3779 Usage : n/a; called automatically by display()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3780 Purpose : Print homology data for hits in the BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3781 Example : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3782 Argument : one argument = filehandle object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3783 Returns : printf call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3784 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3786 See Also : L<homol_data()|homol_data>, L<display()|display>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3788 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3790 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3791 sub _display_homol {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3792 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3793 my( $self, $OUT ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3794
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3795 print $OUT "${Newline}BLAST HOMOLOGY DATA FOR: ${\$self->name()}$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3796 print $OUT '-'x40,"$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3798 foreach ( $self->homol_data()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3799 print $OUT "$_$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3800 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3801 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3803 =head2 _display_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3804
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3805 Usage : n/a; called automatically by display()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3806 Purpose : Display information about the Blast report "meta" data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3807 : Overrides Bio::Tools::SeqAnal::_display_stats() calling it first.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3808 Example : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3809 Argument : one argument = filehandle object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3810 Returns : printf call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3811 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3813 See Also : L<display()|display>, B<Bio::Tools::SeqAnal::_display_stats()>,L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3815 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3817 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3818 sub _display_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3819 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3820 my( $self, $OUT ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3822 $self->SUPER::_display_stats($OUT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3823 printf( $OUT "%-15s: %s$Newline", "GAPPED", $self->gapped ? 'YES' : 'NO');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3824 printf( $OUT "%-15s: %d$Newline", "TOTAL HITS", $self->num_hits('total'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3825 printf( $OUT "%-15s: %s$Newline", "CHECKED ALL", $Blast->{'_check_all'} ? 'YES' : 'NO');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3826 printf( $OUT "%-15s: %s$Newline", "FILT FUNC", $Blast->{'_filt_func'} ? 'YES' : 'NO');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3827 if($self->min_length) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3828 printf( $OUT "%-15s: Length >= %s$Newline", "MIN_LENGTH", $self->min_length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3829 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3831 my $num_hits = $self->num_hits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3832 my $signif_str = ($self->_layout == 1) ? 'P' : 'EXPECT';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3834 printf( $OUT "%-15s: %d$Newline", "SIGNIF HITS", $num_hits);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3835 # Blast1: signif = P-value, Blast2: signif = Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3837 printf( $OUT "%-15s: %s ($signif_str-VALUE)$Newline", "SIGNIF CUTOFF", $self->signif);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3838 printf( $OUT "%-15s: %s$Newline", "LOWEST $signif_str", $self->lowest_signif());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3839 printf( $OUT "%-15s: %s$Newline", "HIGHEST $signif_str", $self->highest_signif());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3840
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3841 printf( $OUT "%-15s: %s (OVERALL)$Newline", "HIGHEST $signif_str", $self->highest_signif('overall'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3843 if($Blast->_get_stats) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3844 my $warn = ($Blast->{'_share'}) ? '(SHARED STATS)' : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3845 printf( $OUT "%-15s: %s$Newline", "MATRIX", $self->matrix() || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3846 printf( $OUT "%-15s: %s$Newline", "FILTER", $self->filter() || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3847 printf( $OUT "%-15s: %s$Newline", "EXPECT", $self->expect() || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3848 printf( $OUT "%-15s: %s, %s, %s %s$Newline", "LAMBDA, K, H", $self->karlin_altschul(), $warn);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3849 printf( $OUT "%-15s: %s$Newline", "WORD SIZE", $self->word_size() || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3850 printf( $OUT "%-15s: %s %s$Newline", "S", $self->s() || 'UNKNOWN', $warn);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3851 if($self->gapped) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3852 printf( $OUT "%-15s: %s$Newline", "GAP CREATION", $self->gap_creation() || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3853 printf( $OUT "%-15s: %s$Newline", "GAP EXTENSION", $self->gap_extension() || 'UNKNOWN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3854 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3855 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3856 print $OUT "$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3857 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3858
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3859 =head2 _display_hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3860
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3861 Usage : n/a; called automatically by display()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3862 Purpose : Display data for each hit. Not tab-delimited.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3863 Example : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3864 Argument : one argument = filehandle object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3865 Returns : printf call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3866 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3867 Comments : For tab-delimited output, see table().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3868
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3869 See Also : L<display()|display>, B<Bio::Tools::Blast::Sbjct::display()>, L<table()|table>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3870
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3871 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3873 sub _display_hits {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3874
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3875 my( $self, %param ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3876 my $OUT = $self->fh();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3877 my @hits = $self->hits();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3878
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3879 ## You need a wide screen to see this properly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3880 # Header.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3881 print $OUT "${Newline}BLAST HITS FOR: ${\$self->name()} length = ${\$self->length}$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3882 print "(This table requires a wide display.)$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3883 print $OUT '-'x80,"$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3885 print $self->table_labels_tiled(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3886 print $self->table_tiled(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3887
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3888 ## Doing this interactively since there is potentially a lot of data here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3889 ## Not quite satisfied with this approach.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3890
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3891 if (not $param{-INTERACTIVE}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3892 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3893 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3894 my ($reply);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3895 print "${Newline}DISPLAY FULL HSP DATA? (y/n): [n] ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3896 chomp( $reply = <STDIN> );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3897 $reply =~ /^y.*/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3898
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3899 my $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3900 foreach ( @hits ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3901 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3902 print $OUT "$Newline$Newline",'-'x80,"$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3903 print $OUT "HSP DATA FOR HIT #$count (hit <RETURN>)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3904 print $OUT "$Newline",'-'x80;<STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3905 $param{-SHOW} = 'hsp';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3906 $_->display( %param );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3907 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3908 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3909 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3910 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3911
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3912 =head2 to_html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3914 Usage : $blast_object->to_html( [%named_parameters] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3915 Purpose : To produce an HTML-formatted version of a BLAST report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3916 : for efficient navigation of the report using a web browser.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3917 Example : # Using the static Blast object:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3918 : # Can read from STDIN or from a designated file:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3919 : $Blast->to_html($file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3920 : $Blast->to_html(-FILE=>$file, -HEADER=>$header);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3921 : (if no file is supplied, STDIN will be used).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3922 : # saving HTML to an array:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3923 : $Blast->to_html(-FILE=>$file, -OUT =>\@out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3924 : # Using a pre-existing blast object (must have been built from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3925 : # a file, not STDIN:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3926 : $blastObj->to_html();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3927 Returns : n/a, either prints report to STDOUT or saves to a supplied array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3928 : if an '-OUT' parameter is defined (see below).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3929 Argument : %named_parameters: (TAGS ARE AND CASE INSENSITIVE).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3930 : -FILE => string containing name of a file to be processed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3931 : If not a valid file or undefined, STDIN will be used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3932 : Can skip the -FILE tag if supplying a filename
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3933 : as a single argument.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3934 : -HEADER => string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3935 : This should be an HTML-formatted string to be used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3936 : as a header for the page, typically describing query sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3937 : database searched, the date of the analysis, and any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3938 : additional links.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3939 : If not supplied, no special header is used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3940 : Regardless of whether a header is supplied, the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3941 : standard info at the top of the report is highlighted.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3942 : This should include the <HEADER></HEADER> section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3943 : of the page as well.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3944 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3945 : -IN => array reference containing a raw Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3946 : each line in a separate element in the array.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3947 : If -IN is not supplied, read() is called
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3948 : and data is then read either from STDIN or a file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3949 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3950 : -OUT => array reference to hold the HTML output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3951 : If not supplied, output is sent to STDOUT.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3952 Throws : Exception is propagated from $HTML::get_html_func()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3953 : and Bio::Root::Object::read().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3954 Comments : The code that does the actual work is located in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3955 : Bio::Tools::Blast::HTML::get_html_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3956 Bugs : Some hypertext links to external databases may not be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3957 : correct. This due in part to the dynamic nature of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3958 : the web.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3959 : Hypertext links are not added to hits without database ids.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3960 TODO : Possibly create a function to produce fancy default header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3961 : using data extracted from the report (requires some parsing).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3962 : For example, it would be nice to always include a date
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3963
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3964 See Also : B<Bio::Tools::Blast::HTML::get_html_func()>, B<Bio::Root::Object::read()>, L<Links to related modules>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3965
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3966 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3968 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3969 sub to_html {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3970 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3971 my ($self, @param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3972
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3973 # Permits syntax such as: $blast->to_html($filename);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3974 my ($file, $header_html, $in_aref, $out_aref) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3975 $self->_rearrange([qw(FILE HEADER IN OUT)], @param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3976
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3977 $self->file($file) if $file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3979 # Only setting the newline character once for efficiency.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3980 $Newline ||= $Util->get_newline(-client => $self, @param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3981
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3982 $header_html ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3983 (ref($out_aref) eq 'ARRAY') ? push(@$out_aref, $header_html) : print "$header_html$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3984
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3985 require Bio::Tools::Blast::HTML;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3986 Bio::Tools::Blast::HTML->import(qw(&get_html_func));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3987
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3988 my ($func);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3989 eval{ $func = &get_html_func($out_aref); };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3990 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3991 my $err = $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3992 $self->throw($err);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3993 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3994
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3995 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3996 if(!$header_html) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3997 $out_aref ? push(@$out_aref, "<html><body>$Newline") : print "<html><body>$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3998 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4000 if (ref ($in_aref) =~ /ARRAY/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4001 # If data is being supplied, process it.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4002 foreach(@$in_aref) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4003 &$func($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4004 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4005 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4006 # Otherwise, read it, processing as we go.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4007
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4008 $self->read(-FUNC => $func, @param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4009 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4010 $out_aref ? push(@$out_aref, "$Newline</pre></body></html>") : print "$Newline</pre></body></html>";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4011 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4012
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4013 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4014 # Check for trivial error (report already HTML formatted).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4015 if($@ =~ /HTML formatted/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4016 print STDERR "\a${Newline}Blast report appears to be HTML formatted already.$Newline$Newline";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4017 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4018 my $err = $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4019 $self->throw($err);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4020 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4021 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4022 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4024 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4025 __END__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4026
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4027 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4028 # END OF CLASS #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4029 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4030
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4031 =head1 FOR DEVELOPERS ONLY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4033 =head2 Data Members
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4034
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4035 Information about the various data members of this module is provided for those
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4036 wishing to modify or understand the code. Two things to bear in mind:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4037
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4038 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4039
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4040 =item 1 Do NOT rely on these in any code outside of this module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4042 All data members are prefixed with an underscore to signify that they are private.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4043 Always use accessor methods. If the accessor doesn't exist or is inadequate,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4044 create or modify an accessor (and let me know, too!). (An exception to this might
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4045 be for Sbjct.pm or HSP.pm which are more tightly coupled to Blast.pm and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4046 may access Blast data members directly for efficiency purposes, but probably
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4047 should not).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4049 =item 2 This documentation may be incomplete and out of date.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4051 It is easy for these data member descriptions to become obsolete as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4052 this module is still evolving. Always double check this info and search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4053 for members not described here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4054
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4055 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4057 An instance of Bio::Tools::Blast.pm is a blessed reference to a hash containing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4058 all or some of the following fields:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4059
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4060 FIELD VALUE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4061 --------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4062 _significance P-value or Expect value cutoff (depends on Blast version:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4063 Blast1/WU-Blast2 = P-value; Blast2 = Expect value).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4064 Values GREATER than this are deemed not significant.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4066 _significant Boolean. True if the query has one or more significant hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4068 _min_length Integer. Query sequences less than this will be skipped.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4069
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4070 _confirm_significance Boolean. True if client has supplied significance criteria.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4071
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4072 _gapped Boolean. True if BLAST analysis has gapping turned on.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4073
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4074 _hits List of Sbjct.pm objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4076 _num_hits Number of hits obtained from the BLAST report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4078 _num_hits_significant Number of significant based on Significant data members.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4080 _highestSignif Highest P or Expect value overall (not just what is stored in _hits).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4081
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4082 _lowestSignif Lowest P or Expect value overall (not just what is stored in _hits).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4084 The static $Blast object has a special set of members:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4085
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4086 _errs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4087 _share
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4088 _stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4089 _get_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4090 _gapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4091 _filt_func
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4092
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4093 Miscellaneous statistical parameters:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4094 -------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4095 _filter, _matrix, _word_size, _expect, _gapCreation, _gapExtension, _s,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4096 _lambda, _k, _h
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4098 INHERITED DATA MEMBERS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4099 -----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4100 (See Bio::Tools::SeqAnal.pm for inherited data members.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4102 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4104 1;