annotate variant_effect_predictor/Bio/Tools/Blast/HTML.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #-------------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Blast::HTML
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # PURPOSE : To encapsulate code for HTML formatting BLAST reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # AUTHOR : Steve Chervitz (sac@bioperl.org)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # CREATED : 28 Apr 1998
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # STATUS : Alpha
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # REVISION: $Id: HTML.pm,v 1.15 2002/11/04 09:12:51 heikki Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # For the latest version and documentation, visit the distribution site:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 # http://bio.perl.org/Projects/Blast/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # To generate documentation, run this module through pod2html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # (preferably from Perl v5.004 or better).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 # CUSTOMIZATION NOTE:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # If your Blast reports are not getting marked up correctly, add or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 # modify the regexps in _markup_report() to accomodate the format of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 # your reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # Copyright (c) 1996-98 Steve Chervitz. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 # This module is free software; you can redistribute it and/or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 # modify it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 #-------------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 package Bio::Tools::Blast::HTML;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 use Exporter;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 use Bio::Tools::WWW qw(:obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $ID %DbUrl %SGDUrl $Revision
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $Acc $Pir_acc $Word $Signif $Int $Descrip);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 @ISA = qw(Exporter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 @EXPORT = qw();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 @EXPORT_OK = qw(&get_html_func &strip_html);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 %EXPORT_TAGS = ( std => [qw(&get_html_func &strip_html)] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 $ID = 'Bio::Tools::Blast::HTML';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 $Revision = '$Id: HTML.pm,v 1.15 2002/11/04 09:12:51 heikki Exp $'; #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 my $_set_markup = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my $_gi_link = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 ## POD Documentation:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 Bio::Tools::Blast::HTML - Bioperl Utility module for HTML formatting Blast reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 =head2 Adding HTML-formatting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::Tools::Blast::HTML qw(&get_html_func);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $func = &get_html_func();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 # Now as each line of the report is read, pass it to &$func($line).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 See L<get_html_func()|get_html_func> for details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 Also see B<Bio::Tools::Blast::to_html> for an example of usage.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 =head2 Removing HTML-formatting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::Tools::Blast::HTML qw(&strip_html);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 &strip_html(\$blast_report_string)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 See L<strip_html()|strip_html> for details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 =head1 INSTALLATION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 This module is included with the central Bioperl distribution:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 http://bio.perl.org/Core/Latest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 ftp://bio.perl.org/pub/DIST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 Follow the installation instructions included in the README file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 This module can be used to add HTML formatting to or remove HTML
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 formatting from a raw Blast sequence analysis report. Hypertext links
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 to the appropriate database are added for each hit sequence (GenBank,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Swiss-Prot, PIR, PDB, SGD).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 This module is intended for use by Bio::Tools::Blast.pm and related modules,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 which provides a front-end to the methods in Bio::Tools::Blast::HTML.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 =head1 DEPENDENCIES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 Bio::Tools::Blast::HTML.pm does not inherit from any other class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 besides Exporter. It is used by B<Bio::Tools::Blast.pm> only. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 class relies on B<Bio::Tools::WWW.pm> to provide key URLS for adding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 links in the Blast report to specific databases.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 The greatest dependency comes from the dynamic state of the web. URLs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 are are likely to change in the future, so all links cannot be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 guaranteed to work indefinitely. Feel free to report broken or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 incorrect database links (L<FEEDBACK | FEEDBACK>). Thanks!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 =head1 SEE ALSO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 Bio::Tools::Blast.pm - Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Bio::Tools::WWW.pm - URL repository.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 http://bio.perl.org/Projects/modules.html - Online module documentation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 http://bio.perl.org/Projects/Blast/ - Bioperl Blast Project
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 http://bio.perl.org/ - Bioperl Project Homepage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Report bugs to the Bioperl bug tracking system to help us keep
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 track the bugs and their resolution. Bug reports can be submitted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 via email or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 Steve Chervitz, E<lt>sac@bioperl.orgE<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 =head1 COPYRIGHT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 Copyright (c) 1998-2000 Steve Chervitz. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 This module is free software; you can redistribute it and/or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 modify it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 #### END of main POD documentation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 ###################### BEGIN FUNCTIONS ########################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 Methods beginning with a leading underscore are considered private
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 and are intended for internal use by this module. They are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 B<not> considered part of the public interface and are described here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 for documentation purposes only.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 =head2 get_html_func
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 Usage : $func_ref = &get_html_func( [array_ref] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 : This method is exported.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 Purpose : Provides a function that adds HTML formatting to a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 : raw Blast report line-by-line.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 : Utility method used by to_html() in Bio::Tools::Blast.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 Returns : Reference to an anonymous function to be used while reading in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 : the raw report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 : The function itself operates on the Blast report line-by-line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 : HTML-ifying it and printing it to STDOUT (or saving in the supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 : array ref) as it goes:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 : foreach( @raw_report ) { &$func_ref($_); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 Argument : array ref (optional) for storing the HTML-formatted report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 : If no argument is supplied, HTML output is sent to STDOUT.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 Throws : Croaks if an argument is supplied and is not an array ref.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 : The anonymous function returned by this method croaks if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 : the Blast output appears to be HTML-formatted already.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 Comments : Adapted from a script by Keith Robison November 1993
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 : krobison@nucleus.harvard.edu; http://golgi.harvard.edu/gilbert.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 : Modified extensively by Steve Chervitz and Mike Cherry.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 : Some modifications are customizations for BLAST reports served up
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 : by the Saccharomyces Genome Database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 : Feel free to modify or replace portions of this code as necessary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 : to accomodate new BLAST datasets or changes to the Blast format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 See Also : B<Bio::Tools::Blast::to_html()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 sub get_html_func {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 my ($out_aref) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 ## Key booleans used in parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 my $found_table = 0; # Located the table at top of report (a.k.a. 'descriptions').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 my $found_data = 0; # Nothing is done until this is true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 my $skip = 0; # Skipping various items in the report header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $ref_skip = 0; # so we can include nice HTML versions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 # (e.g., references for the BLAST program).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my $getNote = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 my $getGenBankAlert = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 my $str = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 my $gi_link = \$_gi_link;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 my $prog = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 if( defined($out_aref) and not ref($out_aref) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 croak("Argument must be an ARRAY ref not a ${\ref $out_aref}.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 my $refs = &_prog_ref_html;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 &_set_markup_data() if not $_set_markup;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 return sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 # $_ contains a single line from a Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 local $_ = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 croak("Report appears to be HTML formatted already.") if m/<HTML>|<TITLE>|<PRE>/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 if(not $found_table) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 if($ref_skip) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 # Replacing an reference data with special HTML.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 $ref_skip = 0 if /^\s+$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 if($getNote) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 ## SAC: created this test since we are no longer reading from STDIN.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 $out_aref ? push(@$out_aref, $_) : print $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 $getNote = 0 if m/^\s+$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 } elsif( m/(.*), Up \d.*/ or /Date: +(.+)/ or /Start: +(.+?) +End:/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 ### Network BLAST reports from NCBI are time stamped as follows:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 #Fri Apr 18 15:55:41 EDT 1997, Up 1 day, 19 mins, 1 user, load: 19.54, 19.13, 17.77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 my $date = "<b>BLASTed on:</b> $1<p>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 $out_aref ? push(@$out_aref, $date) : print $date;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 } elsif ( /^(<\w+>)?(T?BLAST[NPX])\s+(.*?)/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 $found_data = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 local($^W) = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 s#(\S+)\s+(.*)#<P><B>Program:</B> $1 $2 $3<br>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 $out_aref ? push(@$out_aref, $_) : print $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 $skip = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $prog = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 if($prog =~ /BLASTN/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 ## Prevent the error at Entrez when you ask for a nucl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 ## entry with a protein GI number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $$gi_link = $DbUrl{'gb_n'}; # nucleotide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $$gi_link = $DbUrl{'gb_p'}; # protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 } elsif ( m/^Query=/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 # Keeping the "Query=" format to keep it parsable by Blast.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 # (after stripping HTML).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 s#Query= *(.*)#<title>$1</title>\n<p><b>Query=</b> $1#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 $out_aref ? push(@$out_aref, $_) : print $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $skip = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 } elsif ( /Reference:/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $ref_skip = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 } elsif ( /^Database:/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 &_markup_database(\$_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 $out_aref ? push(@$out_aref, $_) : print $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 if ( /non-redundant genbank/i and $prog =~ /TBLAST[NX]/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $getGenBankAlert = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 $skip = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 } elsif ( /sequences;/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $str = "$_<p>";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 $out_aref ? push(@$out_aref, $str) : print $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 } elsif ( /^\s+\(\d+ letters\)\s+/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 $str = "<br>&nbsp&nbsp&nbsp&nbsp$_";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 $out_aref ? push(@$out_aref, $str) : print $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 } elsif ( /^(WARNING|NOTICE):/i ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 s#WARNING: *(.*)#<p><b><font color="red">$1:</font></b> $1#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $out_aref ? push(@$out_aref, $_) : print $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $getNote = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 } elsif ( /Score +E\s*$/ or /Probability\s*$/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 # Put the last HTML-formatted lines before the main body of report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $found_table = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 $skip = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 $out_aref ? push(@$out_aref, $refs) : print $refs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 if($getGenBankAlert) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 $str = &_genbank_alert;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 $out_aref ? push(@$out_aref, $str) : print $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $str = "\n<p><pre>";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 $out_aref ? push(@$out_aref, $str) : print $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 &_markup_report(\$_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 if ($found_data and not($skip or $ref_skip)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 $out_aref ? push(@$out_aref, $_) : print $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 } # end sub {}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 =head2 _set_markup_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 Usage : n/a; utility method used by get_html_func()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 Purpose : Sets various hashes and regexps used for adding HTML
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 : to raw Blast output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 Comments : These items need be set only once.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 See Also : L<get_html_func()|get_html_func>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 sub _set_markup_data {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 %DbUrl = $BioWWW->search_url('all');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 %SGDUrl = $BioWWW->sgd_url('all');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 $Signif = '[\de.-]{3,}'; # Regexp for a P-value or Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 $Int = ' *\d\d*'; # Regexp for an integer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 $Descrip = ' +.* {2,}?'; # Regexp for a description line.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 $Acc = '[A-Z][\d.]+'; # Regexp for GB/EMBL/DDJB/SP accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 $Pir_acc = '[A-Z][A-Z0-9]{5,}'; # Regexp for PIR accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 $Word = '[\w_.]+'; # Regexp for a word. Include dot for version.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 $_set_markup = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 =head2 _markup_database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 Usage : n/a; utility method used by get_html_func()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 Purpose : Converts a cryptic database ID into a readable name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 Comments : This is used for converting local database IDs into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 : understandable terms. At present, it only recognizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 : databases used locally at SGD.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 See Also : L<get_html_func()|get_html_func>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 sub _markup_database {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 #---------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 my $line_ref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 local $_ = $$line_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 $_ =~ s#YeastN#<i>S. cerevisiae</i> GenBank Data Set; #;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 $_ =~ s#YeastP#Non-Redundant <i>S. cerevisiae</i> Protein Data Set; #;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 $_ =~ s#genoSC#Complete DNA Sequence for the S. cerevisiae Genome; #;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 $_ =~ s#YeastORF-P#Translation of all Standard S.c. ORFs; #;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 $_ =~ s#YeastORF-N#Coding Sequence of all Standard S.c. ORFs; #;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 s#Database: *(.*)#<p><b>Database:</b> $1#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 $$line_ref = $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 =head2 _markup_report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 Usage : n/a; utility function used by get_html_func()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 Purpose : Adds HTML links to aid navigation of raw Blast output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 Comments : HTML-formatting is dependent on the Blast server that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 : provided the Blast report. Currently, this function can handle reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 : produced by NCBI and SGD. Feel free to modify this function
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 : to accomodate reports produced by other servers/sites.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 : This function is simply a collection of substitution regexps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 : that recognize and modify the relevant lines of the Blast report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 : All non-header lines of the report are passed through this function,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 : only the ones that match will get modified.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 : The general scheme for adding links is as follows:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 : (Some of the SGD markups do not follow this scheme precisely
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 : but this is the general trend.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 : For description lines in the summary table at the top of report:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 : DB:SEQUENCE_ID DESCRIPTION SIGNIF_VAL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 : DB = links to the indicated database (if not Gen/Embl/Ddbj).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 : SEQUENCE_ID = links to GenBank entry for the sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 : SIGNIF_VAL = internal link to relevant alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 : For the alignment sections in the body of the report:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 : DB:SEQUENCE_ID (Back | Top) DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 : DB = links to the indicated database (if not Gen/Embl/Ddbj).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 : SEQUENCE_ID = links to GenBank entry for the sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 : SIGNIF_VAL = internal link to alignment section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 : Back = internal link to description line in summary section.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 : Top = internal link to top of page.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 : 'DB' links are created for PDB, PIR, and SwissProt sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 : RE_PARSING HTML-FOMRATTED REPORTS:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 : ----------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 : HTML-formatted reports generated by this module, as well as reports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 : obtained from the NCBI servers, should be parsable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 : by Bio::Tools::Blast.pm. Parsing HTML-formatted reports is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 : slow, however, since the HTML must be removed prior to parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 : Parsing HTML-formatted reports is dependent on the specific structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 : of the HTML and is generally not recommended.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 : Note that since URLs can change without notice, links will need updating.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 : The links are obtained from Bio::Tools::WWW.pm updating that module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 : will update this as well.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 Bugs : Some links to external databases are incorrect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 : (in particular, for 'bbs' and 'prf' databases on NCBI Blast reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 : Some links may fail as a result of the dynamic nature of the web.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 : Hypertext links are not added to hits without database ids.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 See Also : L<get_html_func()|get_html_func>, B<Bio::Tools::WWW.pm>, L<strip_html>()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 sub _markup_report {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 my $line_ref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 local $_ = $$line_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 ## REGEXPS FOR ALIGNMENT SECTIONS (within the body of the report,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 ## the text above the list of HSPs).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 ## If the HSP alignment sections don't start with a '>' we have no way
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 ## of finding them. This occurs with reports saved from HTML-formatted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 ## web pages, which we shouldn't be processing here anyway.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 ## To facilitate parsing of HTML-formatted reports by Bio::Tools::Blast.pm,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 ## the <a name=...> anchors should be added at the BEGINNING of the HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 ## alignment section lines and at the END of the description section lines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 # Removing " ! " addded by GCG.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 s/ ! / /;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 ### NCBI-specific markups for HSP alignment section lines:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 local($^W) = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 # GenBank/EMBL, DDBJ hits (GenBank Format):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 s@^>(gb|emb|dbj|ref)\|($Word)(\|$Word)?(.*)$@<a name=$2_A></a><b>$1:<a href="$_gi_link$2">$2$3</a></b>$4<br>(<a href="\#$2_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 s@^>(gb|emb|dbj|ref)\|($Word)(\| \(?$Word\)?)(.*)$@<a name=$2_A></a><b>$1:<a href="$_gi_link$2">$2</a></b>$3$4<br>(<a href="\#$2_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 # PIR hits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 s@^>pir\|\|($Word)( .*)$@<a name=$1_A></a><b><a href=\"$DbUrl{'pir_acc'}$1\">pir</a>:<a href="$DbUrl{'gb_p'}$1">$1</a></b> $2 <br>(<a href="\#$1_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 # GI hits (GenBank Format): using a nested (())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 s@^>(gi)\|($Word)( +\(($Word)\))( .*)$@<a name=$4_A></a><b>$1:<a href="$_gi_link$4">$2</a></b>$3$5<br>(<a href="\#$4_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 # GNL PID hits (GenBank Format):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 s@^>(gnl)\|($Word)?(\|$Word) +\(($Word)\)( .*)$@<a name=$4_A></a><b>$1:<a href="$_gi_link$4">$2$3</a></b>($4)$5<br>(<a href="\#$4_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 # BBS and PRF hits (what db?) (GenBank Format):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 s@^>(bbs|prf)\|\|?($Word)( .*)$@<a name=$2_A></a><b>$1:<a href="$_gi_link$2">$2</a></b>$3<br>(<a href="\#$2_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 # SwissProt hits:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 s@^>sp\|($Word)\|($Word)?( .*)$@<a name=$1_A></a><b><a href="$DbUrl{'swpr'}$1">sp</a>:<a href="$DbUrl{'gb_p'}$1">$1|$2</a></b>$3<br>(<a href="\#$1_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 ## PDB ids with or without a chain identifier (GenBank format)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 s@^>pdb\|(\d\w{3})\|[\w ] (.*)$@<a name=$1_A></A><b><a href=\"$DbUrl{'3db'}$1\">pdb</A>:<a href="$DbUrl{'gb_struct'}$1">$1</a></b> (<a href="\#$1_H">Back</a>|<a href="\#top">Top</a>) $2@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 ### SGD-specific markups for HSP alignment section lines:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 ## PDB ids without chain identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 s@^>PDB_UNIQUEP:(\d\w{3})_ (.*)$@<a name=$1_A></A><b><A HREF="$DbUrl{'3db'}$1">PDB</a>:<A HREF="$DbUrl{'gb_struct'}$1">$1</A></b> (<a href="\#$1_H">Back</a>|<a href="\#top">Top</a>) $2@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 ## PDB ids with chain identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 s@^>PDB_UNIQUEP:(\d\w{3})_([\w ]{1})(.*)$@<a name=$1_A></A><b><A HREF="$DbUrl{'3db'}$1">PDB</a>:<A HREF="$DbUrl{'gb_struct'}$1">$1</A></b> Chain:$2, (<a href="\#$1_H">Back</a>|<a href="\#top">Top</a>) $3@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 s@^>($Word)PEPT:GI_(\d+)(.*)$@<a name=$2_A></a><b>$1:<a href="$DbUrl{'gb_p'}$2">GI_$2</a></b> $3 <br>(<a href="\#$2_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 # The gcg blast dataset generating tools up-case all sbjct sequence IDs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 # This is fine for yeast but not worm. This is considered a hack here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 s@WORMPEPT:(\w+\.)(\S+)@WORMPEPT:$1\L$2\E@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 s@^>WORMPEPT:(\S+)(.*)$@<a name=$1_A></a><b>WORMPEP:<A HREF="$DbUrl{'wormace'}$1">$1</a></b> $2 <br>(<a href="\#$1_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 s#^>(GB_$Word):($Word) ($Acc) (.*$)#<a name=$2_$3_A></A><a href=\#$2_$3_H>$2|$3</A>$4\t<b>[<A HREF=$_gi_link$3>GenBank</A> / <A HREF=$DbUrl{'embl'}$3>EMBL</A> / <A HREF=\"$SGDUrl{'seq_an'}$2\*\">SGD</A>]</b> #o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 # Sac's version: ORF name is an external link into SGD:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 s@^>ORFP:(\S*) +([\w-]+)(.*$)@<a name=$1_A></A>ORFP:<a href=\"$SGDUrl{'locus'}$2\">$1 $2</A>$3<br>&nbsp&nbsp&nbsp&nbsp&nbsp<b>[<A HREF=\"$SGDUrl{'seq_an'}$2\">Gene/Sequence Resources</a> / <a href=\"$SGDUrl{'map_orf'}$2\">ORF Map</a></b>] <a href="\#$1_H">Back</a>|<a href="\#top">Top</a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 # Mike's version:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 # s#^>ORFP:(\S*) (.*$)#<a name=$1_A></A><a href=\#$1_H>ORFP:$1</A> $2\t<b>[<A HREF=\"$SGDUrl{'seq_an'}$1\">Gene/Sequence Resources</a> / <a href=\"$SGDUrl{'map_orf'}$1\">ORF Map</a>]</b> #o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 s#^>ORFN:(\S*) (.*$)#<a name=$1_A></A><a href=\#$1_H>ORFN:$1</A> $2\t<b>[<A HREF=\"$SGDUrl{'seq_an'}$1\">Gene/Sequence Resources</a>] / <a href=\"$SGDUrl{'map_orf'}$1\">ORF Map</a></b> #o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 s#^>NR_SC:GP-\S* gi\|(\w+)([\w\|]*) (.*$)#<a name=$1_A></A><a href=\#$1_H>GenPept|$1</A> gp|$2 $3\t<b>[<A HREF=$DbUrl{'gb_p'}$1>GenPept</A> / <A HREF=\"$SGDUrl{'gi'}$1\*\">SGD</A>]</b> #o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 s#^>NR_SC:SW-$Word SW:($Word) ($Acc) (.*$)#<a name=$1_A></A><a href=\#$1_H>SWISS|$1 $2</A> $3\t<b>[<a href=$DbUrl{'swpr'}$2>SwissProt</a> / <A HREF=$DbUrl{'gb_p'}$2>Entrez</A>]</b>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 s#^>NR_SC:PIR-$Word PIR:($Word) (.*$)#<a name=$1_A> </A><a href=\#$1_H>PIR|$1</A> $2\t<b>[<a href=$DbUrl{'pir_uid'}$1>PIR</a> / <A HREF=$DbUrl{'gb_p'}$1>Entrez</A>]</b>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 s#^>CHRS:([A-Z][0-9]*) (.*)$#<a name=$1_A></a><a href=\#$1_H>$1</A> $2: [<b><a href=$SGDUrl{'seq_an'}$1>Gene/Sequence Resources</A> / <a href=\"$SGDUrl{'map_chr'}$1\">ORF Map</a></b>]#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 s#^>NOT:([A-Z]_[0-9]*-[0-9]*)( *)Chromosome ([0-9]*) from ([0-9]*) to ([0-9]*)$#<a name=$1_A></a><a href=\#$1_H>$1</A> $2Chromosome $3 from $4 to $5 [<b><a href=$SGDUrl{'chr'}$3\&beg=$4\&end=$5>Gene/Sequence Resources</a> / <a href=\"$SGDUrl{'map_chr'}$3\&beg=$4\&end=$5\">ORF Map</a> / <a href=\"$SGDUrl{'chr_old'}$3\&beg=$4\&end=$5\">Retrieve DNA</a></b>]#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 s#^>UTR5_SC_[0-9]*:(\S*) 5' untranslated region, chr(\S*) ([0-9]*) - ([0-9]*)(.*$)#<a name=$1_A></A><a href=\#$1_H>UTR5:$1</A> $1 5' untranslated region, chr$2 $3 - $4, $5\t<b>[<A HREF=\"$SGDUrl{'chr'}$2&beg=$3&end=$4\">Gene/Sequence Resources</A> / <a href=\"$SGDUrl{'map_chr'}$2\&beg=$3\&end=$4\">ORF Map</a>]</b>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 # Hits without a db identifier.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 # If any of the previous regexps succeed, the leading '>' will be removed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 # Otherwise, this regexp could cause trouble.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 s@^>($Word)(.*)$@<a name=$1_A></a>$1 $2<br>(<a href="\#$1_H">Back|<a href="\#top">Top</a>)@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 ## REGEXPS FOR SUMMARY TABLE LINES AT TOP OF REPORT (a.k.a. 'descriptions')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 ## (table of sequence id, description, score, P/Expect value, n)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 ## Not using bold face to highlight the sequence id's since this can throw off
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 ## off formatting of the line when the IDs are different lengths. This lead to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 ## the scores and P/Expect values not lining up properly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 ### NCBI-specific markups for description lines:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 # GenBank/EMBL, DDBJ hits (GenBank Format):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 s@^ ?(gb|emb|dbj|ref)\|($Word)(\|$Word)?($Descrip)($Int +)($Signif)(.*)$@$1:<a href="$_gi_link$2">$2$3</a>$4$5<A href="\#$2_A">$6</a>$7<a name="$2_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 s@^ ?(gb|emb|dbj|ref)\|($Word)(\| \(?$Word\)?)($Descrip)($Int +)($Signif)(.*)$@$1:<a href="$_gi_link$2">$2</a>$3$4$5<A href="\#$2_A">$6</a>$7<a name="$2_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 # Missing inner ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 s@^ ?pir\|\|($Word)?($Descrip)($Int) ($Signif)(.*)$@<a href="$DbUrl{'pir_acc'}$1">pir</a>:<a href="$DbUrl{'gb_p'}$1">$1</a> $2$3 <A href="\#$1_A">$4</a>$5<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 # GI hits (GenBank Format): using a nested (())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 s@^ ?gi\|($Word)( +\(($Word)\))($Descrip)($Int) ($Signif)(.*)$@gi:<a href="$_gi_link$3">$1</a>$2$4$5 <A href="\#$3_A">$6</a>$7<a name="$3_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 s@^ ?(gnl)\|($Word)?(\|$Word +)\(($Word)\)($Descrip)($Int) ($Signif)(.*)$@$1:<a href="$_gi_link$4">$2$3</a>($4)$5$6 <A href="\#$4_A">$7</a>$8<a name="$4_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 s@^ ?(bbs|prf)\|\|?($Word)($Descrip)($Int) ($Signif)(.*)$@$1:<a href="$_gi_link$2">$2</a> $3$4 <A href="\#$2_A">$5</a>$6<a name="$2_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 ## SwissProt accessions (GenBank format)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 s@^ ?sp\|($Word)(\|$Word)?($Descrip)($Int) ($Signif)(.*)$@<a href="$DbUrl{'swpr'}$1">sp</a>:<a href="$DbUrl{'gb_p'}$1">$1$2</a>$3$4 <a href="\#$1_A">$5</a>$6<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 ## PDB ids with or without a chain ID (GenBank format)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 s@^ ?pdb\|($Word)\|($Word)?($Descrip)($Int) ($Signif)(.*)$@<a href="$DbUrl{'3db'}$1">pdb</a>:<a href="$DbUrl{'gb_struct'}$1">$1_$2</a>$3$4 <a href="\#$1_A">$5</a>$6<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 ### SGD-specific markups for description lines:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 ## PDB ids without chain identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 s@^ ?PDB_UNIQUEP:(\d\w{3})_($Descrip)($Int) ($Signif)(.*)$@<a href="$DbUrl{'3db'}$1">PDB</a>:<A HREF="$DbUrl{'gb_struct'}$1">$1</A> $2$3 <a href="\#$1_A">$4</a>$5<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 ## PDB ids with chain identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 s@^ ?PDB_UNIQUEP:(\d\w{3})_(\w)($Descrip)($Int) ($Signif)(.*)$@<a href="$DbUrl{'3db'}$1">PDB</a>:<A HREF="$DbUrl{'gb_struct'}$1">$1</A> Chain:$2$3$4 <a href="\#$1_A">$5</a>$6<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 s@^ ?($Word)PEPT:GI_(\d+)($Descrip)($Int) ($Signif)(.*)$@$1:<A HREF="$DbUrl{'gb_p'}$2">GI_$2</A> $3 $4 <a href="\#$2_A">$5</a> $6<a name="$2_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 s@^ *WORMPEPT:(\S+)($Descrip)($Int) ($Signif)(.*)$@WORMPEP:<A HREF="$DbUrl{'wormace'}$1">$1</a> $2 $3 <a href="\#$1_A">$4</a>$5<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 ## Mike Cherry's markups. SAC note: added back database name to allow
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 ## the HTML-formatted version to be parsable by Blast.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 s#^ ?(GB_$Word:)($Word)( *)($Acc)($Descrip)($Int) ( *$Signif) ( *\d*)$#GenBank\|<a href="$_gi_link$4">$2</A>\|$4 $3$5$6 <a href="\#$2_$4_A">$7</A> $8<a name="$2_$4_H"></A>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 # Mike's version:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 # s#^ ?(ORFP:)(\S*)($Descrip)($Int) ($Signif) ($Int)$#$1<b>$2</b> $3 $4 <a href="\#$2_A">$5</a> $6<a name="$2_H"></a>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 # My modification:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 s@^ ?ORFP:(\S*) +([\w-]+)(.*[ ]{2,3})($Int) ($Signif) ($Int)$@ORFP:<A HREF=\"$SGDUrl{'locus'}$2\">$1 $2</A>$3$4 <a href="\#$1_A">$5</a> $6<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 s#^ ?(ORFN:)(\S*)($Descrip)($Int) ($Signif) ($Int)$#$1$2 $3 $4 <a href="\#$2_A">$5</a> $6<a name="$2_H"></a>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 s#^ ?(NR_SC:GP-)(\S*) ( *)gi\|(\w+)([\w\|]*)($Descrip)($Int) ($Signif) ($Int)$#GenPept\|<a href="$DbUrl{'gb_p'}$4">$4</A>$3 gp|$2 $5$6$7 <a href="\#$4_A">$8</A> $9<a name="$4_H"></A>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 s#^ ?(NR_SC:SW-)$Word ( *)SW:($Word) ($Acc)($Descrip)($Int) ($Signif) ($Int)$#SWISS\|<a href="$DbUrl{'swpr'}$4">$3</A> SW:$3 $4 $5$6 <a href="\#$3_A">$7</A> $8<a name="$3_H"></A>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 s#^ ?(NR_SC:PIR-)$Word ( *)PIR:($Word)($Descrip)($Int) ($Signif) ($Int)$#PIR\|<a href="$DbUrl{'pir_uid'}$3">$3</A> $2 PIR:$3 $4$5 <a href="\#$3_A">$6</A> $7<a name="$3_H"></A>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 s#^ ?(CHRS:)([A-Z][0-9]*)($Descrip)($Int) ($Signif) ($Int)$#$1Segment:$2 $3 $4 <a href="\#$2_A">$5</a> $6<a name="$2_H"></a>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 s#^ ?(CHR[0-9]*)($Descrip)($Int) ($Signif) ($Int)$#$1 $2 $3 <a href="\#$1_A">$4</a> $5<a name="$1_H"></a>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 s#^ ?(NOT:)([A-Z]_[0-9]*-[0-9]*)($Descrip)($Int) ($Signif) ($Int)$#$1$2 $3 $4 <a href="\#$2_A">$5</a> $6<a name="$2_H"></a>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 s#^ ?(UTR5_SC_[0-9]*:)(\S*)($Descrip)($Int) ($Signif) ($Int)$#UTR5:$2 $3 $4 <a href="\#$2_A">$5</a> $6<a name="$2_H"></a>#o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 # Hits without a db identifier.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 s@^ ?($Word)($Descrip)($Int) ($Signif)(.*)$@$1$2$3 <A href="\#$1_A">$4</a>$5<a name="$1_H"></a>@o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 $$line_ref = $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 =head2 _prog_ref_html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 Usage : n/a; utility method used by get_html_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 Purpose : Get a special alert for BLAST reports against all of GenBank/EMBL.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 Returns : string with HTML
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 See Also : L<get_html_func()|get_html_func>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 sub _prog_ref_html {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 return <<"QQ_REF_QQ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 <p>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 <small>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 <b>References:</b>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 <ol>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 <li>Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J. Lipman (1990).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 Basic local alignment search tool.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 <a href="http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=2231712&form=6&db=m&Dopt=r">J. Mol. Biol. 215: 403-10</a>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 <li>Altschul et al. (1997), Gapped BLAST and PSI-BLAST:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 a new generation of protein database search programs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 <a href="http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=9254694&form=6&db=m&Dopt=r">Nucl. Acids Res. 25: 3389-3402</a>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 <li><b>Program Descriptions</b>:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 <a href="http://www.ncbi.nlm.nih.gov/BLAST/newblast.html">BLAST2</a> |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 <a href="http://blast.wustl.edu/">WU-BLAST2</a> |
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 <a href="http://www.ncbi.nlm.nih.gov/BLAST/blast_help.html">Help Manual</a>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 </ol>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 <small>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 HTML formatting provided by the <a href="${\$BioWWW->home_url('bioperl')}Projects/Blast/">Bioperl Blast module</a>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 </small>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 </small>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 <p>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 QQ_REF_QQ
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 # Not really a reference for the Blast algorithm itself but an interesting usage.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 #<li>Gish, Warren, and David J. States (1993). Identification of protein coding regions by database similarity search.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 #<a href="http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?uid=8485583&form=6&db=m&Dopt=r">Nature Genetics 3:266-72</a>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 =head2 _genbank_alert
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 Usage : n/a; utility method used by get_html_func().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 Purpose : Get a special alert for BLAST reports against all of GenBank/EMBL.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 Returns : string with HTML
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 See Also : L<get_html_func()|get_html_func>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 sub _genbank_alert {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 return << "QQ_GENBANK_QQ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 <p><b><font color="red">CAUTION: Hits reported on this page may be derived from DNA sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 that contain more than one gene.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 </font>To avoid mis-interpretation, always check database entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 for any sequence of interest to verify that the similarity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 occurs within the described sequence. (E.g., A DNA sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 for gene X as reported in GenBank may contain a 5' or 3'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 fragment of coding sequence for a neighboring gene Y, yet will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 be listed as gene X, since gene Y had not yet been identified). </b>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 QQ_GENBANK_QQ
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 =head2 strip_html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 Usage : $boolean = &strip_html( string_ref );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 : This method is exported.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 Purpose : Removes HTML formatting from a supplied string.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 : Attempts to restore the Blast report to enable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 : parsing by Bio::Tools::Blast.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 Returns : Boolean: true if string was stripped, false if not.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 Argument : string_ref = reference to a string containing the whole Blast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 : report.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 Throws : Croaks if the argument is not a scalar reference.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 Comments : Based on code originally written by Alex Dong Li
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 : (ali@genet.sickkids.on.ca).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 : This method does some Blast-specific stripping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 : (adds back a '>' character in front of each HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 : alignment listing).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 : THIS METHOD IS HIGHLY ERROR-PRONE!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 : Removal of the HTML tags and accurate reconstitution of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 : non-HTML-formatted report is highly dependent on structure of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 : the HTML-formatted version. For example, it assumes that first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 : line of each alignment section (HSP listing) starts with a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 : <a name=..> anchor tag. This permits the reconstruction of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 : original report in which these lines begin with a ">".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 : This is required for parsing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 : If the structure of the Blast report itself is not intended to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 : be a standard, the structure of the HTML-formatted version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 : is even less so. Therefore, the use of this method to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 : reconstitute parsable Blast reports from HTML-format versions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 : should be considered a temorary solution.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 See Also : B<Bio::Tools::Blast::parse()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 sub strip_html {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 # This may not best way to remove html tags. However, it is simple.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 # it won't work under following conditions:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 # 1) if quoted > appears in a tag (does this ever happen?)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 # 2) if a tag is split over multiple lines and this method is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 # used to process one line at a time.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 my $string_ref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 ref $string_ref eq 'SCALAR' or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 croak ("Can't strip HTML: ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 "Argument is should be a SCALAR reference not a ${\ref $string_ref}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 my $str = $$string_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 my $stripped = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 # Removing "<a name =...>" and adding the '>' character for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 # HSP alignment listings.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 $str =~ s/(\A|\n)<a name ?=[^>]+> ?/>/sgi and $stripped = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 # Removing all "<>" tags.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 $str =~ s/<[^>]+>|&nbsp//sgi and $stripped = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 # Re-uniting any lone '>' characters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 $str =~ s/(\A|\n)>\s+/\n\n>/sgi and $stripped = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $$string_ref = $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 $stripped;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 __END__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 # END OF CLASS #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754