0
|
1 #-----------------------------------------------------------------------------
|
|
2 # PACKAGE : Bio::Tools::WWW
|
|
3 # PURPOSE : To encapsulate commonly used URLs for web key websites in bioinformatics.
|
|
4 # AUTHOR : Steve Chervitz
|
|
5 # CREATED : 27 Aug 1996
|
|
6 # REVISION: $Id: WWW.pm,v 1.12 2002/10/22 07:38:46 lapp Exp $
|
|
7 #
|
|
8 # For documentation, run this module through pod2html
|
|
9 # (preferably from Perl v5.004 or better).
|
|
10 #
|
|
11 # MODIFIED:
|
|
12 # 0.014, sac --- Mon Aug 31 19:41:44 1998
|
|
13 # * Updated and added a few URLs.
|
|
14 # * Added method strip_html().
|
|
15 # * Documentation changes.
|
|
16 #
|
|
17 #-----------------------------------------------------------------------------
|
|
18
|
|
19 package Bio::Tools::WWW;
|
|
20 use strict;
|
|
21 use Bio::Root::Root;
|
|
22 use Exporter ();
|
|
23 use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $ID $VERSION $BioWWW $Revision
|
|
24 $AUTHORITY);
|
|
25 $AUTHORITY = 'nobody@localhost';
|
|
26 @ISA = qw( Bio::Root::Root Exporter);
|
|
27 @EXPORT_OK = qw($BioWWW);
|
|
28 %EXPORT_TAGS = ( obj => [qw($BioWWW)],
|
|
29 std => [qw($BioWWW)]);
|
|
30
|
|
31 $ID = 'Bio::Tools::WWW';
|
|
32 $VERSION = 0.014;
|
|
33 $Revision = '$Id: WWW.pm,v 1.12 2002/10/22 07:38:46 lapp Exp $'; #'
|
|
34
|
|
35 ## Static object.
|
|
36 $BioWWW = {};
|
|
37 bless $BioWWW, $ID;
|
|
38 $BioWWW->{'_name'} = "Static $ID object";
|
|
39
|
|
40
|
|
41 ## POD Documentation:
|
|
42
|
|
43 =head1 NAME
|
|
44
|
|
45 Bio::Tools::WWW - Bioperl manager for web resources related to biology.
|
|
46
|
|
47 =head1 SYNOPSIS
|
|
48
|
|
49 =head2 Object Creation
|
|
50
|
|
51 use Bio::Tools qw(:obj);
|
|
52
|
|
53 $pdb = $BioWWW->home_url('pdb');
|
|
54
|
|
55 There is no need to create a new Bio::Tools::WWW.pm object when the
|
|
56 C<:obj> tag is used. This tag will import the static $BioWWW object
|
|
57 created by Bio::Tools::WWW.pm into your name space. This saves you
|
|
58 from having to call C<new Bio::Tools::WWW>.
|
|
59
|
|
60 You are free to not use the :obj tag and create the object as you
|
|
61 like, but a Bio::Tools::WWW object is not configurable; any given
|
|
62 script only needs a single copy.
|
|
63
|
|
64 =head1 INSTALLATION
|
|
65
|
|
66 This module is included with the central Bioperl distribution:
|
|
67
|
|
68 http://bio.perl.org/Core/Latest
|
|
69 ftp://bio.perl.org/pub/DIST
|
|
70
|
|
71 You also need to define URLs for the following variables in this package:
|
|
72
|
|
73 $Not_found_url : Generic page to show in place of a 404 error.
|
|
74 $Tmp_url : Web-accessible site that is Used for scripts that
|
|
75 need to generate temporary, web-accessible files.
|
|
76 The files need not necessarily be HTML files, but
|
|
77 being on the same disk as the server will permit
|
|
78 faster IO from server scripts.
|
|
79
|
|
80 =head1 DESCRIPTION
|
|
81
|
|
82 Bio::Tools::WWW is primarily a URL broker for a select set
|
|
83 of sites related to bioinformatics/genome analysis. It
|
|
84 definitely represents a biased, unexhaustive set.
|
|
85 It might be more accurate to call this module
|
|
86 "Bio::Tools::URL.pm". But this module does handle some non-URL
|
|
87 things and it may do more of this in the future. Having one
|
|
88 module to cover all biologically relevant web utilities
|
|
89 makes it more convenient, especially at this early stage
|
|
90 of development.
|
|
91
|
|
92 Maintaining accurate URLs over time can be challenging as
|
|
93 new web sites spring up and old sites are re-organized. Because
|
|
94 of this fact, the URLs in this module are not guaranteed to be
|
|
95 correct or exhaustive and will require periodic updating.
|
|
96
|
|
97 =head2 URL Management
|
|
98
|
|
99 By keeping URL management within Bio::Tools::WWW.pm, other generic
|
|
100 modules can easily access a variety of different web sites without
|
|
101 having to know about a potential multitude of specific modules
|
|
102 specialized for one database or another. An alternative approach would
|
|
103 be to have addresses defined within modules specialized for different
|
|
104 web sites. This, however, may create maintenance headaches when updating
|
|
105 these addresses.
|
|
106
|
|
107 =head2 Complex Websites
|
|
108
|
|
109 Websites with complex datasets may require special treatment
|
|
110 within this module. As an example,
|
|
111 URLs for the Saccharomyces Genome Database are clustered
|
|
112 separately in this module, due to (1) the different ways to
|
|
113 access information at this database and (2) the familiarity
|
|
114 of the developer with this database. The Bio::SGD::WWW.pm inherits from
|
|
115 Bio::Tools::WWW.pm to permit access to the URLs provided by Bio::Tools::WWW.pm
|
|
116 and to SGD-specific HTML and images.
|
|
117
|
|
118 The organization of Bio::Tools::WWW.pm is expected to evolve as
|
|
119 websites get born, die, and mutate their APIs.
|
|
120
|
|
121 =head1 SEE ALSO
|
|
122
|
|
123 http://bio.perl.org/Projects/modules.html - Online module documentation
|
|
124 http://bio.perl.org/ - Bioperl Project Homepage
|
|
125
|
|
126 =head1 FEEDBACK
|
|
127
|
|
128 =head2 Mailing Lists
|
|
129
|
|
130 User feedback is an integral part of the evolution of this and other Bioperl modules.
|
|
131 Send your comments and suggestions preferably to one of the Bioperl mailing lists.
|
|
132 Your participation is much appreciated.
|
|
133
|
|
134 bioperl-l@bioperl.org - General discussion
|
|
135 http://www.bioperl.org/MailList.shtml - About the mailing lists
|
|
136
|
|
137 =head2 Reporting Bugs
|
|
138
|
|
139 Report bugs to the Bioperl bug tracking system to help us keep track the bugs and
|
|
140 their resolution. Bug reports can be submitted via email or the web:
|
|
141
|
|
142 bioperl-bugs@bio.perl.org
|
|
143 http://bugzilla.bioperl.org/
|
|
144
|
|
145 =head1 AUTHOR
|
|
146
|
|
147 Steve Chervitz, sac@bioperl.org
|
|
148
|
|
149 =head1 VERSION
|
|
150
|
|
151 Bio::Tools::WWW.pm, 0.014
|
|
152
|
|
153 =head1 COPYRIGHT
|
|
154
|
|
155 Copyright (c) 1996-98 Steve Chervitz. All Rights Reserved.
|
|
156 This module is free software; you can redistribute it and/or
|
|
157 modify it under the same terms as Perl itself.
|
|
158
|
|
159
|
|
160 =cut
|
|
161
|
|
162
|
|
163 #
|
|
164 ##
|
|
165 ###
|
|
166 #### END of main POD documentation.
|
|
167 ###
|
|
168 ##
|
|
169 #
|
|
170
|
|
171
|
|
172 ############################ DATA ##################################
|
|
173
|
|
174 ### Database homepage links.
|
|
175 my %Home_url =
|
|
176 (
|
|
177 'bioperl' =>'http://bio.perl.org/',
|
|
178 'bioperl-stanford'=>'http://genome-www.stanford.edu/perlOOP/bioperl/',
|
|
179 'bioperl-schema' =>'http://bio.perl.org/Projects/Schema/',
|
|
180 'biomoo' =>'http://bioinformatics.weizmann.ac.il/BioMOO/',
|
|
181 'blast_ncbi' =>'http://www.ncbi.nlm.nih.gov/BLAST/',
|
|
182 'blast_wu' =>'http://blast.wustl.edu/',
|
|
183 'bsm' =>'http://www.biochem.ucl.ac.uk/bsm/',
|
|
184 'clustal' =>'http://www.csc.fi/molbio/progs/clustalw/clustalw.html',
|
|
185 'ebi' =>'http://www.ebi.ac.uk/',
|
|
186 'emotif' =>'http://motif.Stanford.EDU/emotif',
|
|
187 'entrez' =>'http://www3.ncbi.nlm.nih.gov/Entrez/',
|
|
188 'expasy' =>'http://www.expasy.ch/',
|
|
189 'gdb' =>'http://www.gdb.org/', # R.I.P. (Jan 1998); site still functional
|
|
190 'mips' =>'http://speedy.mips.biochem.mpg.de/',
|
|
191 'mmdb' =>'http://www.ncbi.nlm.nih.gov/Structure/',
|
|
192 'modbase' =>'http://guitar.rockefeller.edu/',
|
|
193 'ncbi' =>'http://www.ncbi.nlm.nih.gov/',
|
|
194 'pedant' =>'http://pedant.mips.biochem.mpg.de',
|
|
195 'phylip' =>'http://evolution.genetics.washington.edu/phylip.html',
|
|
196 'pir' =>'http://www-nbrf.georgetown.edu/pir/',
|
|
197 'pfam' =>'http://pfam.wustl.edu/',
|
|
198 'pfam_uk' =>'http://www.sanger.ac.uk/Software/Pfam/',
|
|
199 'pfam_us' =>'http://pfam.wustl.edu/',
|
|
200 'pdb' =>'http://www.pdb.bnl.gov/',
|
|
201 'presage' =>'http://presage.stanford.edu/',
|
|
202 'geneQuiz' =>'http://www.sander.ebi.ac.uk/genequiz/genomes/sc/',
|
|
203 'molMov' =>'http://bioinfo.mbb.yale.edu/MolMovDB/',
|
|
204 # 'protMot' =>'http://bioinfo.mbb.yale.edu/ProtMotDB/', # old, use molMov instead
|
|
205 'pubmed' =>'http://www.ncbi.nlm.nih.gov/PubMed/',
|
|
206 'sacch3d' =>'http://genome-www.stanford.edu/Sacch3D/',
|
|
207 'sgd' =>'http://genome-www.stanford.edu/Saccharomyces/',
|
|
208 # 'scop' =>'http://www.pdb.bnl.gov/scop/',
|
|
209 'scop' =>'http://scop.stanford.edu/scop/',
|
|
210 'swissProt' =>'http://www.expasy.ch/sprot/sprot-top.html',
|
|
211 'webmol' =>'http://genome-www.stanford.edu/structure/webmol/',
|
|
212 'ypd' =>'http://quest7.proteome.com/YPDhome.html',
|
|
213 );
|
|
214
|
|
215 ### Database access CGI stems. (For some DBs the home URL can be used as the CGI stem)
|
|
216 my %Stem_url =
|
|
217 (
|
|
218 'emotif' =>'http://dna.Stanford.EDU/cgi-bin/emotif/',
|
|
219 'entrez' =>'http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?',
|
|
220 'pdb' =>'http://www.pdb.bnl.gov/pdb-bin/',
|
|
221 'pfam_uk' =>'http://www.sanger.ac.uk/cgi-bin/Pfam/',
|
|
222 'pfam_us' =>'http://pfam.wustl.edu/cgi-bin/',
|
|
223 'pir' =>'http://www-nbrf.georgetown.edu/cgi-bin/nbrfget?',
|
|
224 );
|
|
225
|
|
226
|
|
227 ### Database access stems/links.
|
|
228 my %Search_url =
|
|
229 ( #'3db' =>'http://pdb.pdb.bnl.gov/cgi-bin/pdbids?3DB_ID=', # Former stem
|
|
230 '3db' =>$Stem_url{'pdb'}.'opdbshort?oPDBid=', # New stem (aug 1997)
|
|
231 'embl' =>$Home_url{'ebi'}.'htbin/emblfetch?',
|
|
232 'expasy' =>$Home_url{'expasy'}.'cgi-bin/', # program name and query string must be supplied.
|
|
233 'cath' =>$Home_url{'bsm'}.'cath/CATHSrch.pl?type=PDB&query=',
|
|
234 'cog_seq' =>$Home_url{'ncbi'}.'cgi-bin/COG/nph-cognitor?seq=', # add sequence
|
|
235 # To cog_orf, append ORF name ('YAL005c'). Case-sensitive! YAL005C won't work!
|
|
236 'cog_orf' =>$Home_url{'ncbi'}.'cgi-bin/COG/cogeseq?',
|
|
237 'ec1' =>$Home_url{'gdb'}.'bin/bio/wais_q-bio?object_class_key=30&jhu_id=',
|
|
238 'ec2' =>$Home_url{'bsm'}.'enzymes/',
|
|
239 'ec3' =>$Home_url{'expasy'}.'cgi-bin/get-enzyme-entry?',
|
|
240 'emotif_id' =>$Stem_url{'emotif'}.'nph-identify?sequence=',
|
|
241 'entrez' =>$Stem_url{'entrez'}."db=p_r?db=1&choseninfo=ORF_NAME%20[Gene%20Name]\@1\@1&form=4&field=Gene%20Name&mode=0&retrievestring=ORF_NAME%20[Gene%20Name]",
|
|
242 'gb_n' =>$Stem_url{'entrez'}."db=n&form=6&dopt=g&uid=",
|
|
243 'gb_p' =>$Stem_url{'entrez'}."db=p&form=6&dopt=g&uid=",
|
|
244 'gb_struct' =>$Stem_url{'entrez'}."db=t&form=6&dopt=s&uid=",
|
|
245 'pdb' =>$Stem_url{'pdb'}.'send-text?filename=',
|
|
246 'medline' =>$Stem_url{'entrez'}.'form=6&db=m&Dopt=r&uid=',
|
|
247 'mmdb' =>$Stem_url{'entrez'}.'db=t&form=6&Dopt=s&uid=',
|
|
248 'modbase_orf' =>$Home_url{'modbase'}.'gm-cgi-bin/orf_page.cgi?pg1=0.5&pg2=1.0&orf=',
|
|
249 # To the modbase_model, append yeast ORF name &pdb=<4-LETTER_CODE>&chain=<UPCASE LETTER, IF ANY>
|
|
250 'modbase_model' =>$Home_url{'modbase'}.'gm-cgi-bin/model_page.cgi?pg1=0.5&pg2=1.0&orf=',
|
|
251 'molMov' =>$Home_url{'molMov'}.'search.cgi?pdb=',
|
|
252 'pdb' =>$Stem_url{'pdb'}.'opdbshort?oPDBid=', # same as 3db
|
|
253 'pdb_coord' =>$Stem_url{'pdb'}.'send-pdb?filename=', # retrieves full coordinate file
|
|
254 'pfam' =>$Home_url{'pfam'}.'cgi-bin/nph-hmm_search?evalue=1.0&protseq=', # default: seq search, US
|
|
255 'pfam_sp_uk' =>$Stem_url{'pfam_uk'}.'swisspfamget.pl?name=',
|
|
256 'pfam_seq_uk' =>$Stem_url{'pfam_uk'}.'nph-search.cgi?evalue=1.0&type=normal&protseq=',
|
|
257 'pfam_sp_us' =>$Stem_url{'pfam_us'}.'getswisspfam?key=',
|
|
258 'pfam_seq_us' =>$Stem_url{'pfam_us'}.'nph-hmm_search?evalue=1.0&protseq=',
|
|
259 'pfam_form' =>$Home_url{'pfam'}.'cgi-bin/hmm_page.cgi', # interactive search form
|
|
260 'pir_id' =>$Stem_url{'pir'}.'fmt=c&xref=0&id=',
|
|
261 'pir_acc' =>$Stem_url{'pir'}.'fmt=c&xref=1&id=',
|
|
262 'pir_uid' =>$Stem_url{'pir'}.'uid=',
|
|
263 'pdbSum' =>$Home_url{'bsm'}.'cath/GetPDBSUMCODE.pl?code=',
|
|
264 # 'protMot' =>$Home_url{'protMot'}.'search.cgi?pdb=', # old, use molMov instead
|
|
265 'presage_sp' =>$Home_url{'presage'}.'search.cgi?spac=',
|
|
266 'swpr' =>$Home_url{'expasy'}.'cgi-bin/get-sprot-entry?',
|
|
267 'swModel' =>$Home_url{'expasy'}.'cgi-bin/sprot-swmodel-sub?',
|
|
268 'swprSearch' =>$Home_url{'expasy'}.'cgi-bin/sprot-search-ful?',
|
|
269
|
|
270 ### SCOP tlev options can be appended to the stem after adding a PDB ID.
|
|
271 ### tlev options are: 'dm'(domain), 'sf'(superfamily), 'fa'(family), 'cf'(common fold), 'cl'(class)
|
|
272 ### E.g., search.cgi?pdb=1ARD;tlev=dm
|
|
273
|
|
274 'scop' =>$Home_url{'scop'}.'search.cgi?pdb=', ### better to use scop_pdb.
|
|
275 'scop_pdb' =>$Home_url{'scop'}.'search.cgi?pdb=',
|
|
276 'scop_data' =>$Home_url{'scop'}.'data/scop.', ### Deprecated: frequent changes.
|
|
277
|
|
278 ## Search URLs for SGD/Sacch3D are contained %SGD_url and %S3d_url (below).
|
|
279
|
|
280 # For wormpep, the query string MUST end with "&keyword=" (after appending a sequence ID)
|
|
281 'wormpep' =>'http://www.sanger.ac.uk/cgi-bin/wormpep_fetch.pl?entry=',
|
|
282 'wormace' =>'http://webace.sanger.ac.uk/cgi-bin/webace?db=wormace&class=Sequence&text=yes&object=',
|
|
283
|
|
284 ### YPD: You must use a valid gene name or ORF name (IFF there is no gene name).
|
|
285 ### For this reason it is most convenient to use SGD's Protein_Info link
|
|
286 ### which can accept either and will provide a proper link to YPD.
|
|
287 'ypd' =>'http://quest7.proteome.com/YPD/',
|
|
288 );
|
|
289
|
|
290
|
|
291
|
|
292 ### CGI stems for SGD and Sacch3D.
|
|
293 my %SGD_stem_url =
|
|
294 ('stanford' =>'http://genome-www.stanford.edu/',
|
|
295 'sgd' =>'http://genome-www.stanford.edu/cgi-bin/SGD/',
|
|
296 'sgd2' =>'http://genome-www2.stanford.edu/cgi-bin/SGD/',
|
|
297 's3d' =>'http://genome-www.stanford.edu/cgi-bin/SGD/Sacch3D/',
|
|
298 's3d2' =>'http://genome-www2.stanford.edu/cgi-bin/SGD/Sacch3D/',
|
|
299 's3d3' =>'http://genome-www3.stanford.edu/cgi-bin/SGD/Sacch3D/',
|
|
300 'sacchdb' =>'http://genome-www.stanford.edu/cgi-bin/dbrun/SacchDB?',
|
|
301 );
|
|
302
|
|
303 ### SGD stems and links.
|
|
304 my %SGD_url =
|
|
305 ('home' =>$Home_url{'sgd'},
|
|
306 'help' =>$Home_url{'sgd'}.'help/',
|
|
307 'mammal' =>$Home_url{'sgd'}.'mammal/',
|
|
308 'worm' =>$Home_url{'sgd'}.'worm/',
|
|
309 'gene' =>$SGD_stem_url{'sacchdb'}.'find+Locus+',
|
|
310 'locus' =>$SGD_stem_url{'sacchdb'}.'find+Locus+',
|
|
311 'orf' =>$SGD_stem_url{'sacchdb'}.'find+Locus+',
|
|
312 'mipsorf' =>$SGD_stem_url{'sgd'}."mips-orfs?",
|
|
313 'gene_info' =>$SGD_stem_url{'sacchdb'}.'find+Gene_Info+',
|
|
314 'prot_info' =>$SGD_stem_url{'sacchdb'}.'find+Protein_Info+',
|
|
315 'seq' =>$SGD_stem_url{'sgd'}.'seqDisplay?seq=',
|
|
316 'gi' =>$SGD_stem_url{'sacchdb'}.'find+Sequence+Database+=+GenPept+AND+NEXT+=+',
|
|
317 'chr' =>$SGD_stem_url{'sgd2'}.'seqTools?chr=',
|
|
318 'chr_old' =>$SGD_stem_url{'sgd'}.'dnaredir?chr=',
|
|
319 'seq_an' =>$SGD_stem_url{'sgd2'}.'seqTools?seqname=',
|
|
320 'seq_an_old' =>$SGD_stem_url{'sgd'}.'dnaredir?seqname=',
|
|
321 'map_chr' =>$SGD_stem_url{'sgd'}.'ORFMAP/ORFmap?chr=',
|
|
322 'map_orf' =>$SGD_stem_url{'sgd'}.'ORFMAP/ORFmap?seq=',
|
|
323 # 'chr' =>$SGD_stem_url{'sgd2'}.'seqform?chr=',
|
|
324 # 'seg' =>$SGD_stem_url{'sgd2'}.'seqform?seg=',
|
|
325 # 'fea' =>$SGD_stem_url{'sgd2'}.'featureform?seg=',
|
|
326 'feature' =>$SGD_stem_url{'sgd2'}.'featureform?chr=', # complete with "5&beg=100&end=400"
|
|
327 'search' =>$SGD_stem_url{'sgd'}.'search?',
|
|
328 'images' =>$SGD_stem_url{'stanford'}.'images/',
|
|
329 'suggest' =>$SGD_stem_url{'stanford'}.'forms/sgd-suggestion.html',
|
|
330 'tmp' =>$SGD_stem_url{'stanford'}.'tmp/',
|
|
331 );
|
|
332
|
|
333
|
|
334 ### Sacch3D stems and links.
|
|
335 my %S3d_url =
|
|
336 ('home' =>$Home_url{'sacch3d'},
|
|
337 'search' =>$Home_url{'sacch3d'}.'search.html',
|
|
338 'help' =>$Home_url{'sacch3d'}.'help/',
|
|
339 'new' =>$Home_url{'sacch3d'}.'new/',
|
|
340 'chrm' =>$Home_url{'sacch3d'}.'data/chr',
|
|
341 'domains' =>$Home_url{'sacch3d'}.'domains/',
|
|
342 'genequiz' =>$Home_url{'sacch3d'}.'genequiz/',
|
|
343 'analysis' =>$Home_url{'sacch3d'}.'analysis/',
|
|
344 'scop' =>$SGD_stem_url{'s3d3'}.'getscop?data=',
|
|
345 'scop_fold' =>$SGD_stem_url{'s3d3'}.'getscop?type=fold&data=',
|
|
346 'scop_class' =>$SGD_stem_url{'s3d3'}.'getscop?type=class&data=',
|
|
347 'scop_gene' =>$SGD_stem_url{'s3d3'}.'getscop?type=gene&data=',
|
|
348 'gene' =>$SGD_stem_url{'s3d'}.'get?class=gene&item=',
|
|
349 'orf' =>$SGD_stem_url{'s3d'}.'get?class=orf&item=',
|
|
350 'text' =>$SGD_stem_url{'s3d'}.'get?class=text&item=',
|
|
351 'pdb' =>$SGD_stem_url{'s3d'}.'get?class=pdb&item=',
|
|
352 'pdb_coord' =>$SGD_stem_url{'s3d'}.'pdbcoord.pl?id=',
|
|
353 'dsc' =>$SGD_stem_url{'s3d'}.'dsc.pl?gene=',
|
|
354 'emotif' =>$SGD_stem_url{'s3d'}.'seq_search.pl?db=emotif&gene=',
|
|
355 'pfam' =>$SGD_stem_url{'s3d'}.'seq_search.pl?db=pfam&gene=',
|
|
356 'pfam_uk' =>$SGD_stem_url{'s3d'}.'seq_search.pl?db=pfam&loc=uk&gene=',
|
|
357 'pfam_us' =>$SGD_stem_url{'s3d'}.'seq_search.pl?db=pfam&loc=us&gene=',
|
|
358 'blast_pdb' =>$SGD_stem_url{'s3d'}.'getblast?db=pdb&name=',
|
|
359 'blast_nr' =>$SGD_stem_url{'s3d'}.'getblast?db=nr&name=',
|
|
360 'blast_est' =>$SGD_stem_url{'s3d'}.'getblast?db=est&name=',
|
|
361 'blast_mammal' =>$SGD_stem_url{'s3d'}.'getblast?db=mammal&name=',
|
|
362 'blast_human' =>$SGD_stem_url{'s3d'}.'getblast?db=human&name=',
|
|
363 'blast_worm' =>$SGD_stem_url{'s3d'}.'getblast?db=worm&name=',
|
|
364 'blast_yeast' =>$SGD_stem_url{'s3d'}.'getblast?db=yeast&name=',
|
|
365 'blast_worm_yeast'=>$SGD_stem_url{'s3d'}.'getblast?db=worm&query=worm&name=',
|
|
366 'patmatch' =>$SGD_stem_url{'s3d2'}.'grepmatch?', ## deprecated
|
|
367 'grepmatch' =>$SGD_stem_url{'s3d2'}.'grepmatch?',
|
|
368 'pdb_neighbors' =>$SGD_stem_url{'s3d'}.'pdb_neighbors?id=CHAIN&gene=ORF_NAME',
|
|
369 );
|
|
370
|
|
371
|
|
372 ### 3D viewer stems.
|
|
373 my %Viewer_url =
|
|
374 # ('java' =>$SGD_stem_url{'sgd'}.'Sacch3D/pdbViewer.pl?pdbCode=PDB&orf=',
|
|
375 (
|
|
376 'java' =>$SGD_stem_url{'sgd'}.'Sacch3D/pdbViewer.pl?pdbCode=', # Default java viewer
|
|
377 'webmol' =>$SGD_stem_url{'sgd'}.'Sacch3D/pdbViewer.pl?pdbCode=',
|
|
378 'codebase' =>$SGD_stem_url{'stanford'}.'structure/webmol/lib',
|
|
379 'rasmol' =>$Stem_url{'pdb'}.'send-ras?filename=',
|
|
380 'chime' =>$Stem_url{'pdb'}.'ccpeek?id=',
|
|
381 'cn3d' =>$Stem_url{'entrez'}.'db=t&form=6&Dopt=i&Complexity=Cn3D+Subset&uid=',
|
|
382 'kinemage' =>'http://prosci.org/Kinemage',
|
|
383 );
|
|
384
|
|
385
|
|
386 ### Stock HTML
|
|
387 # The error reporting HTML strings represent some experiments in human psychology:
|
|
388 # how do you induce users to report errors that you should know about yet not
|
|
389 # get flooded with trivial problems caused by novices?
|
|
390 my %Html =
|
|
391 ('authority' =>qq|<A HREF="mailto:$AUTHORITY"><b>$AUTHORITY</b></A>|,
|
|
392 'trouble' => <<"QQ_TROUBLE_QQ",
|
|
393 <p>If this problem persists, <A HREF="mailto:$AUTHORITY"><b>please notify us.</b></A>
|
|
394 Include a copy of this error page with your message. Thanks.<p>
|
|
395 QQ_TROUBLE_QQ
|
|
396 'notify' => <<"QQ_NOTIFY_QQ",
|
|
397 <A HREF="mailto:$AUTHORITY"><b>Please notify us.</b></A>
|
|
398 Include a copy of this error page with your message. Thanks.<p>
|
|
399 QQ_NOTIFY_QQ
|
|
400 'ourFault' => <<"QQ_FAULT_QQ",
|
|
401 <p><b>This is our fault!</b> There is apparently a problem with our software
|
|
402 that we may not know about. <A HREF="mailto:$AUTHORITY"><b>Please notify us!</b></A>
|
|
403 Include a copy of this error page with your message. Thanks.<p>
|
|
404 QQ_FAULT_QQ
|
|
405 'techDiff' => <<"QQ_TECH_QQ",
|
|
406 <p><big>We are experiencing technical difficulties now.<br>
|
|
407 We will have the problem fixed soon. Sorry for any inconvenience.</big><p>
|
|
408 QQ_TECH_QQ
|
|
409
|
|
410 );
|
|
411
|
|
412
|
|
413 ### Miscellaneous URLs. Configure as desired for your site.
|
|
414 my $Not_found_url = 'http://genome-www.stanford.edu/Sacch3D/notfound.html';
|
|
415 my $Tmp_url = 'http://genome-www.stanford.edu/tmp/';
|
|
416
|
|
417
|
|
418
|
|
419 =head1 APPENDIX
|
|
420
|
|
421 Methods beginning with a leading underscore are considered private
|
|
422 and are intended for internal use by this module. They are
|
|
423 B<not> considered part of the public interface and are described here
|
|
424 for documentation purposes only.
|
|
425
|
|
426 =cut
|
|
427
|
|
428 #########################################################################
|
|
429 ## ACCESSOR METHODS
|
|
430 #########################################################################
|
|
431
|
|
432
|
|
433 =head2 home_url
|
|
434
|
|
435 Usage : $BioWWW->home_url(<string>)
|
|
436 Purpose : To obtain the homepage URL for a biological database or resource.
|
|
437 Returns : String containing the URL (including "http://")
|
|
438 Argument : String
|
|
439 : Currently acceptable arguments are:
|
|
440 : bioperl bioperl-schema biomoo bsm ebi emotif entrez
|
|
441 : expasy mips mmdb ncbi pir pfam pdb geneQuiz
|
|
442 : molMov pubmed sacch3d sgd scop swissProt webmol ypd
|
|
443 Throws : Warns if argument cannot be resolved to a URL.
|
|
444 Comments : The URLs listed here do not represent a complete list.
|
|
445 : Expect this to evolve and grow with time.
|
|
446
|
|
447 See Also : L<search_url>()
|
|
448
|
|
449 =cut
|
|
450
|
|
451 #-------------
|
|
452 sub home_url {
|
|
453 #-------------
|
|
454 my($self,$arg) = @_;
|
|
455 $arg eq 'all' and return %Home_url;
|
|
456 (exists $Home_url{$arg}) ? $Home_url{$arg}
|
|
457 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
458 $Not_found_url);
|
|
459 }
|
|
460
|
|
461
|
|
462
|
|
463 =head2 search_url
|
|
464
|
|
465 Usage : $BioWWW->search_url(<string>)
|
|
466 Purpose : To provide a URL stem for a search engine at a biological database
|
|
467 : or resource.
|
|
468 Returns : String containing the URL (including "http://")
|
|
469 Argument : String
|
|
470 : Currently acceptable arguments are:
|
|
471 : 3db embl cath ec1 ec2 ec3 emotif_id entrez gb1 gb2
|
|
472 : gb3 gb4 gb5 pdb medline mmdb pdb pdb_coord pfam pir_acc
|
|
473 : pdbSum molMov swpr swModel swprSearch scop scop_pdb scop_data
|
|
474 : ypd
|
|
475 Throws : Warns if argument cannot be resolved to a URL.
|
|
476 Comments : Unlike the homepage URLs, this method does not return a complete
|
|
477 : URL but a stem which must be further modified, typically by
|
|
478 : appending data to it, before it can be used. The data appended
|
|
479 : depends on the specific URL; typically, it is a database ID or
|
|
480 : other unique identifier.
|
|
481 : The requirements for each URL will be described here eventually.
|
|
482 :
|
|
483 : The URLs listed here do not represent a complete list.
|
|
484 : Expect this to evolve and grow with time.
|
|
485 :
|
|
486 : Given this complexity, it may be useful to provide special methods
|
|
487 : for these different URLs. This would however result in an
|
|
488 : explosion of methods that might make this module less
|
|
489 : maintainable and harder to use.
|
|
490
|
|
491 See Also : L<home_url>()
|
|
492
|
|
493 =cut
|
|
494
|
|
495 #--------------
|
|
496 sub search_url {
|
|
497 #--------------
|
|
498 my($self,$arg) = @_;
|
|
499 $arg eq 'all' and return %Search_url;
|
|
500 (exists $Search_url{$arg}) ? $Search_url{$arg}
|
|
501 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
502 $Not_found_url);
|
|
503 }
|
|
504
|
|
505
|
|
506
|
|
507 =head2 stem_url
|
|
508
|
|
509 Usage : $BioWWW->stem_url(<string>)
|
|
510 Purpose : To obtain the minimal stem URL for searching a biological database or resource.
|
|
511 Returns : String containing the URL (including "http://")
|
|
512 Argument : String
|
|
513 : Currently acceptable arguments are:
|
|
514 : emotif entrez pdb
|
|
515 Throws : Warns if argument cannot be resolved to a URL.
|
|
516 Comments : The URLs stems returned by this method are much more minimal than
|
|
517 : this provided by search_url(). Use of these stems requires knowledge
|
|
518 : of the CGI scripts which they invoke.
|
|
519
|
|
520 See Also : L<search_url>()
|
|
521
|
|
522 =cut
|
|
523
|
|
524 #--------------
|
|
525 sub stem_url {
|
|
526 #--------------
|
|
527 my($self,$arg) = @_;
|
|
528 $arg eq 'all' and return %Stem_url;
|
|
529 (exists $Stem_url{$arg}) ? $Stem_url{$arg}
|
|
530 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
531 $Not_found_url);
|
|
532 }
|
|
533
|
|
534
|
|
535
|
|
536 =head2 viewer_url
|
|
537
|
|
538 Usage : $BioWWW->viewer_url(<string>)
|
|
539 Purpose : To obtain the stem URL for a 3D viewer (RasMol, WebMol, Cn3D)
|
|
540 Returns : String containing the URL (including "http://")
|
|
541 Argument : String
|
|
542 : Currently acceptable arguments are:
|
|
543 : rasmol webmol cn3d java (java is an alias for webmol)
|
|
544 Throws : Warns if argument cannot be resolved to a URL.
|
|
545 Comments : The 4-letter Brookhaven PDB identifier must be appended to the
|
|
546 : URL provided by this method.
|
|
547 : The URLs listed here do not represent a complete list.
|
|
548 : Expect this to evolve and grow with time.
|
|
549
|
|
550 =cut
|
|
551
|
|
552 #---------------
|
|
553 sub viewer_url {
|
|
554 #---------------
|
|
555 my($self,$arg) = @_;
|
|
556 $arg eq 'all' and return %Viewer_url;
|
|
557 (exists $Viewer_url{$arg}) ? $Viewer_url{$arg}
|
|
558 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
559 $Not_found_url);
|
|
560 }
|
|
561
|
|
562
|
|
563
|
|
564 =head2 not_found_url
|
|
565
|
|
566 Usage : $BioWWW->not_found_url()
|
|
567 Purpose : To obtain the URL for a web page to be shown in place of a 404 error.
|
|
568 Returns : String containing the URL (including "http://")
|
|
569 Argument : n/a
|
|
570 Throws : n/a
|
|
571 Comments : This URL should be customized as desired.
|
|
572
|
|
573 =cut
|
|
574
|
|
575 #-----------------
|
|
576 sub not_found_url { my $self = shift; $Not_found_url; }
|
|
577 #-----------------
|
|
578
|
|
579
|
|
580 =head2 tmp_url
|
|
581
|
|
582 Usage : $BioWWW->tmp_url()
|
|
583 Purpose : To obtain the URL for a temporary, web-accessible directory.
|
|
584 Returns : String containing the URL (including "http://")
|
|
585 Argument : n/a
|
|
586 Throws : n/a
|
|
587 Comments : This URL should be customized as desired.
|
|
588
|
|
589 =cut
|
|
590
|
|
591 #-----------
|
|
592 sub tmp_url { my $self = shift; $Tmp_url; }
|
|
593 #-----------
|
|
594
|
|
595
|
|
596
|
|
597 =head2 search_link
|
|
598
|
|
599 Usage : $BioWWW->search_link(<site>, <value>, <text>)
|
|
600 Purpose : Wrapper for search_url() that returns the URL within an HTML anchor.
|
|
601 Returns : String containing the HTML anchor ( qq|<A HREF="http://..."</A>|)
|
|
602 Argument : <site> = string to be used as argument for search_url()
|
|
603 : <value> = string to be appended to the search URL stem.
|
|
604 : <text> = string to be shown as the link text (default = <value>).
|
|
605 Throws : n/a
|
|
606 Status : Experimental
|
|
607
|
|
608 See Also : L<search_url>()
|
|
609
|
|
610 =cut
|
|
611
|
|
612 #---------------
|
|
613 sub search_link {
|
|
614 #---------------
|
|
615 my($self,$arg,$value,$text) = @_;
|
|
616 my $url = $self->search_url($arg);
|
|
617 $text ||= $value;
|
|
618 qq|<A HREF="$url$value">$text</A>|;
|
|
619 }
|
|
620
|
|
621
|
|
622
|
|
623 =head2 viewer_link
|
|
624
|
|
625 Usage : $BioWWW->viewer_link(<site>, <value>, <text>)
|
|
626 Purpose : Wrapper for viewer_url() that returns the complete URL within an HTML anchor.
|
|
627 Returns : String containing the HTML anchor ( qq|<A HREF="http://..."</A>|)
|
|
628 Argument : <site> = string to be used as argument for viewer_url()
|
|
629 : <value> = string to be appended to the viewer URL stem.
|
|
630 : <text> = string to be shown as the link text (default = <value>).
|
|
631 Throws : n/a
|
|
632 Status : Experimental
|
|
633
|
|
634 See Also : L<viewer_url>()
|
|
635
|
|
636 =cut
|
|
637
|
|
638 #----------------
|
|
639 sub viewer_link {
|
|
640 #----------------
|
|
641 my($self,$arg,$value,$text) = @_;
|
|
642 my $url = $self->viewer_url($arg);
|
|
643 $text ||= $value;
|
|
644 qq|<A HREF="$url$value">$text</A>|;
|
|
645 }
|
|
646
|
|
647
|
|
648
|
|
649 =head2 html
|
|
650
|
|
651 Usage : $BioWWW->html(<string>)
|
|
652 Purpose : To obtain HTML-formatted text for frequently needed web-page messages.
|
|
653 Returns : String containing the HTML anchor ( qq|<A HREF="http://..."</A>|)
|
|
654 Argument : String.
|
|
655 : Currently acceptable arguments are:
|
|
656 : authority (mailto: link for webmaster; shows e-mail address as link)
|
|
657 : notify (wraps mailto:authority link with text for link "please notify us")
|
|
658 : ourFault ("this problem is our fault. If it persists <notify-link>")
|
|
659 : trouble (same as ourFault but doesn't blame us for the problem)
|
|
660 : techDiff ("we are experiencing technical difficulties. Please stand by.")
|
|
661 Throws : n/a
|
|
662 Comments : The authority (webmaster) is imported from the Bio::Root::Global.pm
|
|
663 : module. The value for $AUTHORITY should be set there, or
|
|
664 : customize this module so that it doesn't use Bio::Root::Global.pm.
|
|
665
|
|
666 =cut
|
|
667
|
|
668 #----------
|
|
669 sub html {
|
|
670 #----------
|
|
671 my($self,$arg) = @_;
|
|
672 $arg eq 'all' and return %Html;
|
|
673 (exists $Html{$arg}) ? $Html{$arg} : "<pre>(missing HTML for \"$arg\")</pre>";
|
|
674 }
|
|
675
|
|
676
|
|
677 ###
|
|
678 ### Below are accessors specialized for the Saccharomyces Genome Database
|
|
679 ### It is possible that they will be moved to Bio::SGD::WWW.pm in the future.
|
|
680 ###
|
|
681
|
|
682
|
|
683 =head2 sgd_url
|
|
684
|
|
685 Usage : $BioWWW->sgd_url(<string>)
|
|
686 Purpose : To obtain the webpage URL or search stem for SGD.
|
|
687 Returns : String containing the URL (including "http://")
|
|
688 Argument : String
|
|
689 : Currently acceptable arguments (TODO).
|
|
690 Throws : Warns if argument cannot be resolved to a URL.
|
|
691 Comments : This accessor is specialized for the Saccharomyces Genome Database.
|
|
692 : It is possible that it will be moved to SGD::WWW.pm in the future.
|
|
693
|
|
694 See Also : L<search_url>()
|
|
695
|
|
696 =cut
|
|
697
|
|
698 #------------
|
|
699 sub sgd_url {
|
|
700 #------------
|
|
701 my($self,$arg) = @_;
|
|
702 $arg eq 'all' and return %SGD_url;
|
|
703 (exists $SGD_url{$arg}) ? $SGD_url{$arg}
|
|
704 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
705 $Not_found_url);
|
|
706 }
|
|
707
|
|
708
|
|
709
|
|
710 =head2 s3d_url
|
|
711
|
|
712 Usage : $BioWWW->s3d_url(<string>)
|
|
713 Purpose : To obtain the webpage URL or search stem for Sacch3D.
|
|
714 Returns : String containing the URL (including "http://")
|
|
715 Argument : String
|
|
716 : Currently acceptable arguments (TODO).
|
|
717 Throws : Warns if argument cannot be resolved to a URL.
|
|
718 Comments : This accessor is specialized for the Saccharomyces Genome Database.
|
|
719 : It is possible that it will be moved to SGD::WWW.pm in the future.
|
|
720
|
|
721 See Also : L<search_url>()
|
|
722
|
|
723 =cut
|
|
724
|
|
725 #-----------
|
|
726 sub s3d_url {
|
|
727 #-----------
|
|
728 my($self,$arg) = @_;
|
|
729 $arg eq 'all' and return %S3d_url;
|
|
730 (exists $S3d_url{$arg}) ? $S3d_url{$arg}
|
|
731 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
732 $Not_found_url);
|
|
733 }
|
|
734
|
|
735
|
|
736
|
|
737 =head2 sgd_stem_url
|
|
738
|
|
739 Usage : $BioWWW->sgd_stem_url(<string>)
|
|
740 Purpose : To obtain the minimal stem URL for a SGD/Sacch3D CGI script.
|
|
741 Returns : String containing the URL (including "http://")
|
|
742 Argument : String
|
|
743 : Currently acceptable arguments (TODO).
|
|
744 Throws : Warns if argument cannot be resolved to a URL.
|
|
745 Comments : This accessor is specialized for the Saccharomyces Genome Database.
|
|
746 : It is possible that it will be moved to SGD::WWW.pm in the future.
|
|
747
|
|
748 See Also : L<search_url>()
|
|
749
|
|
750 =cut
|
|
751
|
|
752 #-----------------
|
|
753 sub sgd_stem_url {
|
|
754 #-----------------
|
|
755 my($self,$arg) = @_;
|
|
756 $arg eq 'all' and return %SGD_stem_url;
|
|
757 (exists $SGD_stem_url{$arg}) ? $SGD_stem_url{$arg}
|
|
758 : ($self->warn("Can't resolve argument to URL: $arg"),
|
|
759 $Not_found_url);
|
|
760 }
|
|
761
|
|
762
|
|
763
|
|
764 =head2 s3d_link
|
|
765
|
|
766 Usage : $BioWWW->s3d_link(<site>, <value>, <text>)
|
|
767 Purpose : Wrapper for s3d_url() that returns the complete URL within an HTML anchor.
|
|
768 Returns : String containing the URL (including "http://")
|
|
769 Argument : <site> = string to be used as argument for s3d_url()
|
|
770 : <value> = string to be appended to the s3d URL stem.
|
|
771 : <text> = string to be shown as the link text (default = <value>).
|
|
772 Throws : n/a
|
|
773 Status : Experimental
|
|
774 Comments : This accessor is specialized for the Saccharomyces Genome Database.
|
|
775 : It is possible that it will be moved to SGD::WWW.pm in the future.
|
|
776
|
|
777 See Also : L<s3d_url>(), L<sgd_link>()
|
|
778
|
|
779 =cut
|
|
780
|
|
781 #--------------
|
|
782 sub s3d_link {
|
|
783 #--------------
|
|
784 my($self,$arg,$value,$text) = @_;
|
|
785 my $url = $self->s3d_url($arg);
|
|
786 $text ||= $value;
|
|
787 qq|<A HREF="$url$value">$text</A>|;
|
|
788 }
|
|
789
|
|
790
|
|
791
|
|
792 =head2 sgd_link
|
|
793
|
|
794 Usage : $BioWWW->sgd_link(<site>, <value>, <text>)
|
|
795 Purpose : Wrapper for sgd_url() that returns the complete URL within an HTML anchor.
|
|
796 Returns : String containing the URL (including "http://")
|
|
797 Argument : <site> = string to be used as argument for sgd_url()
|
|
798 : <value> = string to be appended to the sgd URL stem.
|
|
799 : <text> = string to be shown as the link text (default = <value>).
|
|
800 Throws : n/a
|
|
801 Status : Experimental
|
|
802 Comments : This accessor is specialized for the Saccharomyces Genome Database.
|
|
803 : It is possible that it will be moved to SGD::WWW.pm in the future.
|
|
804
|
|
805 See Also : L<sgd_url>(), L<s3d_link>()
|
|
806
|
|
807 =cut
|
|
808
|
|
809 #--------------
|
|
810 sub sgd_link {
|
|
811 #--------------
|
|
812 my($self,$arg,$value,$text) = @_;
|
|
813 my $url = $self->sgd_url($arg);
|
|
814 $text ||= $value;
|
|
815 qq|<A HREF="$url$value">$text</A>|;
|
|
816 }
|
|
817
|
|
818
|
|
819 #########################################################################
|
|
820 ## INSTANCE METHODS
|
|
821 #########################################################################
|
|
822
|
|
823 ## Note that similar functions to those presented below are also availble
|
|
824 ## via L. Stein's CGI.pm. These are more experimental versions.
|
|
825
|
|
826 =head2 start_html
|
|
827
|
|
828 Usage : $BioWWW->start_html()
|
|
829 Purpose : Prints the "Content-type: text/html\n\n<HTML>\n" header.
|
|
830 Returns : n/a; This method prints the Content-type string shown above.
|
|
831 Argument : n/a
|
|
832 Throws : n/a
|
|
833 Status : Experimental
|
|
834 Comments : This method prevents redundant invocations thus avoiding th
|
|
835 : accidental printing of the "content-type..." on the page.
|
|
836 : If using L. Stein's CGI.pm, this is similar to $query->header()
|
|
837 : (Does CGI.pm prevent redundant invocation?)
|
|
838
|
|
839 =cut
|
|
840
|
|
841 #---------------'
|
|
842 sub start_html {
|
|
843 #---------------
|
|
844 my $self=shift;
|
|
845 if(!$self->{'_started_html'}) {
|
|
846 print "Content-type: text/html\n\n<HTML>\n";
|
|
847 $self->{'_started_html'} = 1;
|
|
848 }
|
|
849 }
|
|
850
|
|
851
|
|
852 =head2 redirect
|
|
853
|
|
854 Usage : $BioWWW->redirect(<string>)
|
|
855 Purpose : Prints the header needed to redirect a web browser to a supplied URL.
|
|
856 Returns : n/a; Prints the redirection header.
|
|
857 Argument : String containing the URL to be redirected to.
|
|
858 Throws : n/a
|
|
859 Status : Experimental
|
|
860
|
|
861 =cut
|
|
862
|
|
863 #-------------
|
|
864 sub redirect {
|
|
865 #-------------
|
|
866 my($self,$url) = @_;
|
|
867
|
|
868 print "Location: $url\n";
|
|
869 print "Content-type: text/html\n\n";
|
|
870 }
|
|
871
|
|
872
|
|
873
|
|
874 =head2 pre
|
|
875
|
|
876 Usage : $BioWWW->pre("text to be pre-formatted");
|
|
877 Purpose : To produce HTML for text that is not to be formated by the brower.
|
|
878 Returns : String containing the "<pre>" formatted html.
|
|
879 Argument : n/a
|
|
880 Throws : n/a
|
|
881 Status : Experimental
|
|
882
|
|
883 =cut
|
|
884
|
|
885 #--------
|
|
886 sub pre {
|
|
887 #--------
|
|
888 my $self = shift;
|
|
889 "<PRE>\n".shift()."\n</PRE>";
|
|
890 }
|
|
891
|
|
892
|
|
893 #----------------
|
|
894 sub html_footer {
|
|
895 #----------------
|
|
896 my( $self, @param ) = @_;
|
|
897
|
|
898 my( $linkTo, $linkText, $modified, $mail, $mailText, $top) =
|
|
899 $self->_rearrange([qw(LINKTO LINKTEXT MODIFIED MAIL MAILTEXT TOP)], @param);
|
|
900
|
|
901 $modified = (scalar $modified)
|
|
902 ? qq|<center><small><b>Last modified: $modified </b></small></center>|
|
|
903 : '';
|
|
904
|
|
905 $linkTo ||= '';
|
|
906
|
|
907 # $top = (defined $top) ? qq|<a href="top">Top</a><br>| : '';
|
|
908 $top = qq|<a href="#top">Top</a>|; ## Utilizing the HTML bug/feature wherein
|
|
909 ## a bogus name anchor defaults to the
|
|
910 ## top of the page.
|
|
911
|
|
912 return <<"HTML";
|
|
913 <p>
|
|
914 <hr size=3 noshade width=95%>
|
|
915 $top | <a href="$linkTo"> $linkText</a><br>
|
|
916 $modified
|
|
917 <small><i><a href="mailto:$mail">$mailText</a></i></small>
|
|
918 </body></html>
|
|
919
|
|
920 HTML
|
|
921 }
|
|
922
|
|
923
|
|
924 =head2 strip_html
|
|
925
|
|
926 Usage : $boolean = &strip_html( string_ref, [fast] );
|
|
927 Purpose : Removes HTML formatting from a supplied string.
|
|
928 Returns : Boolean: true if string was stripped, false if not.
|
|
929 Argument : string_ref = reference to a string containing the whole
|
|
930 : web page to be stripped.
|
|
931 : fast = a non-zero value. Optional. If set, a faster
|
|
932 : but perhaps less thorough procedure is used for
|
|
933 : stripping. Default = not fast.
|
|
934 Throws : Exception if the argument is not a scalar reference.
|
|
935 Comments : Based on code originally written by Alex Dong Li
|
|
936 : (ali@genet.sickkids.on.ca).
|
|
937 : This is a more generic version of the function that appears
|
|
938 : in Bio::Tools::Blast::HTML.pm
|
|
939 : This version does not perform any Blast-specific stripping.
|
|
940 :
|
|
941 : This employs a simple method for removing tags that
|
|
942 : will fail under following conditions:
|
|
943 : 1) if quoted > appears in a tag (does this ever happen?)
|
|
944 : 2) if a tag is split over multiple lines and this method is
|
|
945 : used to process one line at a time.
|
|
946 :
|
|
947 : Without fast mode, large HTML files can take exceedingly long times to
|
|
948 : strip (e.g., 1Meg file with many tags can take 10 minutes versus 5 seconds
|
|
949 : in fast mode. Try the swissprot yeast table). If you know the HTML to be
|
|
950 : well-behaved (i.e., tags are not split across mutiple lines), use fast
|
|
951 : mode for large, dense files.
|
|
952
|
|
953 =cut
|
|
954
|
|
955 #---------------
|
|
956 sub strip_html {
|
|
957 #---------------
|
|
958 my ($self, $string_ref, $fast) = @_;
|
|
959
|
|
960 ref $string_ref eq 'SCALAR' or
|
|
961 $self->throw("Can't strip HTML: ".
|
|
962 "Argument is should be a SCALAR reference not a ${\ref $string_ref}");
|
|
963
|
|
964 my $str = $$string_ref;
|
|
965 my $stripped = 0;
|
|
966
|
|
967 if($fast) {
|
|
968 # MULTI-STRING-MODE: Much faster than single-string mode
|
|
969 # but will miss tags that span multiple lines.
|
|
970 # This is fine if you know the HTML to be "well-behaved".
|
|
971
|
|
972 my @lines = split("\n", $str);
|
|
973 foreach (@lines) {
|
|
974 s/<[^>]+>| //gi and $stripped = 1;
|
|
975 }
|
|
976
|
|
977 # This regexp likely won't work properly in this mode.
|
|
978 foreach (@lines) {
|
|
979 s/(\A|\n)>\s+/\n\n>/gi and $stripped = 1;
|
|
980 }
|
|
981 $$string_ref = join ("\n", @lines);
|
|
982
|
|
983 } else {
|
|
984
|
|
985 # SINGLE-STRING-MODE: Can be very slow for long strings with many substitutions.
|
|
986
|
|
987 # Removing all "<>" tags.
|
|
988 $str =~ s/<[^>]+>| //sgi and $stripped = 1;
|
|
989
|
|
990 # Re-uniting any lone '>' characters. Not really necessary for functional HTML
|
|
991 $str =~ s/(\A|\n)>\s+/\n\n>/sgi and $stripped = 1;
|
|
992
|
|
993 $$string_ref = $str;
|
|
994 }
|
|
995 $stripped;
|
|
996 }
|
|
997
|
|
998
|
|
999 1;
|
|
1000 __END__
|
|
1001
|
|
1002 ########################################################################
|
|
1003 ## END OF CLASS
|
|
1004 ########################################################################
|
|
1005
|
|
1006 =head1 FOR DEVELOPERS ONLY
|
|
1007
|
|
1008 =head2 Data Members
|
|
1009
|
|
1010 An instance of Bio::Tools::WWW.pm is a blessed reference to a hash containing
|
|
1011 all or some of the following fields:
|
|
1012
|
|
1013 FIELD VALUE
|
|
1014 --------------------------------------------------------------
|
|
1015 _started_html Defined the on the initial invocation of start_html()
|
|
1016 to avoid duplicate printing out the "Content-type..." header.
|
|
1017
|
|
1018
|
|
1019 =cut
|
|
1020
|
|
1021 1;
|
|
1022
|
|
1023
|