0
|
1 # $Id: pdb.pm,v 1.9.2.2 2003/08/29 16:24:14 birney Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Structure::IO::pdb
|
|
4 #
|
|
5 # Cared for by Kris Boulez <kris.boulez@algonomics.com>
|
|
6 #
|
|
7 # Copyright 2001, 2002 Kris Boulez
|
|
8 #
|
|
9 # Framework is a copy of Bio::SeqIO::embl.pm
|
|
10 #
|
|
11 # You may distribute this module under the same terms as perl itself
|
|
12
|
|
13 # POD documentation - main docs before the code
|
|
14
|
|
15 =head1 NAME
|
|
16
|
|
17 Bio::Structure::IO::pdb - PDB input/output stream
|
|
18
|
|
19 =head1 SYNOPSIS
|
|
20
|
|
21 It is probably best not to use this object directly, but
|
|
22 rather go through the Bio::Structure::IO handler system. Go:
|
|
23
|
|
24 $stream = Bio::Structure::IO->new(-file => $filename,
|
|
25 -format => 'PDB');
|
|
26
|
|
27 while ( (my $structure = $stream->next_structure()) ) {
|
|
28 # do something with $structure
|
|
29 }
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 This object can transform Bio::Structure objects to and from PDB flat
|
|
34 file databases. The working is similar to that of the Bio::SeqIO handlers.
|
|
35
|
|
36 =head1 FEEDBACK
|
|
37
|
|
38 =head2 Mailing Lists
|
|
39
|
|
40 User feedback is an integral part of the evolution of this
|
|
41 and other Bioperl modules. Send your comments and suggestions preferably
|
|
42 to one of the Bioperl mailing lists.
|
|
43 Your participation is much appreciated.
|
|
44
|
|
45 bioperl-l@bioperl.org - General discussion
|
|
46 http://www.bioperl.org/MailList.shtml - About the mailing lists
|
|
47
|
|
48 =head2 Reporting Bugs
|
|
49
|
|
50 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
51 the bugs and their resolution.
|
|
52 Bug reports can be submitted via email or the web:
|
|
53
|
|
54 bioperl-bugs@bio.perl.org
|
|
55 http://bugzilla.bioperl.org/
|
|
56
|
|
57 =head1 AUTHOR - Kris Boulez
|
|
58
|
|
59 Email kris.boulez@algonomics.com
|
|
60
|
|
61 =head1 APPENDIX
|
|
62
|
|
63 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
64
|
|
65 =cut
|
|
66
|
|
67
|
|
68 # Let the code begin...
|
|
69
|
|
70
|
|
71 package Bio::Structure::IO::pdb;
|
|
72 use vars qw(@ISA);
|
|
73 use strict;
|
|
74 use Bio::Structure::IO;
|
|
75 use Bio::Structure::Entry;
|
|
76 #use Bio::Structure::Model;
|
|
77 #use Bio::Structure::Chain;
|
|
78 #use Bio::Structure::Residue;
|
|
79 use Bio::Structure::Atom;
|
|
80 use Bio::SeqFeature::Generic;
|
|
81 use Bio::Annotation::Reference;
|
|
82
|
|
83 @ISA = qw(Bio::Structure::IO);
|
|
84
|
|
85 sub _initialize {
|
|
86 my($self,@args) = @_;
|
|
87
|
|
88 $self->SUPER::_initialize(@args);
|
|
89
|
|
90 my ($noheader, $noatom) =
|
|
91 $self->_rearrange([qw(
|
|
92 NOHEADER
|
|
93 NOATOM
|
|
94 )],
|
|
95 @args);
|
|
96 $noheader && $self->_noheader($noheader);
|
|
97 $noatom && $self->_noatom($noatom);
|
|
98 }
|
|
99
|
|
100
|
|
101 =head2 next_structure;
|
|
102
|
|
103 Title : next_structure
|
|
104 Usage : $struc = $stream->next_structure()
|
|
105 Function: returns the next structure in the stream
|
|
106 Returns : Bio::Structure object
|
|
107 Args :
|
|
108
|
|
109
|
|
110 =cut
|
|
111
|
|
112 sub next_structure {
|
|
113 my ($self,@args) = @_;
|
|
114 my ($line);
|
|
115 my ($obslte, $title, $caveat, $compnd, $source, $keywds,
|
|
116 $expdta, $author, %revdat, $revdat, $sprsde, $jrnl, %remark, $dbref,
|
|
117 $seqadv, $seqres, $modres, $het, $hetnam, $hetsyn, $formul, $helix,
|
|
118 $sheet, $turn, $ssbond, $link, $hydbnd, $sltbrg, $cispep,
|
|
119 $site, $cryst1, $tvect,);
|
|
120 my $struc = Bio::Structure::Entry->new(-id => 'created from pdb.pm');
|
|
121 my $all_headers = ( !$self->_noheader ); # we'll parse all headers and store as annotation
|
|
122 my %header; # stores all header RECORDs an is stored as annotations when ATOM is reached
|
|
123
|
|
124
|
|
125 $line = $self->_readline; # This needs to be before the first eof() test
|
|
126
|
|
127 if( !defined $line ) {
|
|
128 return undef; # no throws - end of file
|
|
129 }
|
|
130
|
|
131 if( $line =~ /^\s+$/ ) {
|
|
132 while( defined ($line = $self->_readline) ) {
|
|
133 $line =~/\S/ && last;
|
|
134 }
|
|
135 }
|
|
136 if( !defined $line ) {
|
|
137 return undef; # end of file
|
|
138 }
|
|
139 $line =~ /^HEADER\s+\S+/ || $self->throw("PDB stream with no HEADER. Not pdb in my book");
|
|
140 my($header_line) = unpack "x10 a56", $line;
|
|
141 $header{'header'} = $header_line;
|
|
142 my($class, $depdate, $idcode) = unpack "x10 a40 a9 x3 a4", $line;
|
|
143 $idcode =~ s/^\s*(\S+)\s*$/$1/;
|
|
144 $struc->id($idcode);
|
|
145 $self->debug("PBD c $class d $depdate id $idcode\n"); # XXX KB
|
|
146
|
|
147 my $buffer = $line;
|
|
148
|
|
149 BEFORE_COORDINATES :
|
|
150 until( !defined $buffer ) {
|
|
151 $_ = $buffer;
|
|
152
|
|
153 # Exit at start of coordinate section
|
|
154 last if /^(MODEL|ATOM|HETATM)/;
|
|
155
|
|
156 # OBSLTE line(s)
|
|
157 if (/^OBSLTE / && $all_headers) {
|
|
158 $obslte = $self->_read_PDB_singlecontline("OBSLTE","12-70",\$buffer);
|
|
159 $header{'obslte'} = $obslte;
|
|
160 }
|
|
161
|
|
162 # TITLE line(s)
|
|
163 if (/^TITLE / && $all_headers) {
|
|
164 $title = $self->_read_PDB_singlecontline("TITLE","11-70",\$buffer);
|
|
165 $header{'title'} = $title;
|
|
166 }
|
|
167
|
|
168 # CAVEAT line(s)
|
|
169 if (/^CAVEAT / && $all_headers) {
|
|
170 $caveat = $self->_read_PDB_singlecontline("CAVEAT","12-70",\$buffer);
|
|
171 $header{'caveat'} = $caveat;
|
|
172 }
|
|
173
|
|
174 # COMPND line(s)
|
|
175 if (/^COMPND / && $all_headers) {
|
|
176 $compnd = $self->_read_PDB_singlecontline("COMPND","11-70",\$buffer);
|
|
177 $header{'compnd'} = $compnd;
|
|
178 $self->debug("get COMPND $compnd\n");
|
|
179 }
|
|
180
|
|
181 # SOURCE line(s)
|
|
182 if (/^SOURCE / && $all_headers) {
|
|
183 $source = $self->_read_PDB_singlecontline("SOURCE","11-70",\$buffer);
|
|
184 $header{'source'} = $source;
|
|
185 }
|
|
186
|
|
187 # KEYWDS line(s)
|
|
188 if (/^KEYWDS / && $all_headers) {
|
|
189 $keywds = $self->_read_PDB_singlecontline("KEYWDS","11-70",\$buffer);
|
|
190 $header{'keywds'} = $keywds;
|
|
191 }
|
|
192
|
|
193 # EXPDTA line(s)
|
|
194 if (/^EXPDTA / && $all_headers) {
|
|
195 $expdta = $self->_read_PDB_singlecontline("EXPDTA","11-70",\$buffer);
|
|
196 $header{'expdta'} = $expdta;
|
|
197 }
|
|
198
|
|
199 # AUTHOR line(s)
|
|
200 if (/^AUTHOR / && $all_headers) {
|
|
201 $author = $self->_read_PDB_singlecontline("AUTHOR","11-70",\$buffer);
|
|
202 $header{'author'} = $author;
|
|
203 }
|
|
204
|
|
205 # REVDAT line(s)
|
|
206 # a bit more elaborate as we also store the modification number
|
|
207 if (/^REVDAT / && $all_headers) {
|
|
208 ##my($modnum,$rol) = unpack "x7 A3 x3 A53", $_;
|
|
209 ##$modnum =~ s/\s+//; # remove spaces
|
|
210 ##$revdat{$modnum} .= $rol;
|
|
211 my ($rol) = unpack "x7 a59", $_;
|
|
212 $revdat .= $rol;
|
|
213 $header{'revdat'} = $revdat;
|
|
214 }
|
|
215
|
|
216 # SPRSDE line(s)
|
|
217 if (/^SPRSDE / && $all_headers) {
|
|
218 $sprsde = $self->_read_PDB_singlecontline("SPRSDE","12-70",\$buffer);
|
|
219 $header{'sprsde'} = $sprsde;
|
|
220 }
|
|
221
|
|
222 # jRNL line(s)
|
|
223 if (/^JRNL / && $all_headers) {
|
|
224 $jrnl = $self->_read_PDB_jrnl(\$buffer);
|
|
225 $struc->annotation->add_Annotation('reference',$jrnl);
|
|
226 $header{'jrnl'} = 1; # when writing out, we need a way to check there was a JRNL record (not mandatory)
|
|
227 }
|
|
228
|
|
229 # REMARK line(s)
|
|
230 # we only parse the "REMARK 1" lines (additional references)
|
|
231 # thre rest is stored in %remark (indexed on remarkNum) (pack does space-padding)
|
|
232 if (/^REMARK\s+(\d+)\s*/ && $all_headers) {
|
|
233 my $remark_num = $1;
|
|
234 if ($remark_num == 1) {
|
|
235 my @refs = $self->_read_PDB_remark_1(\$buffer);
|
|
236 # How can we find the primary reference when writing (JRNL record) XXX KB
|
|
237 foreach my $ref (@refs) {
|
|
238 $struc->annotation->add_Annotation('reference', $ref);
|
|
239 }
|
|
240 # $_ still holds the REMARK_1 line, $buffer now contains the first non
|
|
241 # REMARK_1 line. We need to parse it in this pass (so no else block)
|
|
242 $_ = $buffer;
|
|
243 }
|
|
244 # for the moment I don't see a better solution (other then using goto)
|
|
245 if (/^REMARK\s+(\d+)\s*/) {
|
|
246 my $r_num = $1;
|
|
247 if ($r_num != 1) { # other remarks, we store literlly at the moment
|
|
248 my ($rol) = unpack "x11 a59", $_;
|
|
249 $remark{$r_num} .= $rol;
|
|
250 }
|
|
251 }
|
|
252 } # REMARK
|
|
253
|
|
254 # DBREF line(s)
|
|
255 # references to sequences in other databases
|
|
256 # we store as 'dblink' annotations and whole line as simple annotation (round-trip)
|
|
257 if (/^DBREF / && $all_headers) {
|
|
258 my ($rol) = unpack "x7 a61", $_;
|
|
259 $dbref .= $rol;
|
|
260 $header{'dbref'} = $dbref;
|
|
261 my ($db, $acc) = unpack "x26 a6 x1 a8", $_;
|
|
262 $db =~ s/\s*$//;
|
|
263 $acc =~ s/\s*$//;
|
|
264 my $link = Bio::Annotation::DBLink->new;
|
|
265 $link->database($db);
|
|
266 $link->primary_id($acc);
|
|
267 $struc->annotation->add_Annotation('dblink', $link);
|
|
268 } # DBREF
|
|
269
|
|
270 # SEQADV line(s)
|
|
271 if (/^SEQADV / && $all_headers) {
|
|
272 my ($rol) = unpack "x7 a63", $_;
|
|
273 $seqadv .= $rol;
|
|
274 $header{'seqadv'} = $seqadv;
|
|
275 } # SEQADV
|
|
276
|
|
277 # SEQRES line(s)
|
|
278 # this is (I think) the sequence of macromolecule that was analysed
|
|
279 # this will be returned when doing $struc->seq
|
|
280 if (/^SEQRES / && $all_headers) {
|
|
281 my ($rol) = unpack "x8 a62", $_;
|
|
282 $seqres .= $rol;
|
|
283 $header{'seqres'} = $seqres;
|
|
284 } # SEQRES
|
|
285
|
|
286 # MODRES line(s)
|
|
287 if (/^MODRES / && $all_headers) {
|
|
288 my ($rol) = unpack "x7 a63", $_;
|
|
289 $modres .= $rol;
|
|
290 $header{'modres'} = $modres;
|
|
291 } # MODRES
|
|
292
|
|
293 # HET line(s)
|
|
294 if (/^HET / && $all_headers) {
|
|
295 my ($rol) = unpack "x7 a63", $_;
|
|
296 $het .= $rol;
|
|
297 $header{'het'} = $het;
|
|
298 } # HET
|
|
299
|
|
300 # HETNAM line(s)
|
|
301 if (/^HETNAM / && $all_headers) {
|
|
302 my ($rol) = unpack "x8 a62", $_;
|
|
303 $hetnam .= $rol;
|
|
304 $header{'hetnam'} = $hetnam;
|
|
305 } # HETNAM
|
|
306
|
|
307 # HETSYN line(s)
|
|
308 if (/^HETSYN / && $all_headers) {
|
|
309 my ($rol) = unpack "x8 a62", $_;
|
|
310 $hetsyn .= $rol;
|
|
311 $header{'hetsyn'} = $hetsyn;
|
|
312 } # HETSYN
|
|
313
|
|
314 # FORMUL line(s)
|
|
315 if (/^FORMUL / && $all_headers) {
|
|
316 my ($rol) = unpack "x8 a62", $_;
|
|
317 $formul .= $rol;
|
|
318 $header{'formul'} = $formul;
|
|
319 } # FORMUL
|
|
320
|
|
321 # HELIX line(s)
|
|
322 # store as specific object ??
|
|
323 if (/^HELIX / && $all_headers) {
|
|
324 my ($rol) = unpack "x7 a69", $_;
|
|
325 $helix .= $rol;
|
|
326 $header{'helix'} = $helix;
|
|
327 } # HELIX
|
|
328
|
|
329 # SHEET line(s)
|
|
330 # store as specific object ??
|
|
331 if (/^SHEET / && $all_headers) {
|
|
332 my ($rol) = unpack "x7 a63", $_;
|
|
333 $sheet .= $rol;
|
|
334 $header{'sheet'} = $sheet;
|
|
335 } # SHEET
|
|
336
|
|
337 # TURN line(s)
|
|
338 # store as specific object ??
|
|
339 if (/^TURN / && $all_headers) {
|
|
340 my ($rol) = unpack "x7 a63", $_;
|
|
341 $turn .= $rol;
|
|
342 $header{'turn'} = $turn;
|
|
343 } # TURN
|
|
344
|
|
345 # SSBOND line(s)
|
|
346 # store in connection-like object (see parsing of CONECT record)
|
|
347 if (/^SSBOND / && $all_headers) {
|
|
348 my ($rol) = unpack "x7 a65", $_;
|
|
349 $ssbond .= $rol;
|
|
350 $header{'ssbond'} = $ssbond;
|
|
351 } # SSBOND
|
|
352
|
|
353 # LINK
|
|
354 # store like SSBOND ?
|
|
355 if (/^LINK / && $all_headers) {
|
|
356 my ($rol) = unpack "x12 a60", $_;
|
|
357 $link .= $rol;
|
|
358 $header{'link'} = $link;
|
|
359 } # LINK
|
|
360
|
|
361 # HYDBND
|
|
362 # store like SSBOND
|
|
363 if (/^HYDBND / && $all_headers) {
|
|
364 my ($rol) = unpack "x12 a60", $_;
|
|
365 $hydbnd .= $rol;
|
|
366 $header{'hydbnd'} = $hydbnd;
|
|
367 } # HYDBND
|
|
368
|
|
369 # SLTBRG
|
|
370 # store like SSBOND ?
|
|
371 if (/^SLTBRG / && $all_headers) {
|
|
372 my ($rol) = unpack "x12 a60",$_;
|
|
373 $sltbrg .= $rol;
|
|
374 $header{'sltbrg'} = $sltbrg;
|
|
375 } # SLTBRG
|
|
376
|
|
377 # CISPEP
|
|
378 # store like SSBOND ?
|
|
379 if (/^CISPEP / && $all_headers) {
|
|
380 my ($rol) = unpack "x7 a52", $_;
|
|
381 $cispep .= $rol;
|
|
382 $header{'cispep'} = $cispep;
|
|
383 }
|
|
384
|
|
385 # SITE line(s)
|
|
386 if (/^SITE / && $all_headers) {
|
|
387 my ($rol) = unpack "x7 a54", $_;
|
|
388 $site .= $rol;
|
|
389 $header{'site'} = $site;
|
|
390 } # SITE
|
|
391
|
|
392 # CRYST1 line
|
|
393 # store in some crystallographic subobject ?
|
|
394 if (/^CRYST1/ && $all_headers) {
|
|
395 my ($rol) = unpack "x6 a64", $_;
|
|
396 $cryst1 .= $rol;
|
|
397 $header{'cryst1'} = $cryst1;
|
|
398 } # CRYST1
|
|
399
|
|
400 # ORIGXn line(s) (n=1,2,3)
|
|
401 if (/^(ORIGX\d) / && $all_headers) {
|
|
402 my $origxn = lc($1);
|
|
403 my ($rol) = unpack "x10 a45", $_;
|
|
404 $header{$origxn} .= $rol;
|
|
405 } # ORIGXn
|
|
406
|
|
407 # SCALEn line(s) (n=1,2,3)
|
|
408 if (/^(SCALE\d) / && $all_headers) {
|
|
409 my $scalen = lc($1);
|
|
410 my ($rol) = unpack "x10 a45", $_;
|
|
411 $header{$scalen} .= $rol;
|
|
412 } # SCALEn
|
|
413
|
|
414 # MTRIXn line(s) (n=1,2,3)
|
|
415 if (/^(MTRIX\d) / && $all_headers) {
|
|
416 my $mtrixn = lc($1);
|
|
417 my ($rol) = unpack "x7 a53", $_;
|
|
418 $header{$mtrixn} .= $rol;
|
|
419 } # MTRIXn
|
|
420
|
|
421 # TVECT line(s)
|
|
422 if (/^TVECT / && $all_headers) {
|
|
423 my ($rol) = unpack "x7 a63", $_;
|
|
424 $tvect .= $rol;
|
|
425 $header{'tvect'} = $tvect;
|
|
426 }
|
|
427
|
|
428 # Get next line.
|
|
429 $buffer = $self->_readline;
|
|
430 }
|
|
431
|
|
432 # store %header entries a annotations
|
|
433 if (%header) {
|
|
434 for my $record (keys %header) {
|
|
435 my $sim = Bio::Annotation::SimpleValue->new();
|
|
436 $sim->value($header{$record});
|
|
437 $struc->annotation->add_Annotation($record, $sim);
|
|
438 }
|
|
439 }
|
|
440 # store %remark entries as annotations
|
|
441 if (%remark) {
|
|
442 for my $remark_num (keys %remark) {
|
|
443 my $sim = Bio::Annotation::SimpleValue->new();
|
|
444 $sim->value($remark{$remark_num});
|
|
445 $struc->annotation->add_Annotation("remark_$remark_num", $sim);
|
|
446 }
|
|
447 }
|
|
448
|
|
449 # Coordinate section, the real meat
|
|
450 #
|
|
451 # $_ contains a line beginning with (ATOM|MODEL)
|
|
452
|
|
453 $buffer = $_;
|
|
454
|
|
455
|
|
456 if (defined($buffer) && $buffer =~ /^(ATOM |MODEL |HETATM)/ ) { # can you have an entry without ATOM ?
|
|
457 until( !defined ($buffer) ) { # (yes : 1a7z )
|
|
458 # read in one model at a time
|
|
459 my $model = $self->_read_PDB_coordinate_section(\$buffer, $struc);
|
|
460 # add this to $struc
|
|
461 $struc->add_model($struc, $model);
|
|
462
|
|
463 if ($buffer !~ /^MODEL /) { # if we get here we have multiple MODELs
|
|
464 last;
|
|
465 }
|
|
466 }
|
|
467 }
|
|
468 else {
|
|
469 $self->throw("Could not find a coordinate section in this record\n");
|
|
470 }
|
|
471
|
|
472
|
|
473 until( !defined $buffer ) {
|
|
474 $_ = $buffer;
|
|
475
|
|
476 # CONNECT records
|
|
477 if (/^CONECT/) {
|
|
478 # do not differentiate between different type of connect (column dependant)
|
|
479 my $conect_unpack = "x6 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5";
|
|
480 my (@conect) = unpack $conect_unpack, $_;
|
|
481 for my $k (0 .. $#conect) {
|
|
482 $conect[$k] =~ s/\s//g;
|
|
483 }
|
|
484 my $source = shift @conect;
|
|
485 my $type;
|
|
486 for my $k (0 .. 9) {
|
|
487 next unless ($conect[$k] =~ /^\d+$/);
|
|
488 # 0..3 bond
|
|
489 if( $k <= 3 ) {
|
|
490 $type = "bond";
|
|
491 }
|
|
492 # 4..5,7..8 hydrogen bonded
|
|
493 elsif( ($k >= 4 && $k <= 5) || ($k >= 7 && $k <= 8) ) {
|
|
494 $type = "hydrogen";
|
|
495 }
|
|
496 # 6, 9 salt bridged
|
|
497 elsif( $k == 6 || $k == 9 ) {
|
|
498 $type = "saltbridged";
|
|
499 } else {
|
|
500 $self->throw("k has impossible value ($k), check brain");
|
|
501 }
|
|
502 $struc->conect($source, $conect[$k], $type);
|
|
503 }
|
|
504 }
|
|
505
|
|
506 # MASTER record
|
|
507 if (/^MASTER /) {
|
|
508 # the numbers in here a checksums, we should use them :)
|
|
509 my ($rol) = unpack "x10 a60", $_;
|
|
510 $struc->master($rol);
|
|
511 }
|
|
512
|
|
513 if (/^END/) {
|
|
514 # this it the end ...
|
|
515 }
|
|
516
|
|
517 $buffer = $self->_readline;
|
|
518 }
|
|
519
|
|
520
|
|
521 return $struc;
|
|
522 }
|
|
523
|
|
524 =head2 write_structure
|
|
525
|
|
526 Title : write_structure
|
|
527 Usage : $stream->write_structure($struc)
|
|
528 Function: writes the $struc object (must be a Bio::Structure) to the stream
|
|
529 Returns : 1 for success and 0 for error
|
|
530 Args : Bio::Structure object
|
|
531
|
|
532
|
|
533 =cut
|
|
534
|
|
535 sub write_structure {
|
|
536 my ($self, $struc) = @_;
|
|
537 if( !defined $struc ) {
|
|
538 $self->throw("Attempting to write with no structure!");
|
|
539 }
|
|
540
|
|
541 if( ! ref $struc || ! $struc->isa('Bio::Structure::StructureI') ) {
|
|
542 $self->throw(" $struc is not a StructureI compliant module.");
|
|
543 }
|
|
544 my ($ann, $string, $output_string, $key);
|
|
545 # HEADER
|
|
546 ($ann) = $struc->annotation->get_Annotations("header");
|
|
547 if ($ann) {
|
|
548 $string = $ann->as_text;
|
|
549 $string =~ s/^Value: //;
|
|
550 $output_string = pack ("A10 A56", "HEADER", $string);
|
|
551 } else { # not read in via read_structure, create HEADER line
|
|
552 my $id = $struc->id;
|
|
553 if (!$id) {
|
|
554 $id = "UNK1";
|
|
555 }
|
|
556 if (length($id) > 4) {
|
|
557 $id = substr($id,0,4);
|
|
558 }
|
|
559 my $classification = "DEFAULT CLASSIFICATION";
|
|
560 my $dep_date = "24-JAN-70";
|
|
561 $output_string = pack ("A10 A40 A12 A4", "HEADER", $classification, $dep_date, $id);
|
|
562 }
|
|
563 $output_string .= " " x (80 - length($output_string) );
|
|
564 $self->_print("$output_string\n");
|
|
565
|
|
566 my (%header);
|
|
567 for $key ($struc->annotation->get_all_annotation_keys) {
|
|
568 $header{$key} = 1;;
|
|
569 }
|
|
570
|
|
571 exists $header{'obslte'} && $self->_write_PDB_simple_record(-name => "OBSLTE ", -cont => "9-10",
|
|
572 -annotation => $struc->annotation->get_Annotations("obslte"), -rol => "11-70");
|
|
573
|
|
574 exists $header{'title'} && $self->_write_PDB_simple_record(-name => "TITLE ", -cont => "9-10",
|
|
575 -annotation => $struc->annotation->get_Annotations("title"), -rol => "11-70");
|
|
576
|
|
577 exists $header{'caveat'} && $self->_write_PDB_simple_record(-name => "CAVEAT ", -cont => "9-10",
|
|
578 -annotation => $struc->annotation->get_Annotations("caveat"), -rol => "12-70");
|
|
579
|
|
580 exists $header{'compnd'} && $self->_write_PDB_simple_record(-name => "COMPND ", -cont => "9-10",
|
|
581 -annotation => $struc->annotation->get_Annotations("compnd"), -rol => "11-70");
|
|
582
|
|
583 exists $header{'source'} && $self->_write_PDB_simple_record(-name => "SOURCE ", -cont => "9-10",
|
|
584 -annotation => $struc->annotation->get_Annotations("source"), -rol => "11-70");
|
|
585
|
|
586 exists $header{'keywds'} && $self->_write_PDB_simple_record(-name => "KEYWDS ", -cont => "9-10",
|
|
587 -annotation => $struc->annotation->get_Annotations("keywds"), -rol => "11-70");
|
|
588
|
|
589 exists $header{'expdta'} && $self->_write_PDB_simple_record(-name => "EXPDTA ", -cont => "9-10",
|
|
590 -annotation => $struc->annotation->get_Annotations("expdta"), -rol => "11-70");
|
|
591
|
|
592 exists $header{'author'} && $self->_write_PDB_simple_record(-name => "AUTHOR ", -cont => "9-10",
|
|
593 -annotation => $struc->annotation->get_Annotations("author"), -rol => "11-70");
|
|
594
|
|
595 exists $header{'revdat'} && $self->_write_PDB_simple_record(-name => "REVDAT ",
|
|
596 -annotation => $struc->annotation->get_Annotations("revdat"), -rol => "8-66");
|
|
597
|
|
598 exists $header{'sprsde'} && $self->_write_PDB_simple_record(-name => "SPRSDE ", -cont => "9-10",
|
|
599 -annotation => $struc->annotation->get_Annotations("sprsde"), -rol => "12-70");
|
|
600
|
|
601 # JRNL en REMARK 1
|
|
602 my ($jrnl_done, $remark_1_counter);
|
|
603 if ( !exists $header{'jrnl'} ) {
|
|
604 $jrnl_done = 1;
|
|
605 }
|
|
606 foreach my $ref ($struc->annotation->get_Annotations('reference') ) {
|
|
607 if( !$jrnl_done ) { # JRNL record
|
|
608 $ref->authors && $self->_write_PDB_simple_record(-name => "JRNL AUTH",
|
|
609 -cont => "17-18", -rol => "20-70", -string => $ref->authors );
|
|
610 $ref->title && $self->_write_PDB_simple_record(-name => "JRNL TITL",
|
|
611 -cont => "17-18", -rol => "20-70", -string => $ref->title );
|
|
612 $ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
|
|
613 -cont => "17-18", -rol => "20-70", -string => $ref->editors );
|
|
614 $ref->location && $self->_write_PDB_simple_record(-name => "JRNL REF ",
|
|
615 -cont => "17-18", -rol => "20-70", -string => $ref->location );
|
|
616 $ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
|
|
617 -cont => "17-18", -rol => "20-70", -string => $ref->editors );
|
|
618 $ref->encoded_ref && $self->_write_PDB_simple_record(-name => "JRNL REFN",
|
|
619 -cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
|
|
620 $jrnl_done = 1;
|
|
621 } else { # REMARK 1
|
|
622 if (!$remark_1_counter) { # header line
|
|
623 my $remark_1_header_line = "REMARK 1" . " " x 70;
|
|
624 $self->_print("$remark_1_header_line\n");
|
|
625 $remark_1_counter = 1;
|
|
626 }
|
|
627 # per reference header
|
|
628 my $rem_line = "REMARK 1 REFERENCE " . $remark_1_counter;
|
|
629 $rem_line .= " " x (80 - length($rem_line) );
|
|
630 $self->_print($rem_line,"\n");
|
|
631 $ref->authors && $self->_write_PDB_simple_record(-name => "REMARK 1 AUTH",
|
|
632 -cont => "17-18", -rol => "20-70", -string => $ref->authors );
|
|
633 $ref->title && $self->_write_PDB_simple_record(-name => "REMARK 1 TITL",
|
|
634 -cont => "17-18", -rol => "20-70", -string => $ref->title );
|
|
635 $ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
|
|
636 -cont => "17-18", -rol => "20-70", -string => $ref->editors );
|
|
637 $ref->location && $self->_write_PDB_simple_record(-name => "REMARK 1 REF ",
|
|
638 -cont => "17-18", -rol => "20-70", -string => $ref->location );
|
|
639 $ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
|
|
640 -cont => "17-18", -rol => "20-70", -string => $ref->editors );
|
|
641 $ref->encoded_ref && $self->_write_PDB_simple_record(-name => "REMARK 1 REFN",
|
|
642 -cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
|
|
643 $remark_1_counter++;
|
|
644 }
|
|
645 }
|
|
646 if (! defined $remark_1_counter ) { # no remark 1 record written yet
|
|
647 my $remark_1_header_line = "REMARK 1" . " " x 70;
|
|
648 $self->_print("$remark_1_header_line\n"); # write dummy (we need this line)
|
|
649 }
|
|
650
|
|
651 # REMARK's (not 1 at the moment, references)
|
|
652 my (%remarks, $remark_num);
|
|
653 for $key (keys %header) {
|
|
654 next unless ($key =~ /^remark_(\d+)$/);
|
|
655 next if ($1 == 1);
|
|
656 $remarks{$1} = 1;
|
|
657 }
|
|
658 for $remark_num (sort {$a <=> $b} keys %remarks) {
|
|
659 $self->_write_PDB_remark_record($struc, $remark_num);
|
|
660 }
|
|
661
|
|
662 exists $header{'dbref'} && $self->_write_PDB_simple_record(-name => "DBREF ",
|
|
663 -annotation => $struc->annotation->get_Annotations("dbref"), -rol => "8-68");
|
|
664 exists $header{'seqadv'} && $self->_write_PDB_simple_record(-name => "SEQADV ",
|
|
665 -annotation => $struc->annotation->get_Annotations("seqadv"), -rol => "8-70");
|
|
666 exists $header{'seqres'} && $self->_write_PDB_simple_record(-name => "SEQRES ",
|
|
667 -annotation => $struc->annotation->get_Annotations("seqres"), -rol => "9-70");
|
|
668 exists $header{'modres'} && $self->_write_PDB_simple_record(-name => "MODRES ",
|
|
669 -annotation => $struc->annotation->get_Annotations("modres"), -rol => "8-70");
|
|
670 exists $header{'het'} && $self->_write_PDB_simple_record(-name => "HET ",
|
|
671 -annotation => $struc->annotation->get_Annotations("het"), -rol => "8-70");
|
|
672 exists $header{'hetnam'} && $self->_write_PDB_simple_record(-name => "HETNAM ",
|
|
673 -annotation => $struc->annotation->get_Annotations("hetnam"), -rol => "9-70");
|
|
674 exists $header{'hetsyn'} && $self->_write_PDB_simple_record(-name => "HETSYN ",
|
|
675 -annotation => $struc->annotation->get_Annotations("hetsyn"), -rol => "9-70");
|
|
676 exists $header{'formul'} && $self->_write_PDB_simple_record(-name => "FORMUL ",
|
|
677 -annotation => $struc->annotation->get_Annotations("formul"), -rol => "9-70");
|
|
678 exists $header{'helix'} && $self->_write_PDB_simple_record(-name => "HELIX ",
|
|
679 -annotation => $struc->annotation->get_Annotations("helix"), -rol => "8-76");
|
|
680 exists $header{'sheet'} && $self->_write_PDB_simple_record(-name => "SHEET ",
|
|
681 -annotation => $struc->annotation->get_Annotations("sheet"), -rol => "8-70");
|
|
682 exists $header{'turn'} && $self->_write_PDB_simple_record(-name => "TURN ",
|
|
683 -annotation => $struc->annotation->get_Annotations("turn"), -rol => "8-70");
|
|
684 exists $header{'ssbond'} && $self->_write_PDB_simple_record(-name => "SSBOND ",
|
|
685 -annotation => $struc->annotation->get_Annotations("ssbond"), -rol => "8-72");
|
|
686 exists $header{'link'} && $self->_write_PDB_simple_record(-name => "LINK ",
|
|
687 -annotation => $struc->annotation->get_Annotations("link"), -rol => "13-72");
|
|
688 exists $header{'hydbnd'} && $self->_write_PDB_simple_record(-name => "HYDBND ",
|
|
689 -annotation => $struc->annotation->get_Annotations("hydbnd"), -rol => "13-72");
|
|
690 exists $header{'sltbrg'} && $self->_write_PDB_simple_record(-name => "SLTBRG ",
|
|
691 -annotation => $struc->annotation->get_Annotations("sltbrg"), -rol => "13-72");
|
|
692 exists $header{'cispep'} && $self->_write_PDB_simple_record(-name => "CISPEP ",
|
|
693 -annotation => $struc->annotation->get_Annotations("cispep"), -rol => "8-59");
|
|
694 exists $header{'site'} && $self->_write_PDB_simple_record(-name => "SITE ",
|
|
695 -annotation => $struc->annotation->get_Annotations("site"), -rol => "8-61");
|
|
696 exists $header{'cryst1'} && $self->_write_PDB_simple_record(-name => "CRYST1",
|
|
697 -annotation => $struc->annotation->get_Annotations("cryst1"), -rol => "7-70");
|
|
698 for my $k (1..3) {
|
|
699 my $origxn = "origx".$k;
|
|
700 my $ORIGXN = uc($origxn)." ";
|
|
701 exists $header{$origxn} && $self->_write_PDB_simple_record(-name => $ORIGXN,
|
|
702 -annotation => $struc->annotation->get_Annotations($origxn), -rol => "11-55");
|
|
703 }
|
|
704 for my $k (1..3) {
|
|
705 my $scalen = "scale".$k;
|
|
706 my $SCALEN = uc($scalen)." ";
|
|
707 exists $header{$scalen} && $self->_write_PDB_simple_record(-name => $SCALEN,
|
|
708 -annotation => $struc->annotation->get_Annotations($scalen), -rol => "11-55");
|
|
709 }
|
|
710 for my $k (1..3) {
|
|
711 my $mtrixn = "mtrix".$k;
|
|
712 my $MTRIXN = uc($mtrixn)." ";
|
|
713 exists $header{$mtrixn} && $self->_write_PDB_simple_record(-name => $MTRIXN,
|
|
714 -annotation => $struc->annotation->get_Annotations($mtrixn), -rol => "8-60");
|
|
715 }
|
|
716 exists $header{'tvect'} && $self->_write_PDB_simple_record(-name => "TVECT ",
|
|
717 -annotation => $struc->annotation->get_Annotations("tvect"), -rol => "8-70");
|
|
718
|
|
719 # write out coordinate section
|
|
720 #
|
|
721 my %het_res; # hetero residues
|
|
722 $het_res{'HOH'} = 1; # water is default
|
|
723 if (exists $header{'het'}) {
|
|
724 my ($het_line) = ($struc->annotation->get_Annotations("het"))[0]->as_text;
|
|
725 $het_line =~ s/^Value: //;
|
|
726 for ( my $k = 0; $k <= length $het_line ; $k += 63) {
|
|
727 my $l = substr $het_line, $k, 63;
|
|
728 $l =~ s/^\s*(\S+)\s+.*$/$1/;
|
|
729 $het_res{$l} = 1;
|
|
730 }
|
|
731 }
|
|
732 for my $model ($struc->get_models) {
|
|
733 # more then one model ?
|
|
734 if ($struc->get_models > 1) {
|
|
735 my $model_line = sprintf("MODEL %4d", $model->id);
|
|
736 $model_line .= " " x (80 - length($model_line) );
|
|
737 $self->_print($model_line, "\n");
|
|
738 }
|
|
739 for my $chain ($struc->get_chains($model)) {
|
|
740 my ($residue, $atom, $resname, $resnum, $atom_line, $atom_serial, $atom_icode, $chain_id);
|
|
741 my ($prev_resname, $prev_resnum, $prev_atomicode); # need these for TER record
|
|
742 my $wr_ter = 0; # have we already written out a TER for this chain
|
|
743 $chain_id = $chain->id;
|
|
744 if ( $chain_id eq "default" ) {
|
|
745 $chain_id = " ";
|
|
746 }
|
|
747 $self->debug("model_id: $model->id chain_id: $chain_id\n");
|
|
748 for $residue ($struc->get_residues($chain)) {
|
|
749 ($resname, $resnum) = split /-/, $residue->id;
|
|
750 for $atom ($struc->get_atoms($residue)) {
|
|
751 if ($het_res{$resname}) { # HETATM
|
|
752 if ( ! $wr_ter && $resname ne "HOH" ) { # going from ATOM -> HETATM, we have to write TER
|
|
753 my $ter_line = "TER ";
|
|
754 $ter_line .= sprintf("%5d", $atom_serial + 1);
|
|
755 $ter_line .= " ";
|
|
756 $ter_line .= sprintf("%3s ", $prev_resname);
|
|
757 $ter_line .= $chain_id;
|
|
758 $ter_line .= sprintf("%4d", $prev_resnum);
|
|
759 $ter_line .= $atom_icode ? $prev_atomicode : " "; # 27
|
|
760 $ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
|
|
761 $self->_print($ter_line,"\n");
|
|
762 $wr_ter = 1;
|
|
763 }
|
|
764 $atom_line = "HETATM";
|
|
765 } else {
|
|
766 $atom_line = "ATOM ";
|
|
767 }
|
|
768 $atom_line .= sprintf("%5d ", $atom->serial);
|
|
769 $atom_serial = $atom->serial; # we need it for TER record
|
|
770 $atom_icode = $atom->icode;
|
|
771 # remember some stuff if next iteration needs writing TER
|
|
772 $prev_resname = $resname;
|
|
773 $prev_resnum = $resnum;
|
|
774 $prev_atomicode = $atom_icode;
|
|
775 # getting the name of the atom correct is subtrivial
|
|
776 my $atom_id = $atom->id;
|
|
777 # is pdb_atomname set, then use this (most probably set when
|
|
778 # reading in the PDB record)
|
|
779 my $pdb_atomname = $atom->pdb_atomname;
|
|
780 if( defined $pdb_atomname ) {
|
|
781 $atom_line .= sprintf("%-4s", $pdb_atomname);
|
|
782 } else {
|
|
783 # start (educated) guessing
|
|
784 my $element = $atom->element;
|
|
785 if( defined $element && $element ne "H") {
|
|
786 # element should be at first two positions (right justified)
|
|
787 # ie. Calcium should be "CA "
|
|
788 # C alpha should be " CA "
|
|
789 if( length($element) == 2 ) {
|
|
790 $atom_line .= sprintf("%-4s", $atom->id);
|
|
791 } else {
|
|
792 $atom_line .= sprintf(" %-3s", $atom->id);
|
|
793 }
|
|
794 } else { # old behaviour do a best guess
|
|
795 if ($atom->id =~ /^\dH/) { # H: four positions, left justified
|
|
796 $atom_line .= sprintf("%-4s", $atom->id);
|
|
797 } elsif (length($atom_id) == 4) {
|
|
798 if ($atom_id =~ /^(H\d\d)(\d)$/) { # turn H123 into 3H12
|
|
799 $atom_line .= $2.$1;
|
|
800 } else { # no more guesses, no more alternatives
|
|
801 $atom_line .= $atom_id;
|
|
802 }
|
|
803 } else { # if we get here and it is not correct let me know
|
|
804 $atom_line .= sprintf(" %-3s", $atom->id);
|
|
805 }
|
|
806 }
|
|
807 }
|
|
808 # we don't do alternate location at this moment
|
|
809 $atom_line .= " "; # 17
|
|
810 $atom_line .= sprintf("%3s",$resname); # 18-20
|
|
811 $atom_line .= " ".$chain_id; # 21, 22
|
|
812 $atom_line .= sprintf("%4d", $resnum); # 23-26
|
|
813 $atom_line .= $atom->icode ? $atom->icode : " "; # 27
|
|
814 $atom_line .= " "; # 28-30
|
|
815 $atom_line .= sprintf("%8.3f", $atom->x); # 31-38
|
|
816 $atom_line .= sprintf("%8.3f", $atom->y); # 39-46
|
|
817 $atom_line .= sprintf("%8.3f", $atom->z); # 47-54
|
|
818 $atom_line .= sprintf("%6.2f", $atom->occupancy); # 55-60
|
|
819 $atom_line .= sprintf("%6.2f", $atom->tempfactor); # 61-66
|
|
820 $atom_line .= " "; # 67-72
|
|
821 $atom_line .= $atom->segID ? # segID 73-76
|
|
822 sprintf("%-4s", $atom->segID) :
|
|
823 " ";
|
|
824 $atom_line .= $atom->element ?
|
|
825 sprintf("%2s", $atom->element) :
|
|
826 " ";
|
|
827 $atom_line .= $atom->charge ?
|
|
828 sprintf("%2s", $atom->charge) :
|
|
829 " ";
|
|
830
|
|
831 $self->_print($atom_line,"\n");
|
|
832 }
|
|
833 }
|
|
834 # write out TER record if it hasn't been written yet
|
|
835 if ( $resname ne "HOH" && ! $wr_ter ) {
|
|
836 my $ter_line = "TER ";
|
|
837 $ter_line .= sprintf("%5d", $atom_serial + 1);
|
|
838 $ter_line .= " ";
|
|
839 $ter_line .= sprintf("%3s ", $resname);
|
|
840 $ter_line .= $chain_id;
|
|
841 $ter_line .= sprintf("%4d", $resnum);
|
|
842 $ter_line .= $atom_icode ? $atom_icode : " "; # 27
|
|
843 $ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
|
|
844 $self->_print($ter_line,"\n");
|
|
845 $wr_ter = 1;
|
|
846 }
|
|
847 }
|
|
848 if ($struc->get_models > 1) { # we need ENDMDL
|
|
849 my $endmdl_line = "ENDMDL" . " " x 74;
|
|
850 $self->_print($endmdl_line, "\n");
|
|
851 }
|
|
852 } # for my $model
|
|
853
|
|
854 # CONECT
|
|
855 my @sources = $struc->get_all_conect_source;
|
|
856 my ($conect_line,@conect, @bond, @hydbond, @saltbridge, $to, $type);
|
|
857 for my $source (@sources) {
|
|
858 # get all conect's
|
|
859 my @conect = $struc->conect($source);
|
|
860 # classify
|
|
861 for my $con (@conect) {
|
|
862 ($to, $type) = split /_/, $con;
|
|
863 if($type eq "bond") {
|
|
864 push @bond, $to;
|
|
865 } elsif($type eq "hydrogenbonded") {
|
|
866 push @hydbond, $to;
|
|
867 } elsif($type eq "saltbridged") {
|
|
868 push @saltbridge, $to;
|
|
869 } else {
|
|
870 $self->throw("type $type is unknown for conect");
|
|
871 }
|
|
872 }
|
|
873 # and write out CONECT lines as long as there is something
|
|
874 # in one of the arrays
|
|
875 while ( @bond || @hydbond || @saltbridge) {
|
|
876 my ($b, $hb, $sb);
|
|
877 $conect_line = "CONECT". sprintf("%5d", $source);
|
|
878 for my $k (0..3) {
|
|
879 $b = shift @bond;
|
|
880 $conect_line .= $b ? sprintf("%5d", $b) : " ";
|
|
881 }
|
|
882 for my $k (4..5) {
|
|
883 $hb = shift @hydbond;
|
|
884 $conect_line .= $hb ? sprintf("%5d", $hb) : " ";
|
|
885 }
|
|
886 $sb = shift @saltbridge;
|
|
887 $conect_line .= $sb ? sprintf("%5d", $sb) : " ";
|
|
888 for my $k (7..8) {
|
|
889 $hb = shift @hydbond;
|
|
890 $conect_line .= $hb ? sprintf("%5d", $hb) : " ";
|
|
891 }
|
|
892 $sb = shift @saltbridge;
|
|
893 $conect_line .= $sb ? sprintf("%5d", $sb) : " ";
|
|
894
|
|
895 $conect_line .= " " x (80 - length($conect_line) );
|
|
896 $self->_print($conect_line, "\n");
|
|
897 }
|
|
898 }
|
|
899
|
|
900
|
|
901 # MASTER line contains checksums, we should calculate them of course :)
|
|
902 my $master_line = "MASTER " . $struc->master;
|
|
903 $master_line .= " " x (80 - length($master_line) );
|
|
904 $self->_print($master_line, "\n");
|
|
905
|
|
906 my $end_line = "END" . " " x 77;
|
|
907 $self->_print($end_line,"\n");
|
|
908
|
|
909 #$self->throw("write_structure is not yet implemented, start holding your breath\n");
|
|
910 }
|
|
911
|
|
912 =head2 _filehandle
|
|
913
|
|
914 Title : _filehandle
|
|
915 Usage : $obj->_filehandle($newval)
|
|
916 Function:
|
|
917 Example :
|
|
918 Returns : value of _filehandle
|
|
919 Args : newvalue (optional)
|
|
920
|
|
921
|
|
922 =cut
|
|
923
|
|
924 sub _filehandle{
|
|
925 my ($obj,$value) = @_;
|
|
926 if( defined $value) {
|
|
927 $obj->{'_filehandle'} = $value;
|
|
928 }
|
|
929 return $obj->{'_filehandle'};
|
|
930
|
|
931 }
|
|
932
|
|
933 =head2 _noatom
|
|
934
|
|
935 Title : _noatom
|
|
936 Usage : $obj->_noatom($newval)
|
|
937 Function:
|
|
938 Example :
|
|
939 Returns : value of _noatom
|
|
940 Args : newvalue (optional)
|
|
941
|
|
942
|
|
943 =cut
|
|
944
|
|
945 sub _noatom{
|
|
946 my ($obj,$value) = @_;
|
|
947 if( defined $value) {
|
|
948 $obj->{'_noatom'} = $value;
|
|
949 }
|
|
950 return $obj->{'_noatom'};
|
|
951
|
|
952 }
|
|
953
|
|
954 =head2 _noheader
|
|
955
|
|
956 Title : _noheader
|
|
957 Usage : $obj->_noheader($newval)
|
|
958 Function:
|
|
959 Example :
|
|
960 Returns : value of _noheader
|
|
961 Args : newvalue (optional)
|
|
962
|
|
963
|
|
964 =cut
|
|
965
|
|
966 sub _noheader{
|
|
967 my ($obj,$value) = @_;
|
|
968 if( defined $value) {
|
|
969 $obj->{'_noheader'} = $value;
|
|
970 }
|
|
971 return $obj->{'_noheader'};
|
|
972
|
|
973 }
|
|
974
|
|
975 =head2 _read_PDB_singlecontline
|
|
976
|
|
977 Title : _read_PDB_singlecontline
|
|
978 Usage : $obj->_read_PDB_singlecontline($record, $fromto, $buffer))
|
|
979 Function: read single continued record from PDB
|
|
980 Returns : concatenated record entry (between $fromto columns)
|
|
981 Args : record, colunm delimiters, buffer
|
|
982
|
|
983 =cut
|
|
984
|
|
985 sub _read_PDB_singlecontline {
|
|
986 my ($self, $record, $fromto, $buffer) = @_;
|
|
987 my $concat_line;
|
|
988
|
|
989 my ($begin, $end) = (split (/-/, $fromto));
|
|
990 my $unpack_string = "x8 a2 ";
|
|
991 if($begin == 12) { # one additional space
|
|
992 $unpack_string .= "x1 a59";
|
|
993 } else {
|
|
994 $unpack_string .= "a60";
|
|
995 }
|
|
996 $_ = $$buffer;
|
|
997 while (defined( $_ ||= $self->_readline ) ) {
|
|
998 if ( /^$record/ ) {
|
|
999 my($cont, $rol) = unpack $unpack_string, $_;
|
|
1000 if($cont =~ /\d$/ && $begin == 11) { # continuation line
|
|
1001 # and text normally at pos 11
|
|
1002 $rol =~ s/^\s//; # strip leading space
|
|
1003 }
|
|
1004 ## no space (store litteraly) $concat_line .= $rol . " ";
|
|
1005 $concat_line .= $rol;
|
|
1006 } else {
|
|
1007 last;
|
|
1008 }
|
|
1009
|
|
1010 $_ = undef;
|
|
1011 }
|
|
1012 $concat_line =~ s/\s$//; # remove trailing space
|
|
1013 $$buffer = $_;
|
|
1014
|
|
1015 return $concat_line;
|
|
1016 }
|
|
1017
|
|
1018
|
|
1019 =head2 _read_PDB_jrnl
|
|
1020
|
|
1021 Title : _read_PDB_jrnl
|
|
1022 Usage : $obj->_read_PDB_jrnl($\buffer))
|
|
1023 Function: read jrnl record from PDB
|
|
1024 Returns : Bio::Annotation::Reference object
|
|
1025 Args :
|
|
1026
|
|
1027 =cut
|
|
1028
|
|
1029 sub _read_PDB_jrnl {
|
|
1030 my ($self, $buffer) = @_;
|
|
1031
|
|
1032 $_ = $$buffer;
|
|
1033 my ($auth, $titl,$edit,$ref,$publ,$refn);
|
|
1034 while (defined( $_ ||= $self->_readline )) {
|
|
1035 if (/^JRNL /) {
|
|
1036 # this code belgons in a seperate method (shared with
|
|
1037 # remark 1 parsing)
|
|
1038 my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
|
|
1039 $auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
|
|
1040 $titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
|
|
1041 $edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
|
|
1042 $ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
|
|
1043 $publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
|
|
1044 $refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
|
|
1045 } else {
|
|
1046 last;
|
|
1047 }
|
|
1048
|
|
1049 $_ = undef; # trigger reading of next line
|
|
1050 } # while
|
|
1051
|
|
1052 $$buffer = $_;
|
|
1053 my $jrnl_ref = Bio::Annotation::Reference->new;
|
|
1054
|
|
1055 $jrnl_ref->authors($auth);
|
|
1056 $jrnl_ref->title($titl);
|
|
1057 $jrnl_ref->location($ref);
|
|
1058 $jrnl_ref->publisher($publ);
|
|
1059 $jrnl_ref->editors($edit);
|
|
1060 $jrnl_ref->encoded_ref($refn);
|
|
1061
|
|
1062 return $jrnl_ref;
|
|
1063 } # sub _read_PDB_jrnl
|
|
1064
|
|
1065
|
|
1066 =head2 _read_PDB_remark_1
|
|
1067
|
|
1068 Title : _read_PDB_remark_1
|
|
1069 Usage : $obj->_read_PDB_remark_1($\buffer))
|
|
1070 Function: read "remark 1" record from PDB
|
|
1071 Returns : array of Bio::Annotation::Reference objects
|
|
1072 Args :
|
|
1073
|
|
1074 =cut
|
|
1075
|
|
1076 sub _read_PDB_remark_1 {
|
|
1077 my ($self, $buffer) = @_;
|
|
1078
|
|
1079 $_ = $$buffer;
|
|
1080 my ($auth, $titl,$edit,$ref,$publ,$refn,$refnum);
|
|
1081 my @refs;
|
|
1082
|
|
1083 while (defined( $_ ||= $self->_readline )) {
|
|
1084 if (/^REMARK 1 /) {
|
|
1085 if (/^REMARK 1\s+REFERENCE\s+(\d+)\s*/) {
|
|
1086 $refnum = $1;
|
|
1087 if ($refnum != 1) { # this is first line of a reference
|
|
1088 my $rref = Bio::Annotation::Reference->new;
|
|
1089 $rref->authors($auth);
|
|
1090 $rref->title($titl);
|
|
1091 $rref->location($ref);
|
|
1092 $rref->publisher($publ);
|
|
1093 $rref->editors($edit);
|
|
1094 $rref->encoded_ref($refn);
|
|
1095 $auth = $titl = $edit = $ref = $publ = $refn = undef;
|
|
1096 push @refs, $rref;
|
|
1097 }
|
|
1098 } else {
|
|
1099 # this code belgons in a seperate method (shared with
|
|
1100 # remark 1 parsing)
|
|
1101 my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
|
|
1102 $auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
|
|
1103 $titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
|
|
1104 $edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
|
|
1105 $ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
|
|
1106 $publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
|
|
1107 $refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
|
|
1108 }
|
|
1109 } else {
|
|
1110 # have we seen any reference at all (could be single REMARK 1 line
|
|
1111 if ( ! defined ($refnum) ) {
|
|
1112 last; # get out of while()
|
|
1113 }
|
|
1114
|
|
1115 # create last reference
|
|
1116 my $rref = Bio::Annotation::Reference->new;
|
|
1117 $rref->authors($auth);
|
|
1118 $rref->title($titl);
|
|
1119 $rref->location($ref);
|
|
1120 $rref->publisher($publ);
|
|
1121 $rref->editors($edit);
|
|
1122 $rref->encoded_ref($refn);
|
|
1123 push @refs, $rref;
|
|
1124 last;
|
|
1125 }
|
|
1126
|
|
1127 $_ = undef; # trigger reading of next line
|
|
1128 } # while
|
|
1129
|
|
1130 $$buffer = $_;
|
|
1131
|
|
1132 return @refs;
|
|
1133 } # sub _read_PDB_jrnl
|
|
1134
|
|
1135
|
|
1136 =head2 _read_PDB_coordinate_section
|
|
1137
|
|
1138 Title : _read_PDB_coordinate_section
|
|
1139 Usage : $obj->_read_PDB_coordinate_section($\buffer))
|
|
1140 Function: read one model from a PDB
|
|
1141 Returns : Bio::Structure::Model object
|
|
1142 Args :
|
|
1143
|
|
1144 =cut
|
|
1145
|
|
1146 sub _read_PDB_coordinate_section {
|
|
1147 my ($self, $buffer, $struc) = @_;
|
|
1148 my ($model_num, $chain_name, $residue_name, $atom_name); # to keep track of state
|
|
1149 $model_num = "";
|
|
1150 $chain_name = "";
|
|
1151 $residue_name = "";
|
|
1152 $atom_name = "";
|
|
1153
|
|
1154 my $atom_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x3 a8 a8 a8 a6 a6 x6 a4 a2 a2";
|
|
1155 my $anisou_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x1 a7 a7 a7 a7 a7 a7 a4 a2 a2";
|
|
1156
|
|
1157 my $model = Bio::Structure::Model->new;
|
|
1158 $model->id('default');
|
|
1159 my $noatom = $self->_noatom;
|
|
1160 my ($chain, $residue, $atom, $old);
|
|
1161 my (%_ch_in_model); # which chains are already in this model
|
|
1162
|
|
1163 $_ = $$buffer;
|
|
1164 while (defined( $_ ||= $self->_readline )) {
|
|
1165 # start of a new model
|
|
1166 if (/^MODEL\s+(\d+)/) {
|
|
1167 $model_num = $1;
|
|
1168 $self->debug("_read_PDB_coor: parsing model $model_num\n");
|
|
1169 $model->id($model_num);
|
|
1170 if (/^MODEL\s+\d+\s+\S+/) { # old format (pre 2.1)
|
|
1171 $old = 1;
|
|
1172 }
|
|
1173 }
|
|
1174 # old hier ook setten XXX
|
|
1175 # ATOM lines, if first set chain
|
|
1176 if (/^(ATOM |HETATM|SIGATM)/) {
|
|
1177 my @line_elements = unpack $atom_unpack, $_;
|
|
1178 my $pdb_atomname = $line_elements[1]; # need to get this before removing spaces
|
|
1179 for my $k (0 .. $#line_elements) {
|
|
1180 $line_elements[$k] =~ s/^\s+//; # remove leading space
|
|
1181 $line_elements[$k] =~ s/\s+$//; # remove trailing space
|
|
1182 $line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
|
|
1183 }
|
|
1184 my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode, $x, $y, $z,
|
|
1185 $occupancy, $tempfactor, $segID, $element, $charge) = @line_elements;
|
|
1186 $chainID = 'default' if ( !defined $chainID );
|
|
1187 if ($chainID ne $chain_name) { # possibly a new chain
|
|
1188 # fix for bug #1187
|
|
1189 # we can have ATOM/HETATM of an already defined chain (A B A B)
|
|
1190 # e.g. 1abm
|
|
1191
|
|
1192 if (exists $_ch_in_model{$chainID} ) { # we have already seen this chain in this model
|
|
1193 $chain = $_ch_in_model{$chainID};
|
|
1194 } else { # we create a new chain
|
|
1195 $chain = Bio::Structure::Chain->new;
|
|
1196 $struc->add_chain($model,$chain);
|
|
1197 $chain->id($chainID);
|
|
1198 $_ch_in_model{$chainID} = $chain;
|
|
1199 }
|
|
1200 $chain_name = $chain->id;
|
|
1201 }
|
|
1202
|
|
1203 # fix from bug 1485, by dhoworth@mrc-lmb.cam.ac.uk
|
|
1204 # passes visual inspection by Ewan and tests are ok.
|
|
1205 # (bug fix was to add $icode here to make unique)
|
|
1206 # original looked like
|
|
1207 # my $res_name_num = $resname."-".$resseq;
|
|
1208
|
|
1209 # to get around warning, set icode to "" if not defined
|
|
1210 if( !defined $icode ) {
|
|
1211 $icode = "";
|
|
1212 }
|
|
1213
|
|
1214 my $res_name_num = $resname."-".$resseq.$icode;
|
|
1215 if ($res_name_num ne $residue_name) { # new residue
|
|
1216 $residue = Bio::Structure::Residue->new;
|
|
1217 $struc->add_residue($chain,$residue);
|
|
1218 $residue->id($res_name_num);
|
|
1219 $residue_name = $res_name_num;
|
|
1220 $atom_name = ""; # only needed inside a residue
|
|
1221 }
|
|
1222 # get out of here if we don't want the atom objects
|
|
1223 if ($noatom) {
|
|
1224 $_ = undef;
|
|
1225 next;
|
|
1226 }
|
|
1227 # alternative location: only take first one
|
|
1228 if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
|
|
1229 $_ = undef; # trigger reading next line
|
|
1230 next;
|
|
1231 }
|
|
1232 if (/^(ATOM |HETATM)/) { # ATOM / HETATM
|
|
1233 $atom_name = $atomname;
|
|
1234 $atom = Bio::Structure::Atom->new;
|
|
1235 $struc->add_atom($residue,$atom);
|
|
1236 $atom->id($atomname);
|
|
1237 $atom->pdb_atomname($pdb_atomname); # store away PDB atomname for writing out
|
|
1238 $atom->serial($serial);
|
|
1239 $atom->icode($icode);
|
|
1240 $atom->x($x);
|
|
1241 $atom->y($y);
|
|
1242 $atom->z($z);
|
|
1243 $atom->occupancy($occupancy);
|
|
1244 $atom->tempfactor($tempfactor);
|
|
1245 $atom->segID($segID); # deprecated but used by people
|
|
1246 if (! $old ) {
|
|
1247 $atom->element($element);
|
|
1248 $atom->charge($charge);
|
|
1249 }
|
|
1250 }
|
|
1251 else { # SIGATM
|
|
1252 my $sigx = $x;
|
|
1253 my $sigy = $y;
|
|
1254 my $sigz = $z;
|
|
1255 my $sigocc = $occupancy;
|
|
1256 my $sigtemp = $tempfactor;
|
|
1257 if ($atom_name ne $atomname) { # something wrong with PDB file
|
|
1258 $self->throw("A SIGATM record should have the same $atomname as the previous record $atom_name\n");
|
|
1259 }
|
|
1260 $atom->sigx($sigx);
|
|
1261 $atom->sigy($sigy);
|
|
1262 $atom->sigz($sigz);
|
|
1263 $atom->sigocc($sigocc);
|
|
1264 $atom->sigtemp($sigtemp);
|
|
1265
|
|
1266 }
|
|
1267 } # ATOM|HETARM|SIGATM
|
|
1268
|
|
1269 # ANISOU | SIGUIJ lines
|
|
1270 if (/^(ANISOU|SIGUIJ)/) {
|
|
1271 if ($noatom) {
|
|
1272 $_ = undef;
|
|
1273 next;
|
|
1274 }
|
|
1275 my @line_elements = unpack $anisou_unpack, $_;
|
|
1276 for my $k (0 .. $#line_elements) {
|
|
1277 $line_elements[$k] =~ s/^\s+//; # remove leading space
|
|
1278 $line_elements[$k] =~ s/\s+$//; # remove trailing space
|
|
1279 $line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
|
|
1280 }
|
|
1281 my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode,
|
|
1282 $u11,$u22, $u33, $u12, $u13, $u23, $segID, $element, $charge) = @line_elements;
|
|
1283 $self->debug("read_PDB_coor: parsing ANISOU record: $serial $atomname\n");
|
|
1284 if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
|
|
1285 $_ = undef;
|
|
1286 next;
|
|
1287 }
|
|
1288 if (/^ANISOU/) {
|
|
1289 if ($atom_name ne $atomname) { # something wrong with PDB file
|
|
1290 $self->throw("A ANISOU record should have the same $atomname as the previous record $atom_name\n");
|
|
1291 }
|
|
1292 $atom->aniso("u11",$u11);
|
|
1293 $atom->aniso("u22",$u22);
|
|
1294 $atom->aniso("u33",$u33);
|
|
1295 $atom->aniso("u12",$u12);
|
|
1296 $atom->aniso("u13",$u13);
|
|
1297 $atom->aniso("u23",$u23);
|
|
1298 }
|
|
1299 else { # SIGUIJ
|
|
1300 if ($atom_name ne $atomname) { # something wrong with PDB file
|
|
1301 $self->throw("A SIGUIJ record should have the same $atomname as the previous record $atom_name\n");
|
|
1302 }
|
|
1303 # could use different variable names, but hey ...
|
|
1304 $atom->aniso("sigu11",$u11);
|
|
1305 $atom->aniso("sigu22",$u22);
|
|
1306 $atom->aniso("sigu33",$u33);
|
|
1307 $atom->aniso("sigu12",$u12);
|
|
1308 $atom->aniso("sigu13",$u13);
|
|
1309 $atom->aniso("sigu23",$u23);
|
|
1310 }
|
|
1311 } # ANISOU | SIGUIJ
|
|
1312
|
|
1313 if (/^TER /) {
|
|
1314 $_ = undef;
|
|
1315 next;
|
|
1316 }
|
|
1317
|
|
1318 if (/^ENDMDL/) {
|
|
1319 $_ = $self->_readline;
|
|
1320 last;
|
|
1321 }
|
|
1322
|
|
1323 if (/^(CONECT|MASTER)/) { # get out of here
|
|
1324 # current line is OK
|
|
1325 last;
|
|
1326 }
|
|
1327 $_ = undef;
|
|
1328
|
|
1329 } # while
|
|
1330
|
|
1331 $$buffer = $_;
|
|
1332
|
|
1333 return $model;
|
|
1334 } # _read_PDB_coordinate_section
|
|
1335
|
|
1336
|
|
1337 sub _write_PDB_simple_record {
|
|
1338 my ($self, @args) = @_;
|
|
1339 my ($name, $cont , $annotation, $rol, $string) =
|
|
1340 $self->_rearrange([qw(
|
|
1341 NAME
|
|
1342 CONT
|
|
1343 ANNOTATION
|
|
1344 ROL
|
|
1345 STRING
|
|
1346 )],
|
|
1347 @args);
|
|
1348 if (defined $string && defined $annotation) {
|
|
1349 $self->throw("you can only supply one of -annoation or -string");
|
|
1350 }
|
|
1351 my ($output_string, $ann_string, $t_string);
|
|
1352 my ($rol_begin, $rol_end) = $rol =~ /^(\d+)-(\d+)$/;
|
|
1353 my $rol_length = $rol_end - $rol_begin +1;
|
|
1354 if ($string) {
|
|
1355 if (length $string > $rol_length) {
|
|
1356 # we might need to split $string in multiple lines
|
|
1357 while (length $string > $rol_length) {
|
|
1358 # other option might be to go for a bunch of substr's
|
|
1359 my @c = split//,$string;
|
|
1360 my $t = $rol_length; # index into @c
|
|
1361 while ($c[$t] ne " ") { # find first space, going backwards
|
|
1362 $self->debug("c[t]: $c[$t] $t\n");
|
|
1363 $t--;
|
|
1364 if ($t == 0) { $self->throw("Found no space for $string\n"); }
|
|
1365 }
|
|
1366 $self->debug("t: $t rol_length: $rol_length\n");
|
|
1367 $ann_string .= substr($string, 0, $t);
|
|
1368 $self->debug("ann_string: $ann_string\n");
|
|
1369 $ann_string .= " " x ($rol_length - $t );
|
|
1370 $string = substr($string, $t+1);
|
|
1371 $string =~ s/^\s+//;
|
|
1372 $self->debug("ann_string: $ann_string~~\nstring: $string~~\n");
|
|
1373 }
|
|
1374 $ann_string .= $string;
|
|
1375 } else {
|
|
1376 $ann_string = $string;
|
|
1377 }
|
|
1378 } else {
|
|
1379 $ann_string = $annotation->as_text;
|
|
1380 $ann_string =~ s/^Value: //;
|
|
1381 }
|
|
1382 # ann_string contains the thing to write out, writing out happens below
|
|
1383 my $ann_length = length $ann_string;
|
|
1384
|
|
1385 $self->debug("ann_string: $ann_string\n");
|
|
1386 if ($cont) {
|
|
1387 my ($c_begin, $c_end) = $cont =~ /^(\d+)-(\d+)$/;
|
|
1388 if ( $ann_length > $rol_length ) { # we need to continuation lines
|
|
1389 my $first_line = 1;
|
|
1390 my $cont_number = 2;
|
|
1391 my $out_line;
|
|
1392 my $num_pos = $rol_length;
|
|
1393 my $i = 0;
|
|
1394 while( $i < $ann_length ) {
|
|
1395 $t_string = substr($ann_string, $i, $num_pos);
|
|
1396 $self->debug("t_string: $t_string~~$i $num_pos\n");
|
|
1397 if ($first_line) {
|
|
1398 $out_line = $name . " " x ($rol_begin - $c_begin) . $t_string;
|
|
1399 $out_line .= " " x (80 - length($out_line) ) . "\n";
|
|
1400 $first_line = 0;
|
|
1401 $output_string = $out_line;
|
|
1402 $i += $num_pos; # first do counter
|
|
1403 if ($rol_begin - $c_end == 1) { # next line one character less
|
|
1404 $num_pos--;
|
|
1405 }
|
|
1406 } else {
|
|
1407 $out_line = $name . sprintf("%2d",$cont_number);
|
|
1408 # a space after continuation number
|
|
1409 if ($rol_begin - $c_end == 1) { # one space after cont number
|
|
1410 $out_line .= " ";
|
|
1411 $out_line .= $t_string;
|
|
1412 } else {
|
|
1413 $out_line .= " " x ($rol_begin - $c_end - 1) . $t_string;
|
|
1414 }
|
|
1415 $out_line .= " " x (80 -length($out_line) ) . "\n";
|
|
1416 $cont_number++;
|
|
1417 $output_string .= $out_line;
|
|
1418 $i += $num_pos;
|
|
1419 }
|
|
1420 }
|
|
1421 } else { # no continuation
|
|
1422 my $spaces = $rol_begin - $c_begin; # number of spaces need to insert
|
|
1423 $output_string = $name . " " x $spaces . $ann_string;
|
|
1424 $output_string .= " " x (80 - length($output_string) );
|
|
1425 }
|
|
1426 } else { # no contintuation lines
|
|
1427 if ($ann_length < $rol_length) {
|
|
1428 $output_string = $name . $ann_string;
|
|
1429 $output_string .= " " x (80 - length($output_string) );
|
|
1430 } else {
|
|
1431 for (my $i = 0; $i < $ann_length; $i += $rol_length) {
|
|
1432 my $out_line;
|
|
1433 $t_string = substr($ann_string, $i, $rol_length);
|
|
1434 $out_line = $name . $t_string;
|
|
1435 $out_line .= " " x (80 -length($out_line) ) . "\n";
|
|
1436 $output_string .= $out_line;
|
|
1437 }
|
|
1438 }
|
|
1439 }
|
|
1440 $output_string =~ s/\n$//; # remove trailing newline
|
|
1441 $self->_print("$output_string\n");
|
|
1442
|
|
1443 }
|
|
1444
|
|
1445 sub _write_PDB_remark_record {
|
|
1446 my ($self, $struc, $remark_num) = @_;
|
|
1447 my ($ann) = $struc->annotation->get_Annotations("remark_$remark_num");
|
|
1448 my $name = sprintf("REMARK %3d ",$remark_num);
|
|
1449 $self->_write_PDB_simple_record(-name => $name, -annotation => $ann, -rol => "12-70");
|
|
1450 }
|
|
1451
|
|
1452 1;
|