0
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3 #
|
|
4 # Copyright 2020:
|
|
5 # Francesco Ambrosetti
|
|
6 #
|
|
7
|
|
8 """
|
|
9 Formats the antibody to fit the HADDOCK requirements with the
|
|
10 specified chain id and returns the list of residues belonging
|
|
11 to the HV loops defined according to the HADDOCK friendly format.
|
|
12
|
|
13 *** The antibody has to be numbered according to the Chothia scheme ***
|
|
14
|
|
15 Usage:
|
|
16 python haddock-format.py <chothia numbered antibody> <output .pdb file> <chain_id>
|
|
17
|
|
18 Example:
|
|
19 python 4G6K_ch.pdb 4G6K-HADDOCK.pdb A
|
|
20
|
|
21 Author: {0}
|
|
22 Email: {1}
|
|
23 """
|
|
24
|
|
25 import argparse
|
|
26 import biopandas.pdb as bp
|
|
27 import copy as cp
|
|
28 import os
|
|
29 import sys
|
|
30
|
|
31 __author__ = "Francesco Ambrosetti"
|
|
32 __email__ = "ambrosetti.francesco@gmail.com"
|
|
33 USAGE = __doc__.format(__author__, __email__)
|
|
34
|
|
35
|
|
36 def check_input():
|
|
37 """
|
|
38 Check and collect the script inputs
|
|
39 Returns:
|
|
40 args.pdb (str): path to the pdb-file
|
|
41 args.chain (str): chain id to use for the HADDOCK-formatted structure
|
|
42 """
|
|
43
|
|
44 # Parse command line arguments
|
|
45 parser = argparse.ArgumentParser(
|
|
46 description=USAGE,
|
|
47 formatter_class=argparse.RawTextHelpFormatter)
|
|
48 parser.add_argument('pdb', help='Path to the Chothia numbered antibody PDB structure', type=str)
|
|
49 parser.add_argument('out', help='Path to the output PDB file', type=str)
|
|
50 parser.add_argument('chain', help='Chain id to use for the HADDOCK-formatted PDB structure', type=str)
|
|
51
|
|
52 args = parser.parse_args()
|
|
53
|
|
54 if not os.path.isfile(args.pdb):
|
|
55 emsg = 'ERROR!! File {0} not found or not readable\n'.format(args.pdb)
|
|
56 sys.stderr.write(emsg)
|
|
57 sys.exit(1)
|
|
58
|
|
59 if not args.pdb.endswith(".pdb"):
|
|
60 emsg = 'ERROR!! File {0} not recognize as a PDB file\n'.format(args.pdb)
|
|
61 sys.stderr.write(emsg)
|
|
62 sys.exit(1)
|
|
63
|
|
64 return args.pdb, args.out, args.chain
|
|
65
|
|
66
|
|
67 def unique(sequence):
|
|
68 seen = set()
|
|
69 return [x for x in sequence if not (x in seen or seen.add(x))]
|
|
70
|
|
71
|
|
72 class AbHaddockFormat:
|
|
73 """Class to renumber a Chothia antibody to make it HADDOCK-ready"""
|
|
74
|
|
75 # Loops
|
|
76 # L chain loops
|
|
77 l1 = ['26_L', '27_L', '28_L', '29_L', '30_L', '30A_L', '30B_L', '30C_L', '30D_L', '30E_L', '30F_L', '31_L', '32_L']
|
|
78 l2 = ['50_L', '50A_L', '50B_L', '50C_L', '50D_L', '50E_L', '50F_L', '51_L', '52_L']
|
|
79 l3 = ['91_L', '92_L', '93_L', '94_L', '95_L', '95A_L', '95B_L', '95C_L', '95D_L', '95E_L', '95F_L', '96_L']
|
|
80 loops_l = l1 + l2 + l3
|
|
81
|
|
82 # H chain loops
|
|
83 h1 = ['26_H', '27_H', '28_H', '29_H', '30_H', '31_H', '31A_H', '31B_H', '31C_H', '31D_H', '31E_H', '31F_H', '32_H']
|
|
84 h2 = ['52A_H', '52B_H', '52C_H', '52D_H', '52E_H', '52F_H', '53_H', '54_H', '55_H']
|
|
85 h3 = ['96_H', '97_H', '98_H', '99_H', '100_H', '100A_H', '100B_H', '100C_H', '100D_H', '100E_H', '100F_H', '100G_H',
|
|
86 '100H_H', '100I_H', '100J_H', '100K_H', '101_H']
|
|
87 loops_h = h1 + h2 + h3
|
|
88
|
|
89 def __init__(self, pdbfile, chain):
|
|
90 """
|
|
91 Constructor for the AbHaddockFormat class
|
|
92 Args:
|
|
93 pdbfile (str): path to the antibody .pdb file
|
|
94 chain (str): chain id to use for the HADDOCK-ready structure
|
|
95 """
|
|
96 self.file = pdbfile
|
|
97 self.pdb = bp.PandasPdb().read_pdb(self.file)
|
|
98 self.chain = chain
|
|
99
|
|
100 def check_chain(self):
|
|
101 """
|
|
102 Check if the antibody contains the light and heavy chain
|
|
103 Returns:
|
|
104 0
|
|
105 """
|
|
106 chain_ids = self.pdb.df['ATOM']['chain_id'].values
|
|
107
|
|
108 if 'H' not in chain_ids:
|
|
109 emsg = 'ERROR!! File {0} does not contain the heavy chain\n'.format(self.file)
|
|
110 sys.stderr.write(emsg)
|
|
111 sys.exit(1)
|
|
112
|
|
113 elif 'L' not in chain_ids:
|
|
114 emsg = 'ERROR!! File {0} does not contain the light chain\n'.format(self.file)
|
|
115 sys.stderr.write(emsg)
|
|
116 sys.exit(1)
|
|
117
|
|
118 return 0
|
|
119
|
|
120 def ab_format(self):
|
|
121 """
|
|
122 Renumbers the antibody and extract the HV residues
|
|
123
|
|
124 Returns:
|
|
125 hv_list (list): list of the HV residue numbers
|
|
126 new_pdb (biopandas.pdb.pandas_pdb.PandasPdb): HADDOCK-ready pdb
|
|
127 """
|
|
128
|
|
129 # Check antibody chain ids
|
|
130 self.check_chain()
|
|
131
|
|
132 # Modify resno to include insertions and chain id
|
|
133 resno = self.pdb.df['ATOM']['residue_number'].values
|
|
134 ins = self.pdb.df['ATOM']['insertion'].values
|
|
135 chain = self.pdb.df['ATOM']['chain_id'].values
|
|
136 ch_resno = ['{0}{1}_{2}'.format(i, j, c) for i, j, c in zip(resno, ins, chain)]
|
|
137
|
|
138 # Create new resno
|
|
139 count = 0
|
|
140 prev_resid = None
|
|
141 new_resno = []
|
|
142
|
|
143 # Renumber
|
|
144 for r in ch_resno:
|
|
145 if r != prev_resid:
|
|
146 count += 1
|
|
147 new_resno.append(count)
|
|
148 prev_resid = r
|
|
149 elif r == prev_resid:
|
|
150 new_resno.append(count)
|
|
151 prev_resid = r
|
|
152
|
|
153 # Update pdb
|
|
154 new_pdb = cp.deepcopy(self.pdb)
|
|
155 new_pdb.df['ATOM']['chain_id'] = self.chain
|
|
156 new_pdb.df['ATOM']['residue_number'] = new_resno
|
|
157 new_pdb.df['ATOM']['insertion'] = '' # Remove insertions
|
|
158
|
|
159 # Create dictionary with old and new numbering
|
|
160 resno_dict = dict(zip(unique(ch_resno), unique(new_resno)))
|
|
161
|
|
162 # Collect HV residues with the new numbering
|
|
163 hv_list = []
|
|
164
|
|
165 # Heavy chain
|
|
166 for hv_heavy in self.loops_h:
|
|
167 if hv_heavy in resno_dict.keys():
|
|
168 hv_list.append(resno_dict[hv_heavy])
|
|
169
|
|
170 # Light chain
|
|
171 for hv_light in self.loops_l:
|
|
172 if hv_light in resno_dict.keys():
|
|
173 hv_list.append(resno_dict[hv_light])
|
|
174
|
|
175 hv_list.sort()
|
|
176 return hv_list, new_pdb
|
|
177
|
|
178
|
|
179 if __name__ == '__main__':
|
|
180
|
|
181 # Get inputs
|
|
182 pdb_file, out_file, chain_id = check_input()
|
|
183
|
|
184 # Renumber pdb file and get HV residues
|
|
185 pdb_format = AbHaddockFormat(pdb_file, chain_id)
|
|
186 hv_resno, pdb_ren = pdb_format.ab_format()
|
|
187
|
|
188 # Write pdb into a file
|
|
189 pdb_ren.to_pdb(path=out_file, records=['ATOM'], append_newline=True)
|
|
190
|
|
191 # Print HV residues
|
|
192 print(','.join(map(str, hv_resno)))
|