Mercurial > repos > p.lucas > abhaddock
comparison ab_haddock_format.py @ 0:6a4d5446c123 draft
Uploaded python script
| author | p.lucas |
|---|---|
| date | Thu, 06 May 2021 08:46:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6a4d5446c123 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 # | |
| 4 # Copyright 2020: | |
| 5 # Francesco Ambrosetti | |
| 6 # | |
| 7 | |
| 8 """ | |
| 9 Formats the antibody to fit the HADDOCK requirements with the | |
| 10 specified chain id and returns the list of residues belonging | |
| 11 to the HV loops defined according to the HADDOCK friendly format. | |
| 12 | |
| 13 *** The antibody has to be numbered according to the Chothia scheme *** | |
| 14 | |
| 15 Usage: | |
| 16 python haddock-format.py <chothia numbered antibody> <output .pdb file> <chain_id> | |
| 17 | |
| 18 Example: | |
| 19 python 4G6K_ch.pdb 4G6K-HADDOCK.pdb A | |
| 20 | |
| 21 Author: {0} | |
| 22 Email: {1} | |
| 23 """ | |
| 24 | |
| 25 import argparse | |
| 26 import biopandas.pdb as bp | |
| 27 import copy as cp | |
| 28 import os | |
| 29 import sys | |
| 30 | |
| 31 __author__ = "Francesco Ambrosetti" | |
| 32 __email__ = "ambrosetti.francesco@gmail.com" | |
| 33 USAGE = __doc__.format(__author__, __email__) | |
| 34 | |
| 35 | |
| 36 def check_input(): | |
| 37 """ | |
| 38 Check and collect the script inputs | |
| 39 Returns: | |
| 40 args.pdb (str): path to the pdb-file | |
| 41 args.chain (str): chain id to use for the HADDOCK-formatted structure | |
| 42 """ | |
| 43 | |
| 44 # Parse command line arguments | |
| 45 parser = argparse.ArgumentParser( | |
| 46 description=USAGE, | |
| 47 formatter_class=argparse.RawTextHelpFormatter) | |
| 48 parser.add_argument('pdb', help='Path to the Chothia numbered antibody PDB structure', type=str) | |
| 49 parser.add_argument('out', help='Path to the output PDB file', type=str) | |
| 50 parser.add_argument('chain', help='Chain id to use for the HADDOCK-formatted PDB structure', type=str) | |
| 51 | |
| 52 args = parser.parse_args() | |
| 53 | |
| 54 if not os.path.isfile(args.pdb): | |
| 55 emsg = 'ERROR!! File {0} not found or not readable\n'.format(args.pdb) | |
| 56 sys.stderr.write(emsg) | |
| 57 sys.exit(1) | |
| 58 | |
| 59 if not args.pdb.endswith(".pdb"): | |
| 60 emsg = 'ERROR!! File {0} not recognize as a PDB file\n'.format(args.pdb) | |
| 61 sys.stderr.write(emsg) | |
| 62 sys.exit(1) | |
| 63 | |
| 64 return args.pdb, args.out, args.chain | |
| 65 | |
| 66 | |
| 67 def unique(sequence): | |
| 68 seen = set() | |
| 69 return [x for x in sequence if not (x in seen or seen.add(x))] | |
| 70 | |
| 71 | |
| 72 class AbHaddockFormat: | |
| 73 """Class to renumber a Chothia antibody to make it HADDOCK-ready""" | |
| 74 | |
| 75 # Loops | |
| 76 # L chain loops | |
| 77 l1 = ['26_L', '27_L', '28_L', '29_L', '30_L', '30A_L', '30B_L', '30C_L', '30D_L', '30E_L', '30F_L', '31_L', '32_L'] | |
| 78 l2 = ['50_L', '50A_L', '50B_L', '50C_L', '50D_L', '50E_L', '50F_L', '51_L', '52_L'] | |
| 79 l3 = ['91_L', '92_L', '93_L', '94_L', '95_L', '95A_L', '95B_L', '95C_L', '95D_L', '95E_L', '95F_L', '96_L'] | |
| 80 loops_l = l1 + l2 + l3 | |
| 81 | |
| 82 # H chain loops | |
| 83 h1 = ['26_H', '27_H', '28_H', '29_H', '30_H', '31_H', '31A_H', '31B_H', '31C_H', '31D_H', '31E_H', '31F_H', '32_H'] | |
| 84 h2 = ['52A_H', '52B_H', '52C_H', '52D_H', '52E_H', '52F_H', '53_H', '54_H', '55_H'] | |
| 85 h3 = ['96_H', '97_H', '98_H', '99_H', '100_H', '100A_H', '100B_H', '100C_H', '100D_H', '100E_H', '100F_H', '100G_H', | |
| 86 '100H_H', '100I_H', '100J_H', '100K_H', '101_H'] | |
| 87 loops_h = h1 + h2 + h3 | |
| 88 | |
| 89 def __init__(self, pdbfile, chain): | |
| 90 """ | |
| 91 Constructor for the AbHaddockFormat class | |
| 92 Args: | |
| 93 pdbfile (str): path to the antibody .pdb file | |
| 94 chain (str): chain id to use for the HADDOCK-ready structure | |
| 95 """ | |
| 96 self.file = pdbfile | |
| 97 self.pdb = bp.PandasPdb().read_pdb(self.file) | |
| 98 self.chain = chain | |
| 99 | |
| 100 def check_chain(self): | |
| 101 """ | |
| 102 Check if the antibody contains the light and heavy chain | |
| 103 Returns: | |
| 104 0 | |
| 105 """ | |
| 106 chain_ids = self.pdb.df['ATOM']['chain_id'].values | |
| 107 | |
| 108 if 'H' not in chain_ids: | |
| 109 emsg = 'ERROR!! File {0} does not contain the heavy chain\n'.format(self.file) | |
| 110 sys.stderr.write(emsg) | |
| 111 sys.exit(1) | |
| 112 | |
| 113 elif 'L' not in chain_ids: | |
| 114 emsg = 'ERROR!! File {0} does not contain the light chain\n'.format(self.file) | |
| 115 sys.stderr.write(emsg) | |
| 116 sys.exit(1) | |
| 117 | |
| 118 return 0 | |
| 119 | |
| 120 def ab_format(self): | |
| 121 """ | |
| 122 Renumbers the antibody and extract the HV residues | |
| 123 | |
| 124 Returns: | |
| 125 hv_list (list): list of the HV residue numbers | |
| 126 new_pdb (biopandas.pdb.pandas_pdb.PandasPdb): HADDOCK-ready pdb | |
| 127 """ | |
| 128 | |
| 129 # Check antibody chain ids | |
| 130 self.check_chain() | |
| 131 | |
| 132 # Modify resno to include insertions and chain id | |
| 133 resno = self.pdb.df['ATOM']['residue_number'].values | |
| 134 ins = self.pdb.df['ATOM']['insertion'].values | |
| 135 chain = self.pdb.df['ATOM']['chain_id'].values | |
| 136 ch_resno = ['{0}{1}_{2}'.format(i, j, c) for i, j, c in zip(resno, ins, chain)] | |
| 137 | |
| 138 # Create new resno | |
| 139 count = 0 | |
| 140 prev_resid = None | |
| 141 new_resno = [] | |
| 142 | |
| 143 # Renumber | |
| 144 for r in ch_resno: | |
| 145 if r != prev_resid: | |
| 146 count += 1 | |
| 147 new_resno.append(count) | |
| 148 prev_resid = r | |
| 149 elif r == prev_resid: | |
| 150 new_resno.append(count) | |
| 151 prev_resid = r | |
| 152 | |
| 153 # Update pdb | |
| 154 new_pdb = cp.deepcopy(self.pdb) | |
| 155 new_pdb.df['ATOM']['chain_id'] = self.chain | |
| 156 new_pdb.df['ATOM']['residue_number'] = new_resno | |
| 157 new_pdb.df['ATOM']['insertion'] = '' # Remove insertions | |
| 158 | |
| 159 # Create dictionary with old and new numbering | |
| 160 resno_dict = dict(zip(unique(ch_resno), unique(new_resno))) | |
| 161 | |
| 162 # Collect HV residues with the new numbering | |
| 163 hv_list = [] | |
| 164 | |
| 165 # Heavy chain | |
| 166 for hv_heavy in self.loops_h: | |
| 167 if hv_heavy in resno_dict.keys(): | |
| 168 hv_list.append(resno_dict[hv_heavy]) | |
| 169 | |
| 170 # Light chain | |
| 171 for hv_light in self.loops_l: | |
| 172 if hv_light in resno_dict.keys(): | |
| 173 hv_list.append(resno_dict[hv_light]) | |
| 174 | |
| 175 hv_list.sort() | |
| 176 return hv_list, new_pdb | |
| 177 | |
| 178 | |
| 179 if __name__ == '__main__': | |
| 180 | |
| 181 # Get inputs | |
| 182 pdb_file, out_file, chain_id = check_input() | |
| 183 | |
| 184 # Renumber pdb file and get HV residues | |
| 185 pdb_format = AbHaddockFormat(pdb_file, chain_id) | |
| 186 hv_resno, pdb_ren = pdb_format.ab_format() | |
| 187 | |
| 188 # Write pdb into a file | |
| 189 pdb_ren.to_pdb(path=out_file, records=['ATOM'], append_newline=True) | |
| 190 | |
| 191 # Print HV residues | |
| 192 print(','.join(map(str, hv_resno))) |
