Mercurial > repos > p.lucas > abhaddock
changeset 3:4a71ada763cd draft
Deleted selected files
author | p.lucas |
---|---|
date | Thu, 06 May 2021 09:01:46 +0000 |
parents | a8cda1a4b9ea |
children | 863db9ddab44 |
files | ab_haddock_format.py |
diffstat | 1 files changed, 0 insertions(+), 192 deletions(-) [+] |
line wrap: on
line diff
--- a/ab_haddock_format.py Thu May 06 09:01:30 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,192 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2020: -# Francesco Ambrosetti -# - -""" -Formats the antibody to fit the HADDOCK requirements with the -specified chain id and returns the list of residues belonging -to the HV loops defined according to the HADDOCK friendly format. - -*** The antibody has to be numbered according to the Chothia scheme *** - -Usage: - python haddock-format.py <chothia numbered antibody> <output .pdb file> <chain_id> - -Example: - python 4G6K_ch.pdb 4G6K-HADDOCK.pdb A - -Author: {0} -Email: {1} -""" - -import argparse -import biopandas.pdb as bp -import copy as cp -import os -import sys - -__author__ = "Francesco Ambrosetti" -__email__ = "ambrosetti.francesco@gmail.com" -USAGE = __doc__.format(__author__, __email__) - - -def check_input(): - """ - Check and collect the script inputs - Returns: - args.pdb (str): path to the pdb-file - args.chain (str): chain id to use for the HADDOCK-formatted structure - """ - - # Parse command line arguments - parser = argparse.ArgumentParser( - description=USAGE, - formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument('pdb', help='Path to the Chothia numbered antibody PDB structure', type=str) - parser.add_argument('out', help='Path to the output PDB file', type=str) - parser.add_argument('chain', help='Chain id to use for the HADDOCK-formatted PDB structure', type=str) - - args = parser.parse_args() - - if not os.path.isfile(args.pdb): - emsg = 'ERROR!! File {0} not found or not readable\n'.format(args.pdb) - sys.stderr.write(emsg) - sys.exit(1) - - if not args.pdb.endswith(".pdb"): - emsg = 'ERROR!! File {0} not recognize as a PDB file\n'.format(args.pdb) - sys.stderr.write(emsg) - sys.exit(1) - - return args.pdb, args.out, args.chain - - -def unique(sequence): - seen = set() - return [x for x in sequence if not (x in seen or seen.add(x))] - - -class AbHaddockFormat: - """Class to renumber a Chothia antibody to make it HADDOCK-ready""" - - # Loops - # L chain loops - l1 = ['26_L', '27_L', '28_L', '29_L', '30_L', '30A_L', '30B_L', '30C_L', '30D_L', '30E_L', '30F_L', '31_L', '32_L'] - l2 = ['50_L', '50A_L', '50B_L', '50C_L', '50D_L', '50E_L', '50F_L', '51_L', '52_L'] - l3 = ['91_L', '92_L', '93_L', '94_L', '95_L', '95A_L', '95B_L', '95C_L', '95D_L', '95E_L', '95F_L', '96_L'] - loops_l = l1 + l2 + l3 - - # H chain loops - h1 = ['26_H', '27_H', '28_H', '29_H', '30_H', '31_H', '31A_H', '31B_H', '31C_H', '31D_H', '31E_H', '31F_H', '32_H'] - h2 = ['52A_H', '52B_H', '52C_H', '52D_H', '52E_H', '52F_H', '53_H', '54_H', '55_H'] - h3 = ['96_H', '97_H', '98_H', '99_H', '100_H', '100A_H', '100B_H', '100C_H', '100D_H', '100E_H', '100F_H', '100G_H', - '100H_H', '100I_H', '100J_H', '100K_H', '101_H'] - loops_h = h1 + h2 + h3 - - def __init__(self, pdbfile, chain): - """ - Constructor for the AbHaddockFormat class - Args: - pdbfile (str): path to the antibody .pdb file - chain (str): chain id to use for the HADDOCK-ready structure - """ - self.file = pdbfile - self.pdb = bp.PandasPdb().read_pdb(self.file) - self.chain = chain - - def check_chain(self): - """ - Check if the antibody contains the light and heavy chain - Returns: - 0 - """ - chain_ids = self.pdb.df['ATOM']['chain_id'].values - - if 'H' not in chain_ids: - emsg = 'ERROR!! File {0} does not contain the heavy chain\n'.format(self.file) - sys.stderr.write(emsg) - sys.exit(1) - - elif 'L' not in chain_ids: - emsg = 'ERROR!! File {0} does not contain the light chain\n'.format(self.file) - sys.stderr.write(emsg) - sys.exit(1) - - return 0 - - def ab_format(self): - """ - Renumbers the antibody and extract the HV residues - - Returns: - hv_list (list): list of the HV residue numbers - new_pdb (biopandas.pdb.pandas_pdb.PandasPdb): HADDOCK-ready pdb - """ - - # Check antibody chain ids - self.check_chain() - - # Modify resno to include insertions and chain id - resno = self.pdb.df['ATOM']['residue_number'].values - ins = self.pdb.df['ATOM']['insertion'].values - chain = self.pdb.df['ATOM']['chain_id'].values - ch_resno = ['{0}{1}_{2}'.format(i, j, c) for i, j, c in zip(resno, ins, chain)] - - # Create new resno - count = 0 - prev_resid = None - new_resno = [] - - # Renumber - for r in ch_resno: - if r != prev_resid: - count += 1 - new_resno.append(count) - prev_resid = r - elif r == prev_resid: - new_resno.append(count) - prev_resid = r - - # Update pdb - new_pdb = cp.deepcopy(self.pdb) - new_pdb.df['ATOM']['chain_id'] = self.chain - new_pdb.df['ATOM']['residue_number'] = new_resno - new_pdb.df['ATOM']['insertion'] = '' # Remove insertions - - # Create dictionary with old and new numbering - resno_dict = dict(zip(unique(ch_resno), unique(new_resno))) - - # Collect HV residues with the new numbering - hv_list = [] - - # Heavy chain - for hv_heavy in self.loops_h: - if hv_heavy in resno_dict.keys(): - hv_list.append(resno_dict[hv_heavy]) - - # Light chain - for hv_light in self.loops_l: - if hv_light in resno_dict.keys(): - hv_list.append(resno_dict[hv_light]) - - hv_list.sort() - return hv_list, new_pdb - - -if __name__ == '__main__': - - # Get inputs - pdb_file, out_file, chain_id = check_input() - - # Renumber pdb file and get HV residues - pdb_format = AbHaddockFormat(pdb_file, chain_id) - hv_resno, pdb_ren = pdb_format.ab_format() - - # Write pdb into a file - pdb_ren.to_pdb(path=out_file, records=['ATOM'], append_newline=True) - - # Print HV residues - print(','.join(map(str, hv_resno)))