Mercurial > repos > agpetit > calculate_diameter
view calculate_pore_diameter_aqp.py @ 3:b120a98cf623 draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit 48a10de1b21f94ab8019d9d0e4a43e0bd9d0c31e-dirty"
author | agpetit |
---|---|
date | Fri, 27 May 2022 07:36:52 +0000 |
parents | e227d80e53be |
children | a4097272ade0 |
line wrap: on
line source
#!/usr/bin/env python3 # coding: utf-8 """ This script allows to launch a calculation of the pore diameter of an aquaporin as a function of time from a .gro file and an .xtc file. At the end of the script, a table containing the evolution of the distance over time is created. # USAGE : calculate_pore_diameter_aqp.py -xtc xtc_file -gro gro_file -npao npa1 Pattern (optional) -npas npa2 Pattern (optional) -arro arR1 Pattern on H5 (optional) -arrs arR2 Pattern on HE (optional) -v verbose (optional) -w work_directory (optional) -o output_directory (optional) --gro_file_ext extension of gro file (optional) --xtc_file_ext extension of xtc file (optional) -log name of log file (optional) -o name of output file (optional). """ __all__ = [ "calculate_length_proto", "search_pattern", "run_one_frame_npa_arr", "run_one_frame_arr", "run_all_frames_npa_arr", "run_all_frames_arr", "create_config_file", ] __author__ = "Agnès-Elisabeth Petit" __date__ = "29/04/2021" __version__ = "1.0" __copyright__ = "(c) 2022 CC-BY-NC-SA" # Library import import argparse import configparser import logging import os import re import sys import MDAnalysis as Mda import numpy as np import pandas as pd # Global variables # Functions def test_setup(): global args args = parse_arguments() args.verbose = True def parse_arguments(): parser = argparse.ArgumentParser( description="This script allows to launch a calculation of the pore" + " diameter of an aquaporin as a function of time from" + " a .gro file and an .xtc file.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, prefix_chars="-", add_help=True, ) parser.add_argument( "-v", "--verbose", action="store_true", help="""Information messages to stderr""", ) parser.add_argument( "-gro", "--gro_file", type=str, nargs=1, help="""My input .gro file""" ) parser.add_argument( "-xtc", "--xtc_file", type=str, nargs=1, help="""My input .xtc file""" ) parser.add_argument( "--gro_file_ext", type=str, nargs=1, help="""My input .gro file""" ) parser.add_argument( "--xtc_file_ext", type=str, nargs=1, help="""My input .xtc file""" ) parser.add_argument( "-w", "--work_directory", type=str, nargs=1, help="""It's work Directory""" ) parser.add_argument( "-od", "--output_directory", default="stdout", type=str, nargs=1, help="""It's output Directory""", ) parser.add_argument( "-npao", "--npa_one", type=str, nargs=1, help="""3-letter pattern of the 3 amino acids of npa1""", ) parser.add_argument( "-npas", "--npa_second", type=str, nargs=1, help="""3-letter pattern of the 3 amino acids of npa2""", ) parser.add_argument( "-arro", "--arr_one", type=str, default="", nargs=1, help="""3-letter pattern of the 4 amino acids of first ArR on H5""", ) parser.add_argument( "-arrs", "--arr_second", type=str, default="", nargs=1, help="""3-letter pattern of the 4 amino acids of second ArR on HE""", ) parser.add_argument("-o", "--output", help="""Output for files""") parser.add_argument("-log", "--log_output", help="""Output for files""") return parser.parse_args() def search_amino_acids_pattern(list_pattern, uni): """ Description:Creates a list that contains two lists. The first one contains the residue numbers and the second one the residue names for each amino acid in the pattern. param list_pattern: list of the names of each amino acid of the pattern. param uni: MDAnalysis universe. return: list containing two lists. One containing residue numbers and the other containing residue names. """ if args.verbose: logging.info("\nFunction search_amino_acids_pattern") logging.info("The input list is " + str(list_pattern)) logging.info("The input universe is " + str(uni)) if len(list_pattern) == 3: slct_pattern = ( "(resname " + list_pattern[0] + " or resname " + list_pattern[1] + " or resname " + list_pattern[2] + ") and name CA" ) else: slct_pattern = ( "(resname " + list_pattern[0] + " or resname " + list_pattern[1] + " or resname " + list_pattern[2] + " or resname " + list_pattern[3] + ") and name CA" ) if args.verbose: logging.info("The selected pattern is : " + slct_pattern) list_atm_npa = uni.select_atoms(slct_pattern).residues list_all_num_resid = list_atm_npa.resids if args.verbose: logging.info( "The list containing all the residue numbers associated" + " with the pattern is : " + str(list_all_num_resid) ) list_all_name_resid = list_atm_npa.resnames if args.verbose: logging.info( "The list containing all the residue names associated with the " + "pattern is : " + str(list_all_name_resid) ) list_all_resid = [list_all_num_resid, list_all_name_resid] if args.verbose: logging.info( "The list containing the lists of numbers and names of residues" + " associated with the pattern is : " + str(list_all_resid) ) return list_all_resid def calculate_length_proto(list_num_resid): """ Description : calculate the length of each protomer according to the number of amino acids corresponding to the pattern. param list_num_resid: list of amino acid numbers corresponding to the pattern in the protein. return: list of the length of each protomer. """ if args.verbose: logging.info("\nFunction calculate_length_proto") logging.info("The input list is " + str(list_num_resid)) list_length_proto = [1, 0, 0, 0, 0, 0, 0, 0] ind_list_length_proto = 0 for i, v in enumerate(list_num_resid[1:]): if int(v) < int(list_num_resid[i]): ind_list_length_proto += 1 list_length_proto[ind_list_length_proto] += 1 else: list_length_proto[ind_list_length_proto] += 1 if args.verbose: for i, v in enumerate(list_length_proto): logging.info( "The length of the protomer " + str(i + 1) + " is " + str(v) + " residues" ) return list_length_proto def create_config_file(list_length_proto): """ Description : creates a Configparser object that describes the start and end indices of each protomer. param list_length_proto: list of the length of each protomer. return: Configparser object that describes the start and end indices of each protomer. """ if args.verbose: logging.info("\nFunction create_config_file") logging.info("The input list is " + str(list_length_proto)) config = configparser.ConfigParser() proto_prev = "" for aqp in range(1, 3): for p in range(1, 5): if aqp == 1 and p == 1: proto_current = "AQP" + str(aqp) + ",P" + str(p) config[proto_current] = {} config.set(proto_current, "start", str(0)) config.set(proto_current, "end", str(list_length_proto[0] - 1)) proto_prev = proto_current if args.verbose: logging.info(proto_current + " starts at index " + str(config["AQP1,P1"]["start"]) + " and finish at index " + str(config["AQP1,P1"]["end"])) else: proto_current = "AQP" + str(aqp) + ",P" + str(p) config[proto_current] = {} if aqp == 1 and p == 2: config.set(proto_current, "start", str(int(config[proto_prev]["start"]) + list_length_proto[p-2] - 1)) else: config.set(proto_current, "start", str(int(config[proto_prev]["start"]) + list_length_proto[p-2])) config.set(proto_current, "end", str(int(config[proto_prev]["end"]) + list_length_proto[p-1])) proto_prev = proto_current if args.verbose: logging.info( proto_current + " starts at index " + config[proto_current]["start"] + " and finish at index " + config[proto_current]["end"] ) if args.verbose: logging.info( "The sections of the settings object are : " + str(config.sections()) ) logging.info( "The keys for each section of the settings object are: " + str(list(config["AQP1,P3"].keys())) ) return config def search_pattern(list_all_resid, config, list_pattern, pattern_resid, uni): """ Description : creates the dictionary that contains the name, number and position of the residue and the key for the following dictionary. param list_all_resid: list containing two lists. One containing residue numbers and the other containing residue names. param config: Configparser object that describes the start and end indices of each protomer. param list_pattern: list of the names of each amino acid of the pattern. param pattern_resid: string that gives the pattern (NPA1,NPA2,ArR1 ou ArR2) in which we are. param uni: MDAnalysis universe return: dictionary which contains as key the name of the aquaporin, the protomer and the residue, as well as the residue number. The value contains the name, the number and the position of the residue, as well as the key of the next dictionary. """ if args.verbose: logging.info("\nFunction search_pattern") logging.info("The first input argument is " + str(list_all_resid)) logging.info("The second input argument is a configParser object") logging.info("The third input argument is " + str(list_pattern)) logging.info("The fourth input argument is " + str(pattern_resid)) logging.info("The fourth input argument is " + str(uni)) num_ca_npa = 0 num_ca_arr = 0 dict_resid_pattern = {} str_pattern = "".join(list_pattern) for aqp in range(1, 3): for p in range(1, 5): list_ind_temp_resid = [] list_ind_final_resid = [] proto = "AQP" + str(aqp) + ",P" + str(p) ss_name_list = list( list_all_resid[1][ int(config[proto]["start"]): int(config[proto]["end"]) ] ) ss_name_str = "".join(ss_name_list) pattern = re.compile(str_pattern) for v in pattern.finditer(ss_name_str): if args.verbose: logging.info( "The pattern " + str(pattern) + " is searched in " + ss_name_str ) if aqp == 1 and p == 1: list_ind_temp_resid.append(int(v.start() / 3)) else: list_ind_temp_resid.append( int(v.start() / 3) + int(config[proto]["start"]) ) if args.verbose: logging.info("The temporary index list is " + str(list_ind_temp_resid)) for ind_current in list_ind_temp_resid: val_one = list_all_resid[0][ind_current] val_two = list_all_resid[0][ind_current + 1] val_three = list_all_resid[0][ind_current + 2] if val_one + 1 == val_two and val_one + 2 == val_three: if pattern_resid == "ArR1" or pattern_resid == "ArR2": val_four = list_all_resid[0][ind_current + 3] if val_one + 3 == val_four: list_ind_final_resid.append(ind_current) else: list_ind_final_resid.append(ind_current) list_ind_final_resid.append(ind_current + 1) list_ind_final_resid.append(ind_current + 2) if len(list_ind_final_resid) == 0: if args.verbose: logging.critical("The pattern is not found " + "in the protein sequence") print("The pattern is not found in the protein sequence") sys.exit(1) if args.verbose: logging.info("The final index list is " + str(list_ind_final_resid)) ind_list = [] for ind, val_ind in enumerate(list_ind_final_resid): ind_list.append(ind) key = ( proto + "," + list_all_resid[1][val_ind] + "," + str(list_all_resid[0][val_ind]) ) if args.verbose: logging.info("The key of dictionary is " + key) slct_resid = ( "(resid " + str(list_all_resid[0][val_ind]) + ") and name CA" ) if args.verbose: logging.info("The selected residue is : " + slct_resid) resid = uni.select_atoms(slct_resid) if args.verbose: logging.info("The AtomGroup is " + str(resid)) if pattern_resid == "ArR1": new_key = "ArR1," + proto if args.verbose: logging.info("The key of next dictionary is " + new_key) position_ca = resid.positions[num_ca_arr] num_ca_arr += 1 elif pattern_resid == "ArR2": new_key = "ArR2," + proto if args.verbose: logging.info("The key of next dictionary is " + new_key) position_ca = resid.positions[num_ca_arr] num_ca_arr += 1 else: if (ind - 3) in ind_list or pattern_resid == "NPA2": new_key = "NPA2," + proto if args.verbose: logging.info("The key of next dictionary is " + new_key) position_ca = resid.positions[num_ca_npa] else: new_key = "NPA1," + proto if args.verbose: logging.info("The key of next dictionary is " + new_key) position_ca = resid.positions[num_ca_npa] dict_resid_pattern[key] = [ list_all_resid[1][val_ind], list_all_resid[0][val_ind], position_ca, new_key, ] if args.verbose: logging.info( "The value associated at the key " + key + " is " + str(dict_resid_pattern[key]) ) if len(list_ind_final_resid) > 1: num_ca_npa += 1 if args.verbose: logging.info("The dictionary contains" + str(dict_resid_pattern)) return dict_resid_pattern def search_resid_positions(dict_resid_pattern, uni): """ Description : creates the dictionary that contains the position of the barycenter of the alpha carbons of NPA and ArR. param dict_resid_pattern: dictionary which contains as key the name of the aquaporin, the protomer and the residue, as well as the residue number. The value contains the name, the number and the position of the residue, as well as the key of the next dictionary. param uni: MDAnalysis universe. return: dictionary which contains as key the name of the pattern, the aquaporin and the protomer. The value associated with the key contain the position of the barycenter of the pattern. """ if args.verbose: logging.info("\nFunction search_resid_positions") logging.info("The input dictionary is " + str(dict_resid_pattern)) logging.info("The input universe is " + str(uni)) dict_pos_resid = {} for npa in range(1, 3): num_proto = 0 for aqp in range(1, 3): for p in range(1, 5): list_atm_npa = [] key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) + ",P" + str(p) key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p) for k, v in dict_resid_pattern.items(): if key_npa == v[3]: list_atm_npa.append(v[1]) elif key_arr1 == v[3]: dict_pos_resid[key_arr1] = v[2] elif key_arr2 == v[3]: dict_pos_resid[key_arr2] = v[2] if len(list_atm_npa) > 0: slct_pattern = ( "(resid " + str(list_atm_npa[0]) + " or resid " + str(list_atm_npa[1]) + " or resid " + str(list_atm_npa[2]) + ") and name CA" ) if args.verbose: logging.info("The selected pattern is " + slct_pattern) atom_group_pattern = uni.select_atoms(slct_pattern) if args.verbose: logging.info(str(atom_group_pattern)) deb = 0 + 3 * num_proto fin = +3 + 3 * num_proto atm_proto_aqp = atom_group_pattern[deb:fin] if args.verbose: logging.info("deb : " + str(deb) + " fin : " + str(fin)) logging.info("The selected AtomGroup is " + str(atm_proto_aqp)) dict_pos_resid[key_npa] = atm_proto_aqp.centroid() num_proto += 1 if args.verbose: logging.info("The dictionary contains " + str(dict_pos_resid)) return dict_pos_resid def calculate_euclidean_distance(dict_pos_resid, choice_measure): """ Description : creates the dictionary that contains the Euclidean distance between ArR and NPA1 or NPA2. param dict_pos_resid: dictionary which contains as key the name of the pattern, the aquaporin and the protomer. The value contains the position of the barycenter of the pattern. param choice_measure: variable that contains ArR-ArR or NPA-ArR. return: dictionary which contains as key the name of the couple, the aquaporin and the protomer. The value contains the euclidean distance between the barycenters of the two motifs of the couple. """ if args.verbose: logging.info("\nFunction calculate_euclidean_distance") logging.info("The input dictionary is " + str(dict_pos_resid)) logging.info("The distance will be calculated between" + choice_measure) dict_pair_dist = {} if choice_measure == "NPA-ArR": for npa in range(1, 3): for aqp in range(1, 3): for p in range(1, 5): key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) \ + ",P" + str(p) key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) new_key = "ArR-NPA" + str(npa) + ",AQP" + str(aqp) \ + ",P" + str(p) dict_pair_dist[new_key] = np.linalg.norm( dict_pos_resid[key_npa] - dict_pos_resid[key_arr1] ) if args.verbose: logging.info( "Key NPA : " + key_npa + " value : " + str(dict_pos_resid[key_npa]) ) logging.info( "Key ArR : " + key_arr1 + " value : " + str(dict_pos_resid[key_arr1]) ) logging.info( "The value associated at the key " + new_key + " is " + str(dict_pair_dist[new_key]) ) else: for aqp in range(1, 3): for p in range(1, 5): key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p) new_key = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p) dict_pair_dist[new_key] = np.linalg.norm( dict_pos_resid[key_arr1] - dict_pos_resid[key_arr2] ) if args.verbose: logging.info( "Key ArR1 : " + key_arr1 + " value : " + str(dict_pos_resid[key_arr1]) ) logging.info( "Key ArR2 : " + key_arr2 + " value : " + str(dict_pos_resid[key_arr2]) ) logging.info( "The value associated at the key " + new_key + " is " + str(dict_pair_dist[new_key]) ) if args.verbose: logging.info("The dictionary contains " + str(dict_pair_dist)) return dict_pair_dist def run_one_frame_npa_arr(list_resid_npa1, list_resid_npa2, list_resid_arr, uni): """ Description: compiles the different functions to create the dict_pair_dist dictionary for one frame. param list_resid_npa1: list containing the names of the residues constituting the NPA1. param list_resid_npa2: list containing the names of the residues constituting the NPA2. param list_resid_arr: list containing the names of the residues constituting the ArR. param uni: MDAnalysis universe. return: dictionary which contains as key the name of the couple, the aquaporin and the protomer. The value contains the euclidean distance between the barycenters of the two motifs of the couple. """ if args.verbose: logging.info("\nFunction runForOneFrame") logging.info("The first input parameter is " + str(list_resid_npa1)) logging.info("The second input parameter is " + str(list_resid_npa2)) logging.info("The third input parameter is " + str(list_resid_arr)) logging.info("The fourth input parameter is " + str(uni)) choice_measure = "NPA-ArR" if list_resid_npa1 == list_resid_npa2: if args.verbose: logging.info("Pattern NPA1 identical to pattern NPA2") pattern = "NPA1" list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni) list_length_proto_npa = \ calculate_length_proto(list(list_all_resid_npa[0])) config_npa = create_config_file(list_length_proto_npa) dict_resid_pattern = search_pattern( list_all_resid_npa, config_npa, list_resid_npa1, pattern, uni ) else: if args.verbose: logging.info("Pattern NPA1 different from the NPA2 pattern") # NPA1 pattern = "NPA1" list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni) list_length_proto_npa = \ calculate_length_proto(list(list_all_resid_npa[0])) config_npa1 = create_config_file(list_length_proto_npa) dict_resid_pattern = search_pattern( list_all_resid_npa, config_npa1, list_resid_npa1, pattern, uni ) # NPA2 pattern = "NPA2" list_all_resid_npa = search_amino_acids_pattern(list_resid_npa2, uni) list_length_proto_npa = \ calculate_length_proto(list(list_all_resid_npa[0])) config_npa2 = create_config_file(list_length_proto_npa) dict_resid_pattern.update( search_pattern( list_all_resid_npa, config_npa2, list_resid_npa2, pattern, uni ) ) # ArR pattern = "ArR1" list_all_resid_arr = search_amino_acids_pattern(list_resid_arr, uni) list_length_proto_arr = \ calculate_length_proto(list(list_all_resid_arr[0])) config_arr = create_config_file(list_length_proto_arr) dict_resid_pattern.update( search_pattern(list_all_resid_arr, config_arr, list_resid_arr, pattern, uni) ) # dict position dict_pos_resid = search_resid_positions(dict_resid_pattern, uni) # dict distance dict_pair_dist = \ calculate_euclidean_distance(dict_pos_resid, choice_measure) return dict_pair_dist def run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni): """ Description: compiles the different functions to create the dict_pair_dist dictionary for one frame. param list_resid_arr1: list containing the names of the residues constituting the ArR1. param list_resid_arr2: list_resid_arr2, list containing the names of the residues constituting the ArR2. param uni: MDAnalysis universe. return: dictionary which contains as key the name of the couple, the aquaporin and the protomer. The value contains the euclidean distance between the barycenters of the two motifs of the couple. """ if args.verbose: logging.info("\nFunction runForOneFrame") logging.info("The first input parameter is " + str(list_resid_arr1)) logging.info("The second input parameter is " + str(list_resid_arr2)) logging.info("The third input parameter is " + str(uni)) dict_resid_pattern = {} # ArR1 pattern = "ArR1" list_all_resid_arr1 = search_amino_acids_pattern(list_resid_arr1, uni) list_length_proto_arr1 = \ calculate_length_proto(list(list_all_resid_arr1[0])) config_arr1 = create_config_file(list_length_proto_arr1) dict_resid_pattern.update( search_pattern(list_all_resid_arr1, config_arr1, list_resid_arr1, pattern, uni) ) # ArR2 pattern = "ArR2" list_all_resid_arr2 = search_amino_acids_pattern(list_resid_arr2, uni) list_length_proto_arr2 = \ calculate_length_proto(list(list_all_resid_arr2[0])) config_arr2 = create_config_file(list_length_proto_arr2) dict_resid_pattern.update( search_pattern(list_all_resid_arr2, config_arr2, list_resid_arr2, pattern, uni) ) # dict position dict_pos_resid = search_resid_positions(dict_resid_pattern, uni) # dict distance choice_measure = "ArR-ArR" dict_pair_dist = \ calculate_euclidean_distance(dict_pos_resid, choice_measure) return dict_pair_dist def run_all_frames_npa_arr( list_resid_npa1, list_resid_npa2, list_resid_arr, col_number, uni ): """ Description: creates the table containing the pore diameter of an aquaporin over time. param list_resid_npa1: list containing the names of the residues constituting the NPA1. param list_resid_npa2: list containing the names of the residues constituting the NPA2. param list_resid_arr: list containing the names of the residues constituting the ArR. param col_number: number of columns in the table param uni: MDAnalysis universe. return: list containing an array and a key list. The table containing the pore diameter of an aquaporin over time. """ if args.verbose: logging.info("\nFunction runForAllFrame") logging.info("The first input parameter is " + str(list_resid_npa1)) logging.info("The second input parameter is " + str(list_resid_npa2)) logging.info("The third input parameter is " + str(list_resid_arr)) logging.info("The fourth input parameter is " + str(col_number)) logging.info("The fifth input parameter is " + str(uni)) nbr_frame = len(uni.trajectory) if args.verbose: logging.info("The number of frame is " + str(nbr_frame)) tab_pair_dist = np.empty((nbr_frame, col_number)) if args.verbose: logging.info( "The dimensions of the table of means and standard deviation are" + " as follows " + str(tab_pair_dist.shape) ) list_key = [] pattern_arr = "" pattern_npa1 = "" pattern_npa2 = "" dict_prot = { "ALA": "A", "CYS": "C", "ASP": "D", "GLU": "E", "PHE": "F", "GLY": "G", "HIS": "H", "ILE": "I", "LYS": "K", "LEU": "L", "MET": "M", "ASN": "N", "PRO": "P", "GLN": "Q", "ARG": "R", "SER": "S", "THR": "T", "VAL": "V", "TRP": "W", "TYR": "Y", "HSD": "H", "HSP": "H", } for aa in list_resid_arr: pattern_arr += dict_prot[aa] for aa in list_resid_npa1: pattern_npa1 += dict_prot[aa] for aa in list_resid_npa2: pattern_npa2 += dict_prot[aa] for ts in uni.trajectory: if args.verbose: logging.info("Frame " + str(uni.trajectory.frame)) logging.info(str(ts)) dict_pair_dist = run_one_frame_npa_arr( list_resid_npa1, list_resid_npa2, list_resid_arr, uni ) tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time list_key = ["Time in ps"] num_npa_proto = 0 for npa in range(1, 3): if npa == 1: pattern_complete = "," + pattern_arr + "-" + pattern_npa1 + "1" else: pattern_complete = "," + pattern_arr + "-" + pattern_npa2 + "2" for aqp in range(1, 3): for p in range(1, 5): num_npa_proto += 1 key1 = "ArR-NPA" + str(npa) + ",AQP" + str(aqp)\ + ",P" + str(p) key2 = "AQP" + str(aqp) + ",P" + str(p) + pattern_complete\ + " (Angstroms)" tab_pair_dist[uni.trajectory.frame][num_npa_proto] = \ dict_pair_dist[key1] list_key.append(key2) list_out = [tab_pair_dist, list_key] return list_out def run_all_frames_arr(list_resid_arr1, list_resid_arr2, col_number, uni): """ Description: creates the table containing the pore diameter of an aquaporin over time. param list_resid_arr1: list containing the names of the residues constituting the ArR1. param list_resid_arr2: list containing the names of the residues constituting the ArR2. param col_number: number of columns in the table param uni: MDAnalysis universe. return: list containing an array and a key list. The table containing the pore diameter of an aquaporin over time. """ if args.verbose: logging.info("\nFunction runForAllFrame") logging.info("The first input parameter is " + str(list_resid_arr1)) logging.info("The second input parameter is " + str(list_resid_arr2)) logging.info("The third input parameter is " + str(col_number)) logging.info("The fourth input parameter is " + str(uni)) nbr_frame = len(uni.trajectory) if args.verbose: logging.info("The number of frame is " + str(nbr_frame)) tab_pair_dist = np.empty((nbr_frame, col_number)) if args.verbose: logging.info( "The dimensions of the table of means and standard deviation" + " are as follows " + str(tab_pair_dist.shape) ) list_key = [] pattern1 = "" pattern2 = "" dict_prot = { "ALA": "A", "CYS": "C", "ASP": "D", "GLU": "E", "PHE": "F", "GLY": "G", "HIS": "H", "ILE": "I", "LYS": "K", "LEU": "L", "MET": "M", "ASN": "N", "PRO": "P", "GLN": "Q", "ARG": "R", "SER": "S", "THR": "T", "VAL": "V", "TRP": "W", "TYR": "Y", "HSD": "H", "HSP": "H", } for aa in list_resid_arr1: pattern1 += dict_prot[aa] for aa in list_resid_arr2: pattern2 += dict_prot[aa] for ts in uni.trajectory: if args.verbose: logging.info("Frame " + str(uni.trajectory.frame)) logging.info(str(ts)) dict_pair_dist = run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni) tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time list_key = ["Time (ps)"] num_proto = 0 for aqp in range(1, 3): for p in range(1, 5): num_proto += 1 key1 = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p) key2 = ( "AQP" + str(aqp) + ",P" + str(p) + "," + pattern1 + "-" + pattern2 + " (Angstroms)" ) tab_pair_dist[uni.trajectory.frame][num_proto] = \ dict_pair_dist[key1] list_key.append(key2) list_out = [tab_pair_dist, list_key] return list_out def create_final_dataframe( list_resid_arr1, list_resid_arr2, list_resid_npa1, list_resid_npa2, uni ): """ Description: creates the final dataframe. param list_resid_arr1: list containing the arr1 pattern param list_resid_arr2: list containing the arr2 pattern param list_resid_npa1: list containing the npa1 pattern param list_resid_npa2: list containing the npa2 pattern param uni: MDAnalysis universe return: final dataframe """ if args.verbose: logging.info("\nFunction runForAllFrame") logging.info("The first input parameter is " + str(list_resid_arr1)) logging.info("The second input parameter is " + str(list_resid_arr2)) logging.info("The third input parameter is " + str(list_resid_npa1)) logging.info("The fourth input parameter is " + str(list_resid_npa2)) logging.info("The fifth input parameter is " + str(uni)) if list_resid_npa1 is None and list_resid_npa2 is None: if len(list_resid_arr1) >= 4 and len(list_resid_arr2) >= 4: columns_number = 9 list_out = run_all_frames_arr( list_resid_arr1, list_resid_arr2, columns_number, uni ) else: if args.verbose: if len(list_resid_arr1) < 4 <= len(list_resid_arr2): logging.critical("Pattern1 must be provided") elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: logging.critical("Pattern1 and Pattern2 must be provided") elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): logging.critical("Pattern2 must be provided") print("Pattern error : please read the log file or help") sys.exit(2) elif list_resid_npa1 is None and list_resid_npa2 is not None: if args.verbose: if len(list_resid_arr1) < 4 <= len(list_resid_arr2): logging.critical("PatternArr1 must be provided") logging.critical("Please, do not put patternArr2") logging.critical("PatternNPA1 must be provided") elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: logging.critical("PatternArr1 must be provided") logging.critical("PatternNPA1 must be provided") elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): logging.critical("PatternNPA1 must be provided") else: logging.critical("Please, do not put patternArr2") logging.critical("PatternNPA1 must be provided") print("Pattern error : please read the log file or help") sys.exit(2) elif list_resid_npa1 is not None and list_resid_npa2 is None: if args.verbose: if len(list_resid_arr1) < 4 <= len(list_resid_arr2): logging.critical("PatternArr1 must be provided") logging.critical("Please, do not put patternArr2") logging.critical("PatternNPA2 must be provided") elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: logging.critical("PatternArr1 must be provided") logging.critical("PatternNPA2 must be provided") elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): logging.critical("PatternNPA1 must be provided") else: logging.critical("Please, do not put patternArr2") logging.critical("PatternNPA2 must be provided") print("Pattern error : please read the log file or help") sys.exit(2) else: if ( list_resid_npa1 is not None and list_resid_npa2 is not None and len(list_resid_arr1) >= 4 > len(list_resid_arr2) ): columns_number = 17 list_out = run_all_frames_npa_arr( list_resid_npa1, list_resid_npa2, list_resid_arr1, columns_number, uni ) else: if len(list_resid_arr1) < 4 <= len(list_resid_arr2): logging.critical("PatternArr1 must be provided") logging.critical("Please, do not put patternArr2") elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: logging.critical("Pattern1 must be provided") print("Pattern error : please read the log file or help") sys.exit(2) list_std = list(np.std(list_out[0], axis=0)) list_mean = list(np.mean(list_out[0], axis=0)) np_mean_around = list(np.around(list_mean, decimals=3)) np_std_around = list(np.around(list_std, decimals=3)) np_mean_around[0] = "Mean" np_std_around[0] = "Std" np_mean_around = np.array([np_mean_around]) np_std_around = np.array([np_std_around]) np_mean_std_around = np.concatenate((np_mean_around, np_std_around), axis=0) tab_pair_dist = np.concatenate((np_mean_std_around, list_out[0]), axis=0) df_dist = pd.DataFrame(tab_pair_dist, columns=list_out[1]) return df_dist # Class if __name__ == "__main__": args = parse_arguments() out_file_name = "test" file_name_log = out_file_name + ".log" if args.verbose: if args.log_output: if os.path.isfile(args.log_output): os.remove(args.log_output) logging.basicConfig( filename=args.log_output, format="%(levelname)s - %(message)s", level=logging.INFO, ) else: if os.path.isfile(file_name_log): os.remove(file_name_log) logging.basicConfig( filename=file_name_log, format="%(levelname)s - %(message)s", level=logging.INFO, ) logging.info("verbose mode on") gro_file = '' xtc_file = '' if args.gro_file: gro_file = os.path.basename(args.gro_file[0]) else: logging.critical("A .gro file must be provided") if args.xtc_file: xtc_file = os.path.basename(args.xtc_file[0]) else: logging.critical("A .xtc file must be provided") if args.work_directory: work_directory = args.work_directory[0] else: work_directory = os.path.dirname(args.gro_file[0]) if not work_directory.endswith("/"): work_directory += "/" output_directory = args.output_directory[0] if args.output: output = args.output else: output = "" os.chdir(work_directory) if args.gro_file_ext and args.xtc_file_ext: u = Mda.Universe( args.gro_file[0], args.xtc_file[0], topology_format=args.gro_file_ext[0], format=args.xtc_file_ext[0], ) else: u = Mda.Universe(args.gro_file[0], args.xtc_file[0]) if args.verbose: logging.info("Load universe : " + str(u)) if args.npa_one: resid_npa1 = args.npa_one[0].split() else: resid_npa1 = args.npa_one if args.npa_second: resid_npa2 = args.npa_second[0].split() else: resid_npa2 = args.npa_second if args.arr_one: resid_arr1 = args.arr_one[0].split() else: resid_arr1 = args.arr_one if args.arr_second: resid_arr2 = args.arr_second[0].split() else: resid_arr2 = args.arr_second df_final = create_final_dataframe(resid_arr1, resid_arr2, resid_npa1, resid_npa2, u) if len(output_directory) > 2: if not os.path.exists(output_directory): os.makedirs(output_directory) os.chdir(output_directory) if len(output) > 2: df_final.to_csv(output, sep="\t", index=False) else: file_name_tabular = out_file_name + "PairDist.tabular" df_final.to_csv(file_name_tabular, sep="\t", index=False)