Mercurial > repos > agpetit > calculate_diameter
diff calculate_pore_diameter_aqp.py @ 0:e227d80e53be draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit 48a10de1b21f94ab8019d9d0e4a43e0bd9d0c31e-dirty"
author | agpetit |
---|---|
date | Wed, 25 May 2022 07:17:24 +0000 |
parents | |
children | a4097272ade0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calculate_pore_diameter_aqp.py Wed May 25 07:17:24 2022 +0000 @@ -0,0 +1,1064 @@ +#!/usr/bin/env python3 + +# coding: utf-8 +""" +This script allows to launch a calculation of the pore diameter of an aquaporin + as a function of time from a .gro file and an .xtc file. +At the end of the script, a table containing the evolution of the distance over + time is created. +# USAGE : calculate_pore_diameter_aqp.py -xtc xtc_file -gro gro_file + -npao npa1 Pattern (optional) -npas npa2 Pattern (optional) + -arro arR1 Pattern on H5 (optional) -arrs arR2 Pattern on HE (optional) + -v verbose (optional) -w work_directory (optional) + -o output_directory (optional) --gro_file_ext extension of gro file + (optional) --xtc_file_ext extension of xtc file (optional) + -log name of log file (optional) -o name of output file (optional). +""" + + +__all__ = [ + "calculate_length_proto", + "search_pattern", + "run_one_frame_npa_arr", + "run_one_frame_arr", + "run_all_frames_npa_arr", + "run_all_frames_arr", + "create_config_file", +] +__author__ = "Agnès-Elisabeth Petit" +__date__ = "29/04/2021" +__version__ = "1.0" +__copyright__ = "(c) 2022 CC-BY-NC-SA" + +# Library import +import argparse +import configparser +import logging +import os +import re +import sys + +import MDAnalysis as Mda + +import numpy as np + +import pandas as pd + +# Global variables + +# Functions + + +def test_setup(): + global args + args = parse_arguments() + args.verbose = True + + +def parse_arguments(): + parser = argparse.ArgumentParser( + description="This script allows to launch a calculation of the pore" + + " diameter of an aquaporin as a function of time from" + + " a .gro file and an .xtc file.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + prefix_chars="-", + add_help=True, + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="""Information messages to stderr""", + ) + parser.add_argument( + "-gro", "--gro_file", type=str, nargs=1, help="""My input .gro file""" + ) + parser.add_argument( + "-xtc", "--xtc_file", type=str, nargs=1, help="""My input .xtc file""" + ) + parser.add_argument( + "--gro_file_ext", type=str, nargs=1, help="""My input .gro file""" + ) + parser.add_argument( + "--xtc_file_ext", type=str, nargs=1, help="""My input .xtc file""" + ) + parser.add_argument( + "-w", "--work_directory", type=str, nargs=1, + help="""It's work Directory""" + ) + parser.add_argument( + "-od", + "--output_directory", + default="stdout", + type=str, + nargs=1, + help="""It's output Directory""", + ) + parser.add_argument( + "-npao", + "--npa_one", + type=str, + nargs=1, + help="""3-letter pattern of the 3 amino acids of npa1""", + ) + parser.add_argument( + "-npas", + "--npa_second", + type=str, + nargs=1, + help="""3-letter pattern of the 3 amino acids of npa2""", + ) + parser.add_argument( + "-arro", + "--arr_one", + type=str, + default="", + nargs=1, + help="""3-letter pattern of the 4 amino acids of first ArR on H5""", + ) + parser.add_argument( + "-arrs", + "--arr_second", + type=str, + default="", + nargs=1, + help="""3-letter pattern of the 4 amino acids of second ArR on HE""", + ) + parser.add_argument("-o", "--output", help="""Output for files""") + parser.add_argument("-log", "--log_output", help="""Output for files""") + return parser.parse_args() + + +def search_amino_acids_pattern(list_pattern, uni): + """ + Description:Creates a list that contains two lists. + The first one contains the residue numbers and the second one the residue + names for each amino acid in the pattern. + param list_pattern: list of the names of each amino acid of the pattern. + param uni: MDAnalysis universe. + return: list containing two lists. One containing residue numbers + and the other containing residue names. + """ + if args.verbose: + logging.info("\nFunction search_amino_acids_pattern") + logging.info("The input list is " + str(list_pattern)) + logging.info("The input universe is " + str(uni)) + if len(list_pattern) == 3: + slct_pattern = ( + "(resname " + + list_pattern[0] + + " or resname " + + list_pattern[1] + + " or resname " + + list_pattern[2] + + ") and name CA" + ) + else: + slct_pattern = ( + "(resname " + + list_pattern[0] + + " or resname " + + list_pattern[1] + + " or resname " + + list_pattern[2] + + " or resname " + + list_pattern[3] + + ") and name CA" + ) + if args.verbose: + logging.info("The selected pattern is : " + slct_pattern) + list_atm_npa = uni.select_atoms(slct_pattern).residues + list_all_num_resid = list_atm_npa.resids + if args.verbose: + logging.info( + "The list containing all the residue numbers associated" + + " with the pattern is : " + str(list_all_num_resid) + ) + list_all_name_resid = list_atm_npa.resnames + if args.verbose: + logging.info( + "The list containing all the residue names associated with the " + + "pattern is : " + str(list_all_name_resid) + ) + list_all_resid = [list_all_num_resid, list_all_name_resid] + if args.verbose: + logging.info( + "The list containing the lists of numbers and names of residues" + + " associated with the pattern is : " + str(list_all_resid) + ) + return list_all_resid + + +def calculate_length_proto(list_num_resid): + """ + Description : calculate the length of each protomer according to the number + of amino acids corresponding to the pattern. + param list_num_resid: list of amino acid numbers corresponding to the + pattern in the protein. + return: list of the length of each protomer. + """ + if args.verbose: + logging.info("\nFunction calculate_length_proto") + logging.info("The input list is " + str(list_num_resid)) + list_length_proto = [1, 0, 0, 0, 0, 0, 0, 0] + ind_list_length_proto = 0 + for i, v in enumerate(list_num_resid[1:]): + if int(v) < int(list_num_resid[i]): + ind_list_length_proto += 1 + list_length_proto[ind_list_length_proto] += 1 + else: + list_length_proto[ind_list_length_proto] += 1 + if args.verbose: + for i, v in enumerate(list_length_proto): + logging.info( + "The length of the protomer " + + str(i + 1) + + " is " + + str(v) + + " residues" + ) + return list_length_proto + + +def create_config_file(list_length_proto): + """ + Description : creates a Configparser object that describes the start and + end indices of each protomer. + param list_length_proto: list of the length of each protomer. + return: Configparser object that describes the start and end indices + of each protomer. + """ + if args.verbose: + logging.info("\nFunction create_config_file") + logging.info("The input list is " + str(list_length_proto)) + config = configparser.ConfigParser() + proto_prev = "" + for aqp in range(1, 3): + for p in range(1, 5): + if aqp == 1 and p == 1: + proto_current = "AQP" + str(aqp) + ",P" + str(p) + config[proto_current] = {} + config.set(proto_current, "start", str(0)) + config.set(proto_current, "end", str(list_length_proto[0] - 1)) + proto_prev = proto_current + if args.verbose: + logging.info(proto_current + " starts at index " + + str(config["AQP1,P1"]["start"]) + + " and finish at index " + + str(config["AQP1,P1"]["end"])) + else: + proto_current = "AQP" + str(aqp) + ",P" + str(p) + config[proto_current] = {} + if aqp == 1 and p == 2: + config.set(proto_current, "start", + str(int(config[proto_prev]["start"]) + + list_length_proto[p-2] - 1)) + else: + config.set(proto_current, "start", + str(int(config[proto_prev]["start"]) + + list_length_proto[p-2])) + config.set(proto_current, "end", + str(int(config[proto_prev]["end"]) + + list_length_proto[p-1])) + proto_prev = proto_current + if args.verbose: + logging.info( + proto_current + + " starts at index " + + config[proto_current]["start"] + + " and finish at index " + + config[proto_current]["end"] + ) + if args.verbose: + logging.info( + "The sections of the settings object are : " + + str(config.sections()) + ) + logging.info( + "The keys for each section of the settings object are: " + + str(list(config["AQP1,P3"].keys())) + ) + return config + + +def search_pattern(list_all_resid, config, list_pattern, pattern_resid, uni): + """ + Description : creates the dictionary that contains the name, number and + position of the residue and the key for the following dictionary. + param list_all_resid: list containing two lists. One containing residue + numbers and the other containing residue names. + param config: Configparser object that describes the start and end indices + of each protomer. + param list_pattern: list of the names of each amino acid of the pattern. + param pattern_resid: string that gives the pattern (NPA1,NPA2,ArR1 ou ArR2) + in which we are. + param uni: MDAnalysis universe + return: dictionary which contains as key the name of the + aquaporin, the protomer and the residue, as well as the residue number. + The value contains the name, the number and the position of the residue, + as well as the key of the next dictionary. + """ + if args.verbose: + logging.info("\nFunction search_pattern") + logging.info("The first input argument is " + str(list_all_resid)) + logging.info("The second input argument is a configParser object") + logging.info("The third input argument is " + str(list_pattern)) + logging.info("The fourth input argument is " + str(pattern_resid)) + logging.info("The fourth input argument is " + str(uni)) + num_ca_npa = 0 + num_ca_arr = 0 + dict_resid_pattern = {} + str_pattern = "".join(list_pattern) + for aqp in range(1, 3): + for p in range(1, 5): + list_ind_temp_resid = [] + list_ind_final_resid = [] + proto = "AQP" + str(aqp) + ",P" + str(p) + ss_name_list = list( + list_all_resid[1][ + int(config[proto]["start"]): int(config[proto]["end"]) + ] + ) + ss_name_str = "".join(ss_name_list) + pattern = re.compile(str_pattern) + for v in pattern.finditer(ss_name_str): + if args.verbose: + logging.info( + "The pattern " + str(pattern) + " is searched in " + + ss_name_str + ) + if aqp == 1 and p == 1: + list_ind_temp_resid.append(int(v.start() / 3)) + else: + list_ind_temp_resid.append( + int(v.start() / 3) + int(config[proto]["start"]) + ) + if args.verbose: + logging.info("The temporary index list is " + + str(list_ind_temp_resid)) + for ind_current in list_ind_temp_resid: + val_one = list_all_resid[0][ind_current] + val_two = list_all_resid[0][ind_current + 1] + val_three = list_all_resid[0][ind_current + 2] + if val_one + 1 == val_two and val_one + 2 == val_three: + if pattern_resid == "ArR1" or pattern_resid == "ArR2": + val_four = list_all_resid[0][ind_current + 3] + if val_one + 3 == val_four: + list_ind_final_resid.append(ind_current) + else: + list_ind_final_resid.append(ind_current) + list_ind_final_resid.append(ind_current + 1) + list_ind_final_resid.append(ind_current + 2) + if len(list_ind_final_resid) == 0: + if args.verbose: + logging.critical("The pattern is not found " + + "in the protein sequence") + print("The pattern is not found in the protein sequence") + sys.exit(1) + if args.verbose: + logging.info("The final index list is " + + str(list_ind_final_resid)) + ind_list = [] + for ind, val_ind in enumerate(list_ind_final_resid): + ind_list.append(ind) + key = ( + proto + + "," + + list_all_resid[1][val_ind] + + "," + + str(list_all_resid[0][val_ind]) + ) + if args.verbose: + logging.info("The key of dictionary is " + key) + slct_resid = ( + "(resid " + str(list_all_resid[0][val_ind]) + + ") and name CA" + ) + if args.verbose: + logging.info("The selected residue is : " + slct_resid) + resid = uni.select_atoms(slct_resid) + if args.verbose: + logging.info("The AtomGroup is " + str(resid)) + if pattern_resid == "ArR1": + new_key = "ArR1," + proto + if args.verbose: + logging.info("The key of next dictionary is " + + new_key) + position_ca = resid.positions[num_ca_arr] + num_ca_arr += 1 + elif pattern_resid == "ArR2": + new_key = "ArR2," + proto + if args.verbose: + logging.info("The key of next dictionary is " + + new_key) + position_ca = resid.positions[num_ca_arr] + num_ca_arr += 1 + else: + if (ind - 3) in ind_list or pattern_resid == "NPA2": + new_key = "NPA2," + proto + if args.verbose: + logging.info("The key of next dictionary is " + + new_key) + position_ca = resid.positions[num_ca_npa] + else: + new_key = "NPA1," + proto + if args.verbose: + logging.info("The key of next dictionary is " + + new_key) + position_ca = resid.positions[num_ca_npa] + dict_resid_pattern[key] = [ + list_all_resid[1][val_ind], + list_all_resid[0][val_ind], + position_ca, + new_key, + ] + if args.verbose: + logging.info( + "The value associated at the key " + + key + + " is " + + str(dict_resid_pattern[key]) + ) + if len(list_ind_final_resid) > 1: + num_ca_npa += 1 + if args.verbose: + logging.info("The dictionary contains" + str(dict_resid_pattern)) + return dict_resid_pattern + + +def search_resid_positions(dict_resid_pattern, uni): + """ + Description : creates the dictionary that contains the position of the + barycenter of the alpha carbons of NPA and ArR. + param dict_resid_pattern: dictionary which contains as key the name of the + aquaporin, the protomer and the residue, as well as the residue number. + The value contains the name, the number and the position of the residue, + as well as the key of the next dictionary. + param uni: MDAnalysis universe. + return: dictionary which contains as key the name of the pattern, + the aquaporin and the protomer. The value associated with the key contain + the position of the barycenter of the pattern. + """ + if args.verbose: + logging.info("\nFunction search_resid_positions") + logging.info("The input dictionary is " + str(dict_resid_pattern)) + logging.info("The input universe is " + str(uni)) + dict_pos_resid = {} + for npa in range(1, 3): + num_proto = 0 + for aqp in range(1, 3): + for p in range(1, 5): + list_atm_npa = [] + key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) + ",P" + str(p) + key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) + key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p) + for k, v in dict_resid_pattern.items(): + if key_npa == v[3]: + list_atm_npa.append(v[1]) + elif key_arr1 == v[3]: + dict_pos_resid[key_arr1] = v[2] + elif key_arr2 == v[3]: + dict_pos_resid[key_arr2] = v[2] + if len(list_atm_npa) > 0: + slct_pattern = ( + "(resid " + + str(list_atm_npa[0]) + + " or resid " + + str(list_atm_npa[1]) + + " or resid " + + str(list_atm_npa[2]) + + ") and name CA" + ) + if args.verbose: + logging.info("The selected pattern is " + slct_pattern) + atom_group_pattern = uni.select_atoms(slct_pattern) + if args.verbose: + logging.info(str(atom_group_pattern)) + deb = 0 + 3 * num_proto + fin = +3 + 3 * num_proto + atm_proto_aqp = atom_group_pattern[deb:fin] + if args.verbose: + logging.info("deb : " + str(deb) + " fin : " + + str(fin)) + logging.info("The selected AtomGroup is " + + str(atm_proto_aqp)) + dict_pos_resid[key_npa] = atm_proto_aqp.centroid() + num_proto += 1 + if args.verbose: + logging.info("The dictionary contains " + str(dict_pos_resid)) + return dict_pos_resid + + +def calculate_euclidean_distance(dict_pos_resid, choice_measure): + """ + Description : creates the dictionary that contains the Euclidean distance + between ArR and NPA1 or NPA2. + param dict_pos_resid: dictionary which contains as key the name of the + pattern, the aquaporin and the protomer. + The value contains the position of the barycenter of the pattern. + param choice_measure: variable that contains ArR-ArR or NPA-ArR. + return: dictionary which contains as key the name of the couple, + the aquaporin and the protomer. + The value contains the euclidean distance between the barycenters of + the two motifs of the couple. + """ + if args.verbose: + logging.info("\nFunction calculate_euclidean_distance") + logging.info("The input dictionary is " + str(dict_pos_resid)) + logging.info("The distance will be calculated between" + + choice_measure) + dict_pair_dist = {} + if choice_measure == "NPA-ArR": + for npa in range(1, 3): + for aqp in range(1, 3): + for p in range(1, 5): + key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) \ + + ",P" + str(p) + key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) + new_key = "ArR-NPA" + str(npa) + ",AQP" + str(aqp) \ + + ",P" + str(p) + dict_pair_dist[new_key] = np.linalg.norm( + dict_pos_resid[key_npa] - dict_pos_resid[key_arr1] + ) + if args.verbose: + logging.info( + "Key NPA : " + + key_npa + + " value : " + + str(dict_pos_resid[key_npa]) + ) + logging.info( + "Key ArR : " + + key_arr1 + + " value : " + + str(dict_pos_resid[key_arr1]) + ) + logging.info( + "The value associated at the key " + + new_key + + " is " + + str(dict_pair_dist[new_key]) + ) + else: + for aqp in range(1, 3): + for p in range(1, 5): + key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) + key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p) + new_key = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p) + dict_pair_dist[new_key] = np.linalg.norm( + dict_pos_resid[key_arr1] - dict_pos_resid[key_arr2] + ) + if args.verbose: + logging.info( + "Key ArR1 : " + + key_arr1 + + " value : " + + str(dict_pos_resid[key_arr1]) + ) + logging.info( + "Key ArR2 : " + + key_arr2 + + " value : " + + str(dict_pos_resid[key_arr2]) + ) + logging.info( + "The value associated at the key " + + new_key + + " is " + + str(dict_pair_dist[new_key]) + ) + if args.verbose: + logging.info("The dictionary contains " + str(dict_pair_dist)) + return dict_pair_dist + + +def run_one_frame_npa_arr(list_resid_npa1, list_resid_npa2, list_resid_arr, + uni): + """ + Description: compiles the different functions to create the dict_pair_dist + dictionary for one frame. + param list_resid_npa1: list containing the names of the residues + constituting the NPA1. + param list_resid_npa2: list containing the names of the residues + constituting the NPA2. + param list_resid_arr: list containing the names of the residues + constituting the ArR. + param uni: MDAnalysis universe. + return: dictionary which contains as key the name of the couple, + the aquaporin and the protomer. + The value contains the euclidean distance between the barycenters of + the two motifs of the couple. + """ + if args.verbose: + logging.info("\nFunction runForOneFrame") + logging.info("The first input parameter is " + str(list_resid_npa1)) + logging.info("The second input parameter is " + str(list_resid_npa2)) + logging.info("The third input parameter is " + str(list_resid_arr)) + logging.info("The fourth input parameter is " + str(uni)) + choice_measure = "NPA-ArR" + if list_resid_npa1 == list_resid_npa2: + if args.verbose: + logging.info("Pattern NPA1 identical to pattern NPA2") + pattern = "NPA1" + list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni) + list_length_proto_npa = \ + calculate_length_proto(list(list_all_resid_npa[0])) + config_npa = create_config_file(list_length_proto_npa) + dict_resid_pattern = search_pattern( + list_all_resid_npa, config_npa, list_resid_npa1, pattern, uni + ) + else: + if args.verbose: + logging.info("Pattern NPA1 different from the NPA2 pattern") + # NPA1 + pattern = "NPA1" + list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni) + list_length_proto_npa = \ + calculate_length_proto(list(list_all_resid_npa[0])) + config_npa1 = create_config_file(list_length_proto_npa) + dict_resid_pattern = search_pattern( + list_all_resid_npa, config_npa1, list_resid_npa1, pattern, uni + ) + # NPA2 + pattern = "NPA2" + list_all_resid_npa = search_amino_acids_pattern(list_resid_npa2, uni) + list_length_proto_npa = \ + calculate_length_proto(list(list_all_resid_npa[0])) + config_npa2 = create_config_file(list_length_proto_npa) + dict_resid_pattern.update( + search_pattern( + list_all_resid_npa, config_npa2, list_resid_npa2, pattern, uni + ) + ) + # ArR + pattern = "ArR1" + list_all_resid_arr = search_amino_acids_pattern(list_resid_arr, uni) + list_length_proto_arr = \ + calculate_length_proto(list(list_all_resid_arr[0])) + config_arr = create_config_file(list_length_proto_arr) + dict_resid_pattern.update( + search_pattern(list_all_resid_arr, config_arr, list_resid_arr, + pattern, uni) + ) + # dict position + dict_pos_resid = search_resid_positions(dict_resid_pattern, uni) + # dict distance + dict_pair_dist = \ + calculate_euclidean_distance(dict_pos_resid, choice_measure) + return dict_pair_dist + + +def run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni): + """ + Description: compiles the different functions to create the dict_pair_dist + dictionary for one frame. + param list_resid_arr1: list containing the names of the residues + constituting the ArR1. + param list_resid_arr2: list_resid_arr2, list containing the names of + the residues constituting the ArR2. + param uni: MDAnalysis universe. + return: dictionary which contains as key the name of the couple, + the aquaporin and the protomer. + The value contains the euclidean distance between the barycenters of + the two motifs of the couple. + """ + if args.verbose: + logging.info("\nFunction runForOneFrame") + logging.info("The first input parameter is " + str(list_resid_arr1)) + logging.info("The second input parameter is " + str(list_resid_arr2)) + logging.info("The third input parameter is " + str(uni)) + dict_resid_pattern = {} + # ArR1 + pattern = "ArR1" + list_all_resid_arr1 = search_amino_acids_pattern(list_resid_arr1, uni) + list_length_proto_arr1 = \ + calculate_length_proto(list(list_all_resid_arr1[0])) + config_arr1 = create_config_file(list_length_proto_arr1) + dict_resid_pattern.update( + search_pattern(list_all_resid_arr1, config_arr1, list_resid_arr1, + pattern, uni) + ) + # ArR2 + pattern = "ArR2" + list_all_resid_arr2 = search_amino_acids_pattern(list_resid_arr2, uni) + list_length_proto_arr2 = \ + calculate_length_proto(list(list_all_resid_arr2[0])) + config_arr2 = create_config_file(list_length_proto_arr2) + dict_resid_pattern.update( + search_pattern(list_all_resid_arr2, config_arr2, list_resid_arr2, + pattern, uni) + ) + # dict position + dict_pos_resid = search_resid_positions(dict_resid_pattern, uni) + # dict distance + choice_measure = "ArR-ArR" + dict_pair_dist = \ + calculate_euclidean_distance(dict_pos_resid, choice_measure) + return dict_pair_dist + + +def run_all_frames_npa_arr( + list_resid_npa1, list_resid_npa2, list_resid_arr, col_number, uni +): + """ + Description: creates the table containing the pore diameter of an aquaporin + over time. + param list_resid_npa1: list containing the names of the residues + constituting the NPA1. + param list_resid_npa2: list containing the names of the residues + constituting the NPA2. + param list_resid_arr: list containing the names of the residues + constituting the ArR. + param col_number: number of columns in the table + param uni: MDAnalysis universe. + return: list containing an array and a key list. The table containing the + pore diameter of an aquaporin over time. + """ + if args.verbose: + logging.info("\nFunction runForAllFrame") + logging.info("The first input parameter is " + str(list_resid_npa1)) + logging.info("The second input parameter is " + str(list_resid_npa2)) + logging.info("The third input parameter is " + str(list_resid_arr)) + logging.info("The fourth input parameter is " + str(col_number)) + logging.info("The fifth input parameter is " + str(uni)) + nbr_frame = len(uni.trajectory) + if args.verbose: + logging.info("The number of frame is " + str(nbr_frame)) + tab_pair_dist = np.empty((nbr_frame, col_number)) + if args.verbose: + logging.info( + "The dimensions of the table of means and standard deviation are" + + " as follows " + str(tab_pair_dist.shape) + ) + list_key = [] + pattern_arr = "" + pattern_npa1 = "" + pattern_npa2 = "" + dict_prot = { + "ALA": "A", + "CYS": "C", + "ASP": "D", + "GLU": "E", + "PHE": "F", + "GLY": "G", + "HIS": "H", + "ILE": "I", + "LYS": "K", + "LEU": "L", + "MET": "M", + "ASN": "N", + "PRO": "P", + "GLN": "Q", + "ARG": "R", + "SER": "S", + "THR": "T", + "VAL": "V", + "TRP": "W", + "TYR": "Y", + "HSD": "H", + "HSP": "H", + } + for aa in list_resid_arr: + pattern_arr += dict_prot[aa] + for aa in list_resid_npa1: + pattern_npa1 += dict_prot[aa] + for aa in list_resid_npa2: + pattern_npa2 += dict_prot[aa] + for ts in uni.trajectory: + if args.verbose: + logging.info("Frame " + str(uni.trajectory.frame)) + logging.info(str(ts)) + dict_pair_dist = run_one_frame_npa_arr( + list_resid_npa1, list_resid_npa2, list_resid_arr, uni + ) + tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time + list_key = ["Time in ps"] + num_npa_proto = 0 + for npa in range(1, 3): + if npa == 1: + pattern_complete = "," + pattern_arr + "-" + pattern_npa1 + "1" + else: + pattern_complete = "," + pattern_arr + "-" + pattern_npa2 + "2" + for aqp in range(1, 3): + for p in range(1, 5): + num_npa_proto += 1 + key1 = "ArR-NPA" + str(npa) + ",AQP" + str(aqp)\ + + ",P" + str(p) + key2 = "AQP" + str(aqp) + ",P" + str(p) + pattern_complete\ + + " (Angstroms)" + tab_pair_dist[uni.trajectory.frame][num_npa_proto] = \ + dict_pair_dist[key1] + list_key.append(key2) + list_out = [tab_pair_dist, list_key] + return list_out + + +def run_all_frames_arr(list_resid_arr1, list_resid_arr2, col_number, uni): + """ + Description: creates the table containing the pore diameter of an aquaporin + over time. + param list_resid_arr1: list containing the names of the residues + constituting the ArR1. + param list_resid_arr2: list containing the names of the residues + constituting the ArR2. + param col_number: number of columns in the table + param uni: MDAnalysis universe. + return: list containing an array and a key list. The table containing the + pore diameter of an aquaporin over time. + """ + if args.verbose: + logging.info("\nFunction runForAllFrame") + logging.info("The first input parameter is " + str(list_resid_arr1)) + logging.info("The second input parameter is " + str(list_resid_arr2)) + logging.info("The third input parameter is " + str(col_number)) + logging.info("The fourth input parameter is " + str(uni)) + nbr_frame = len(uni.trajectory) + if args.verbose: + logging.info("The number of frame is " + str(nbr_frame)) + tab_pair_dist = np.empty((nbr_frame, col_number)) + if args.verbose: + logging.info( + "The dimensions of the table of means and standard deviation" + + " are as follows " + str(tab_pair_dist.shape) + ) + list_key = [] + pattern1 = "" + pattern2 = "" + dict_prot = { + "ALA": "A", + "CYS": "C", + "ASP": "D", + "GLU": "E", + "PHE": "F", + "GLY": "G", + "HIS": "H", + "ILE": "I", + "LYS": "K", + "LEU": "L", + "MET": "M", + "ASN": "N", + "PRO": "P", + "GLN": "Q", + "ARG": "R", + "SER": "S", + "THR": "T", + "VAL": "V", + "TRP": "W", + "TYR": "Y", + "HSD": "H", + "HSP": "H", + } + for aa in list_resid_arr1: + pattern1 += dict_prot[aa] + for aa in list_resid_arr2: + pattern2 += dict_prot[aa] + for ts in uni.trajectory: + if args.verbose: + logging.info("Frame " + str(uni.trajectory.frame)) + logging.info(str(ts)) + dict_pair_dist = run_one_frame_arr(list_resid_arr1, list_resid_arr2, + uni) + tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time + list_key = ["Time (ps)"] + num_proto = 0 + for aqp in range(1, 3): + for p in range(1, 5): + num_proto += 1 + key1 = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p) + key2 = ( + "AQP" + str(aqp) + ",P" + str(p) + "," + pattern1 + + "-" + pattern2 + " (Angstroms)" + ) + tab_pair_dist[uni.trajectory.frame][num_proto] = \ + dict_pair_dist[key1] + list_key.append(key2) + list_out = [tab_pair_dist, list_key] + return list_out + + +def create_final_dataframe( + list_resid_arr1, list_resid_arr2, list_resid_npa1, list_resid_npa2, uni +): + """ + Description: creates the final dataframe. + param list_resid_arr1: list containing the arr1 pattern + param list_resid_arr2: list containing the arr2 pattern + param list_resid_npa1: list containing the npa1 pattern + param list_resid_npa2: list containing the npa2 pattern + param uni: MDAnalysis universe + return: final dataframe + """ + if args.verbose: + logging.info("\nFunction runForAllFrame") + logging.info("The first input parameter is " + str(list_resid_arr1)) + logging.info("The second input parameter is " + str(list_resid_arr2)) + logging.info("The third input parameter is " + str(list_resid_npa1)) + logging.info("The fourth input parameter is " + str(list_resid_npa2)) + logging.info("The fifth input parameter is " + str(uni)) + if list_resid_npa1 is None and list_resid_npa2 is None: + if len(list_resid_arr1) >= 4 and len(list_resid_arr2) >= 4: + columns_number = 9 + list_out = run_all_frames_arr( + list_resid_arr1, list_resid_arr2, columns_number, uni + ) + else: + if args.verbose: + if len(list_resid_arr1) < 4 <= len(list_resid_arr2): + logging.critical("Pattern1 must be provided") + elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: + logging.critical("Pattern1 and Pattern2 must be provided") + elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): + logging.critical("Pattern2 must be provided") + print("Pattern error : please read the log file or help") + sys.exit(2) + elif list_resid_npa1 is None and list_resid_npa2 is not None: + if args.verbose: + if len(list_resid_arr1) < 4 <= len(list_resid_arr2): + logging.critical("PatternArr1 must be provided") + logging.critical("Please, do not put patternArr2") + logging.critical("PatternNPA1 must be provided") + elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: + logging.critical("PatternArr1 must be provided") + logging.critical("PatternNPA1 must be provided") + elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): + logging.critical("PatternNPA1 must be provided") + else: + logging.critical("Please, do not put patternArr2") + logging.critical("PatternNPA1 must be provided") + print("Pattern error : please read the log file or help") + sys.exit(2) + elif list_resid_npa1 is not None and list_resid_npa2 is None: + if args.verbose: + if len(list_resid_arr1) < 4 <= len(list_resid_arr2): + logging.critical("PatternArr1 must be provided") + logging.critical("Please, do not put patternArr2") + logging.critical("PatternNPA2 must be provided") + elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: + logging.critical("PatternArr1 must be provided") + logging.critical("PatternNPA2 must be provided") + elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): + logging.critical("PatternNPA1 must be provided") + else: + logging.critical("Please, do not put patternArr2") + logging.critical("PatternNPA2 must be provided") + print("Pattern error : please read the log file or help") + sys.exit(2) + else: + if ( + list_resid_npa1 is not None + and list_resid_npa2 is not None + and len(list_resid_arr1) >= 4 > len(list_resid_arr2) + ): + columns_number = 17 + list_out = run_all_frames_npa_arr( + list_resid_npa1, list_resid_npa2, list_resid_arr1, + columns_number, uni + ) + else: + if len(list_resid_arr1) < 4 <= len(list_resid_arr2): + logging.critical("PatternArr1 must be provided") + logging.critical("Please, do not put patternArr2") + elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: + logging.critical("Pattern1 must be provided") + print("Pattern error : please read the log file or help") + sys.exit(2) + list_std = list(np.std(list_out[0], axis=0)) + list_mean = list(np.mean(list_out[0], axis=0)) + np_mean_around = list(np.around(list_mean, decimals=3)) + np_std_around = list(np.around(list_std, decimals=3)) + np_mean_around[0] = "Mean" + np_std_around[0] = "Std" + np_mean_around = np.array([np_mean_around]) + np_std_around = np.array([np_std_around]) + np_mean_std_around = np.concatenate((np_mean_around, np_std_around), + axis=0) + tab_pair_dist = np.concatenate((np_mean_std_around, list_out[0]), axis=0) + df_dist = pd.DataFrame(tab_pair_dist, columns=list_out[1]) + return df_dist + + +# Class + + +if __name__ == "__main__": + args = parse_arguments() + out_file_name = "test" + file_name_log = out_file_name + ".log" + if args.verbose: + if args.log_output: + if os.path.isfile(args.log_output): + os.remove(args.log_output) + logging.basicConfig( + filename=args.log_output, + format="%(levelname)s - %(message)s", + level=logging.INFO, + ) + else: + if os.path.isfile(file_name_log): + os.remove(file_name_log) + logging.basicConfig( + filename=file_name_log, + format="%(levelname)s - %(message)s", + level=logging.INFO, + ) + logging.info("verbose mode on") + gro_file = '' + xtc_file = '' + if args.gro_file: + gro_file = os.path.basename(args.gro_file[0]) + else: + logging.critical("A .gro file must be provided") + if args.xtc_file: + xtc_file = os.path.basename(args.xtc_file[0]) + else: + logging.critical("A .xtc file must be provided") + if args.work_directory: + work_directory = args.work_directory[0] + else: + work_directory = os.path.dirname(args.gro_file[0]) + if not work_directory.endswith("/"): + work_directory += "/" + output_directory = args.output_directory[0] + if args.output: + output = args.output + else: + output = "" + os.chdir(work_directory) + if args.gro_file_ext and args.xtc_file_ext: + u = Mda.Universe( + args.gro_file[0], + args.xtc_file[0], + topology_format=args.gro_file_ext[0], + format=args.xtc_file_ext[0], + ) + else: + u = Mda.Universe(args.gro_file[0], args.xtc_file[0]) + if args.verbose: + logging.info("Load universe : " + str(u)) + if args.npa_one: + resid_npa1 = args.npa_one[0].split() + else: + resid_npa1 = args.npa_one + if args.npa_second: + resid_npa2 = args.npa_second[0].split() + else: + resid_npa2 = args.npa_second + if args.arr_one: + resid_arr1 = args.arr_one[0].split() + else: + resid_arr1 = args.arr_one + if args.arr_second: + resid_arr2 = args.arr_second[0].split() + else: + resid_arr2 = args.arr_second + df_final = create_final_dataframe(resid_arr1, resid_arr2, + resid_npa1, resid_npa2, u) + if len(output_directory) > 2: + if not os.path.exists(output_directory): + os.makedirs(output_directory) + os.chdir(output_directory) + if len(output) > 2: + df_final.to_csv(output, sep="\t", index=False) + else: + file_name_tabular = out_file_name + "PairDist.tabular" + df_final.to_csv(file_name_tabular, sep="\t", index=False)