Mercurial > repos > agpetit > calculate_diameter

#!/usr/bin/env python3

# coding: utf-8
"""
This script allows to launch a calculation of the pore diameter of an aquaporin
  as a function of time from a .gro file and an .xtc file.
At the end of the script, a table containing the evolution of the distance over
  time is created.
# USAGE : calculate_pore_diameter_aqp.py -xtc xtc_file -gro gro_file
  -npao npa1 Pattern (optional) -npas npa2 Pattern (optional)
  -arro arR1 Pattern on H5 (optional) -arrs arR2 Pattern on HE (optional)
  -v verbose (optional) -w work_directory (optional)
  -o output_directory (optional) --gro_file_ext extension of gro file
  (optional) --xtc_file_ext extension of xtc file (optional)
  -log name of log file (optional) -o name of output file (optional).
"""


__all__ = [
    "calculate_length_proto",
    "search_pattern",
    "run_one_frame_npa_arr",
    "run_one_frame_arr",
    "run_all_frames_npa_arr",
    "run_all_frames_arr",
    "create_config_file",
]
__author__ = "Agnès-Elisabeth Petit"
__date__ = "29/04/2021"
__version__ = "1.0"
__copyright__ = "(c) 2022 CC-BY-NC-SA"

# Library import
import argparse
import configparser
import logging
import os
import re
import sys

import MDAnalysis as Mda

import numpy as np

import pandas as pd

# Global variables

# Functions


def test_setup():
    global args
    args = parse_arguments()
    args.verbose = True


def parse_arguments():
    parser = argparse.ArgumentParser(
        description="This script allows to launch a calculation of the pore"
                    + " diameter of an aquaporin as a function of time from"
                    + " a .gro file and an .xtc file.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        prefix_chars="-",
        add_help=True,
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="""Information messages to stderr""",
    )
    parser.add_argument(
        "-gro", "--gro_file", type=str, nargs=1, help="""My input .gro file"""
    )
    parser.add_argument(
        "-xtc", "--xtc_file", type=str, nargs=1, help="""My input .xtc file"""
    )
    parser.add_argument(
        "--gro_file_ext", type=str, nargs=1, help="""My input .gro file"""
    )
    parser.add_argument(
        "--xtc_file_ext", type=str, nargs=1, help="""My input .xtc file"""
    )
    parser.add_argument(
        "-w", "--work_directory", type=str, nargs=1,
        help="""It's work Directory"""
    )
    parser.add_argument(
        "-od",
        "--output_directory",
        default="stdout",
        type=str,
        nargs=1,
        help="""It's output Directory""",
    )
    parser.add_argument(
        "-npao",
        "--npa_one",
        type=str,
        nargs=1,
        help="""3-letter pattern of the 3 amino acids of npa1""",
    )
    parser.add_argument(
        "-npas",
        "--npa_second",
        type=str,
        nargs=1,
        help="""3-letter pattern of the 3 amino acids of npa2""",
    )
    parser.add_argument(
        "-arro",
        "--arr_one",
        type=str,
        default="",
        nargs=1,
        help="""3-letter pattern of the 4 amino acids of first ArR on H5""",
    )
    parser.add_argument(
        "-arrs",
        "--arr_second",
        type=str,
        default="",
        nargs=1,
        help="""3-letter pattern of the 4 amino acids of second ArR on HE""",
    )
    parser.add_argument("-o", "--output", help="""Output for files""")
    parser.add_argument("-log", "--log_output", help="""Output for files""")
    return parser.parse_args()


def search_amino_acids_pattern(list_pattern, uni):
    """
    Description:Creates a list that contains two lists.
      The first one contains the residue numbers and the second one the residue
      names for each amino acid in the pattern.
    param list_pattern: list of the names of each amino acid of the pattern.
    param uni: MDAnalysis universe.
    return: list containing two lists. One containing residue numbers
      and the other containing residue names.
    """
    if args.verbose:
        logging.info("\nFunction search_amino_acids_pattern")
        logging.info("The input list is " + str(list_pattern))
        logging.info("The input universe is " + str(uni))
    if len(list_pattern) == 3:
        slct_pattern = (
            "(resname "
            + list_pattern[0]
            + " or resname "
            + list_pattern[1]
            + " or resname "
            + list_pattern[2]
            + ") and name CA"
        )
    else:
        slct_pattern = (
            "(resname "
            + list_pattern[0]
            + " or resname "
            + list_pattern[1]
            + " or resname "
            + list_pattern[2]
            + " or resname "
            + list_pattern[3]
            + ") and name CA"
        )
    if args.verbose:
        logging.info("The selected pattern is : " + slct_pattern)
    list_atm_npa = uni.select_atoms(slct_pattern).residues
    list_all_num_resid = list_atm_npa.resids
    if args.verbose:
        logging.info(
            "The list containing all the residue numbers associated"
            + " with the pattern is : " + str(list_all_num_resid)
        )
    list_all_name_resid = list_atm_npa.resnames
    if args.verbose:
        logging.info(
            "The list containing all the residue names associated with the "
            + "pattern is : " + str(list_all_name_resid)
        )
    list_all_resid = [list_all_num_resid, list_all_name_resid]
    if args.verbose:
        logging.info(
            "The list containing the lists of numbers and names of residues"
            + " associated with the pattern is : " + str(list_all_resid)
        )
    return list_all_resid


def calculate_length_proto(list_num_resid):
    """
    Description : calculate the length of each protomer according to the number
      of amino acids corresponding to the pattern.
    param list_num_resid: list of amino acid numbers corresponding to the
      pattern in the protein.
    return: list of the length of each protomer.
    """
    if args.verbose:
        logging.info("\nFunction calculate_length_proto")
        logging.info("The input list is " + str(list_num_resid))
    list_length_proto = [1, 0, 0, 0, 0, 0, 0, 0]
    ind_list_length_proto = 0
    for i, v in enumerate(list_num_resid[1:]):
        if int(v) < int(list_num_resid[i]):
            ind_list_length_proto += 1
            list_length_proto[ind_list_length_proto] += 1
        else:
            list_length_proto[ind_list_length_proto] += 1
    if args.verbose:
        for i, v in enumerate(list_length_proto):
            logging.info(
                "The length of the protomer "
                + str(i + 1)
                + " is "
                + str(v)
                + " residues"
            )
    return list_length_proto


def create_config_file(list_length_proto):
    """
    Description : creates a Configparser object that describes the start and
      end indices of each protomer.
    param list_length_proto: list of the length of each protomer.
    return: Configparser object that describes the start and end indices
      of each protomer.
    """
    if args.verbose:
        logging.info("\nFunction create_config_file")
        logging.info("The input list is " + str(list_length_proto))
    config = configparser.ConfigParser()
    proto_prev = ""
    for aqp in range(1, 3):
        for p in range(1, 5):
            if aqp == 1 and p == 1:
                proto_current = "AQP" + str(aqp) + ",P" + str(p)
                config[proto_current] = {}
                config.set(proto_current, "start", str(0))
                config.set(proto_current, "end", str(list_length_proto[0] - 1))
                proto_prev = proto_current
                if args.verbose:
                    logging.info(proto_current + " starts at index "
                                 + str(config["AQP1,P1"]["start"])
                                 + " and finish at index "
                                 + str(config["AQP1,P1"]["end"]))
            else:
                proto_current = "AQP" + str(aqp) + ",P" + str(p)
                config[proto_current] = {}
                if aqp == 1 and p == 2:
                    config.set(proto_current, "start",
                               str(int(config[proto_prev]["start"]) +
                                   list_length_proto[p-2] - 1))
                else:
                    config.set(proto_current, "start",
                               str(int(config[proto_prev]["start"])
                                   + list_length_proto[p-2]))
                config.set(proto_current, "end",
                           str(int(config[proto_prev]["end"])
                               + list_length_proto[p-1]))
                proto_prev = proto_current
                if args.verbose:
                    logging.info(
                        proto_current
                        + " starts at index "
                        + config[proto_current]["start"]
                        + " and finish at index "
                        + config[proto_current]["end"]
                    )
    if args.verbose:
        logging.info(
            "The sections of the settings object are : "
            + str(config.sections())
        )
        logging.info(
            "The keys for each section of the settings object are: "
            + str(list(config["AQP1,P3"].keys()))
        )
    return config


def search_pattern(list_all_resid, config, list_pattern, pattern_resid, uni):
    """
    Description : creates the dictionary that contains the name, number and
      position of the residue and the key for the following dictionary.
    param list_all_resid: list containing two lists. One containing residue
      numbers and the other containing residue names.
    param config: Configparser object that describes the start and end indices
      of each protomer.
    param list_pattern: list of the names of each amino acid of the pattern.
    param pattern_resid: string that gives the pattern (NPA1,NPA2,ArR1 ou ArR2)
      in which we are.
    param uni: MDAnalysis universe
    return: dictionary which contains as key the name of the
      aquaporin, the protomer and the residue, as well as the residue number.
      The value contains the name, the number and the position of the residue,
      as well as the key of the next dictionary.
    """
    if args.verbose:
        logging.info("\nFunction search_pattern")
        logging.info("The first input argument is " + str(list_all_resid))
        logging.info("The second input argument is a configParser object")
        logging.info("The third input argument is " + str(list_pattern))
        logging.info("The fourth input argument is " + str(pattern_resid))
        logging.info("The fourth input argument is " + str(uni))
    num_ca_npa = 0
    num_ca_arr = 0
    dict_resid_pattern = {}
    str_pattern = "".join(list_pattern)
    for aqp in range(1, 3):
        for p in range(1, 5):
            list_ind_temp_resid = []
            list_ind_final_resid = []
            proto = "AQP" + str(aqp) + ",P" + str(p)
            ss_name_list = list(
                list_all_resid[1][
                    int(config[proto]["start"]): int(config[proto]["end"])
                ]
            )
            ss_name_str = "".join(ss_name_list)
            pattern = re.compile(str_pattern)
            for v in pattern.finditer(ss_name_str):
                if args.verbose:
                    logging.info(
                        "The pattern " + str(pattern) + " is searched in "
                        + ss_name_str
                    )
                if aqp == 1 and p == 1:
                    list_ind_temp_resid.append(int(v.start() / 3))
                else:
                    list_ind_temp_resid.append(
                        int(v.start() / 3) + int(config[proto]["start"])
                    )
            if args.verbose:
                logging.info("The temporary index list is "
                             + str(list_ind_temp_resid))
            for ind_current in list_ind_temp_resid:
                val_one = list_all_resid[0][ind_current]
                val_two = list_all_resid[0][ind_current + 1]
                val_three = list_all_resid[0][ind_current + 2]
                if val_one + 1 == val_two and val_one + 2 == val_three:
                    if pattern_resid == "ArR1" or pattern_resid == "ArR2":
                        val_four = list_all_resid[0][ind_current + 3]
                        if val_one + 3 == val_four:
                            list_ind_final_resid.append(ind_current)
                    else:
                        list_ind_final_resid.append(ind_current)
                        list_ind_final_resid.append(ind_current + 1)
                        list_ind_final_resid.append(ind_current + 2)
            if len(list_ind_final_resid) == 0:
                if args.verbose:
                    logging.critical("The pattern is not found "
                                     + "in the protein sequence")
                print("The pattern is not found in the protein sequence")
                sys.exit(1)
            if args.verbose:
                logging.info("The final index list is "
                             + str(list_ind_final_resid))
            ind_list = []
            for ind, val_ind in enumerate(list_ind_final_resid):
                ind_list.append(ind)
                key = (
                    proto
                    + ","
                    + list_all_resid[1][val_ind]
                    + ","
                    + str(list_all_resid[0][val_ind])
                )
                if args.verbose:
                    logging.info("The key of dictionary is " + key)
                slct_resid = (
                    "(resid " + str(list_all_resid[0][val_ind])
                    + ") and name CA"
                )
                if args.verbose:
                    logging.info("The selected residue is : " + slct_resid)
                resid = uni.select_atoms(slct_resid)
                if args.verbose:
                    logging.info("The AtomGroup is " + str(resid))
                if pattern_resid == "ArR1":
                    new_key = "ArR1," + proto
                    if args.verbose:
                        logging.info("The key of next dictionary is "
                                     + new_key)
                    position_ca = resid.positions[num_ca_arr]
                    num_ca_arr += 1
                elif pattern_resid == "ArR2":
                    new_key = "ArR2," + proto
                    if args.verbose:
                        logging.info("The key of next dictionary is "
                                     + new_key)
                    position_ca = resid.positions[num_ca_arr]
                    num_ca_arr += 1
                else:
                    if (ind - 3) in ind_list or pattern_resid == "NPA2":
                        new_key = "NPA2," + proto
                        if args.verbose:
                            logging.info("The key of next dictionary is "
                                         + new_key)
                        position_ca = resid.positions[num_ca_npa]
                    else:
                        new_key = "NPA1," + proto
                        if args.verbose:
                            logging.info("The key of next dictionary is "
                                         + new_key)
                        position_ca = resid.positions[num_ca_npa]
                dict_resid_pattern[key] = [
                    list_all_resid[1][val_ind],
                    list_all_resid[0][val_ind],
                    position_ca,
                    new_key,
                ]
                if args.verbose:
                    logging.info(
                        "The value associated at the key "
                        + key
                        + " is "
                        + str(dict_resid_pattern[key])
                    )
            if len(list_ind_final_resid) > 1:
                num_ca_npa += 1
    if args.verbose:
        logging.info("The dictionary contains" + str(dict_resid_pattern))
    return dict_resid_pattern


def search_resid_positions(dict_resid_pattern, uni):
    """
    Description : creates the dictionary that contains the position of the
      barycenter of the alpha carbons of NPA and ArR.
    param dict_resid_pattern: dictionary which contains as key the name of the
      aquaporin, the protomer and the residue, as well as the residue number.
      The value contains the name, the number and the position of the residue,
      as well as the key of the next dictionary.
    param uni: MDAnalysis universe.
    return: dictionary which contains as key the name of the pattern,
      the aquaporin and the protomer. The value associated with the key contain
      the position of the barycenter of the pattern.
    """
    if args.verbose:
        logging.info("\nFunction search_resid_positions")
        logging.info("The input dictionary is " + str(dict_resid_pattern))
        logging.info("The input universe is " + str(uni))
    dict_pos_resid = {}
    for npa in range(1, 3):
        num_proto = 0
        for aqp in range(1, 3):
            for p in range(1, 5):
                list_atm_npa = []
                key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) + ",P" + str(p)
                key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
                key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p)
                for k, v in dict_resid_pattern.items():
                    if key_npa == v[3]:
                        list_atm_npa.append(v[1])
                    elif key_arr1 == v[3]:
                        dict_pos_resid[key_arr1] = v[2]
                    elif key_arr2 == v[3]:
                        dict_pos_resid[key_arr2] = v[2]
                if len(list_atm_npa) > 0:
                    slct_pattern = (
                        "(resid "
                        + str(list_atm_npa[0])
                        + " or resid "
                        + str(list_atm_npa[1])
                        + " or resid "
                        + str(list_atm_npa[2])
                        + ") and name CA"
                    )
                    if args.verbose:
                        logging.info("The selected pattern is " + slct_pattern)
                    atom_group_pattern = uni.select_atoms(slct_pattern)
                    if args.verbose:
                        logging.info(str(atom_group_pattern))
                    deb = 0 + 3 * num_proto
                    fin = +3 + 3 * num_proto
                    atm_proto_aqp = atom_group_pattern[deb:fin]
                    if args.verbose:
                        logging.info("deb : " + str(deb) + "   fin : "
                                     + str(fin))
                        logging.info("The selected AtomGroup is "
                                     + str(atm_proto_aqp))
                    dict_pos_resid[key_npa] = atm_proto_aqp.centroid()
                    num_proto += 1
    if args.verbose:
        logging.info("The dictionary contains " + str(dict_pos_resid))
    return dict_pos_resid


def calculate_euclidean_distance(dict_pos_resid, choice_measure):
    """
    Description : creates the dictionary that contains the Euclidean distance
      between ArR and NPA1 or NPA2.
    param dict_pos_resid: dictionary which contains as key the name of the
      pattern, the aquaporin and the protomer.
      The value contains the position of the barycenter of the pattern.
    param choice_measure: variable that contains ArR-ArR or NPA-ArR.
    return: dictionary which contains as key the name of the couple,
      the aquaporin and the protomer.
      The value contains the euclidean distance between the barycenters of
      the two motifs of the couple.
    """
    if args.verbose:
        logging.info("\nFunction calculate_euclidean_distance")
        logging.info("The input dictionary is " + str(dict_pos_resid))
        logging.info("The distance will be calculated between"
                     + choice_measure)
    dict_pair_dist = {}
    if choice_measure == "NPA-ArR":
        for npa in range(1, 3):
            for aqp in range(1, 3):
                for p in range(1, 5):
                    key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) \
                              + ",P" + str(p)
                    key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
                    new_key = "ArR-NPA" + str(npa) + ",AQP" + str(aqp) \
                              + ",P" + str(p)
                    dict_pair_dist[new_key] = np.linalg.norm(
                        dict_pos_resid[key_npa] - dict_pos_resid[key_arr1]
                    )
                    if args.verbose:
                        logging.info(
                            "Key NPA : "
                            + key_npa
                            + "  value : "
                            + str(dict_pos_resid[key_npa])
                        )
                        logging.info(
                            "Key ArR : "
                            + key_arr1
                            + "  value : "
                            + str(dict_pos_resid[key_arr1])
                        )
                        logging.info(
                            "The value associated at the key "
                            + new_key
                            + " is "
                            + str(dict_pair_dist[new_key])
                        )
    else:
        for aqp in range(1, 3):
            for p in range(1, 5):
                key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
                key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p)
                new_key = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p)
                dict_pair_dist[new_key] = np.linalg.norm(
                    dict_pos_resid[key_arr1] - dict_pos_resid[key_arr2]
                )
                if args.verbose:
                    logging.info(
                        "Key ArR1 : "
                        + key_arr1
                        + "  value : "
                        + str(dict_pos_resid[key_arr1])
                    )
                    logging.info(
                        "Key ArR2 : "
                        + key_arr2
                        + "  value : "
                        + str(dict_pos_resid[key_arr2])
                    )
                    logging.info(
                        "The value associated at the key "
                        + new_key
                        + " is "
                        + str(dict_pair_dist[new_key])
                    )
    if args.verbose:
        logging.info("The  dictionary contains " + str(dict_pair_dist))
    return dict_pair_dist


def run_one_frame_npa_arr(list_resid_npa1, list_resid_npa2, list_resid_arr,
                          uni):
    """
    Description: compiles the different functions to create the dict_pair_dist
      dictionary for one frame.
    param list_resid_npa1: list containing the names of the residues
      constituting the NPA1.
    param list_resid_npa2: list containing the names of the residues
      constituting the NPA2.
    param list_resid_arr: list containing the names of the residues
      constituting the ArR.
    param uni: MDAnalysis universe.
    return: dictionary which contains as key the name of the couple,
      the aquaporin and the protomer.
      The value contains the euclidean distance between the barycenters of
      the two motifs of the couple.
    """
    if args.verbose:
        logging.info("\nFunction runForOneFrame")
        logging.info("The first input parameter is " + str(list_resid_npa1))
        logging.info("The second input parameter is " + str(list_resid_npa2))
        logging.info("The third input parameter is " + str(list_resid_arr))
        logging.info("The fourth input parameter is " + str(uni))
    choice_measure = "NPA-ArR"
    if list_resid_npa1 == list_resid_npa2:
        if args.verbose:
            logging.info("Pattern NPA1 identical to pattern NPA2")
        pattern = "NPA1"
        list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni)
        list_length_proto_npa = \
            calculate_length_proto(list(list_all_resid_npa[0]))
        config_npa = create_config_file(list_length_proto_npa)
        dict_resid_pattern = search_pattern(
            list_all_resid_npa, config_npa, list_resid_npa1, pattern, uni
        )
    else:
        if args.verbose:
            logging.info("Pattern NPA1 different from the NPA2 pattern")
        # NPA1
        pattern = "NPA1"
        list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni)
        list_length_proto_npa = \
            calculate_length_proto(list(list_all_resid_npa[0]))
        config_npa1 = create_config_file(list_length_proto_npa)
        dict_resid_pattern = search_pattern(
            list_all_resid_npa, config_npa1, list_resid_npa1, pattern, uni
        )
        # NPA2
        pattern = "NPA2"
        list_all_resid_npa = search_amino_acids_pattern(list_resid_npa2, uni)
        list_length_proto_npa = \
            calculate_length_proto(list(list_all_resid_npa[0]))
        config_npa2 = create_config_file(list_length_proto_npa)
        dict_resid_pattern.update(
            search_pattern(
                list_all_resid_npa, config_npa2, list_resid_npa2, pattern, uni
            )
        )
    # ArR
    pattern = "ArR1"
    list_all_resid_arr = search_amino_acids_pattern(list_resid_arr, uni)
    list_length_proto_arr = \
        calculate_length_proto(list(list_all_resid_arr[0]))
    config_arr = create_config_file(list_length_proto_arr)
    dict_resid_pattern.update(
        search_pattern(list_all_resid_arr, config_arr, list_resid_arr,
                       pattern, uni)
    )
    # dict position
    dict_pos_resid = search_resid_positions(dict_resid_pattern, uni)
    # dict distance
    dict_pair_dist = \
        calculate_euclidean_distance(dict_pos_resid, choice_measure)
    return dict_pair_dist


def run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni):
    """
    Description: compiles the different functions to create the dict_pair_dist
      dictionary for one frame.
    param list_resid_arr1: list containing the names of the residues
      constituting the ArR1.
    param list_resid_arr2: list_resid_arr2, list containing the names of
      the residues constituting the ArR2.
    param uni: MDAnalysis universe.
    return: dictionary which contains as key the name of the couple,
      the aquaporin and the protomer.
      The value contains the euclidean distance between the barycenters of
      the two motifs of the couple.
    """
    if args.verbose:
        logging.info("\nFunction runForOneFrame")
        logging.info("The first input parameter is " + str(list_resid_arr1))
        logging.info("The second input parameter is " + str(list_resid_arr2))
        logging.info("The third input parameter is " + str(uni))
    dict_resid_pattern = {}
    # ArR1
    pattern = "ArR1"
    list_all_resid_arr1 = search_amino_acids_pattern(list_resid_arr1, uni)
    list_length_proto_arr1 = \
        calculate_length_proto(list(list_all_resid_arr1[0]))
    config_arr1 = create_config_file(list_length_proto_arr1)
    dict_resid_pattern.update(
        search_pattern(list_all_resid_arr1, config_arr1, list_resid_arr1,
                       pattern, uni)
    )
    # ArR2
    pattern = "ArR2"
    list_all_resid_arr2 = search_amino_acids_pattern(list_resid_arr2, uni)
    list_length_proto_arr2 = \
        calculate_length_proto(list(list_all_resid_arr2[0]))
    config_arr2 = create_config_file(list_length_proto_arr2)
    dict_resid_pattern.update(
        search_pattern(list_all_resid_arr2, config_arr2, list_resid_arr2,
                       pattern, uni)
    )
    # dict position
    dict_pos_resid = search_resid_positions(dict_resid_pattern, uni)
    # dict distance
    choice_measure = "ArR-ArR"
    dict_pair_dist = \
        calculate_euclidean_distance(dict_pos_resid, choice_measure)
    return dict_pair_dist


def run_all_frames_npa_arr(
    list_resid_npa1, list_resid_npa2, list_resid_arr, col_number, uni
):
    """
    Description: creates the table containing the pore diameter of an aquaporin
      over time.
    param list_resid_npa1: list containing the names of the residues
      constituting the NPA1.
    param list_resid_npa2: list containing the names of the residues
      constituting the NPA2.
    param list_resid_arr: list containing the names of the residues
      constituting the ArR.
    param col_number: number of columns in the table
    param uni: MDAnalysis universe.
    return: list containing an array and a key list. The table containing the
      pore diameter of an aquaporin over time.
    """
    if args.verbose:
        logging.info("\nFunction runForAllFrame")
        logging.info("The first input parameter is " + str(list_resid_npa1))
        logging.info("The second input parameter is " + str(list_resid_npa2))
        logging.info("The third input parameter is " + str(list_resid_arr))
        logging.info("The fourth input parameter is " + str(col_number))
        logging.info("The fifth input parameter is " + str(uni))
    nbr_frame = len(uni.trajectory)
    if args.verbose:
        logging.info("The number of frame is " + str(nbr_frame))
    tab_pair_dist = np.empty((nbr_frame, col_number))
    if args.verbose:
        logging.info(
            "The dimensions of the table of means and standard deviation are"
            + " as follows " + str(tab_pair_dist.shape)
        )
    list_key = []
    pattern_arr = ""
    pattern_npa1 = ""
    pattern_npa2 = ""
    dict_prot = {
        "ALA": "A",
        "CYS": "C",
        "ASP": "D",
        "GLU": "E",
        "PHE": "F",
        "GLY": "G",
        "HIS": "H",
        "ILE": "I",
        "LYS": "K",
        "LEU": "L",
        "MET": "M",
        "ASN": "N",
        "PRO": "P",
        "GLN": "Q",
        "ARG": "R",
        "SER": "S",
        "THR": "T",
        "VAL": "V",
        "TRP": "W",
        "TYR": "Y",
        "HSD": "H",
        "HSP": "H",
    }
    for aa in list_resid_arr:
        pattern_arr += dict_prot[aa]
    for aa in list_resid_npa1:
        pattern_npa1 += dict_prot[aa]
    for aa in list_resid_npa2:
        pattern_npa2 += dict_prot[aa]
    for ts in uni.trajectory:
        if args.verbose:
            logging.info("Frame " + str(uni.trajectory.frame))
            logging.info(str(ts))
        dict_pair_dist = run_one_frame_npa_arr(
            list_resid_npa1, list_resid_npa2, list_resid_arr, uni
        )
        tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time
        list_key = ["Time in ps"]
        num_npa_proto = 0
        for npa in range(1, 3):
            if npa == 1:
                pattern_complete = "," + pattern_arr + "-" + pattern_npa1 + "1"
            else:
                pattern_complete = "," + pattern_arr + "-" + pattern_npa2 + "2"
            for aqp in range(1, 3):
                for p in range(1, 5):
                    num_npa_proto += 1
                    key1 = "ArR-NPA" + str(npa) + ",AQP" + str(aqp)\
                           + ",P" + str(p)
                    key2 = "AQP" + str(aqp) + ",P" + str(p) + pattern_complete\
                           + " (Angstroms)"
                    tab_pair_dist[uni.trajectory.frame][num_npa_proto] = \
                        dict_pair_dist[key1]
                    list_key.append(key2)
    list_out = [tab_pair_dist, list_key]
    return list_out


def run_all_frames_arr(list_resid_arr1, list_resid_arr2, col_number, uni):
    """
    Description: creates the table containing the pore diameter of an aquaporin
      over time.
    param list_resid_arr1: list containing the names of the residues
      constituting the ArR1.
    param list_resid_arr2: list containing the names of the residues
      constituting the ArR2.
    param col_number: number of columns in the table
      param uni: MDAnalysis universe.
    return: list containing an array and a key list. The table containing the
      pore diameter of an aquaporin over time.
    """
    if args.verbose:
        logging.info("\nFunction runForAllFrame")
        logging.info("The first input parameter is " + str(list_resid_arr1))
        logging.info("The second input parameter is " + str(list_resid_arr2))
        logging.info("The third input parameter is " + str(col_number))
        logging.info("The fourth input parameter is " + str(uni))
    nbr_frame = len(uni.trajectory)
    if args.verbose:
        logging.info("The number of frame is " + str(nbr_frame))
    tab_pair_dist = np.empty((nbr_frame, col_number))
    if args.verbose:
        logging.info(
            "The dimensions of the table of means and standard deviation"
            + " are as follows " + str(tab_pair_dist.shape)
        )
    list_key = []
    pattern1 = ""
    pattern2 = ""
    dict_prot = {
        "ALA": "A",
        "CYS": "C",
        "ASP": "D",
        "GLU": "E",
        "PHE": "F",
        "GLY": "G",
        "HIS": "H",
        "ILE": "I",
        "LYS": "K",
        "LEU": "L",
        "MET": "M",
        "ASN": "N",
        "PRO": "P",
        "GLN": "Q",
        "ARG": "R",
        "SER": "S",
        "THR": "T",
        "VAL": "V",
        "TRP": "W",
        "TYR": "Y",
        "HSD": "H",
        "HSP": "H",
    }
    for aa in list_resid_arr1:
        pattern1 += dict_prot[aa]
    for aa in list_resid_arr2:
        pattern2 += dict_prot[aa]
    for ts in uni.trajectory:
        if args.verbose:
            logging.info("Frame " + str(uni.trajectory.frame))
            logging.info(str(ts))
        dict_pair_dist = run_one_frame_arr(list_resid_arr1, list_resid_arr2,
                                           uni)
        tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time
        list_key = ["Time (ps)"]
        num_proto = 0
        for aqp in range(1, 3):
            for p in range(1, 5):
                num_proto += 1
                key1 = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p)
                key2 = (
                    "AQP" + str(aqp) + ",P" + str(p) + "," + pattern1 +
                    "-" + pattern2 + " (Angstroms)"
                )
                tab_pair_dist[uni.trajectory.frame][num_proto] = \
                    dict_pair_dist[key1]
                list_key.append(key2)
    list_out = [tab_pair_dist, list_key]
    return list_out


def create_final_dataframe(
        list_resid_arr1, list_resid_arr2, list_resid_npa1, list_resid_npa2, uni
):
    """
    Description: creates the final dataframe.
    param list_resid_arr1: list containing the arr1 pattern
    param list_resid_arr2: list containing the arr2 pattern
    param list_resid_npa1: list containing the npa1 pattern
    param list_resid_npa2: list containing the npa2 pattern
    param uni: MDAnalysis universe
    return: final dataframe
    """
    if args.verbose:
        logging.info("\nFunction runForAllFrame")
        logging.info("The first input parameter is " + str(list_resid_arr1))
        logging.info("The second input parameter is " + str(list_resid_arr2))
        logging.info("The third input parameter is " + str(list_resid_npa1))
        logging.info("The fourth input parameter is " + str(list_resid_npa2))
        logging.info("The fifth input parameter is " + str(uni))
    if list_resid_npa1 is None and list_resid_npa2 is None:
        if len(list_resid_arr1) >= 4 and len(list_resid_arr2) >= 4:
            columns_number = 9
            list_out = run_all_frames_arr(
                list_resid_arr1, list_resid_arr2, columns_number, uni
            )
        else:
            if args.verbose:
                if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
                    logging.critical("Pattern1 must be provided")
                elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
                    logging.critical("Pattern1 and Pattern2 must be provided")
                elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
                    logging.critical("Pattern2 must be provided")
            print("Pattern error : please read the log file or help")
            sys.exit(2)
    elif list_resid_npa1 is None and list_resid_npa2 is not None:
        if args.verbose:
            if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
                logging.critical("PatternArr1 must be provided")
                logging.critical("Please, do not put patternArr2")
                logging.critical("PatternNPA1 must be provided")
            elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
                logging.critical("PatternArr1 must be provided")
                logging.critical("PatternNPA1 must be provided")
            elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
                logging.critical("PatternNPA1 must be provided")
            else:
                logging.critical("Please, do not put patternArr2")
                logging.critical("PatternNPA1 must be provided")
        print("Pattern error : please read the log file or help")
        sys.exit(2)
    elif list_resid_npa1 is not None and list_resid_npa2 is None:
        if args.verbose:
            if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
                logging.critical("PatternArr1 must be provided")
                logging.critical("Please, do not put patternArr2")
                logging.critical("PatternNPA2 must be provided")
            elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
                logging.critical("PatternArr1 must be provided")
                logging.critical("PatternNPA2 must be provided")
            elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
                logging.critical("PatternNPA1 must be provided")
            else:
                logging.critical("Please, do not put patternArr2")
                logging.critical("PatternNPA2 must be provided")
        print("Pattern error : please read the log file or help")
        sys.exit(2)
    else:
        if (
            list_resid_npa1 is not None
            and list_resid_npa2 is not None
            and len(list_resid_arr1) >= 4 > len(list_resid_arr2)
        ):
            columns_number = 17
            list_out = run_all_frames_npa_arr(
                list_resid_npa1, list_resid_npa2, list_resid_arr1,
                columns_number, uni
            )
        else:
            if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
                logging.critical("PatternArr1 must be provided")
                logging.critical("Please, do not put patternArr2")
            elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
                logging.critical("Pattern1 must be provided")
            print("Pattern error : please read the log file or help")
            sys.exit(2)
    list_std = list(np.std(list_out[0], axis=0))
    list_mean = list(np.mean(list_out[0], axis=0))
    np_mean_around = list(np.around(list_mean, decimals=3))
    np_std_around = list(np.around(list_std, decimals=3))
    np_mean_around[0] = "Mean"
    np_std_around[0] = "Std"
    np_mean_around = np.array([np_mean_around])
    np_std_around = np.array([np_std_around])
    np_mean_std_around = np.concatenate((np_mean_around, np_std_around),
                                        axis=0)
    tab_pair_dist = np.concatenate((np_mean_std_around, list_out[0]), axis=0)
    df_dist = pd.DataFrame(tab_pair_dist, columns=list_out[1])
    return df_dist


# Class


if __name__ == "__main__":
    args = parse_arguments()
    out_file_name = "test"
    file_name_log = out_file_name + ".log"
    if args.verbose:
        if args.log_output:
            if os.path.isfile(args.log_output):
                os.remove(args.log_output)
            logging.basicConfig(
                filename=args.log_output,
                format="%(levelname)s - %(message)s",
                level=logging.INFO,
            )
        else:
            if os.path.isfile(file_name_log):
                os.remove(file_name_log)
            logging.basicConfig(
                filename=file_name_log,
                format="%(levelname)s - %(message)s",
                level=logging.INFO,
            )
        logging.info("verbose mode on")
    gro_file = ''
    xtc_file = ''
    if args.gro_file:
        gro_file = os.path.basename(args.gro_file[0])
    else:
        logging.critical("A .gro file must be provided")
    if args.xtc_file:
        xtc_file = os.path.basename(args.xtc_file[0])
    else:
        logging.critical("A .xtc file must be provided")
    if args.work_directory:
        work_directory = args.work_directory[0]
    else:
        work_directory = os.path.dirname(args.gro_file[0])
        if not work_directory.endswith("/"):
            work_directory += "/"
    output_directory = args.output_directory[0]
    if args.output:
        output = args.output
    else:
        output = ""
    os.chdir(work_directory)
    if args.gro_file_ext and args.xtc_file_ext:
        u = Mda.Universe(
            args.gro_file[0],
            args.xtc_file[0],
            topology_format=args.gro_file_ext[0],
            format=args.xtc_file_ext[0],
        )
    else:
        u = Mda.Universe(args.gro_file[0], args.xtc_file[0])
    if args.verbose:
        logging.info("Load universe : " + str(u))
    if args.npa_one:
        resid_npa1 = args.npa_one[0].split()
    else:
        resid_npa1 = args.npa_one
    if args.npa_second:
        resid_npa2 = args.npa_second[0].split()
    else:
        resid_npa2 = args.npa_second
    if args.arr_one:
        resid_arr1 = args.arr_one[0].split()
    else:
        resid_arr1 = args.arr_one
    if args.arr_second:
        resid_arr2 = args.arr_second[0].split()
    else:
        resid_arr2 = args.arr_second
    df_final = create_final_dataframe(resid_arr1, resid_arr2,
                                      resid_npa1, resid_npa2, u)
    if len(output_directory) > 2:
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
        os.chdir(output_directory)
    if len(output) > 2:
        df_final.to_csv(output, sep="\t", index=False)
    else:
        file_name_tabular = out_file_name + "PairDist.tabular"
        df_final.to_csv(file_name_tabular, sep="\t", index=False)
author	agpetit
date	Fri, 27 May 2022 07:36:52 +0000
parents	e227d80e53be
children	a4097272ade0