calculate_diameter: calculate_pore_diameter

comparison calculate_pore_diameter_aqp.py @ 0:e227d80e53be draft

"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit 48a10de1b21f94ab8019d9d0e4a43e0bd9d0c31e-dirty"

author	agpetit
date	Wed, 25 May 2022 07:17:24 +0000
parents
children	a4097272ade0

comparison

equal deleted inserted replaced

--1:000000000000
+:e227d80e53be
+#!/usr/bin/env python3
+# coding: utf-8
+"""
+This script allows to launch a calculation of the pore diameter of an aquaporin
+as a function of time from a .gro file and an .xtc file.
+At the end of the script, a table containing the evolution of the distance over
+time is created.
+# USAGE : calculate_pore_diameter_aqp.py -xtc xtc_file -gro gro_file
+-npao npa1 Pattern (optional) -npas npa2 Pattern (optional)
+-arro arR1 Pattern on H5 (optional) -arrs arR2 Pattern on HE (optional)
+-v verbose (optional) -w work_directory (optional)
+-o output_directory (optional) --gro_file_ext extension of gro file
+(optional) --xtc_file_ext extension of xtc file (optional)
+-log name of log file (optional) -o name of output file (optional).
+"""
+__all__ = [
+"calculate_length_proto",
+"search_pattern",
+"run_one_frame_npa_arr",
+"run_one_frame_arr",
+"run_all_frames_npa_arr",
+"run_all_frames_arr",
+"create_config_file",
+]
+__author__ = "Agnès-Elisabeth Petit"
+__date__ = "29/04/2021"
+__version__ = "1.0"
+__copyright__ = "(c) 2022 CC-BY-NC-SA"
+# Library import
+import argparse
+import configparser
+import logging
+import os
+import re
+import sys
+import MDAnalysis as Mda
+import numpy as np
+import pandas as pd
+# Global variables
+# Functions
+def test_setup():
+global args
+args = parse_arguments()
+args.verbose = True
+def parse_arguments():
+parser = argparse.ArgumentParser(
+description="This script allows to launch a calculation of the pore"
++ " diameter of an aquaporin as a function of time from"
++ " a .gro file and an .xtc file.",
+formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+prefix_chars="-",
+add_help=True,
+)
+parser.add_argument(
+"-v",
+"--verbose",
+action="store_true",
+help="""Information messages to stderr""",
+)
+parser.add_argument(
+"-gro", "--gro_file", type=str, nargs=1, help="""My input .gro file"""
+)
+parser.add_argument(
+"-xtc", "--xtc_file", type=str, nargs=1, help="""My input .xtc file"""
+)
+parser.add_argument(
+"--gro_file_ext", type=str, nargs=1, help="""My input .gro file"""
+)
+parser.add_argument(
+"--xtc_file_ext", type=str, nargs=1, help="""My input .xtc file"""
+)
+parser.add_argument(
+"-w", "--work_directory", type=str, nargs=1,
+help="""It's work Directory"""
+)
+parser.add_argument(
+"-od",
+"--output_directory",
+default="stdout",
+type=str,
+nargs=1,
+help="""It's output Directory""",
+)
+parser.add_argument(
+"-npao",
+"--npa_one",
+type=str,
+nargs=1,
+help="""3-letter pattern of the 3 amino acids of npa1""",
+)
+parser.add_argument(
+"-npas",
+"--npa_second",
+type=str,
+nargs=1,
+help="""3-letter pattern of the 3 amino acids of npa2""",
+)
+parser.add_argument(
+"-arro",
+"--arr_one",
+type=str,
+default="",
+nargs=1,
+help="""3-letter pattern of the 4 amino acids of first ArR on H5""",
+)
+parser.add_argument(
+"-arrs",
+"--arr_second",
+type=str,
+default="",
+nargs=1,
+help="""3-letter pattern of the 4 amino acids of second ArR on HE""",
+)
+parser.add_argument("-o", "--output", help="""Output for files""")
+parser.add_argument("-log", "--log_output", help="""Output for files""")
+return parser.parse_args()
+def search_amino_acids_pattern(list_pattern, uni):
+"""
+Description:Creates a list that contains two lists.
+The first one contains the residue numbers and the second one the residue
+names for each amino acid in the pattern.
+param list_pattern: list of the names of each amino acid of the pattern.
+param uni: MDAnalysis universe.
+return: list containing two lists. One containing residue numbers
+and the other containing residue names.
+"""
+if args.verbose:
+logging.info("\nFunction search_amino_acids_pattern")
+logging.info("The input list is " + str(list_pattern))
+logging.info("The input universe is " + str(uni))
+if len(list_pattern) == 3:
+slct_pattern = (
+"(resname "
++ list_pattern[0]
++ " or resname "
++ list_pattern[1]
++ " or resname "
++ list_pattern[2]
++ ") and name CA"
+)
+else:
+slct_pattern = (
+"(resname "
++ list_pattern[0]
++ " or resname "
++ list_pattern[1]
++ " or resname "
++ list_pattern[2]
++ " or resname "
++ list_pattern[3]
++ ") and name CA"
+)
+if args.verbose:
+logging.info("The selected pattern is : " + slct_pattern)
+list_atm_npa = uni.select_atoms(slct_pattern).residues
+list_all_num_resid = list_atm_npa.resids
+if args.verbose:
+logging.info(
+"The list containing all the residue numbers associated"
++ " with the pattern is : " + str(list_all_num_resid)
+)
+list_all_name_resid = list_atm_npa.resnames
+if args.verbose:
+logging.info(
+"The list containing all the residue names associated with the "
++ "pattern is : " + str(list_all_name_resid)
+)
+list_all_resid = [list_all_num_resid, list_all_name_resid]
+if args.verbose:
+logging.info(
+"The list containing the lists of numbers and names of residues"
++ " associated with the pattern is : " + str(list_all_resid)
+)
+return list_all_resid
+def calculate_length_proto(list_num_resid):
+"""
+Description : calculate the length of each protomer according to the number
+of amino acids corresponding to the pattern.
+param list_num_resid: list of amino acid numbers corresponding to the
+pattern in the protein.
+return: list of the length of each protomer.
+"""
+if args.verbose:
+logging.info("\nFunction calculate_length_proto")
+logging.info("The input list is " + str(list_num_resid))
+list_length_proto = [1, 0, 0, 0, 0, 0, 0, 0]
+ind_list_length_proto = 0
+for i, v in enumerate(list_num_resid[1:]):
+if int(v) < int(list_num_resid[i]):
+ind_list_length_proto += 1
+list_length_proto[ind_list_length_proto] += 1
+else:
+list_length_proto[ind_list_length_proto] += 1
+if args.verbose:
+for i, v in enumerate(list_length_proto):
+logging.info(
+"The length of the protomer "
++ str(i + 1)
++ " is "
++ str(v)
++ " residues"
+)
+return list_length_proto
+def create_config_file(list_length_proto):
+"""
+Description : creates a Configparser object that describes the start and
+end indices of each protomer.
+param list_length_proto: list of the length of each protomer.
+return: Configparser object that describes the start and end indices
+of each protomer.
+"""
+if args.verbose:
+logging.info("\nFunction create_config_file")
+logging.info("The input list is " + str(list_length_proto))
+config = configparser.ConfigParser()
+proto_prev = ""
+for aqp in range(1, 3):
+for p in range(1, 5):
+if aqp == 1 and p == 1:
+proto_current = "AQP" + str(aqp) + ",P" + str(p)
+config[proto_current] = {}
+config.set(proto_current, "start", str(0))
+config.set(proto_current, "end", str(list_length_proto[0] - 1))
+proto_prev = proto_current
+if args.verbose:
+logging.info(proto_current + " starts at index "
++ str(config["AQP1,P1"]["start"])
++ " and finish at index "
++ str(config["AQP1,P1"]["end"]))
+else:
+proto_current = "AQP" + str(aqp) + ",P" + str(p)
+config[proto_current] = {}
+if aqp == 1 and p == 2:
+config.set(proto_current, "start",
+str(int(config[proto_prev]["start"]) +
+list_length_proto[p-2] - 1))
+else:
+config.set(proto_current, "start",
+str(int(config[proto_prev]["start"])
++ list_length_proto[p-2]))
+config.set(proto_current, "end",
+str(int(config[proto_prev]["end"])
++ list_length_proto[p-1]))
+proto_prev = proto_current
+if args.verbose:
+logging.info(
+proto_current
++ " starts at index "
++ config[proto_current]["start"]
++ " and finish at index "
++ config[proto_current]["end"]
+)
+if args.verbose:
+logging.info(
+"The sections of the settings object are : "
++ str(config.sections())
+)
+logging.info(
+"The keys for each section of the settings object are: "
++ str(list(config["AQP1,P3"].keys()))
+)
+return config
+def search_pattern(list_all_resid, config, list_pattern, pattern_resid, uni):
+"""
+Description : creates the dictionary that contains the name, number and
+position of the residue and the key for the following dictionary.
+param list_all_resid: list containing two lists. One containing residue
+numbers and the other containing residue names.
+param config: Configparser object that describes the start and end indices
+of each protomer.
+param list_pattern: list of the names of each amino acid of the pattern.
+param pattern_resid: string that gives the pattern (NPA1,NPA2,ArR1 ou ArR2)
+in which we are.
+param uni: MDAnalysis universe
+return: dictionary which contains as key the name of the
+aquaporin, the protomer and the residue, as well as the residue number.
+The value contains the name, the number and the position of the residue,
+as well as the key of the next dictionary.
+"""
+if args.verbose:
+logging.info("\nFunction search_pattern")
+logging.info("The first input argument is " + str(list_all_resid))
+logging.info("The second input argument is a configParser object")
+logging.info("The third input argument is " + str(list_pattern))
+logging.info("The fourth input argument is " + str(pattern_resid))
+logging.info("The fourth input argument is " + str(uni))
+num_ca_npa = 0
+num_ca_arr = 0
+dict_resid_pattern = {}
+str_pattern = "".join(list_pattern)
+for aqp in range(1, 3):
+for p in range(1, 5):
+list_ind_temp_resid = []
+list_ind_final_resid = []
+proto = "AQP" + str(aqp) + ",P" + str(p)
+ss_name_list = list(
+list_all_resid[1][
+int(config[proto]["start"]): int(config[proto]["end"])
+]
+)
+ss_name_str = "".join(ss_name_list)
+pattern = re.compile(str_pattern)
+for v in pattern.finditer(ss_name_str):
+if args.verbose:
+logging.info(
+"The pattern " + str(pattern) + " is searched in "
++ ss_name_str
+)
+if aqp == 1 and p == 1:
+list_ind_temp_resid.append(int(v.start() / 3))
+else:
+list_ind_temp_resid.append(
+int(v.start() / 3) + int(config[proto]["start"])
+)
+if args.verbose:
+logging.info("The temporary index list is "
++ str(list_ind_temp_resid))
+for ind_current in list_ind_temp_resid:
+val_one = list_all_resid[0][ind_current]
+val_two = list_all_resid[0][ind_current + 1]
+val_three = list_all_resid[0][ind_current + 2]
+if val_one + 1 == val_two and val_one + 2 == val_three:
+if pattern_resid == "ArR1" or pattern_resid == "ArR2":
+val_four = list_all_resid[0][ind_current + 3]
+if val_one + 3 == val_four:
+list_ind_final_resid.append(ind_current)
+else:
+list_ind_final_resid.append(ind_current)
+list_ind_final_resid.append(ind_current + 1)
+list_ind_final_resid.append(ind_current + 2)
+if len(list_ind_final_resid) == 0:
+if args.verbose:
+logging.critical("The pattern is not found "
++ "in the protein sequence")
+print("The pattern is not found in the protein sequence")
+sys.exit(1)
+if args.verbose:
+logging.info("The final index list is "
++ str(list_ind_final_resid))
+ind_list = []
+for ind, val_ind in enumerate(list_ind_final_resid):
+ind_list.append(ind)
+key = (
+proto
++ ","
++ list_all_resid[1][val_ind]
++ ","
++ str(list_all_resid[0][val_ind])
+)
+if args.verbose:
+logging.info("The key of dictionary is " + key)
+slct_resid = (
+"(resid " + str(list_all_resid[0][val_ind])
++ ") and name CA"
+)
+if args.verbose:
+logging.info("The selected residue is : " + slct_resid)
+resid = uni.select_atoms(slct_resid)
+if args.verbose:
+logging.info("The AtomGroup is " + str(resid))
+if pattern_resid == "ArR1":
+new_key = "ArR1," + proto
+if args.verbose:
+logging.info("The key of next dictionary is "
++ new_key)
+position_ca = resid.positions[num_ca_arr]
+num_ca_arr += 1
+elif pattern_resid == "ArR2":
+new_key = "ArR2," + proto
+if args.verbose:
+logging.info("The key of next dictionary is "
++ new_key)
+position_ca = resid.positions[num_ca_arr]
+num_ca_arr += 1
+else:
+if (ind - 3) in ind_list or pattern_resid == "NPA2":
+new_key = "NPA2," + proto
+if args.verbose:
+logging.info("The key of next dictionary is "
++ new_key)
+position_ca = resid.positions[num_ca_npa]
+else:
+new_key = "NPA1," + proto
+if args.verbose:
+logging.info("The key of next dictionary is "
++ new_key)
+position_ca = resid.positions[num_ca_npa]
+dict_resid_pattern[key] = [
+list_all_resid[1][val_ind],
+list_all_resid[0][val_ind],
+position_ca,
+new_key,
+]
+if args.verbose:
+logging.info(
+"The value associated at the key "
++ key
++ " is "
++ str(dict_resid_pattern[key])
+)
+if len(list_ind_final_resid) > 1:
+num_ca_npa += 1
+if args.verbose:
+logging.info("The dictionary contains" + str(dict_resid_pattern))
+return dict_resid_pattern
+def search_resid_positions(dict_resid_pattern, uni):
+"""
+Description : creates the dictionary that contains the position of the
+barycenter of the alpha carbons of NPA and ArR.
+param dict_resid_pattern: dictionary which contains as key the name of the
+aquaporin, the protomer and the residue, as well as the residue number.
+The value contains the name, the number and the position of the residue,
+as well as the key of the next dictionary.
+param uni: MDAnalysis universe.
+return: dictionary which contains as key the name of the pattern,
+the aquaporin and the protomer. The value associated with the key contain
+the position of the barycenter of the pattern.
+"""
+if args.verbose:
+logging.info("\nFunction search_resid_positions")
+logging.info("The input dictionary is " + str(dict_resid_pattern))
+logging.info("The input universe is " + str(uni))
+dict_pos_resid = {}
+for npa in range(1, 3):
+num_proto = 0
+for aqp in range(1, 3):
+for p in range(1, 5):
+list_atm_npa = []
+key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) + ",P" + str(p)
+key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
+key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p)
+for k, v in dict_resid_pattern.items():
+if key_npa == v[3]:
+list_atm_npa.append(v[1])
+elif key_arr1 == v[3]:
+dict_pos_resid[key_arr1] = v[2]
+elif key_arr2 == v[3]:
+dict_pos_resid[key_arr2] = v[2]
+if len(list_atm_npa) > 0:
+slct_pattern = (
+"(resid "
++ str(list_atm_npa[0])
++ " or resid "
++ str(list_atm_npa[1])
++ " or resid "
++ str(list_atm_npa[2])
++ ") and name CA"
+)
+if args.verbose:
+logging.info("The selected pattern is " + slct_pattern)
+atom_group_pattern = uni.select_atoms(slct_pattern)
+if args.verbose:
+logging.info(str(atom_group_pattern))
+deb = 0 + 3 * num_proto
+fin = +3 + 3 * num_proto
+atm_proto_aqp = atom_group_pattern[deb:fin]
+if args.verbose:
+logging.info("deb : " + str(deb) + "   fin : "
++ str(fin))
+logging.info("The selected AtomGroup is "
++ str(atm_proto_aqp))
+dict_pos_resid[key_npa] = atm_proto_aqp.centroid()
+num_proto += 1
+if args.verbose:
+logging.info("The dictionary contains " + str(dict_pos_resid))
+return dict_pos_resid
+def calculate_euclidean_distance(dict_pos_resid, choice_measure):
+"""
+Description : creates the dictionary that contains the Euclidean distance
+between ArR and NPA1 or NPA2.
+param dict_pos_resid: dictionary which contains as key the name of the
+pattern, the aquaporin and the protomer.
+The value contains the position of the barycenter of the pattern.
+param choice_measure: variable that contains ArR-ArR or NPA-ArR.
+return: dictionary which contains as key the name of the couple,
+the aquaporin and the protomer.
+The value contains the euclidean distance between the barycenters of
+the two motifs of the couple.
+"""
+if args.verbose:
+logging.info("\nFunction calculate_euclidean_distance")
+logging.info("The input dictionary is " + str(dict_pos_resid))
+logging.info("The distance will be calculated between"
++ choice_measure)
+dict_pair_dist = {}
+if choice_measure == "NPA-ArR":
+for npa in range(1, 3):
+for aqp in range(1, 3):
+for p in range(1, 5):
+key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) \
++ ",P" + str(p)
+key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
+new_key = "ArR-NPA" + str(npa) + ",AQP" + str(aqp) \
++ ",P" + str(p)
+dict_pair_dist[new_key] = np.linalg.norm(
+dict_pos_resid[key_npa] - dict_pos_resid[key_arr1]
+)
+if args.verbose:
+logging.info(
+"Key NPA : "
++ key_npa
++ "  value : "
++ str(dict_pos_resid[key_npa])
+)
+logging.info(
+"Key ArR : "
++ key_arr1
++ "  value : "
++ str(dict_pos_resid[key_arr1])
+)
+logging.info(
+"The value associated at the key "
++ new_key
++ " is "
++ str(dict_pair_dist[new_key])
+)
+else:
+for aqp in range(1, 3):
+for p in range(1, 5):
+key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
+key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p)
+new_key = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p)
+dict_pair_dist[new_key] = np.linalg.norm(
+dict_pos_resid[key_arr1] - dict_pos_resid[key_arr2]
+)
+if args.verbose:
+logging.info(
+"Key ArR1 : "
++ key_arr1
++ "  value : "
++ str(dict_pos_resid[key_arr1])
+)
+logging.info(
+"Key ArR2 : "
++ key_arr2
++ "  value : "
++ str(dict_pos_resid[key_arr2])
+)
+logging.info(
+"The value associated at the key "
++ new_key
++ " is "
++ str(dict_pair_dist[new_key])
+)
+if args.verbose:
+logging.info("The  dictionary contains " + str(dict_pair_dist))
+return dict_pair_dist
+def run_one_frame_npa_arr(list_resid_npa1, list_resid_npa2, list_resid_arr,
+uni):
+"""
+Description: compiles the different functions to create the dict_pair_dist
+dictionary for one frame.
+param list_resid_npa1: list containing the names of the residues
+constituting the NPA1.
+param list_resid_npa2: list containing the names of the residues
+constituting the NPA2.
+param list_resid_arr: list containing the names of the residues
+constituting the ArR.
+param uni: MDAnalysis universe.
+return: dictionary which contains as key the name of the couple,
+the aquaporin and the protomer.
+The value contains the euclidean distance between the barycenters of
+the two motifs of the couple.
+"""
+if args.verbose:
+logging.info("\nFunction runForOneFrame")
+logging.info("The first input parameter is " + str(list_resid_npa1))
+logging.info("The second input parameter is " + str(list_resid_npa2))
+logging.info("The third input parameter is " + str(list_resid_arr))
+logging.info("The fourth input parameter is " + str(uni))
+choice_measure = "NPA-ArR"
+if list_resid_npa1 == list_resid_npa2:
+if args.verbose:
+logging.info("Pattern NPA1 identical to pattern NPA2")
+pattern = "NPA1"
+list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni)
+list_length_proto_npa = \
+calculate_length_proto(list(list_all_resid_npa[0]))
+config_npa = create_config_file(list_length_proto_npa)
+dict_resid_pattern = search_pattern(
+list_all_resid_npa, config_npa, list_resid_npa1, pattern, uni
+)
+else:
+if args.verbose:
+logging.info("Pattern NPA1 different from the NPA2 pattern")
+# NPA1
+pattern = "NPA1"
+list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni)
+list_length_proto_npa = \
+calculate_length_proto(list(list_all_resid_npa[0]))
+config_npa1 = create_config_file(list_length_proto_npa)
+dict_resid_pattern = search_pattern(
+list_all_resid_npa, config_npa1, list_resid_npa1, pattern, uni
+)
+# NPA2
+pattern = "NPA2"
+list_all_resid_npa = search_amino_acids_pattern(list_resid_npa2, uni)
+list_length_proto_npa = \
+calculate_length_proto(list(list_all_resid_npa[0]))
+config_npa2 = create_config_file(list_length_proto_npa)
+dict_resid_pattern.update(
+search_pattern(
+list_all_resid_npa, config_npa2, list_resid_npa2, pattern, uni
+)
+)
+# ArR
+pattern = "ArR1"
+list_all_resid_arr = search_amino_acids_pattern(list_resid_arr, uni)
+list_length_proto_arr = \
+calculate_length_proto(list(list_all_resid_arr[0]))
+config_arr = create_config_file(list_length_proto_arr)
+dict_resid_pattern.update(
+search_pattern(list_all_resid_arr, config_arr, list_resid_arr,
+pattern, uni)
+)
+# dict position
+dict_pos_resid = search_resid_positions(dict_resid_pattern, uni)
+# dict distance
+dict_pair_dist = \
+calculate_euclidean_distance(dict_pos_resid, choice_measure)
+return dict_pair_dist
+def run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni):
+"""
+Description: compiles the different functions to create the dict_pair_dist
+dictionary for one frame.
+param list_resid_arr1: list containing the names of the residues
+constituting the ArR1.
+param list_resid_arr2: list_resid_arr2, list containing the names of
+the residues constituting the ArR2.
+param uni: MDAnalysis universe.
+return: dictionary which contains as key the name of the couple,
+the aquaporin and the protomer.
+The value contains the euclidean distance between the barycenters of
+the two motifs of the couple.
+"""
+if args.verbose:
+logging.info("\nFunction runForOneFrame")
+logging.info("The first input parameter is " + str(list_resid_arr1))
+logging.info("The second input parameter is " + str(list_resid_arr2))
+logging.info("The third input parameter is " + str(uni))
+dict_resid_pattern = {}
+# ArR1
+pattern = "ArR1"
+list_all_resid_arr1 = search_amino_acids_pattern(list_resid_arr1, uni)
+list_length_proto_arr1 = \
+calculate_length_proto(list(list_all_resid_arr1[0]))
+config_arr1 = create_config_file(list_length_proto_arr1)
+dict_resid_pattern.update(
+search_pattern(list_all_resid_arr1, config_arr1, list_resid_arr1,
+pattern, uni)
+)
+# ArR2
+pattern = "ArR2"
+list_all_resid_arr2 = search_amino_acids_pattern(list_resid_arr2, uni)
+list_length_proto_arr2 = \
+calculate_length_proto(list(list_all_resid_arr2[0]))
+config_arr2 = create_config_file(list_length_proto_arr2)
+dict_resid_pattern.update(
+search_pattern(list_all_resid_arr2, config_arr2, list_resid_arr2,
+pattern, uni)
+)
+# dict position
+dict_pos_resid = search_resid_positions(dict_resid_pattern, uni)
+# dict distance
+choice_measure = "ArR-ArR"
+dict_pair_dist = \
+calculate_euclidean_distance(dict_pos_resid, choice_measure)
+return dict_pair_dist
+def run_all_frames_npa_arr(
+list_resid_npa1, list_resid_npa2, list_resid_arr, col_number, uni
+):
+"""
+Description: creates the table containing the pore diameter of an aquaporin
+over time.
+param list_resid_npa1: list containing the names of the residues
+constituting the NPA1.
+param list_resid_npa2: list containing the names of the residues
+constituting the NPA2.
+param list_resid_arr: list containing the names of the residues
+constituting the ArR.
+param col_number: number of columns in the table
+param uni: MDAnalysis universe.
+return: list containing an array and a key list. The table containing the
+pore diameter of an aquaporin over time.
+"""
+if args.verbose:
+logging.info("\nFunction runForAllFrame")
+logging.info("The first input parameter is " + str(list_resid_npa1))
+logging.info("The second input parameter is " + str(list_resid_npa2))
+logging.info("The third input parameter is " + str(list_resid_arr))
+logging.info("The fourth input parameter is " + str(col_number))
+logging.info("The fifth input parameter is " + str(uni))
+nbr_frame = len(uni.trajectory)
+if args.verbose:
+logging.info("The number of frame is " + str(nbr_frame))
+tab_pair_dist = np.empty((nbr_frame, col_number))
+if args.verbose:
+logging.info(
+"The dimensions of the table of means and standard deviation are"
++ " as follows " + str(tab_pair_dist.shape)
+)
+list_key = []
+pattern_arr = ""
+pattern_npa1 = ""
+pattern_npa2 = ""
+dict_prot = {
+"ALA": "A",
+"CYS": "C",
+"ASP": "D",
+"GLU": "E",
+"PHE": "F",
+"GLY": "G",
+"HIS": "H",
+"ILE": "I",
+"LYS": "K",
+"LEU": "L",
+"MET": "M",
+"ASN": "N",
+"PRO": "P",
+"GLN": "Q",
+"ARG": "R",
+"SER": "S",
+"THR": "T",
+"VAL": "V",
+"TRP": "W",
+"TYR": "Y",
+"HSD": "H",
+"HSP": "H",
+}
+for aa in list_resid_arr:
+pattern_arr += dict_prot[aa]
+for aa in list_resid_npa1:
+pattern_npa1 += dict_prot[aa]
+for aa in list_resid_npa2:
+pattern_npa2 += dict_prot[aa]
+for ts in uni.trajectory:
+if args.verbose:
+logging.info("Frame " + str(uni.trajectory.frame))
+logging.info(str(ts))
+dict_pair_dist = run_one_frame_npa_arr(
+list_resid_npa1, list_resid_npa2, list_resid_arr, uni
+)
+tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time
+list_key = ["Time in ps"]
+num_npa_proto = 0
+for npa in range(1, 3):
+if npa == 1:
+pattern_complete = "," + pattern_arr + "-" + pattern_npa1 + "1"
+else:
+pattern_complete = "," + pattern_arr + "-" + pattern_npa2 + "2"
+for aqp in range(1, 3):
+for p in range(1, 5):
+num_npa_proto += 1
+key1 = "ArR-NPA" + str(npa) + ",AQP" + str(aqp)\
++ ",P" + str(p)
+key2 = "AQP" + str(aqp) + ",P" + str(p) + pattern_complete\
++ " (Angstroms)"
+tab_pair_dist[uni.trajectory.frame][num_npa_proto] = \
+dict_pair_dist[key1]
+list_key.append(key2)
+list_out = [tab_pair_dist, list_key]
+return list_out
+def run_all_frames_arr(list_resid_arr1, list_resid_arr2, col_number, uni):
+"""
+Description: creates the table containing the pore diameter of an aquaporin
+over time.
+param list_resid_arr1: list containing the names of the residues
+constituting the ArR1.
+param list_resid_arr2: list containing the names of the residues
+constituting the ArR2.
+param col_number: number of columns in the table
+param uni: MDAnalysis universe.
+return: list containing an array and a key list. The table containing the
+pore diameter of an aquaporin over time.
+"""
+if args.verbose:
+logging.info("\nFunction runForAllFrame")
+logging.info("The first input parameter is " + str(list_resid_arr1))
+logging.info("The second input parameter is " + str(list_resid_arr2))
+logging.info("The third input parameter is " + str(col_number))
+logging.info("The fourth input parameter is " + str(uni))
+nbr_frame = len(uni.trajectory)
+if args.verbose:
+logging.info("The number of frame is " + str(nbr_frame))
+tab_pair_dist = np.empty((nbr_frame, col_number))
+if args.verbose:
+logging.info(
+"The dimensions of the table of means and standard deviation"
++ " are as follows " + str(tab_pair_dist.shape)
+)
+list_key = []
+pattern1 = ""
+pattern2 = ""
+dict_prot = {
+"ALA": "A",
+"CYS": "C",
+"ASP": "D",
+"GLU": "E",
+"PHE": "F",
+"GLY": "G",
+"HIS": "H",
+"ILE": "I",
+"LYS": "K",
+"LEU": "L",
+"MET": "M",
+"ASN": "N",
+"PRO": "P",
+"GLN": "Q",
+"ARG": "R",
+"SER": "S",
+"THR": "T",
+"VAL": "V",
+"TRP": "W",
+"TYR": "Y",
+"HSD": "H",
+"HSP": "H",
+}
+for aa in list_resid_arr1:
+pattern1 += dict_prot[aa]
+for aa in list_resid_arr2:
+pattern2 += dict_prot[aa]
+for ts in uni.trajectory:
+if args.verbose:
+logging.info("Frame " + str(uni.trajectory.frame))
+logging.info(str(ts))
+dict_pair_dist = run_one_frame_arr(list_resid_arr1, list_resid_arr2,
+uni)
+tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time
+list_key = ["Time (ps)"]
+num_proto = 0
+for aqp in range(1, 3):
+for p in range(1, 5):
+num_proto += 1
+key1 = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p)
+key2 = (
+"AQP" + str(aqp) + ",P" + str(p) + "," + pattern1 +
+"-" + pattern2 + " (Angstroms)"
+)
+tab_pair_dist[uni.trajectory.frame][num_proto] = \
+dict_pair_dist[key1]
+list_key.append(key2)
+list_out = [tab_pair_dist, list_key]
+return list_out
+def create_final_dataframe(
+list_resid_arr1, list_resid_arr2, list_resid_npa1, list_resid_npa2, uni
+):
+"""
+Description: creates the final dataframe.
+param list_resid_arr1: list containing the arr1 pattern
+param list_resid_arr2: list containing the arr2 pattern
+param list_resid_npa1: list containing the npa1 pattern
+param list_resid_npa2: list containing the npa2 pattern
+param uni: MDAnalysis universe
+return: final dataframe
+"""
+if args.verbose:
+logging.info("\nFunction runForAllFrame")
+logging.info("The first input parameter is " + str(list_resid_arr1))
+logging.info("The second input parameter is " + str(list_resid_arr2))
+logging.info("The third input parameter is " + str(list_resid_npa1))
+logging.info("The fourth input parameter is " + str(list_resid_npa2))
+logging.info("The fifth input parameter is " + str(uni))
+if list_resid_npa1 is None and list_resid_npa2 is None:
+if len(list_resid_arr1) >= 4 and len(list_resid_arr2) >= 4:
+columns_number = 9
+list_out = run_all_frames_arr(
+list_resid_arr1, list_resid_arr2, columns_number, uni
+)
+else:
+if args.verbose:
+if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
+logging.critical("Pattern1 must be provided")
+elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
+logging.critical("Pattern1 and Pattern2 must be provided")
+elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
+logging.critical("Pattern2 must be provided")
+print("Pattern error : please read the log file or help")
+sys.exit(2)
+elif list_resid_npa1 is None and list_resid_npa2 is not None:
+if args.verbose:
+if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
+logging.critical("PatternArr1 must be provided")
+logging.critical("Please, do not put patternArr2")
+logging.critical("PatternNPA1 must be provided")
+elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
+logging.critical("PatternArr1 must be provided")
+logging.critical("PatternNPA1 must be provided")
+elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
+logging.critical("PatternNPA1 must be provided")
+else:
+logging.critical("Please, do not put patternArr2")
+logging.critical("PatternNPA1 must be provided")
+print("Pattern error : please read the log file or help")
+sys.exit(2)
+elif list_resid_npa1 is not None and list_resid_npa2 is None:
+if args.verbose:
+if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
+logging.critical("PatternArr1 must be provided")
+logging.critical("Please, do not put patternArr2")
+logging.critical("PatternNPA2 must be provided")
+elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
+logging.critical("PatternArr1 must be provided")
+logging.critical("PatternNPA2 must be provided")
+elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
+logging.critical("PatternNPA1 must be provided")
+else:
+logging.critical("Please, do not put patternArr2")
+logging.critical("PatternNPA2 must be provided")
+print("Pattern error : please read the log file or help")
+sys.exit(2)
+else:
+if (
+list_resid_npa1 is not None
+and list_resid_npa2 is not None
+and len(list_resid_arr1) >= 4 > len(list_resid_arr2)
+):
+columns_number = 17
+list_out = run_all_frames_npa_arr(
+list_resid_npa1, list_resid_npa2, list_resid_arr1,
+columns_number, uni
+)
+else:
+if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
+logging.critical("PatternArr1 must be provided")
+logging.critical("Please, do not put patternArr2")
+elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
+logging.critical("Pattern1 must be provided")
+print("Pattern error : please read the log file or help")
+sys.exit(2)
+list_std = list(np.std(list_out[0], axis=0))
+list_mean = list(np.mean(list_out[0], axis=0))
+np_mean_around = list(np.around(list_mean, decimals=3))
+np_std_around = list(np.around(list_std, decimals=3))
+np_mean_around[0] = "Mean"
+np_std_around[0] = "Std"
+np_mean_around = np.array([np_mean_around])
+np_std_around = np.array([np_std_around])
+np_mean_std_around = np.concatenate((np_mean_around, np_std_around),
+axis=0)
+tab_pair_dist = np.concatenate((np_mean_std_around, list_out[0]), axis=0)
+df_dist = pd.DataFrame(tab_pair_dist, columns=list_out[1])
+return df_dist
+# Class
+if __name__ == "__main__":
+args = parse_arguments()
+out_file_name = "test"
+file_name_log = out_file_name + ".log"
+if args.verbose:
+if args.log_output:
+if os.path.isfile(args.log_output):
+os.remove(args.log_output)
+logging.basicConfig(
+filename=args.log_output,
+format="%(levelname)s - %(message)s",
+level=logging.INFO,
+)
+else:
+if os.path.isfile(file_name_log):
+os.remove(file_name_log)
+logging.basicConfig(
+filename=file_name_log,
+format="%(levelname)s - %(message)s",
+level=logging.INFO,
+)
+logging.info("verbose mode on")
+gro_file = ''
+xtc_file = ''
+if args.gro_file:
+gro_file = os.path.basename(args.gro_file[0])
+else:
+logging.critical("A .gro file must be provided")
+if args.xtc_file:
+xtc_file = os.path.basename(args.xtc_file[0])
+else:
+logging.critical("A .xtc file must be provided")
+if args.work_directory:
+work_directory = args.work_directory[0]
+else:
+work_directory = os.path.dirname(args.gro_file[0])
+if not work_directory.endswith("/"):
+work_directory += "/"
+output_directory = args.output_directory[0]
+if args.output:
+output = args.output
+else:
+output = ""
+os.chdir(work_directory)
+if args.gro_file_ext and args.xtc_file_ext:
+u = Mda.Universe(
+args.gro_file[0],
+args.xtc_file[0],
+topology_format=args.gro_file_ext[0],
+format=args.xtc_file_ext[0],
+)
+else:
+u = Mda.Universe(args.gro_file[0], args.xtc_file[0])
+if args.verbose:
+logging.info("Load universe : " + str(u))
+if args.npa_one:
+resid_npa1 = args.npa_one[0].split()
+else:
+resid_npa1 = args.npa_one
+if args.npa_second:
+resid_npa2 = args.npa_second[0].split()
+else:
+resid_npa2 = args.npa_second
+if args.arr_one:
+resid_arr1 = args.arr_one[0].split()
+else:
+resid_arr1 = args.arr_one
+if args.arr_second:
+resid_arr2 = args.arr_second[0].split()
+else:
+resid_arr2 = args.arr_second
+df_final = create_final_dataframe(resid_arr1, resid_arr2,
+resid_npa1, resid_npa2, u)
+if len(output_directory) > 2:
+if not os.path.exists(output_directory):
+os.makedirs(output_directory)
+os.chdir(output_directory)
+if len(output) > 2:
+df_final.to_csv(output, sep="\t", index=False)
+else:
+file_name_tabular = out_file_name + "PairDist.tabular"
+df_final.to_csv(file_name_tabular, sep="\t", index=False)

Mercurial > repos > agpetit > calculate_diameter

comparison calculate_pore_diameter_aqp.py @ 0:e227d80e53be draft