comparison calculate_pore_diameter_aqp.py @ 0:e227d80e53be draft

"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit 48a10de1b21f94ab8019d9d0e4a43e0bd9d0c31e-dirty"
author agpetit
date Wed, 25 May 2022 07:17:24 +0000
parents
children a4097272ade0
comparison
equal deleted inserted replaced
-1:000000000000 0:e227d80e53be
1 #!/usr/bin/env python3
2
3 # coding: utf-8
4 """
5 This script allows to launch a calculation of the pore diameter of an aquaporin
6 as a function of time from a .gro file and an .xtc file.
7 At the end of the script, a table containing the evolution of the distance over
8 time is created.
9 # USAGE : calculate_pore_diameter_aqp.py -xtc xtc_file -gro gro_file
10 -npao npa1 Pattern (optional) -npas npa2 Pattern (optional)
11 -arro arR1 Pattern on H5 (optional) -arrs arR2 Pattern on HE (optional)
12 -v verbose (optional) -w work_directory (optional)
13 -o output_directory (optional) --gro_file_ext extension of gro file
14 (optional) --xtc_file_ext extension of xtc file (optional)
15 -log name of log file (optional) -o name of output file (optional).
16 """
17
18
19 __all__ = [
20 "calculate_length_proto",
21 "search_pattern",
22 "run_one_frame_npa_arr",
23 "run_one_frame_arr",
24 "run_all_frames_npa_arr",
25 "run_all_frames_arr",
26 "create_config_file",
27 ]
28 __author__ = "Agnès-Elisabeth Petit"
29 __date__ = "29/04/2021"
30 __version__ = "1.0"
31 __copyright__ = "(c) 2022 CC-BY-NC-SA"
32
33 # Library import
34 import argparse
35 import configparser
36 import logging
37 import os
38 import re
39 import sys
40
41 import MDAnalysis as Mda
42
43 import numpy as np
44
45 import pandas as pd
46
47 # Global variables
48
49 # Functions
50
51
52 def test_setup():
53 global args
54 args = parse_arguments()
55 args.verbose = True
56
57
58 def parse_arguments():
59 parser = argparse.ArgumentParser(
60 description="This script allows to launch a calculation of the pore"
61 + " diameter of an aquaporin as a function of time from"
62 + " a .gro file and an .xtc file.",
63 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
64 prefix_chars="-",
65 add_help=True,
66 )
67 parser.add_argument(
68 "-v",
69 "--verbose",
70 action="store_true",
71 help="""Information messages to stderr""",
72 )
73 parser.add_argument(
74 "-gro", "--gro_file", type=str, nargs=1, help="""My input .gro file"""
75 )
76 parser.add_argument(
77 "-xtc", "--xtc_file", type=str, nargs=1, help="""My input .xtc file"""
78 )
79 parser.add_argument(
80 "--gro_file_ext", type=str, nargs=1, help="""My input .gro file"""
81 )
82 parser.add_argument(
83 "--xtc_file_ext", type=str, nargs=1, help="""My input .xtc file"""
84 )
85 parser.add_argument(
86 "-w", "--work_directory", type=str, nargs=1,
87 help="""It's work Directory"""
88 )
89 parser.add_argument(
90 "-od",
91 "--output_directory",
92 default="stdout",
93 type=str,
94 nargs=1,
95 help="""It's output Directory""",
96 )
97 parser.add_argument(
98 "-npao",
99 "--npa_one",
100 type=str,
101 nargs=1,
102 help="""3-letter pattern of the 3 amino acids of npa1""",
103 )
104 parser.add_argument(
105 "-npas",
106 "--npa_second",
107 type=str,
108 nargs=1,
109 help="""3-letter pattern of the 3 amino acids of npa2""",
110 )
111 parser.add_argument(
112 "-arro",
113 "--arr_one",
114 type=str,
115 default="",
116 nargs=1,
117 help="""3-letter pattern of the 4 amino acids of first ArR on H5""",
118 )
119 parser.add_argument(
120 "-arrs",
121 "--arr_second",
122 type=str,
123 default="",
124 nargs=1,
125 help="""3-letter pattern of the 4 amino acids of second ArR on HE""",
126 )
127 parser.add_argument("-o", "--output", help="""Output for files""")
128 parser.add_argument("-log", "--log_output", help="""Output for files""")
129 return parser.parse_args()
130
131
132 def search_amino_acids_pattern(list_pattern, uni):
133 """
134 Description:Creates a list that contains two lists.
135 The first one contains the residue numbers and the second one the residue
136 names for each amino acid in the pattern.
137 param list_pattern: list of the names of each amino acid of the pattern.
138 param uni: MDAnalysis universe.
139 return: list containing two lists. One containing residue numbers
140 and the other containing residue names.
141 """
142 if args.verbose:
143 logging.info("\nFunction search_amino_acids_pattern")
144 logging.info("The input list is " + str(list_pattern))
145 logging.info("The input universe is " + str(uni))
146 if len(list_pattern) == 3:
147 slct_pattern = (
148 "(resname "
149 + list_pattern[0]
150 + " or resname "
151 + list_pattern[1]
152 + " or resname "
153 + list_pattern[2]
154 + ") and name CA"
155 )
156 else:
157 slct_pattern = (
158 "(resname "
159 + list_pattern[0]
160 + " or resname "
161 + list_pattern[1]
162 + " or resname "
163 + list_pattern[2]
164 + " or resname "
165 + list_pattern[3]
166 + ") and name CA"
167 )
168 if args.verbose:
169 logging.info("The selected pattern is : " + slct_pattern)
170 list_atm_npa = uni.select_atoms(slct_pattern).residues
171 list_all_num_resid = list_atm_npa.resids
172 if args.verbose:
173 logging.info(
174 "The list containing all the residue numbers associated"
175 + " with the pattern is : " + str(list_all_num_resid)
176 )
177 list_all_name_resid = list_atm_npa.resnames
178 if args.verbose:
179 logging.info(
180 "The list containing all the residue names associated with the "
181 + "pattern is : " + str(list_all_name_resid)
182 )
183 list_all_resid = [list_all_num_resid, list_all_name_resid]
184 if args.verbose:
185 logging.info(
186 "The list containing the lists of numbers and names of residues"
187 + " associated with the pattern is : " + str(list_all_resid)
188 )
189 return list_all_resid
190
191
192 def calculate_length_proto(list_num_resid):
193 """
194 Description : calculate the length of each protomer according to the number
195 of amino acids corresponding to the pattern.
196 param list_num_resid: list of amino acid numbers corresponding to the
197 pattern in the protein.
198 return: list of the length of each protomer.
199 """
200 if args.verbose:
201 logging.info("\nFunction calculate_length_proto")
202 logging.info("The input list is " + str(list_num_resid))
203 list_length_proto = [1, 0, 0, 0, 0, 0, 0, 0]
204 ind_list_length_proto = 0
205 for i, v in enumerate(list_num_resid[1:]):
206 if int(v) < int(list_num_resid[i]):
207 ind_list_length_proto += 1
208 list_length_proto[ind_list_length_proto] += 1
209 else:
210 list_length_proto[ind_list_length_proto] += 1
211 if args.verbose:
212 for i, v in enumerate(list_length_proto):
213 logging.info(
214 "The length of the protomer "
215 + str(i + 1)
216 + " is "
217 + str(v)
218 + " residues"
219 )
220 return list_length_proto
221
222
223 def create_config_file(list_length_proto):
224 """
225 Description : creates a Configparser object that describes the start and
226 end indices of each protomer.
227 param list_length_proto: list of the length of each protomer.
228 return: Configparser object that describes the start and end indices
229 of each protomer.
230 """
231 if args.verbose:
232 logging.info("\nFunction create_config_file")
233 logging.info("The input list is " + str(list_length_proto))
234 config = configparser.ConfigParser()
235 proto_prev = ""
236 for aqp in range(1, 3):
237 for p in range(1, 5):
238 if aqp == 1 and p == 1:
239 proto_current = "AQP" + str(aqp) + ",P" + str(p)
240 config[proto_current] = {}
241 config.set(proto_current, "start", str(0))
242 config.set(proto_current, "end", str(list_length_proto[0] - 1))
243 proto_prev = proto_current
244 if args.verbose:
245 logging.info(proto_current + " starts at index "
246 + str(config["AQP1,P1"]["start"])
247 + " and finish at index "
248 + str(config["AQP1,P1"]["end"]))
249 else:
250 proto_current = "AQP" + str(aqp) + ",P" + str(p)
251 config[proto_current] = {}
252 if aqp == 1 and p == 2:
253 config.set(proto_current, "start",
254 str(int(config[proto_prev]["start"]) +
255 list_length_proto[p-2] - 1))
256 else:
257 config.set(proto_current, "start",
258 str(int(config[proto_prev]["start"])
259 + list_length_proto[p-2]))
260 config.set(proto_current, "end",
261 str(int(config[proto_prev]["end"])
262 + list_length_proto[p-1]))
263 proto_prev = proto_current
264 if args.verbose:
265 logging.info(
266 proto_current
267 + " starts at index "
268 + config[proto_current]["start"]
269 + " and finish at index "
270 + config[proto_current]["end"]
271 )
272 if args.verbose:
273 logging.info(
274 "The sections of the settings object are : "
275 + str(config.sections())
276 )
277 logging.info(
278 "The keys for each section of the settings object are: "
279 + str(list(config["AQP1,P3"].keys()))
280 )
281 return config
282
283
284 def search_pattern(list_all_resid, config, list_pattern, pattern_resid, uni):
285 """
286 Description : creates the dictionary that contains the name, number and
287 position of the residue and the key for the following dictionary.
288 param list_all_resid: list containing two lists. One containing residue
289 numbers and the other containing residue names.
290 param config: Configparser object that describes the start and end indices
291 of each protomer.
292 param list_pattern: list of the names of each amino acid of the pattern.
293 param pattern_resid: string that gives the pattern (NPA1,NPA2,ArR1 ou ArR2)
294 in which we are.
295 param uni: MDAnalysis universe
296 return: dictionary which contains as key the name of the
297 aquaporin, the protomer and the residue, as well as the residue number.
298 The value contains the name, the number and the position of the residue,
299 as well as the key of the next dictionary.
300 """
301 if args.verbose:
302 logging.info("\nFunction search_pattern")
303 logging.info("The first input argument is " + str(list_all_resid))
304 logging.info("The second input argument is a configParser object")
305 logging.info("The third input argument is " + str(list_pattern))
306 logging.info("The fourth input argument is " + str(pattern_resid))
307 logging.info("The fourth input argument is " + str(uni))
308 num_ca_npa = 0
309 num_ca_arr = 0
310 dict_resid_pattern = {}
311 str_pattern = "".join(list_pattern)
312 for aqp in range(1, 3):
313 for p in range(1, 5):
314 list_ind_temp_resid = []
315 list_ind_final_resid = []
316 proto = "AQP" + str(aqp) + ",P" + str(p)
317 ss_name_list = list(
318 list_all_resid[1][
319 int(config[proto]["start"]): int(config[proto]["end"])
320 ]
321 )
322 ss_name_str = "".join(ss_name_list)
323 pattern = re.compile(str_pattern)
324 for v in pattern.finditer(ss_name_str):
325 if args.verbose:
326 logging.info(
327 "The pattern " + str(pattern) + " is searched in "
328 + ss_name_str
329 )
330 if aqp == 1 and p == 1:
331 list_ind_temp_resid.append(int(v.start() / 3))
332 else:
333 list_ind_temp_resid.append(
334 int(v.start() / 3) + int(config[proto]["start"])
335 )
336 if args.verbose:
337 logging.info("The temporary index list is "
338 + str(list_ind_temp_resid))
339 for ind_current in list_ind_temp_resid:
340 val_one = list_all_resid[0][ind_current]
341 val_two = list_all_resid[0][ind_current + 1]
342 val_three = list_all_resid[0][ind_current + 2]
343 if val_one + 1 == val_two and val_one + 2 == val_three:
344 if pattern_resid == "ArR1" or pattern_resid == "ArR2":
345 val_four = list_all_resid[0][ind_current + 3]
346 if val_one + 3 == val_four:
347 list_ind_final_resid.append(ind_current)
348 else:
349 list_ind_final_resid.append(ind_current)
350 list_ind_final_resid.append(ind_current + 1)
351 list_ind_final_resid.append(ind_current + 2)
352 if len(list_ind_final_resid) == 0:
353 if args.verbose:
354 logging.critical("The pattern is not found "
355 + "in the protein sequence")
356 print("The pattern is not found in the protein sequence")
357 sys.exit(1)
358 if args.verbose:
359 logging.info("The final index list is "
360 + str(list_ind_final_resid))
361 ind_list = []
362 for ind, val_ind in enumerate(list_ind_final_resid):
363 ind_list.append(ind)
364 key = (
365 proto
366 + ","
367 + list_all_resid[1][val_ind]
368 + ","
369 + str(list_all_resid[0][val_ind])
370 )
371 if args.verbose:
372 logging.info("The key of dictionary is " + key)
373 slct_resid = (
374 "(resid " + str(list_all_resid[0][val_ind])
375 + ") and name CA"
376 )
377 if args.verbose:
378 logging.info("The selected residue is : " + slct_resid)
379 resid = uni.select_atoms(slct_resid)
380 if args.verbose:
381 logging.info("The AtomGroup is " + str(resid))
382 if pattern_resid == "ArR1":
383 new_key = "ArR1," + proto
384 if args.verbose:
385 logging.info("The key of next dictionary is "
386 + new_key)
387 position_ca = resid.positions[num_ca_arr]
388 num_ca_arr += 1
389 elif pattern_resid == "ArR2":
390 new_key = "ArR2," + proto
391 if args.verbose:
392 logging.info("The key of next dictionary is "
393 + new_key)
394 position_ca = resid.positions[num_ca_arr]
395 num_ca_arr += 1
396 else:
397 if (ind - 3) in ind_list or pattern_resid == "NPA2":
398 new_key = "NPA2," + proto
399 if args.verbose:
400 logging.info("The key of next dictionary is "
401 + new_key)
402 position_ca = resid.positions[num_ca_npa]
403 else:
404 new_key = "NPA1," + proto
405 if args.verbose:
406 logging.info("The key of next dictionary is "
407 + new_key)
408 position_ca = resid.positions[num_ca_npa]
409 dict_resid_pattern[key] = [
410 list_all_resid[1][val_ind],
411 list_all_resid[0][val_ind],
412 position_ca,
413 new_key,
414 ]
415 if args.verbose:
416 logging.info(
417 "The value associated at the key "
418 + key
419 + " is "
420 + str(dict_resid_pattern[key])
421 )
422 if len(list_ind_final_resid) > 1:
423 num_ca_npa += 1
424 if args.verbose:
425 logging.info("The dictionary contains" + str(dict_resid_pattern))
426 return dict_resid_pattern
427
428
429 def search_resid_positions(dict_resid_pattern, uni):
430 """
431 Description : creates the dictionary that contains the position of the
432 barycenter of the alpha carbons of NPA and ArR.
433 param dict_resid_pattern: dictionary which contains as key the name of the
434 aquaporin, the protomer and the residue, as well as the residue number.
435 The value contains the name, the number and the position of the residue,
436 as well as the key of the next dictionary.
437 param uni: MDAnalysis universe.
438 return: dictionary which contains as key the name of the pattern,
439 the aquaporin and the protomer. The value associated with the key contain
440 the position of the barycenter of the pattern.
441 """
442 if args.verbose:
443 logging.info("\nFunction search_resid_positions")
444 logging.info("The input dictionary is " + str(dict_resid_pattern))
445 logging.info("The input universe is " + str(uni))
446 dict_pos_resid = {}
447 for npa in range(1, 3):
448 num_proto = 0
449 for aqp in range(1, 3):
450 for p in range(1, 5):
451 list_atm_npa = []
452 key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) + ",P" + str(p)
453 key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
454 key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p)
455 for k, v in dict_resid_pattern.items():
456 if key_npa == v[3]:
457 list_atm_npa.append(v[1])
458 elif key_arr1 == v[3]:
459 dict_pos_resid[key_arr1] = v[2]
460 elif key_arr2 == v[3]:
461 dict_pos_resid[key_arr2] = v[2]
462 if len(list_atm_npa) > 0:
463 slct_pattern = (
464 "(resid "
465 + str(list_atm_npa[0])
466 + " or resid "
467 + str(list_atm_npa[1])
468 + " or resid "
469 + str(list_atm_npa[2])
470 + ") and name CA"
471 )
472 if args.verbose:
473 logging.info("The selected pattern is " + slct_pattern)
474 atom_group_pattern = uni.select_atoms(slct_pattern)
475 if args.verbose:
476 logging.info(str(atom_group_pattern))
477 deb = 0 + 3 * num_proto
478 fin = +3 + 3 * num_proto
479 atm_proto_aqp = atom_group_pattern[deb:fin]
480 if args.verbose:
481 logging.info("deb : " + str(deb) + " fin : "
482 + str(fin))
483 logging.info("The selected AtomGroup is "
484 + str(atm_proto_aqp))
485 dict_pos_resid[key_npa] = atm_proto_aqp.centroid()
486 num_proto += 1
487 if args.verbose:
488 logging.info("The dictionary contains " + str(dict_pos_resid))
489 return dict_pos_resid
490
491
492 def calculate_euclidean_distance(dict_pos_resid, choice_measure):
493 """
494 Description : creates the dictionary that contains the Euclidean distance
495 between ArR and NPA1 or NPA2.
496 param dict_pos_resid: dictionary which contains as key the name of the
497 pattern, the aquaporin and the protomer.
498 The value contains the position of the barycenter of the pattern.
499 param choice_measure: variable that contains ArR-ArR or NPA-ArR.
500 return: dictionary which contains as key the name of the couple,
501 the aquaporin and the protomer.
502 The value contains the euclidean distance between the barycenters of
503 the two motifs of the couple.
504 """
505 if args.verbose:
506 logging.info("\nFunction calculate_euclidean_distance")
507 logging.info("The input dictionary is " + str(dict_pos_resid))
508 logging.info("The distance will be calculated between"
509 + choice_measure)
510 dict_pair_dist = {}
511 if choice_measure == "NPA-ArR":
512 for npa in range(1, 3):
513 for aqp in range(1, 3):
514 for p in range(1, 5):
515 key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) \
516 + ",P" + str(p)
517 key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
518 new_key = "ArR-NPA" + str(npa) + ",AQP" + str(aqp) \
519 + ",P" + str(p)
520 dict_pair_dist[new_key] = np.linalg.norm(
521 dict_pos_resid[key_npa] - dict_pos_resid[key_arr1]
522 )
523 if args.verbose:
524 logging.info(
525 "Key NPA : "
526 + key_npa
527 + " value : "
528 + str(dict_pos_resid[key_npa])
529 )
530 logging.info(
531 "Key ArR : "
532 + key_arr1
533 + " value : "
534 + str(dict_pos_resid[key_arr1])
535 )
536 logging.info(
537 "The value associated at the key "
538 + new_key
539 + " is "
540 + str(dict_pair_dist[new_key])
541 )
542 else:
543 for aqp in range(1, 3):
544 for p in range(1, 5):
545 key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p)
546 key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p)
547 new_key = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p)
548 dict_pair_dist[new_key] = np.linalg.norm(
549 dict_pos_resid[key_arr1] - dict_pos_resid[key_arr2]
550 )
551 if args.verbose:
552 logging.info(
553 "Key ArR1 : "
554 + key_arr1
555 + " value : "
556 + str(dict_pos_resid[key_arr1])
557 )
558 logging.info(
559 "Key ArR2 : "
560 + key_arr2
561 + " value : "
562 + str(dict_pos_resid[key_arr2])
563 )
564 logging.info(
565 "The value associated at the key "
566 + new_key
567 + " is "
568 + str(dict_pair_dist[new_key])
569 )
570 if args.verbose:
571 logging.info("The dictionary contains " + str(dict_pair_dist))
572 return dict_pair_dist
573
574
575 def run_one_frame_npa_arr(list_resid_npa1, list_resid_npa2, list_resid_arr,
576 uni):
577 """
578 Description: compiles the different functions to create the dict_pair_dist
579 dictionary for one frame.
580 param list_resid_npa1: list containing the names of the residues
581 constituting the NPA1.
582 param list_resid_npa2: list containing the names of the residues
583 constituting the NPA2.
584 param list_resid_arr: list containing the names of the residues
585 constituting the ArR.
586 param uni: MDAnalysis universe.
587 return: dictionary which contains as key the name of the couple,
588 the aquaporin and the protomer.
589 The value contains the euclidean distance between the barycenters of
590 the two motifs of the couple.
591 """
592 if args.verbose:
593 logging.info("\nFunction runForOneFrame")
594 logging.info("The first input parameter is " + str(list_resid_npa1))
595 logging.info("The second input parameter is " + str(list_resid_npa2))
596 logging.info("The third input parameter is " + str(list_resid_arr))
597 logging.info("The fourth input parameter is " + str(uni))
598 choice_measure = "NPA-ArR"
599 if list_resid_npa1 == list_resid_npa2:
600 if args.verbose:
601 logging.info("Pattern NPA1 identical to pattern NPA2")
602 pattern = "NPA1"
603 list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni)
604 list_length_proto_npa = \
605 calculate_length_proto(list(list_all_resid_npa[0]))
606 config_npa = create_config_file(list_length_proto_npa)
607 dict_resid_pattern = search_pattern(
608 list_all_resid_npa, config_npa, list_resid_npa1, pattern, uni
609 )
610 else:
611 if args.verbose:
612 logging.info("Pattern NPA1 different from the NPA2 pattern")
613 # NPA1
614 pattern = "NPA1"
615 list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni)
616 list_length_proto_npa = \
617 calculate_length_proto(list(list_all_resid_npa[0]))
618 config_npa1 = create_config_file(list_length_proto_npa)
619 dict_resid_pattern = search_pattern(
620 list_all_resid_npa, config_npa1, list_resid_npa1, pattern, uni
621 )
622 # NPA2
623 pattern = "NPA2"
624 list_all_resid_npa = search_amino_acids_pattern(list_resid_npa2, uni)
625 list_length_proto_npa = \
626 calculate_length_proto(list(list_all_resid_npa[0]))
627 config_npa2 = create_config_file(list_length_proto_npa)
628 dict_resid_pattern.update(
629 search_pattern(
630 list_all_resid_npa, config_npa2, list_resid_npa2, pattern, uni
631 )
632 )
633 # ArR
634 pattern = "ArR1"
635 list_all_resid_arr = search_amino_acids_pattern(list_resid_arr, uni)
636 list_length_proto_arr = \
637 calculate_length_proto(list(list_all_resid_arr[0]))
638 config_arr = create_config_file(list_length_proto_arr)
639 dict_resid_pattern.update(
640 search_pattern(list_all_resid_arr, config_arr, list_resid_arr,
641 pattern, uni)
642 )
643 # dict position
644 dict_pos_resid = search_resid_positions(dict_resid_pattern, uni)
645 # dict distance
646 dict_pair_dist = \
647 calculate_euclidean_distance(dict_pos_resid, choice_measure)
648 return dict_pair_dist
649
650
651 def run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni):
652 """
653 Description: compiles the different functions to create the dict_pair_dist
654 dictionary for one frame.
655 param list_resid_arr1: list containing the names of the residues
656 constituting the ArR1.
657 param list_resid_arr2: list_resid_arr2, list containing the names of
658 the residues constituting the ArR2.
659 param uni: MDAnalysis universe.
660 return: dictionary which contains as key the name of the couple,
661 the aquaporin and the protomer.
662 The value contains the euclidean distance between the barycenters of
663 the two motifs of the couple.
664 """
665 if args.verbose:
666 logging.info("\nFunction runForOneFrame")
667 logging.info("The first input parameter is " + str(list_resid_arr1))
668 logging.info("The second input parameter is " + str(list_resid_arr2))
669 logging.info("The third input parameter is " + str(uni))
670 dict_resid_pattern = {}
671 # ArR1
672 pattern = "ArR1"
673 list_all_resid_arr1 = search_amino_acids_pattern(list_resid_arr1, uni)
674 list_length_proto_arr1 = \
675 calculate_length_proto(list(list_all_resid_arr1[0]))
676 config_arr1 = create_config_file(list_length_proto_arr1)
677 dict_resid_pattern.update(
678 search_pattern(list_all_resid_arr1, config_arr1, list_resid_arr1,
679 pattern, uni)
680 )
681 # ArR2
682 pattern = "ArR2"
683 list_all_resid_arr2 = search_amino_acids_pattern(list_resid_arr2, uni)
684 list_length_proto_arr2 = \
685 calculate_length_proto(list(list_all_resid_arr2[0]))
686 config_arr2 = create_config_file(list_length_proto_arr2)
687 dict_resid_pattern.update(
688 search_pattern(list_all_resid_arr2, config_arr2, list_resid_arr2,
689 pattern, uni)
690 )
691 # dict position
692 dict_pos_resid = search_resid_positions(dict_resid_pattern, uni)
693 # dict distance
694 choice_measure = "ArR-ArR"
695 dict_pair_dist = \
696 calculate_euclidean_distance(dict_pos_resid, choice_measure)
697 return dict_pair_dist
698
699
700 def run_all_frames_npa_arr(
701 list_resid_npa1, list_resid_npa2, list_resid_arr, col_number, uni
702 ):
703 """
704 Description: creates the table containing the pore diameter of an aquaporin
705 over time.
706 param list_resid_npa1: list containing the names of the residues
707 constituting the NPA1.
708 param list_resid_npa2: list containing the names of the residues
709 constituting the NPA2.
710 param list_resid_arr: list containing the names of the residues
711 constituting the ArR.
712 param col_number: number of columns in the table
713 param uni: MDAnalysis universe.
714 return: list containing an array and a key list. The table containing the
715 pore diameter of an aquaporin over time.
716 """
717 if args.verbose:
718 logging.info("\nFunction runForAllFrame")
719 logging.info("The first input parameter is " + str(list_resid_npa1))
720 logging.info("The second input parameter is " + str(list_resid_npa2))
721 logging.info("The third input parameter is " + str(list_resid_arr))
722 logging.info("The fourth input parameter is " + str(col_number))
723 logging.info("The fifth input parameter is " + str(uni))
724 nbr_frame = len(uni.trajectory)
725 if args.verbose:
726 logging.info("The number of frame is " + str(nbr_frame))
727 tab_pair_dist = np.empty((nbr_frame, col_number))
728 if args.verbose:
729 logging.info(
730 "The dimensions of the table of means and standard deviation are"
731 + " as follows " + str(tab_pair_dist.shape)
732 )
733 list_key = []
734 pattern_arr = ""
735 pattern_npa1 = ""
736 pattern_npa2 = ""
737 dict_prot = {
738 "ALA": "A",
739 "CYS": "C",
740 "ASP": "D",
741 "GLU": "E",
742 "PHE": "F",
743 "GLY": "G",
744 "HIS": "H",
745 "ILE": "I",
746 "LYS": "K",
747 "LEU": "L",
748 "MET": "M",
749 "ASN": "N",
750 "PRO": "P",
751 "GLN": "Q",
752 "ARG": "R",
753 "SER": "S",
754 "THR": "T",
755 "VAL": "V",
756 "TRP": "W",
757 "TYR": "Y",
758 "HSD": "H",
759 "HSP": "H",
760 }
761 for aa in list_resid_arr:
762 pattern_arr += dict_prot[aa]
763 for aa in list_resid_npa1:
764 pattern_npa1 += dict_prot[aa]
765 for aa in list_resid_npa2:
766 pattern_npa2 += dict_prot[aa]
767 for ts in uni.trajectory:
768 if args.verbose:
769 logging.info("Frame " + str(uni.trajectory.frame))
770 logging.info(str(ts))
771 dict_pair_dist = run_one_frame_npa_arr(
772 list_resid_npa1, list_resid_npa2, list_resid_arr, uni
773 )
774 tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time
775 list_key = ["Time in ps"]
776 num_npa_proto = 0
777 for npa in range(1, 3):
778 if npa == 1:
779 pattern_complete = "," + pattern_arr + "-" + pattern_npa1 + "1"
780 else:
781 pattern_complete = "," + pattern_arr + "-" + pattern_npa2 + "2"
782 for aqp in range(1, 3):
783 for p in range(1, 5):
784 num_npa_proto += 1
785 key1 = "ArR-NPA" + str(npa) + ",AQP" + str(aqp)\
786 + ",P" + str(p)
787 key2 = "AQP" + str(aqp) + ",P" + str(p) + pattern_complete\
788 + " (Angstroms)"
789 tab_pair_dist[uni.trajectory.frame][num_npa_proto] = \
790 dict_pair_dist[key1]
791 list_key.append(key2)
792 list_out = [tab_pair_dist, list_key]
793 return list_out
794
795
796 def run_all_frames_arr(list_resid_arr1, list_resid_arr2, col_number, uni):
797 """
798 Description: creates the table containing the pore diameter of an aquaporin
799 over time.
800 param list_resid_arr1: list containing the names of the residues
801 constituting the ArR1.
802 param list_resid_arr2: list containing the names of the residues
803 constituting the ArR2.
804 param col_number: number of columns in the table
805 param uni: MDAnalysis universe.
806 return: list containing an array and a key list. The table containing the
807 pore diameter of an aquaporin over time.
808 """
809 if args.verbose:
810 logging.info("\nFunction runForAllFrame")
811 logging.info("The first input parameter is " + str(list_resid_arr1))
812 logging.info("The second input parameter is " + str(list_resid_arr2))
813 logging.info("The third input parameter is " + str(col_number))
814 logging.info("The fourth input parameter is " + str(uni))
815 nbr_frame = len(uni.trajectory)
816 if args.verbose:
817 logging.info("The number of frame is " + str(nbr_frame))
818 tab_pair_dist = np.empty((nbr_frame, col_number))
819 if args.verbose:
820 logging.info(
821 "The dimensions of the table of means and standard deviation"
822 + " are as follows " + str(tab_pair_dist.shape)
823 )
824 list_key = []
825 pattern1 = ""
826 pattern2 = ""
827 dict_prot = {
828 "ALA": "A",
829 "CYS": "C",
830 "ASP": "D",
831 "GLU": "E",
832 "PHE": "F",
833 "GLY": "G",
834 "HIS": "H",
835 "ILE": "I",
836 "LYS": "K",
837 "LEU": "L",
838 "MET": "M",
839 "ASN": "N",
840 "PRO": "P",
841 "GLN": "Q",
842 "ARG": "R",
843 "SER": "S",
844 "THR": "T",
845 "VAL": "V",
846 "TRP": "W",
847 "TYR": "Y",
848 "HSD": "H",
849 "HSP": "H",
850 }
851 for aa in list_resid_arr1:
852 pattern1 += dict_prot[aa]
853 for aa in list_resid_arr2:
854 pattern2 += dict_prot[aa]
855 for ts in uni.trajectory:
856 if args.verbose:
857 logging.info("Frame " + str(uni.trajectory.frame))
858 logging.info(str(ts))
859 dict_pair_dist = run_one_frame_arr(list_resid_arr1, list_resid_arr2,
860 uni)
861 tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time
862 list_key = ["Time (ps)"]
863 num_proto = 0
864 for aqp in range(1, 3):
865 for p in range(1, 5):
866 num_proto += 1
867 key1 = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p)
868 key2 = (
869 "AQP" + str(aqp) + ",P" + str(p) + "," + pattern1 +
870 "-" + pattern2 + " (Angstroms)"
871 )
872 tab_pair_dist[uni.trajectory.frame][num_proto] = \
873 dict_pair_dist[key1]
874 list_key.append(key2)
875 list_out = [tab_pair_dist, list_key]
876 return list_out
877
878
879 def create_final_dataframe(
880 list_resid_arr1, list_resid_arr2, list_resid_npa1, list_resid_npa2, uni
881 ):
882 """
883 Description: creates the final dataframe.
884 param list_resid_arr1: list containing the arr1 pattern
885 param list_resid_arr2: list containing the arr2 pattern
886 param list_resid_npa1: list containing the npa1 pattern
887 param list_resid_npa2: list containing the npa2 pattern
888 param uni: MDAnalysis universe
889 return: final dataframe
890 """
891 if args.verbose:
892 logging.info("\nFunction runForAllFrame")
893 logging.info("The first input parameter is " + str(list_resid_arr1))
894 logging.info("The second input parameter is " + str(list_resid_arr2))
895 logging.info("The third input parameter is " + str(list_resid_npa1))
896 logging.info("The fourth input parameter is " + str(list_resid_npa2))
897 logging.info("The fifth input parameter is " + str(uni))
898 if list_resid_npa1 is None and list_resid_npa2 is None:
899 if len(list_resid_arr1) >= 4 and len(list_resid_arr2) >= 4:
900 columns_number = 9
901 list_out = run_all_frames_arr(
902 list_resid_arr1, list_resid_arr2, columns_number, uni
903 )
904 else:
905 if args.verbose:
906 if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
907 logging.critical("Pattern1 must be provided")
908 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
909 logging.critical("Pattern1 and Pattern2 must be provided")
910 elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
911 logging.critical("Pattern2 must be provided")
912 print("Pattern error : please read the log file or help")
913 sys.exit(2)
914 elif list_resid_npa1 is None and list_resid_npa2 is not None:
915 if args.verbose:
916 if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
917 logging.critical("PatternArr1 must be provided")
918 logging.critical("Please, do not put patternArr2")
919 logging.critical("PatternNPA1 must be provided")
920 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
921 logging.critical("PatternArr1 must be provided")
922 logging.critical("PatternNPA1 must be provided")
923 elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
924 logging.critical("PatternNPA1 must be provided")
925 else:
926 logging.critical("Please, do not put patternArr2")
927 logging.critical("PatternNPA1 must be provided")
928 print("Pattern error : please read the log file or help")
929 sys.exit(2)
930 elif list_resid_npa1 is not None and list_resid_npa2 is None:
931 if args.verbose:
932 if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
933 logging.critical("PatternArr1 must be provided")
934 logging.critical("Please, do not put patternArr2")
935 logging.critical("PatternNPA2 must be provided")
936 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
937 logging.critical("PatternArr1 must be provided")
938 logging.critical("PatternNPA2 must be provided")
939 elif len(list_resid_arr1) >= 4 > len(list_resid_arr2):
940 logging.critical("PatternNPA1 must be provided")
941 else:
942 logging.critical("Please, do not put patternArr2")
943 logging.critical("PatternNPA2 must be provided")
944 print("Pattern error : please read the log file or help")
945 sys.exit(2)
946 else:
947 if (
948 list_resid_npa1 is not None
949 and list_resid_npa2 is not None
950 and len(list_resid_arr1) >= 4 > len(list_resid_arr2)
951 ):
952 columns_number = 17
953 list_out = run_all_frames_npa_arr(
954 list_resid_npa1, list_resid_npa2, list_resid_arr1,
955 columns_number, uni
956 )
957 else:
958 if len(list_resid_arr1) < 4 <= len(list_resid_arr2):
959 logging.critical("PatternArr1 must be provided")
960 logging.critical("Please, do not put patternArr2")
961 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4:
962 logging.critical("Pattern1 must be provided")
963 print("Pattern error : please read the log file or help")
964 sys.exit(2)
965 list_std = list(np.std(list_out[0], axis=0))
966 list_mean = list(np.mean(list_out[0], axis=0))
967 np_mean_around = list(np.around(list_mean, decimals=3))
968 np_std_around = list(np.around(list_std, decimals=3))
969 np_mean_around[0] = "Mean"
970 np_std_around[0] = "Std"
971 np_mean_around = np.array([np_mean_around])
972 np_std_around = np.array([np_std_around])
973 np_mean_std_around = np.concatenate((np_mean_around, np_std_around),
974 axis=0)
975 tab_pair_dist = np.concatenate((np_mean_std_around, list_out[0]), axis=0)
976 df_dist = pd.DataFrame(tab_pair_dist, columns=list_out[1])
977 return df_dist
978
979
980 # Class
981
982
983 if __name__ == "__main__":
984 args = parse_arguments()
985 out_file_name = "test"
986 file_name_log = out_file_name + ".log"
987 if args.verbose:
988 if args.log_output:
989 if os.path.isfile(args.log_output):
990 os.remove(args.log_output)
991 logging.basicConfig(
992 filename=args.log_output,
993 format="%(levelname)s - %(message)s",
994 level=logging.INFO,
995 )
996 else:
997 if os.path.isfile(file_name_log):
998 os.remove(file_name_log)
999 logging.basicConfig(
1000 filename=file_name_log,
1001 format="%(levelname)s - %(message)s",
1002 level=logging.INFO,
1003 )
1004 logging.info("verbose mode on")
1005 gro_file = ''
1006 xtc_file = ''
1007 if args.gro_file:
1008 gro_file = os.path.basename(args.gro_file[0])
1009 else:
1010 logging.critical("A .gro file must be provided")
1011 if args.xtc_file:
1012 xtc_file = os.path.basename(args.xtc_file[0])
1013 else:
1014 logging.critical("A .xtc file must be provided")
1015 if args.work_directory:
1016 work_directory = args.work_directory[0]
1017 else:
1018 work_directory = os.path.dirname(args.gro_file[0])
1019 if not work_directory.endswith("/"):
1020 work_directory += "/"
1021 output_directory = args.output_directory[0]
1022 if args.output:
1023 output = args.output
1024 else:
1025 output = ""
1026 os.chdir(work_directory)
1027 if args.gro_file_ext and args.xtc_file_ext:
1028 u = Mda.Universe(
1029 args.gro_file[0],
1030 args.xtc_file[0],
1031 topology_format=args.gro_file_ext[0],
1032 format=args.xtc_file_ext[0],
1033 )
1034 else:
1035 u = Mda.Universe(args.gro_file[0], args.xtc_file[0])
1036 if args.verbose:
1037 logging.info("Load universe : " + str(u))
1038 if args.npa_one:
1039 resid_npa1 = args.npa_one[0].split()
1040 else:
1041 resid_npa1 = args.npa_one
1042 if args.npa_second:
1043 resid_npa2 = args.npa_second[0].split()
1044 else:
1045 resid_npa2 = args.npa_second
1046 if args.arr_one:
1047 resid_arr1 = args.arr_one[0].split()
1048 else:
1049 resid_arr1 = args.arr_one
1050 if args.arr_second:
1051 resid_arr2 = args.arr_second[0].split()
1052 else:
1053 resid_arr2 = args.arr_second
1054 df_final = create_final_dataframe(resid_arr1, resid_arr2,
1055 resid_npa1, resid_npa2, u)
1056 if len(output_directory) > 2:
1057 if not os.path.exists(output_directory):
1058 os.makedirs(output_directory)
1059 os.chdir(output_directory)
1060 if len(output) > 2:
1061 df_final.to_csv(output, sep="\t", index=False)
1062 else:
1063 file_name_tabular = out_file_name + "PairDist.tabular"
1064 df_final.to_csv(file_name_tabular, sep="\t", index=False)