Mercurial > repos > agpetit > calculate_diameter
comparison calculate_pore_diameter_aqp.py @ 0:e227d80e53be draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit 48a10de1b21f94ab8019d9d0e4a43e0bd9d0c31e-dirty"
author | agpetit |
---|---|
date | Wed, 25 May 2022 07:17:24 +0000 |
parents | |
children | a4097272ade0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e227d80e53be |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 # coding: utf-8 | |
4 """ | |
5 This script allows to launch a calculation of the pore diameter of an aquaporin | |
6 as a function of time from a .gro file and an .xtc file. | |
7 At the end of the script, a table containing the evolution of the distance over | |
8 time is created. | |
9 # USAGE : calculate_pore_diameter_aqp.py -xtc xtc_file -gro gro_file | |
10 -npao npa1 Pattern (optional) -npas npa2 Pattern (optional) | |
11 -arro arR1 Pattern on H5 (optional) -arrs arR2 Pattern on HE (optional) | |
12 -v verbose (optional) -w work_directory (optional) | |
13 -o output_directory (optional) --gro_file_ext extension of gro file | |
14 (optional) --xtc_file_ext extension of xtc file (optional) | |
15 -log name of log file (optional) -o name of output file (optional). | |
16 """ | |
17 | |
18 | |
19 __all__ = [ | |
20 "calculate_length_proto", | |
21 "search_pattern", | |
22 "run_one_frame_npa_arr", | |
23 "run_one_frame_arr", | |
24 "run_all_frames_npa_arr", | |
25 "run_all_frames_arr", | |
26 "create_config_file", | |
27 ] | |
28 __author__ = "Agnès-Elisabeth Petit" | |
29 __date__ = "29/04/2021" | |
30 __version__ = "1.0" | |
31 __copyright__ = "(c) 2022 CC-BY-NC-SA" | |
32 | |
33 # Library import | |
34 import argparse | |
35 import configparser | |
36 import logging | |
37 import os | |
38 import re | |
39 import sys | |
40 | |
41 import MDAnalysis as Mda | |
42 | |
43 import numpy as np | |
44 | |
45 import pandas as pd | |
46 | |
47 # Global variables | |
48 | |
49 # Functions | |
50 | |
51 | |
52 def test_setup(): | |
53 global args | |
54 args = parse_arguments() | |
55 args.verbose = True | |
56 | |
57 | |
58 def parse_arguments(): | |
59 parser = argparse.ArgumentParser( | |
60 description="This script allows to launch a calculation of the pore" | |
61 + " diameter of an aquaporin as a function of time from" | |
62 + " a .gro file and an .xtc file.", | |
63 formatter_class=argparse.ArgumentDefaultsHelpFormatter, | |
64 prefix_chars="-", | |
65 add_help=True, | |
66 ) | |
67 parser.add_argument( | |
68 "-v", | |
69 "--verbose", | |
70 action="store_true", | |
71 help="""Information messages to stderr""", | |
72 ) | |
73 parser.add_argument( | |
74 "-gro", "--gro_file", type=str, nargs=1, help="""My input .gro file""" | |
75 ) | |
76 parser.add_argument( | |
77 "-xtc", "--xtc_file", type=str, nargs=1, help="""My input .xtc file""" | |
78 ) | |
79 parser.add_argument( | |
80 "--gro_file_ext", type=str, nargs=1, help="""My input .gro file""" | |
81 ) | |
82 parser.add_argument( | |
83 "--xtc_file_ext", type=str, nargs=1, help="""My input .xtc file""" | |
84 ) | |
85 parser.add_argument( | |
86 "-w", "--work_directory", type=str, nargs=1, | |
87 help="""It's work Directory""" | |
88 ) | |
89 parser.add_argument( | |
90 "-od", | |
91 "--output_directory", | |
92 default="stdout", | |
93 type=str, | |
94 nargs=1, | |
95 help="""It's output Directory""", | |
96 ) | |
97 parser.add_argument( | |
98 "-npao", | |
99 "--npa_one", | |
100 type=str, | |
101 nargs=1, | |
102 help="""3-letter pattern of the 3 amino acids of npa1""", | |
103 ) | |
104 parser.add_argument( | |
105 "-npas", | |
106 "--npa_second", | |
107 type=str, | |
108 nargs=1, | |
109 help="""3-letter pattern of the 3 amino acids of npa2""", | |
110 ) | |
111 parser.add_argument( | |
112 "-arro", | |
113 "--arr_one", | |
114 type=str, | |
115 default="", | |
116 nargs=1, | |
117 help="""3-letter pattern of the 4 amino acids of first ArR on H5""", | |
118 ) | |
119 parser.add_argument( | |
120 "-arrs", | |
121 "--arr_second", | |
122 type=str, | |
123 default="", | |
124 nargs=1, | |
125 help="""3-letter pattern of the 4 amino acids of second ArR on HE""", | |
126 ) | |
127 parser.add_argument("-o", "--output", help="""Output for files""") | |
128 parser.add_argument("-log", "--log_output", help="""Output for files""") | |
129 return parser.parse_args() | |
130 | |
131 | |
132 def search_amino_acids_pattern(list_pattern, uni): | |
133 """ | |
134 Description:Creates a list that contains two lists. | |
135 The first one contains the residue numbers and the second one the residue | |
136 names for each amino acid in the pattern. | |
137 param list_pattern: list of the names of each amino acid of the pattern. | |
138 param uni: MDAnalysis universe. | |
139 return: list containing two lists. One containing residue numbers | |
140 and the other containing residue names. | |
141 """ | |
142 if args.verbose: | |
143 logging.info("\nFunction search_amino_acids_pattern") | |
144 logging.info("The input list is " + str(list_pattern)) | |
145 logging.info("The input universe is " + str(uni)) | |
146 if len(list_pattern) == 3: | |
147 slct_pattern = ( | |
148 "(resname " | |
149 + list_pattern[0] | |
150 + " or resname " | |
151 + list_pattern[1] | |
152 + " or resname " | |
153 + list_pattern[2] | |
154 + ") and name CA" | |
155 ) | |
156 else: | |
157 slct_pattern = ( | |
158 "(resname " | |
159 + list_pattern[0] | |
160 + " or resname " | |
161 + list_pattern[1] | |
162 + " or resname " | |
163 + list_pattern[2] | |
164 + " or resname " | |
165 + list_pattern[3] | |
166 + ") and name CA" | |
167 ) | |
168 if args.verbose: | |
169 logging.info("The selected pattern is : " + slct_pattern) | |
170 list_atm_npa = uni.select_atoms(slct_pattern).residues | |
171 list_all_num_resid = list_atm_npa.resids | |
172 if args.verbose: | |
173 logging.info( | |
174 "The list containing all the residue numbers associated" | |
175 + " with the pattern is : " + str(list_all_num_resid) | |
176 ) | |
177 list_all_name_resid = list_atm_npa.resnames | |
178 if args.verbose: | |
179 logging.info( | |
180 "The list containing all the residue names associated with the " | |
181 + "pattern is : " + str(list_all_name_resid) | |
182 ) | |
183 list_all_resid = [list_all_num_resid, list_all_name_resid] | |
184 if args.verbose: | |
185 logging.info( | |
186 "The list containing the lists of numbers and names of residues" | |
187 + " associated with the pattern is : " + str(list_all_resid) | |
188 ) | |
189 return list_all_resid | |
190 | |
191 | |
192 def calculate_length_proto(list_num_resid): | |
193 """ | |
194 Description : calculate the length of each protomer according to the number | |
195 of amino acids corresponding to the pattern. | |
196 param list_num_resid: list of amino acid numbers corresponding to the | |
197 pattern in the protein. | |
198 return: list of the length of each protomer. | |
199 """ | |
200 if args.verbose: | |
201 logging.info("\nFunction calculate_length_proto") | |
202 logging.info("The input list is " + str(list_num_resid)) | |
203 list_length_proto = [1, 0, 0, 0, 0, 0, 0, 0] | |
204 ind_list_length_proto = 0 | |
205 for i, v in enumerate(list_num_resid[1:]): | |
206 if int(v) < int(list_num_resid[i]): | |
207 ind_list_length_proto += 1 | |
208 list_length_proto[ind_list_length_proto] += 1 | |
209 else: | |
210 list_length_proto[ind_list_length_proto] += 1 | |
211 if args.verbose: | |
212 for i, v in enumerate(list_length_proto): | |
213 logging.info( | |
214 "The length of the protomer " | |
215 + str(i + 1) | |
216 + " is " | |
217 + str(v) | |
218 + " residues" | |
219 ) | |
220 return list_length_proto | |
221 | |
222 | |
223 def create_config_file(list_length_proto): | |
224 """ | |
225 Description : creates a Configparser object that describes the start and | |
226 end indices of each protomer. | |
227 param list_length_proto: list of the length of each protomer. | |
228 return: Configparser object that describes the start and end indices | |
229 of each protomer. | |
230 """ | |
231 if args.verbose: | |
232 logging.info("\nFunction create_config_file") | |
233 logging.info("The input list is " + str(list_length_proto)) | |
234 config = configparser.ConfigParser() | |
235 proto_prev = "" | |
236 for aqp in range(1, 3): | |
237 for p in range(1, 5): | |
238 if aqp == 1 and p == 1: | |
239 proto_current = "AQP" + str(aqp) + ",P" + str(p) | |
240 config[proto_current] = {} | |
241 config.set(proto_current, "start", str(0)) | |
242 config.set(proto_current, "end", str(list_length_proto[0] - 1)) | |
243 proto_prev = proto_current | |
244 if args.verbose: | |
245 logging.info(proto_current + " starts at index " | |
246 + str(config["AQP1,P1"]["start"]) | |
247 + " and finish at index " | |
248 + str(config["AQP1,P1"]["end"])) | |
249 else: | |
250 proto_current = "AQP" + str(aqp) + ",P" + str(p) | |
251 config[proto_current] = {} | |
252 if aqp == 1 and p == 2: | |
253 config.set(proto_current, "start", | |
254 str(int(config[proto_prev]["start"]) + | |
255 list_length_proto[p-2] - 1)) | |
256 else: | |
257 config.set(proto_current, "start", | |
258 str(int(config[proto_prev]["start"]) | |
259 + list_length_proto[p-2])) | |
260 config.set(proto_current, "end", | |
261 str(int(config[proto_prev]["end"]) | |
262 + list_length_proto[p-1])) | |
263 proto_prev = proto_current | |
264 if args.verbose: | |
265 logging.info( | |
266 proto_current | |
267 + " starts at index " | |
268 + config[proto_current]["start"] | |
269 + " and finish at index " | |
270 + config[proto_current]["end"] | |
271 ) | |
272 if args.verbose: | |
273 logging.info( | |
274 "The sections of the settings object are : " | |
275 + str(config.sections()) | |
276 ) | |
277 logging.info( | |
278 "The keys for each section of the settings object are: " | |
279 + str(list(config["AQP1,P3"].keys())) | |
280 ) | |
281 return config | |
282 | |
283 | |
284 def search_pattern(list_all_resid, config, list_pattern, pattern_resid, uni): | |
285 """ | |
286 Description : creates the dictionary that contains the name, number and | |
287 position of the residue and the key for the following dictionary. | |
288 param list_all_resid: list containing two lists. One containing residue | |
289 numbers and the other containing residue names. | |
290 param config: Configparser object that describes the start and end indices | |
291 of each protomer. | |
292 param list_pattern: list of the names of each amino acid of the pattern. | |
293 param pattern_resid: string that gives the pattern (NPA1,NPA2,ArR1 ou ArR2) | |
294 in which we are. | |
295 param uni: MDAnalysis universe | |
296 return: dictionary which contains as key the name of the | |
297 aquaporin, the protomer and the residue, as well as the residue number. | |
298 The value contains the name, the number and the position of the residue, | |
299 as well as the key of the next dictionary. | |
300 """ | |
301 if args.verbose: | |
302 logging.info("\nFunction search_pattern") | |
303 logging.info("The first input argument is " + str(list_all_resid)) | |
304 logging.info("The second input argument is a configParser object") | |
305 logging.info("The third input argument is " + str(list_pattern)) | |
306 logging.info("The fourth input argument is " + str(pattern_resid)) | |
307 logging.info("The fourth input argument is " + str(uni)) | |
308 num_ca_npa = 0 | |
309 num_ca_arr = 0 | |
310 dict_resid_pattern = {} | |
311 str_pattern = "".join(list_pattern) | |
312 for aqp in range(1, 3): | |
313 for p in range(1, 5): | |
314 list_ind_temp_resid = [] | |
315 list_ind_final_resid = [] | |
316 proto = "AQP" + str(aqp) + ",P" + str(p) | |
317 ss_name_list = list( | |
318 list_all_resid[1][ | |
319 int(config[proto]["start"]): int(config[proto]["end"]) | |
320 ] | |
321 ) | |
322 ss_name_str = "".join(ss_name_list) | |
323 pattern = re.compile(str_pattern) | |
324 for v in pattern.finditer(ss_name_str): | |
325 if args.verbose: | |
326 logging.info( | |
327 "The pattern " + str(pattern) + " is searched in " | |
328 + ss_name_str | |
329 ) | |
330 if aqp == 1 and p == 1: | |
331 list_ind_temp_resid.append(int(v.start() / 3)) | |
332 else: | |
333 list_ind_temp_resid.append( | |
334 int(v.start() / 3) + int(config[proto]["start"]) | |
335 ) | |
336 if args.verbose: | |
337 logging.info("The temporary index list is " | |
338 + str(list_ind_temp_resid)) | |
339 for ind_current in list_ind_temp_resid: | |
340 val_one = list_all_resid[0][ind_current] | |
341 val_two = list_all_resid[0][ind_current + 1] | |
342 val_three = list_all_resid[0][ind_current + 2] | |
343 if val_one + 1 == val_two and val_one + 2 == val_three: | |
344 if pattern_resid == "ArR1" or pattern_resid == "ArR2": | |
345 val_four = list_all_resid[0][ind_current + 3] | |
346 if val_one + 3 == val_four: | |
347 list_ind_final_resid.append(ind_current) | |
348 else: | |
349 list_ind_final_resid.append(ind_current) | |
350 list_ind_final_resid.append(ind_current + 1) | |
351 list_ind_final_resid.append(ind_current + 2) | |
352 if len(list_ind_final_resid) == 0: | |
353 if args.verbose: | |
354 logging.critical("The pattern is not found " | |
355 + "in the protein sequence") | |
356 print("The pattern is not found in the protein sequence") | |
357 sys.exit(1) | |
358 if args.verbose: | |
359 logging.info("The final index list is " | |
360 + str(list_ind_final_resid)) | |
361 ind_list = [] | |
362 for ind, val_ind in enumerate(list_ind_final_resid): | |
363 ind_list.append(ind) | |
364 key = ( | |
365 proto | |
366 + "," | |
367 + list_all_resid[1][val_ind] | |
368 + "," | |
369 + str(list_all_resid[0][val_ind]) | |
370 ) | |
371 if args.verbose: | |
372 logging.info("The key of dictionary is " + key) | |
373 slct_resid = ( | |
374 "(resid " + str(list_all_resid[0][val_ind]) | |
375 + ") and name CA" | |
376 ) | |
377 if args.verbose: | |
378 logging.info("The selected residue is : " + slct_resid) | |
379 resid = uni.select_atoms(slct_resid) | |
380 if args.verbose: | |
381 logging.info("The AtomGroup is " + str(resid)) | |
382 if pattern_resid == "ArR1": | |
383 new_key = "ArR1," + proto | |
384 if args.verbose: | |
385 logging.info("The key of next dictionary is " | |
386 + new_key) | |
387 position_ca = resid.positions[num_ca_arr] | |
388 num_ca_arr += 1 | |
389 elif pattern_resid == "ArR2": | |
390 new_key = "ArR2," + proto | |
391 if args.verbose: | |
392 logging.info("The key of next dictionary is " | |
393 + new_key) | |
394 position_ca = resid.positions[num_ca_arr] | |
395 num_ca_arr += 1 | |
396 else: | |
397 if (ind - 3) in ind_list or pattern_resid == "NPA2": | |
398 new_key = "NPA2," + proto | |
399 if args.verbose: | |
400 logging.info("The key of next dictionary is " | |
401 + new_key) | |
402 position_ca = resid.positions[num_ca_npa] | |
403 else: | |
404 new_key = "NPA1," + proto | |
405 if args.verbose: | |
406 logging.info("The key of next dictionary is " | |
407 + new_key) | |
408 position_ca = resid.positions[num_ca_npa] | |
409 dict_resid_pattern[key] = [ | |
410 list_all_resid[1][val_ind], | |
411 list_all_resid[0][val_ind], | |
412 position_ca, | |
413 new_key, | |
414 ] | |
415 if args.verbose: | |
416 logging.info( | |
417 "The value associated at the key " | |
418 + key | |
419 + " is " | |
420 + str(dict_resid_pattern[key]) | |
421 ) | |
422 if len(list_ind_final_resid) > 1: | |
423 num_ca_npa += 1 | |
424 if args.verbose: | |
425 logging.info("The dictionary contains" + str(dict_resid_pattern)) | |
426 return dict_resid_pattern | |
427 | |
428 | |
429 def search_resid_positions(dict_resid_pattern, uni): | |
430 """ | |
431 Description : creates the dictionary that contains the position of the | |
432 barycenter of the alpha carbons of NPA and ArR. | |
433 param dict_resid_pattern: dictionary which contains as key the name of the | |
434 aquaporin, the protomer and the residue, as well as the residue number. | |
435 The value contains the name, the number and the position of the residue, | |
436 as well as the key of the next dictionary. | |
437 param uni: MDAnalysis universe. | |
438 return: dictionary which contains as key the name of the pattern, | |
439 the aquaporin and the protomer. The value associated with the key contain | |
440 the position of the barycenter of the pattern. | |
441 """ | |
442 if args.verbose: | |
443 logging.info("\nFunction search_resid_positions") | |
444 logging.info("The input dictionary is " + str(dict_resid_pattern)) | |
445 logging.info("The input universe is " + str(uni)) | |
446 dict_pos_resid = {} | |
447 for npa in range(1, 3): | |
448 num_proto = 0 | |
449 for aqp in range(1, 3): | |
450 for p in range(1, 5): | |
451 list_atm_npa = [] | |
452 key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) + ",P" + str(p) | |
453 key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) | |
454 key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p) | |
455 for k, v in dict_resid_pattern.items(): | |
456 if key_npa == v[3]: | |
457 list_atm_npa.append(v[1]) | |
458 elif key_arr1 == v[3]: | |
459 dict_pos_resid[key_arr1] = v[2] | |
460 elif key_arr2 == v[3]: | |
461 dict_pos_resid[key_arr2] = v[2] | |
462 if len(list_atm_npa) > 0: | |
463 slct_pattern = ( | |
464 "(resid " | |
465 + str(list_atm_npa[0]) | |
466 + " or resid " | |
467 + str(list_atm_npa[1]) | |
468 + " or resid " | |
469 + str(list_atm_npa[2]) | |
470 + ") and name CA" | |
471 ) | |
472 if args.verbose: | |
473 logging.info("The selected pattern is " + slct_pattern) | |
474 atom_group_pattern = uni.select_atoms(slct_pattern) | |
475 if args.verbose: | |
476 logging.info(str(atom_group_pattern)) | |
477 deb = 0 + 3 * num_proto | |
478 fin = +3 + 3 * num_proto | |
479 atm_proto_aqp = atom_group_pattern[deb:fin] | |
480 if args.verbose: | |
481 logging.info("deb : " + str(deb) + " fin : " | |
482 + str(fin)) | |
483 logging.info("The selected AtomGroup is " | |
484 + str(atm_proto_aqp)) | |
485 dict_pos_resid[key_npa] = atm_proto_aqp.centroid() | |
486 num_proto += 1 | |
487 if args.verbose: | |
488 logging.info("The dictionary contains " + str(dict_pos_resid)) | |
489 return dict_pos_resid | |
490 | |
491 | |
492 def calculate_euclidean_distance(dict_pos_resid, choice_measure): | |
493 """ | |
494 Description : creates the dictionary that contains the Euclidean distance | |
495 between ArR and NPA1 or NPA2. | |
496 param dict_pos_resid: dictionary which contains as key the name of the | |
497 pattern, the aquaporin and the protomer. | |
498 The value contains the position of the barycenter of the pattern. | |
499 param choice_measure: variable that contains ArR-ArR or NPA-ArR. | |
500 return: dictionary which contains as key the name of the couple, | |
501 the aquaporin and the protomer. | |
502 The value contains the euclidean distance between the barycenters of | |
503 the two motifs of the couple. | |
504 """ | |
505 if args.verbose: | |
506 logging.info("\nFunction calculate_euclidean_distance") | |
507 logging.info("The input dictionary is " + str(dict_pos_resid)) | |
508 logging.info("The distance will be calculated between" | |
509 + choice_measure) | |
510 dict_pair_dist = {} | |
511 if choice_measure == "NPA-ArR": | |
512 for npa in range(1, 3): | |
513 for aqp in range(1, 3): | |
514 for p in range(1, 5): | |
515 key_npa = "NPA" + str(npa) + ",AQP" + str(aqp) \ | |
516 + ",P" + str(p) | |
517 key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) | |
518 new_key = "ArR-NPA" + str(npa) + ",AQP" + str(aqp) \ | |
519 + ",P" + str(p) | |
520 dict_pair_dist[new_key] = np.linalg.norm( | |
521 dict_pos_resid[key_npa] - dict_pos_resid[key_arr1] | |
522 ) | |
523 if args.verbose: | |
524 logging.info( | |
525 "Key NPA : " | |
526 + key_npa | |
527 + " value : " | |
528 + str(dict_pos_resid[key_npa]) | |
529 ) | |
530 logging.info( | |
531 "Key ArR : " | |
532 + key_arr1 | |
533 + " value : " | |
534 + str(dict_pos_resid[key_arr1]) | |
535 ) | |
536 logging.info( | |
537 "The value associated at the key " | |
538 + new_key | |
539 + " is " | |
540 + str(dict_pair_dist[new_key]) | |
541 ) | |
542 else: | |
543 for aqp in range(1, 3): | |
544 for p in range(1, 5): | |
545 key_arr1 = "ArR1,AQP" + str(aqp) + ",P" + str(p) | |
546 key_arr2 = "ArR2,AQP" + str(aqp) + ",P" + str(p) | |
547 new_key = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p) | |
548 dict_pair_dist[new_key] = np.linalg.norm( | |
549 dict_pos_resid[key_arr1] - dict_pos_resid[key_arr2] | |
550 ) | |
551 if args.verbose: | |
552 logging.info( | |
553 "Key ArR1 : " | |
554 + key_arr1 | |
555 + " value : " | |
556 + str(dict_pos_resid[key_arr1]) | |
557 ) | |
558 logging.info( | |
559 "Key ArR2 : " | |
560 + key_arr2 | |
561 + " value : " | |
562 + str(dict_pos_resid[key_arr2]) | |
563 ) | |
564 logging.info( | |
565 "The value associated at the key " | |
566 + new_key | |
567 + " is " | |
568 + str(dict_pair_dist[new_key]) | |
569 ) | |
570 if args.verbose: | |
571 logging.info("The dictionary contains " + str(dict_pair_dist)) | |
572 return dict_pair_dist | |
573 | |
574 | |
575 def run_one_frame_npa_arr(list_resid_npa1, list_resid_npa2, list_resid_arr, | |
576 uni): | |
577 """ | |
578 Description: compiles the different functions to create the dict_pair_dist | |
579 dictionary for one frame. | |
580 param list_resid_npa1: list containing the names of the residues | |
581 constituting the NPA1. | |
582 param list_resid_npa2: list containing the names of the residues | |
583 constituting the NPA2. | |
584 param list_resid_arr: list containing the names of the residues | |
585 constituting the ArR. | |
586 param uni: MDAnalysis universe. | |
587 return: dictionary which contains as key the name of the couple, | |
588 the aquaporin and the protomer. | |
589 The value contains the euclidean distance between the barycenters of | |
590 the two motifs of the couple. | |
591 """ | |
592 if args.verbose: | |
593 logging.info("\nFunction runForOneFrame") | |
594 logging.info("The first input parameter is " + str(list_resid_npa1)) | |
595 logging.info("The second input parameter is " + str(list_resid_npa2)) | |
596 logging.info("The third input parameter is " + str(list_resid_arr)) | |
597 logging.info("The fourth input parameter is " + str(uni)) | |
598 choice_measure = "NPA-ArR" | |
599 if list_resid_npa1 == list_resid_npa2: | |
600 if args.verbose: | |
601 logging.info("Pattern NPA1 identical to pattern NPA2") | |
602 pattern = "NPA1" | |
603 list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni) | |
604 list_length_proto_npa = \ | |
605 calculate_length_proto(list(list_all_resid_npa[0])) | |
606 config_npa = create_config_file(list_length_proto_npa) | |
607 dict_resid_pattern = search_pattern( | |
608 list_all_resid_npa, config_npa, list_resid_npa1, pattern, uni | |
609 ) | |
610 else: | |
611 if args.verbose: | |
612 logging.info("Pattern NPA1 different from the NPA2 pattern") | |
613 # NPA1 | |
614 pattern = "NPA1" | |
615 list_all_resid_npa = search_amino_acids_pattern(list_resid_npa1, uni) | |
616 list_length_proto_npa = \ | |
617 calculate_length_proto(list(list_all_resid_npa[0])) | |
618 config_npa1 = create_config_file(list_length_proto_npa) | |
619 dict_resid_pattern = search_pattern( | |
620 list_all_resid_npa, config_npa1, list_resid_npa1, pattern, uni | |
621 ) | |
622 # NPA2 | |
623 pattern = "NPA2" | |
624 list_all_resid_npa = search_amino_acids_pattern(list_resid_npa2, uni) | |
625 list_length_proto_npa = \ | |
626 calculate_length_proto(list(list_all_resid_npa[0])) | |
627 config_npa2 = create_config_file(list_length_proto_npa) | |
628 dict_resid_pattern.update( | |
629 search_pattern( | |
630 list_all_resid_npa, config_npa2, list_resid_npa2, pattern, uni | |
631 ) | |
632 ) | |
633 # ArR | |
634 pattern = "ArR1" | |
635 list_all_resid_arr = search_amino_acids_pattern(list_resid_arr, uni) | |
636 list_length_proto_arr = \ | |
637 calculate_length_proto(list(list_all_resid_arr[0])) | |
638 config_arr = create_config_file(list_length_proto_arr) | |
639 dict_resid_pattern.update( | |
640 search_pattern(list_all_resid_arr, config_arr, list_resid_arr, | |
641 pattern, uni) | |
642 ) | |
643 # dict position | |
644 dict_pos_resid = search_resid_positions(dict_resid_pattern, uni) | |
645 # dict distance | |
646 dict_pair_dist = \ | |
647 calculate_euclidean_distance(dict_pos_resid, choice_measure) | |
648 return dict_pair_dist | |
649 | |
650 | |
651 def run_one_frame_arr(list_resid_arr1, list_resid_arr2, uni): | |
652 """ | |
653 Description: compiles the different functions to create the dict_pair_dist | |
654 dictionary for one frame. | |
655 param list_resid_arr1: list containing the names of the residues | |
656 constituting the ArR1. | |
657 param list_resid_arr2: list_resid_arr2, list containing the names of | |
658 the residues constituting the ArR2. | |
659 param uni: MDAnalysis universe. | |
660 return: dictionary which contains as key the name of the couple, | |
661 the aquaporin and the protomer. | |
662 The value contains the euclidean distance between the barycenters of | |
663 the two motifs of the couple. | |
664 """ | |
665 if args.verbose: | |
666 logging.info("\nFunction runForOneFrame") | |
667 logging.info("The first input parameter is " + str(list_resid_arr1)) | |
668 logging.info("The second input parameter is " + str(list_resid_arr2)) | |
669 logging.info("The third input parameter is " + str(uni)) | |
670 dict_resid_pattern = {} | |
671 # ArR1 | |
672 pattern = "ArR1" | |
673 list_all_resid_arr1 = search_amino_acids_pattern(list_resid_arr1, uni) | |
674 list_length_proto_arr1 = \ | |
675 calculate_length_proto(list(list_all_resid_arr1[0])) | |
676 config_arr1 = create_config_file(list_length_proto_arr1) | |
677 dict_resid_pattern.update( | |
678 search_pattern(list_all_resid_arr1, config_arr1, list_resid_arr1, | |
679 pattern, uni) | |
680 ) | |
681 # ArR2 | |
682 pattern = "ArR2" | |
683 list_all_resid_arr2 = search_amino_acids_pattern(list_resid_arr2, uni) | |
684 list_length_proto_arr2 = \ | |
685 calculate_length_proto(list(list_all_resid_arr2[0])) | |
686 config_arr2 = create_config_file(list_length_proto_arr2) | |
687 dict_resid_pattern.update( | |
688 search_pattern(list_all_resid_arr2, config_arr2, list_resid_arr2, | |
689 pattern, uni) | |
690 ) | |
691 # dict position | |
692 dict_pos_resid = search_resid_positions(dict_resid_pattern, uni) | |
693 # dict distance | |
694 choice_measure = "ArR-ArR" | |
695 dict_pair_dist = \ | |
696 calculate_euclidean_distance(dict_pos_resid, choice_measure) | |
697 return dict_pair_dist | |
698 | |
699 | |
700 def run_all_frames_npa_arr( | |
701 list_resid_npa1, list_resid_npa2, list_resid_arr, col_number, uni | |
702 ): | |
703 """ | |
704 Description: creates the table containing the pore diameter of an aquaporin | |
705 over time. | |
706 param list_resid_npa1: list containing the names of the residues | |
707 constituting the NPA1. | |
708 param list_resid_npa2: list containing the names of the residues | |
709 constituting the NPA2. | |
710 param list_resid_arr: list containing the names of the residues | |
711 constituting the ArR. | |
712 param col_number: number of columns in the table | |
713 param uni: MDAnalysis universe. | |
714 return: list containing an array and a key list. The table containing the | |
715 pore diameter of an aquaporin over time. | |
716 """ | |
717 if args.verbose: | |
718 logging.info("\nFunction runForAllFrame") | |
719 logging.info("The first input parameter is " + str(list_resid_npa1)) | |
720 logging.info("The second input parameter is " + str(list_resid_npa2)) | |
721 logging.info("The third input parameter is " + str(list_resid_arr)) | |
722 logging.info("The fourth input parameter is " + str(col_number)) | |
723 logging.info("The fifth input parameter is " + str(uni)) | |
724 nbr_frame = len(uni.trajectory) | |
725 if args.verbose: | |
726 logging.info("The number of frame is " + str(nbr_frame)) | |
727 tab_pair_dist = np.empty((nbr_frame, col_number)) | |
728 if args.verbose: | |
729 logging.info( | |
730 "The dimensions of the table of means and standard deviation are" | |
731 + " as follows " + str(tab_pair_dist.shape) | |
732 ) | |
733 list_key = [] | |
734 pattern_arr = "" | |
735 pattern_npa1 = "" | |
736 pattern_npa2 = "" | |
737 dict_prot = { | |
738 "ALA": "A", | |
739 "CYS": "C", | |
740 "ASP": "D", | |
741 "GLU": "E", | |
742 "PHE": "F", | |
743 "GLY": "G", | |
744 "HIS": "H", | |
745 "ILE": "I", | |
746 "LYS": "K", | |
747 "LEU": "L", | |
748 "MET": "M", | |
749 "ASN": "N", | |
750 "PRO": "P", | |
751 "GLN": "Q", | |
752 "ARG": "R", | |
753 "SER": "S", | |
754 "THR": "T", | |
755 "VAL": "V", | |
756 "TRP": "W", | |
757 "TYR": "Y", | |
758 "HSD": "H", | |
759 "HSP": "H", | |
760 } | |
761 for aa in list_resid_arr: | |
762 pattern_arr += dict_prot[aa] | |
763 for aa in list_resid_npa1: | |
764 pattern_npa1 += dict_prot[aa] | |
765 for aa in list_resid_npa2: | |
766 pattern_npa2 += dict_prot[aa] | |
767 for ts in uni.trajectory: | |
768 if args.verbose: | |
769 logging.info("Frame " + str(uni.trajectory.frame)) | |
770 logging.info(str(ts)) | |
771 dict_pair_dist = run_one_frame_npa_arr( | |
772 list_resid_npa1, list_resid_npa2, list_resid_arr, uni | |
773 ) | |
774 tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time | |
775 list_key = ["Time in ps"] | |
776 num_npa_proto = 0 | |
777 for npa in range(1, 3): | |
778 if npa == 1: | |
779 pattern_complete = "," + pattern_arr + "-" + pattern_npa1 + "1" | |
780 else: | |
781 pattern_complete = "," + pattern_arr + "-" + pattern_npa2 + "2" | |
782 for aqp in range(1, 3): | |
783 for p in range(1, 5): | |
784 num_npa_proto += 1 | |
785 key1 = "ArR-NPA" + str(npa) + ",AQP" + str(aqp)\ | |
786 + ",P" + str(p) | |
787 key2 = "AQP" + str(aqp) + ",P" + str(p) + pattern_complete\ | |
788 + " (Angstroms)" | |
789 tab_pair_dist[uni.trajectory.frame][num_npa_proto] = \ | |
790 dict_pair_dist[key1] | |
791 list_key.append(key2) | |
792 list_out = [tab_pair_dist, list_key] | |
793 return list_out | |
794 | |
795 | |
796 def run_all_frames_arr(list_resid_arr1, list_resid_arr2, col_number, uni): | |
797 """ | |
798 Description: creates the table containing the pore diameter of an aquaporin | |
799 over time. | |
800 param list_resid_arr1: list containing the names of the residues | |
801 constituting the ArR1. | |
802 param list_resid_arr2: list containing the names of the residues | |
803 constituting the ArR2. | |
804 param col_number: number of columns in the table | |
805 param uni: MDAnalysis universe. | |
806 return: list containing an array and a key list. The table containing the | |
807 pore diameter of an aquaporin over time. | |
808 """ | |
809 if args.verbose: | |
810 logging.info("\nFunction runForAllFrame") | |
811 logging.info("The first input parameter is " + str(list_resid_arr1)) | |
812 logging.info("The second input parameter is " + str(list_resid_arr2)) | |
813 logging.info("The third input parameter is " + str(col_number)) | |
814 logging.info("The fourth input parameter is " + str(uni)) | |
815 nbr_frame = len(uni.trajectory) | |
816 if args.verbose: | |
817 logging.info("The number of frame is " + str(nbr_frame)) | |
818 tab_pair_dist = np.empty((nbr_frame, col_number)) | |
819 if args.verbose: | |
820 logging.info( | |
821 "The dimensions of the table of means and standard deviation" | |
822 + " are as follows " + str(tab_pair_dist.shape) | |
823 ) | |
824 list_key = [] | |
825 pattern1 = "" | |
826 pattern2 = "" | |
827 dict_prot = { | |
828 "ALA": "A", | |
829 "CYS": "C", | |
830 "ASP": "D", | |
831 "GLU": "E", | |
832 "PHE": "F", | |
833 "GLY": "G", | |
834 "HIS": "H", | |
835 "ILE": "I", | |
836 "LYS": "K", | |
837 "LEU": "L", | |
838 "MET": "M", | |
839 "ASN": "N", | |
840 "PRO": "P", | |
841 "GLN": "Q", | |
842 "ARG": "R", | |
843 "SER": "S", | |
844 "THR": "T", | |
845 "VAL": "V", | |
846 "TRP": "W", | |
847 "TYR": "Y", | |
848 "HSD": "H", | |
849 "HSP": "H", | |
850 } | |
851 for aa in list_resid_arr1: | |
852 pattern1 += dict_prot[aa] | |
853 for aa in list_resid_arr2: | |
854 pattern2 += dict_prot[aa] | |
855 for ts in uni.trajectory: | |
856 if args.verbose: | |
857 logging.info("Frame " + str(uni.trajectory.frame)) | |
858 logging.info(str(ts)) | |
859 dict_pair_dist = run_one_frame_arr(list_resid_arr1, list_resid_arr2, | |
860 uni) | |
861 tab_pair_dist[uni.trajectory.frame][0] = uni.trajectory.time | |
862 list_key = ["Time (ps)"] | |
863 num_proto = 0 | |
864 for aqp in range(1, 3): | |
865 for p in range(1, 5): | |
866 num_proto += 1 | |
867 key1 = "ArR1-ArR2,AQP" + str(aqp) + ",P" + str(p) | |
868 key2 = ( | |
869 "AQP" + str(aqp) + ",P" + str(p) + "," + pattern1 + | |
870 "-" + pattern2 + " (Angstroms)" | |
871 ) | |
872 tab_pair_dist[uni.trajectory.frame][num_proto] = \ | |
873 dict_pair_dist[key1] | |
874 list_key.append(key2) | |
875 list_out = [tab_pair_dist, list_key] | |
876 return list_out | |
877 | |
878 | |
879 def create_final_dataframe( | |
880 list_resid_arr1, list_resid_arr2, list_resid_npa1, list_resid_npa2, uni | |
881 ): | |
882 """ | |
883 Description: creates the final dataframe. | |
884 param list_resid_arr1: list containing the arr1 pattern | |
885 param list_resid_arr2: list containing the arr2 pattern | |
886 param list_resid_npa1: list containing the npa1 pattern | |
887 param list_resid_npa2: list containing the npa2 pattern | |
888 param uni: MDAnalysis universe | |
889 return: final dataframe | |
890 """ | |
891 if args.verbose: | |
892 logging.info("\nFunction runForAllFrame") | |
893 logging.info("The first input parameter is " + str(list_resid_arr1)) | |
894 logging.info("The second input parameter is " + str(list_resid_arr2)) | |
895 logging.info("The third input parameter is " + str(list_resid_npa1)) | |
896 logging.info("The fourth input parameter is " + str(list_resid_npa2)) | |
897 logging.info("The fifth input parameter is " + str(uni)) | |
898 if list_resid_npa1 is None and list_resid_npa2 is None: | |
899 if len(list_resid_arr1) >= 4 and len(list_resid_arr2) >= 4: | |
900 columns_number = 9 | |
901 list_out = run_all_frames_arr( | |
902 list_resid_arr1, list_resid_arr2, columns_number, uni | |
903 ) | |
904 else: | |
905 if args.verbose: | |
906 if len(list_resid_arr1) < 4 <= len(list_resid_arr2): | |
907 logging.critical("Pattern1 must be provided") | |
908 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: | |
909 logging.critical("Pattern1 and Pattern2 must be provided") | |
910 elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): | |
911 logging.critical("Pattern2 must be provided") | |
912 print("Pattern error : please read the log file or help") | |
913 sys.exit(2) | |
914 elif list_resid_npa1 is None and list_resid_npa2 is not None: | |
915 if args.verbose: | |
916 if len(list_resid_arr1) < 4 <= len(list_resid_arr2): | |
917 logging.critical("PatternArr1 must be provided") | |
918 logging.critical("Please, do not put patternArr2") | |
919 logging.critical("PatternNPA1 must be provided") | |
920 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: | |
921 logging.critical("PatternArr1 must be provided") | |
922 logging.critical("PatternNPA1 must be provided") | |
923 elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): | |
924 logging.critical("PatternNPA1 must be provided") | |
925 else: | |
926 logging.critical("Please, do not put patternArr2") | |
927 logging.critical("PatternNPA1 must be provided") | |
928 print("Pattern error : please read the log file or help") | |
929 sys.exit(2) | |
930 elif list_resid_npa1 is not None and list_resid_npa2 is None: | |
931 if args.verbose: | |
932 if len(list_resid_arr1) < 4 <= len(list_resid_arr2): | |
933 logging.critical("PatternArr1 must be provided") | |
934 logging.critical("Please, do not put patternArr2") | |
935 logging.critical("PatternNPA2 must be provided") | |
936 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: | |
937 logging.critical("PatternArr1 must be provided") | |
938 logging.critical("PatternNPA2 must be provided") | |
939 elif len(list_resid_arr1) >= 4 > len(list_resid_arr2): | |
940 logging.critical("PatternNPA1 must be provided") | |
941 else: | |
942 logging.critical("Please, do not put patternArr2") | |
943 logging.critical("PatternNPA2 must be provided") | |
944 print("Pattern error : please read the log file or help") | |
945 sys.exit(2) | |
946 else: | |
947 if ( | |
948 list_resid_npa1 is not None | |
949 and list_resid_npa2 is not None | |
950 and len(list_resid_arr1) >= 4 > len(list_resid_arr2) | |
951 ): | |
952 columns_number = 17 | |
953 list_out = run_all_frames_npa_arr( | |
954 list_resid_npa1, list_resid_npa2, list_resid_arr1, | |
955 columns_number, uni | |
956 ) | |
957 else: | |
958 if len(list_resid_arr1) < 4 <= len(list_resid_arr2): | |
959 logging.critical("PatternArr1 must be provided") | |
960 logging.critical("Please, do not put patternArr2") | |
961 elif len(list_resid_arr1) < 4 and len(list_resid_arr2) < 4: | |
962 logging.critical("Pattern1 must be provided") | |
963 print("Pattern error : please read the log file or help") | |
964 sys.exit(2) | |
965 list_std = list(np.std(list_out[0], axis=0)) | |
966 list_mean = list(np.mean(list_out[0], axis=0)) | |
967 np_mean_around = list(np.around(list_mean, decimals=3)) | |
968 np_std_around = list(np.around(list_std, decimals=3)) | |
969 np_mean_around[0] = "Mean" | |
970 np_std_around[0] = "Std" | |
971 np_mean_around = np.array([np_mean_around]) | |
972 np_std_around = np.array([np_std_around]) | |
973 np_mean_std_around = np.concatenate((np_mean_around, np_std_around), | |
974 axis=0) | |
975 tab_pair_dist = np.concatenate((np_mean_std_around, list_out[0]), axis=0) | |
976 df_dist = pd.DataFrame(tab_pair_dist, columns=list_out[1]) | |
977 return df_dist | |
978 | |
979 | |
980 # Class | |
981 | |
982 | |
983 if __name__ == "__main__": | |
984 args = parse_arguments() | |
985 out_file_name = "test" | |
986 file_name_log = out_file_name + ".log" | |
987 if args.verbose: | |
988 if args.log_output: | |
989 if os.path.isfile(args.log_output): | |
990 os.remove(args.log_output) | |
991 logging.basicConfig( | |
992 filename=args.log_output, | |
993 format="%(levelname)s - %(message)s", | |
994 level=logging.INFO, | |
995 ) | |
996 else: | |
997 if os.path.isfile(file_name_log): | |
998 os.remove(file_name_log) | |
999 logging.basicConfig( | |
1000 filename=file_name_log, | |
1001 format="%(levelname)s - %(message)s", | |
1002 level=logging.INFO, | |
1003 ) | |
1004 logging.info("verbose mode on") | |
1005 gro_file = '' | |
1006 xtc_file = '' | |
1007 if args.gro_file: | |
1008 gro_file = os.path.basename(args.gro_file[0]) | |
1009 else: | |
1010 logging.critical("A .gro file must be provided") | |
1011 if args.xtc_file: | |
1012 xtc_file = os.path.basename(args.xtc_file[0]) | |
1013 else: | |
1014 logging.critical("A .xtc file must be provided") | |
1015 if args.work_directory: | |
1016 work_directory = args.work_directory[0] | |
1017 else: | |
1018 work_directory = os.path.dirname(args.gro_file[0]) | |
1019 if not work_directory.endswith("/"): | |
1020 work_directory += "/" | |
1021 output_directory = args.output_directory[0] | |
1022 if args.output: | |
1023 output = args.output | |
1024 else: | |
1025 output = "" | |
1026 os.chdir(work_directory) | |
1027 if args.gro_file_ext and args.xtc_file_ext: | |
1028 u = Mda.Universe( | |
1029 args.gro_file[0], | |
1030 args.xtc_file[0], | |
1031 topology_format=args.gro_file_ext[0], | |
1032 format=args.xtc_file_ext[0], | |
1033 ) | |
1034 else: | |
1035 u = Mda.Universe(args.gro_file[0], args.xtc_file[0]) | |
1036 if args.verbose: | |
1037 logging.info("Load universe : " + str(u)) | |
1038 if args.npa_one: | |
1039 resid_npa1 = args.npa_one[0].split() | |
1040 else: | |
1041 resid_npa1 = args.npa_one | |
1042 if args.npa_second: | |
1043 resid_npa2 = args.npa_second[0].split() | |
1044 else: | |
1045 resid_npa2 = args.npa_second | |
1046 if args.arr_one: | |
1047 resid_arr1 = args.arr_one[0].split() | |
1048 else: | |
1049 resid_arr1 = args.arr_one | |
1050 if args.arr_second: | |
1051 resid_arr2 = args.arr_second[0].split() | |
1052 else: | |
1053 resid_arr2 = args.arr_second | |
1054 df_final = create_final_dataframe(resid_arr1, resid_arr2, | |
1055 resid_npa1, resid_npa2, u) | |
1056 if len(output_directory) > 2: | |
1057 if not os.path.exists(output_directory): | |
1058 os.makedirs(output_directory) | |
1059 os.chdir(output_directory) | |
1060 if len(output) > 2: | |
1061 df_final.to_csv(output, sep="\t", index=False) | |
1062 else: | |
1063 file_name_tabular = out_file_name + "PairDist.tabular" | |
1064 df_final.to_csv(file_name_tabular, sep="\t", index=False) |