Mercurial > repos > agpetit > calculate_diameter
comparison cut_trajectory.py @ 5:e504457035e5 draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit b6488400d4478d46697019485e912c38ea2202a5-dirty"
author | agpetit |
---|---|
date | Mon, 30 May 2022 14:21:51 +0000 |
parents | |
children | f1dd5d99ea2d |
comparison
equal
deleted
inserted
replaced
4:433dba16af7d | 5:e504457035e5 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 # coding: utf-8 | |
4 """ | |
5 The script allows to estimate the number of sub-trajectories to obtain. | |
6 It also allows to split the trajectory into a number of sub-trajectories. | |
7 # USAGE to estimate number of sub-trajectories : | |
8 # USAGE : cut_trajectory.py | |
9 -s : number of desired sub-trajectories (number or file) | |
10 -gro : .gro file -xtc : .xtc file -log : name of log file (optional) | |
11 -d : output directory (optional) -o : name of output file (optional) | |
12 -g : group for output (optional) | |
13 -start : start time of the trajectory (optional) | |
14 -end : end time of the trajectory (optional) | |
15 """ | |
16 | |
17 __all__ = [] | |
18 __author__ = "Agnès-Elisabeth Petit" | |
19 __date__ = "29/04/2022" | |
20 __version__ = "0.8" | |
21 __copyright__ = "(c) 2022 CC-BY-NC-SA" | |
22 | |
23 # Library import | |
24 import argparse | |
25 import logging | |
26 import os | |
27 import subprocess | |
28 import sys | |
29 | |
30 from joblib import Parallel, delayed | |
31 | |
32 | |
33 def test_setup(): | |
34 global args | |
35 args = parse_arguments() | |
36 args.verbose = True | |
37 | |
38 | |
39 def parse_arguments(): | |
40 list_choices = [] | |
41 [list_choices.append(str(x)) for x in range(17)] | |
42 parser = argparse.ArgumentParser( | |
43 description="The script allows to estimate the number of " | |
44 "sub-trajectories to obtain. It also allows to split" | |
45 " the trajectory into a number of sub-trajectories.", | |
46 formatter_class=argparse.ArgumentDefaultsHelpFormatter, | |
47 prefix_chars="-", | |
48 add_help=True, | |
49 ) | |
50 parser.add_argument( | |
51 "-v", | |
52 "--verbose", | |
53 action="store_true", | |
54 help="""Information messages to stderr""", | |
55 ) | |
56 parser.add_argument( | |
57 "-c", | |
58 "--input_check", | |
59 type=str, | |
60 help=""".txt file obtained with gmx check -f. | |
61 It contains information about the trajectory""", | |
62 ) | |
63 parser.add_argument( | |
64 "-s", | |
65 "--nbr_sub_traj", | |
66 type=str, | |
67 help="""numbers of sub_trajectories""", | |
68 ) | |
69 parser.add_argument( | |
70 "-gro", "--gro_file", type=str, help="""My input .gro file""" | |
71 ) | |
72 parser.add_argument( | |
73 "-xtc", "--xtc_file", type=str, help="""My input .xtc file""" | |
74 ) | |
75 parser.add_argument( | |
76 "-log", "--log_output", type=str, | |
77 default="log/cut_trajectory.log", | |
78 help="""Output for log file. Default : | |
79 log/cut_trajectory.log""" | |
80 ) | |
81 parser.add_argument( | |
82 "-d", "--output_directory", type=str, | |
83 default="out/", | |
84 help="""It's output Directory. Default : out/""" | |
85 ) | |
86 parser.add_argument( | |
87 "-g", | |
88 "--group_output", | |
89 type=str, | |
90 default="0", | |
91 choices=list_choices, | |
92 help="""Select group for output. 0 : system, 1: protein, 2: protein-H, | |
93 3: C-alpha, 4: Backbone, 5: MainChain, 6: MainChain+Cb, | |
94 7: MainChain+H, 8: SideChain, 9: SideChain-H, 10: Prot-Masses, | |
95 11: non-Protein, 12: Other, 13: POPC, 14: POT, 15: CLA, | |
96 16: TIP3""", | |
97 ) | |
98 parser.add_argument( | |
99 "-start", "--start_traj", type=str, | |
100 help="""Start of the trajectory to be cut""" | |
101 ) | |
102 parser.add_argument( | |
103 "-end", "--end_traj", type=str, | |
104 help="""End of the trajectory to be cut""" | |
105 ) | |
106 parser.add_argument( | |
107 "-cpus", "--number_cpus", type=int, | |
108 default=1, | |
109 help="""Number of cpus. Default : 1""" | |
110 ) | |
111 return parser.parse_args() | |
112 | |
113 | |
114 def search_nbr_steps_time_step(txt_file): | |
115 """ | |
116 Description : Keeping the number of frames of the complete trajectory and | |
117 the time between each frame. | |
118 param txt_file: file obtained with gmx check. | |
119 return: list that contains the number of frames of the complete trajectory | |
120 and time between each frame. | |
121 """ | |
122 if args.verbose: | |
123 logging.info("\nFunction search_nbr_steps_time_step") | |
124 logging.info("The input file is " + txt_file) | |
125 with open(txt_file, "r") as f: | |
126 len_traj = "" | |
127 time_step = "" | |
128 for li in f: | |
129 li = li.rstrip() | |
130 if li.startswith("Step"): | |
131 li2 = li.split() | |
132 len_traj = int(li2[1]) | |
133 time_step = int(li2[2]) | |
134 if args.verbose: | |
135 logging.info("The length of the trajectory is " + str(len_traj)) | |
136 logging.info( | |
137 "The elapsed time between two steps is : " + str(time_step) + " ps" | |
138 ) | |
139 logging.info("End search_nbr_steps_time_step functions") | |
140 return len_traj, time_step | |
141 | |
142 | |
143 def search_nbr_sub_traj(tsv_nb_sub_traj_file): | |
144 """ | |
145 Description: Obtaining the number of frames of the complete trajectory, | |
146 the number of sub-trajectories to create, the time between each | |
147 frame and the number of frames per sub-trajectory. | |
148 param tsv_nb_sub_traj_file: tsv file obtained with the estimation of | |
149 the number of sub-trajectories | |
150 return: list that contains the number of images of the complete trajectory, | |
151 the number of sub-trajectories to create, the time between each image and | |
152 the number of images per sub-trajectory. | |
153 """ | |
154 if args.verbose: | |
155 logging.info("\nFunction search_nbr_sub_traj") | |
156 logging.info("The input file is " + tsv_nb_sub_traj_file) | |
157 list_number_sub_traj = [] | |
158 with open(tsv_nb_sub_traj_file, "r") as f: | |
159 for li in f: | |
160 li = li.rstrip() | |
161 if not li.startswith("Length"): | |
162 list_number_sub_traj = li.split() | |
163 if args.verbose: | |
164 logging.info("The length of complete trajectory is " | |
165 + list_number_sub_traj[0]) | |
166 logging.info( | |
167 "The number of sub-trajectories required is " | |
168 + list_number_sub_traj[1] | |
169 ) | |
170 logging.info( | |
171 "The elapsed time between two steps is : " | |
172 + list_number_sub_traj[2] + "ps" | |
173 ) | |
174 logging.info( | |
175 "The number of frames per sub-trajectory is " | |
176 + list_number_sub_traj[3] | |
177 ) | |
178 logging.info("End search_nbr_sub_traj function") | |
179 list_number_sub_traj = [int(v) for v in list_number_sub_traj] | |
180 return list_number_sub_traj | |
181 | |
182 | |
183 def launch_cut_traj(list_file, gro_file, xtc_file, out_dir, | |
184 logging_file, n_group, n_cpus): | |
185 """ | |
186 Description: function that allows to split the trajectory | |
187 into a number of sub-trajectories | |
188 param list_file: list that contains the number of images of the complete | |
189 trajectory, the number of sub-trajectories to create, the time between | |
190 each image and the number of images per sub-trajectory. | |
191 param gro_file: .gro file | |
192 param xtc_file: .xtc file | |
193 param out_dir: output directory | |
194 param logging_file: name of log file | |
195 return: None | |
196 output: create a number of sub-trajectories | |
197 """ | |
198 if args.verbose: | |
199 logging.info("\nFunction cut_traj") | |
200 logging.info("The length of the trajectory is " + str(list_file[0]) | |
201 + " frames") | |
202 logging.info( | |
203 "The elapsed time between two steps is : " + str(list_file[2]) | |
204 + " ps" | |
205 ) | |
206 logging.info("The number of sub-trajectories required is " | |
207 + str(list_file[1])) | |
208 logging.info(".gro file is " + gro_file) | |
209 logging.info(".xtc file is " + xtc_file) | |
210 logging.info("Output directory is " + str(out_dir)) | |
211 logging.info("The name of .log file is " + logging_file) | |
212 logging.info("The number of cpus used is " + str(n_cpus)) | |
213 if not os.path.exists(out_dir): | |
214 os.makedirs(out_dir) | |
215 n_sub_traj = list_file[1] | |
216 times_step = list_file[2] | |
217 nb_frame_sub_traj = list_file[3] | |
218 prefix_name_file = os.path.basename(gro_file).rsplit(".", 1)[0] | |
219 start = list_file[4] | |
220 end = list_file[4] - 1 | |
221 dict_sub_traj = {} | |
222 for nb_traj in range(n_sub_traj): | |
223 end += nb_frame_sub_traj | |
224 value = str(start * times_step) + "," + str(end * times_step) | |
225 dict_sub_traj[nb_traj + 1] = value | |
226 start += nb_frame_sub_traj | |
227 if args.verbose: | |
228 for k, v in dict_sub_traj.items(): | |
229 logging.info("Sub_trajectory " + str(k) + " starts at " | |
230 + v.split(",")[0] + " ps and ends at " | |
231 + v.split(",")[1] + " ps") | |
232 logging.info("Launch gmx trjconv\n") | |
233 if args.verbose: | |
234 list_log_files = Parallel(n_jobs=n_cpus)( | |
235 delayed(cut_traj)(out_dir, prefix_name_file, k, v, n_group, | |
236 xtc_file, gro_file, logging_file, args) | |
237 for k, v in dict_sub_traj.items()) | |
238 log_file_complete = open(logging_file, "a") | |
239 for f in list_log_files: | |
240 open_f = open(f, "r") | |
241 [log_file_complete.write(li) for li in open_f.readlines()] | |
242 open_f.close() | |
243 os.remove(f) | |
244 log_file_complete.close() | |
245 else: | |
246 logging_file = "" | |
247 Parallel(n_jobs=n_cpus)(delayed(cut_traj)(out_dir, prefix_name_file, k, | |
248 v, n_group, xtc_file, | |
249 gro_file, logging_file, args) | |
250 for k, v in dict_sub_traj.items()) | |
251 if args.verbose: | |
252 logging.info("End cut_traj function") | |
253 | |
254 | |
255 def cut_traj(out_dir, prefix_name_file, k, v, n_group, xtc_file, gro_file, | |
256 logging_file, arguments): | |
257 out_traj = (str(out_dir) + prefix_name_file + "_traj_" + str(k) + ".xtc") | |
258 bash_command = ("echo " + n_group + " | gmx trjconv -f " + xtc_file | |
259 + " -s " + gro_file + " -b " + str(v.split(",")[0]) | |
260 + " -e " + str(v.split(",")[1]) + " -o " + out_traj) | |
261 if arguments.verbose: | |
262 log_directory = "log/tmp/" | |
263 logging_file = (log_directory | |
264 + logging_file.rsplit("/", 1)[1].split(".")[0] | |
265 + "_" + str(k) + ".log") | |
266 if not os.path.exists(log_directory): | |
267 os.makedirs(log_directory) | |
268 f_log = open(logging_file, "w") | |
269 subprocess.run(bash_command, shell=True, stdout=f_log, stderr=f_log) | |
270 f_log.close() | |
271 return logging_file | |
272 else: | |
273 subprocess.run(bash_command, shell=True) | |
274 | |
275 | |
276 if __name__ == "__main__": | |
277 args = parse_arguments() | |
278 log_file = "" | |
279 if args.output_directory.endswith("/"): | |
280 out_directory = args.output_directory | |
281 else: | |
282 out_directory = args.output_directory + "/" | |
283 if args.verbose: | |
284 if "/" in args.log_output: | |
285 log_dir = args.log_output.rsplit("/", 1)[0] | |
286 else: | |
287 log_dir = "log/" | |
288 log_file = args.log_output | |
289 if not os.path.exists(log_dir): | |
290 os.makedirs(log_dir) | |
291 if os.path.isfile(log_file): | |
292 os.remove(log_file) | |
293 if args.log_output: | |
294 logging.basicConfig( | |
295 filename=log_file, | |
296 format="%(levelname)s - %(message)s", | |
297 level=logging.INFO, | |
298 ) | |
299 else: | |
300 logging.basicConfig( | |
301 filename=log_file, | |
302 format="%(levelname)s - %(message)s", | |
303 level=logging.INFO, | |
304 ) | |
305 logging.info("verbose mode on") | |
306 gro = "" | |
307 xtc = "" | |
308 list_nbr_sub_traj = [] | |
309 if args.verbose: | |
310 logging.info("Start cut trajectory") | |
311 if args.gro_file and ".gro" in args.gro_file: | |
312 gro = os.path.basename(args.gro_file) | |
313 else: | |
314 if args.verbose: | |
315 logging.error("The entry is not a file or is not a .gro file") | |
316 sys.exit(1) | |
317 if args.xtc_file and ".xtc" in args.xtc_file: | |
318 xtc = os.path.basename(args.xtc_file) | |
319 else: | |
320 if args.verbose: | |
321 logging.error("The entry is not a file or is not a .xtc file") | |
322 sys.exit(1) | |
323 if ".tsv" in args.nbr_sub_traj: | |
324 list_nbr_sub_traj = search_nbr_sub_traj(args.nbr_sub_traj) | |
325 nb_sub_traj = list_nbr_sub_traj[1] | |
326 else: | |
327 nb_sub_traj = int(args.nbr_sub_traj) | |
328 nb_steps_time_step = search_nbr_steps_time_step( | |
329 args.input_check) | |
330 if args.start_traj and args.start_traj != "": | |
331 start_trajectory = int(args.start_traj) | |
332 else: | |
333 start_trajectory = 0 | |
334 if args.end_traj and args.end_traj != "": | |
335 end_trajectory = int(args.end_traj) | |
336 else: | |
337 end_trajectory = nb_steps_time_step[0] - 1 | |
338 len_trajectory = end_trajectory - start_trajectory + 1 | |
339 list_nbr_sub_traj = [ | |
340 len_trajectory, | |
341 nb_sub_traj, | |
342 nb_steps_time_step[1], | |
343 len_trajectory // nb_sub_traj, | |
344 start_trajectory, | |
345 end_trajectory, | |
346 ] | |
347 if nb_sub_traj >= (int(list_nbr_sub_traj[0]) // 2): | |
348 if args.verbose: | |
349 logging.error( | |
350 "The number of averages requested is greater than " | |
351 "half the trajectory size" | |
352 ) | |
353 print( | |
354 "Number of requested sub-trajectories too large compared " | |
355 "to the size of the trajectory" | |
356 ) | |
357 sys.exit(2) | |
358 nb_cpus = args.number_cpus | |
359 launch_cut_traj( | |
360 list_nbr_sub_traj, gro, xtc, out_directory, log_file, | |
361 args.group_output, nb_cpus | |
362 ) | |
363 | |
364 |