comparison cut_trajectory.py @ 5:e504457035e5 draft

"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit b6488400d4478d46697019485e912c38ea2202a5-dirty"
author agpetit
date Mon, 30 May 2022 14:21:51 +0000
parents
children f1dd5d99ea2d
comparison
equal deleted inserted replaced
4:433dba16af7d 5:e504457035e5
1 #!/usr/bin/env python3
2
3 # coding: utf-8
4 """
5 The script allows to estimate the number of sub-trajectories to obtain.
6 It also allows to split the trajectory into a number of sub-trajectories.
7 # USAGE to estimate number of sub-trajectories :
8 # USAGE : cut_trajectory.py
9 -s : number of desired sub-trajectories (number or file)
10 -gro : .gro file -xtc : .xtc file -log : name of log file (optional)
11 -d : output directory (optional) -o : name of output file (optional)
12 -g : group for output (optional)
13 -start : start time of the trajectory (optional)
14 -end : end time of the trajectory (optional)
15 """
16
17 __all__ = []
18 __author__ = "Agnès-Elisabeth Petit"
19 __date__ = "29/04/2022"
20 __version__ = "0.8"
21 __copyright__ = "(c) 2022 CC-BY-NC-SA"
22
23 # Library import
24 import argparse
25 import logging
26 import os
27 import subprocess
28 import sys
29
30 from joblib import Parallel, delayed
31
32
33 def test_setup():
34 global args
35 args = parse_arguments()
36 args.verbose = True
37
38
39 def parse_arguments():
40 list_choices = []
41 [list_choices.append(str(x)) for x in range(17)]
42 parser = argparse.ArgumentParser(
43 description="The script allows to estimate the number of "
44 "sub-trajectories to obtain. It also allows to split"
45 " the trajectory into a number of sub-trajectories.",
46 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
47 prefix_chars="-",
48 add_help=True,
49 )
50 parser.add_argument(
51 "-v",
52 "--verbose",
53 action="store_true",
54 help="""Information messages to stderr""",
55 )
56 parser.add_argument(
57 "-c",
58 "--input_check",
59 type=str,
60 help=""".txt file obtained with gmx check -f.
61 It contains information about the trajectory""",
62 )
63 parser.add_argument(
64 "-s",
65 "--nbr_sub_traj",
66 type=str,
67 help="""numbers of sub_trajectories""",
68 )
69 parser.add_argument(
70 "-gro", "--gro_file", type=str, help="""My input .gro file"""
71 )
72 parser.add_argument(
73 "-xtc", "--xtc_file", type=str, help="""My input .xtc file"""
74 )
75 parser.add_argument(
76 "-log", "--log_output", type=str,
77 default="log/cut_trajectory.log",
78 help="""Output for log file. Default :
79 log/cut_trajectory.log"""
80 )
81 parser.add_argument(
82 "-d", "--output_directory", type=str,
83 default="out/",
84 help="""It's output Directory. Default : out/"""
85 )
86 parser.add_argument(
87 "-g",
88 "--group_output",
89 type=str,
90 default="0",
91 choices=list_choices,
92 help="""Select group for output. 0 : system, 1: protein, 2: protein-H,
93 3: C-alpha, 4: Backbone, 5: MainChain, 6: MainChain+Cb,
94 7: MainChain+H, 8: SideChain, 9: SideChain-H, 10: Prot-Masses,
95 11: non-Protein, 12: Other, 13: POPC, 14: POT, 15: CLA,
96 16: TIP3""",
97 )
98 parser.add_argument(
99 "-start", "--start_traj", type=str,
100 help="""Start of the trajectory to be cut"""
101 )
102 parser.add_argument(
103 "-end", "--end_traj", type=str,
104 help="""End of the trajectory to be cut"""
105 )
106 parser.add_argument(
107 "-cpus", "--number_cpus", type=int,
108 default=1,
109 help="""Number of cpus. Default : 1"""
110 )
111 return parser.parse_args()
112
113
114 def search_nbr_steps_time_step(txt_file):
115 """
116 Description : Keeping the number of frames of the complete trajectory and
117 the time between each frame.
118 param txt_file: file obtained with gmx check.
119 return: list that contains the number of frames of the complete trajectory
120 and time between each frame.
121 """
122 if args.verbose:
123 logging.info("\nFunction search_nbr_steps_time_step")
124 logging.info("The input file is " + txt_file)
125 with open(txt_file, "r") as f:
126 len_traj = ""
127 time_step = ""
128 for li in f:
129 li = li.rstrip()
130 if li.startswith("Step"):
131 li2 = li.split()
132 len_traj = int(li2[1])
133 time_step = int(li2[2])
134 if args.verbose:
135 logging.info("The length of the trajectory is " + str(len_traj))
136 logging.info(
137 "The elapsed time between two steps is : " + str(time_step) + " ps"
138 )
139 logging.info("End search_nbr_steps_time_step functions")
140 return len_traj, time_step
141
142
143 def search_nbr_sub_traj(tsv_nb_sub_traj_file):
144 """
145 Description: Obtaining the number of frames of the complete trajectory,
146 the number of sub-trajectories to create, the time between each
147 frame and the number of frames per sub-trajectory.
148 param tsv_nb_sub_traj_file: tsv file obtained with the estimation of
149 the number of sub-trajectories
150 return: list that contains the number of images of the complete trajectory,
151 the number of sub-trajectories to create, the time between each image and
152 the number of images per sub-trajectory.
153 """
154 if args.verbose:
155 logging.info("\nFunction search_nbr_sub_traj")
156 logging.info("The input file is " + tsv_nb_sub_traj_file)
157 list_number_sub_traj = []
158 with open(tsv_nb_sub_traj_file, "r") as f:
159 for li in f:
160 li = li.rstrip()
161 if not li.startswith("Length"):
162 list_number_sub_traj = li.split()
163 if args.verbose:
164 logging.info("The length of complete trajectory is "
165 + list_number_sub_traj[0])
166 logging.info(
167 "The number of sub-trajectories required is "
168 + list_number_sub_traj[1]
169 )
170 logging.info(
171 "The elapsed time between two steps is : "
172 + list_number_sub_traj[2] + "ps"
173 )
174 logging.info(
175 "The number of frames per sub-trajectory is "
176 + list_number_sub_traj[3]
177 )
178 logging.info("End search_nbr_sub_traj function")
179 list_number_sub_traj = [int(v) for v in list_number_sub_traj]
180 return list_number_sub_traj
181
182
183 def launch_cut_traj(list_file, gro_file, xtc_file, out_dir,
184 logging_file, n_group, n_cpus):
185 """
186 Description: function that allows to split the trajectory
187 into a number of sub-trajectories
188 param list_file: list that contains the number of images of the complete
189 trajectory, the number of sub-trajectories to create, the time between
190 each image and the number of images per sub-trajectory.
191 param gro_file: .gro file
192 param xtc_file: .xtc file
193 param out_dir: output directory
194 param logging_file: name of log file
195 return: None
196 output: create a number of sub-trajectories
197 """
198 if args.verbose:
199 logging.info("\nFunction cut_traj")
200 logging.info("The length of the trajectory is " + str(list_file[0])
201 + " frames")
202 logging.info(
203 "The elapsed time between two steps is : " + str(list_file[2])
204 + " ps"
205 )
206 logging.info("The number of sub-trajectories required is "
207 + str(list_file[1]))
208 logging.info(".gro file is " + gro_file)
209 logging.info(".xtc file is " + xtc_file)
210 logging.info("Output directory is " + str(out_dir))
211 logging.info("The name of .log file is " + logging_file)
212 logging.info("The number of cpus used is " + str(n_cpus))
213 if not os.path.exists(out_dir):
214 os.makedirs(out_dir)
215 n_sub_traj = list_file[1]
216 times_step = list_file[2]
217 nb_frame_sub_traj = list_file[3]
218 prefix_name_file = os.path.basename(gro_file).rsplit(".", 1)[0]
219 start = list_file[4]
220 end = list_file[4] - 1
221 dict_sub_traj = {}
222 for nb_traj in range(n_sub_traj):
223 end += nb_frame_sub_traj
224 value = str(start * times_step) + "," + str(end * times_step)
225 dict_sub_traj[nb_traj + 1] = value
226 start += nb_frame_sub_traj
227 if args.verbose:
228 for k, v in dict_sub_traj.items():
229 logging.info("Sub_trajectory " + str(k) + " starts at "
230 + v.split(",")[0] + " ps and ends at "
231 + v.split(",")[1] + " ps")
232 logging.info("Launch gmx trjconv\n")
233 if args.verbose:
234 list_log_files = Parallel(n_jobs=n_cpus)(
235 delayed(cut_traj)(out_dir, prefix_name_file, k, v, n_group,
236 xtc_file, gro_file, logging_file, args)
237 for k, v in dict_sub_traj.items())
238 log_file_complete = open(logging_file, "a")
239 for f in list_log_files:
240 open_f = open(f, "r")
241 [log_file_complete.write(li) for li in open_f.readlines()]
242 open_f.close()
243 os.remove(f)
244 log_file_complete.close()
245 else:
246 logging_file = ""
247 Parallel(n_jobs=n_cpus)(delayed(cut_traj)(out_dir, prefix_name_file, k,
248 v, n_group, xtc_file,
249 gro_file, logging_file, args)
250 for k, v in dict_sub_traj.items())
251 if args.verbose:
252 logging.info("End cut_traj function")
253
254
255 def cut_traj(out_dir, prefix_name_file, k, v, n_group, xtc_file, gro_file,
256 logging_file, arguments):
257 out_traj = (str(out_dir) + prefix_name_file + "_traj_" + str(k) + ".xtc")
258 bash_command = ("echo " + n_group + " | gmx trjconv -f " + xtc_file
259 + " -s " + gro_file + " -b " + str(v.split(",")[0])
260 + " -e " + str(v.split(",")[1]) + " -o " + out_traj)
261 if arguments.verbose:
262 log_directory = "log/tmp/"
263 logging_file = (log_directory
264 + logging_file.rsplit("/", 1)[1].split(".")[0]
265 + "_" + str(k) + ".log")
266 if not os.path.exists(log_directory):
267 os.makedirs(log_directory)
268 f_log = open(logging_file, "w")
269 subprocess.run(bash_command, shell=True, stdout=f_log, stderr=f_log)
270 f_log.close()
271 return logging_file
272 else:
273 subprocess.run(bash_command, shell=True)
274
275
276 if __name__ == "__main__":
277 args = parse_arguments()
278 log_file = ""
279 if args.output_directory.endswith("/"):
280 out_directory = args.output_directory
281 else:
282 out_directory = args.output_directory + "/"
283 if args.verbose:
284 if "/" in args.log_output:
285 log_dir = args.log_output.rsplit("/", 1)[0]
286 else:
287 log_dir = "log/"
288 log_file = args.log_output
289 if not os.path.exists(log_dir):
290 os.makedirs(log_dir)
291 if os.path.isfile(log_file):
292 os.remove(log_file)
293 if args.log_output:
294 logging.basicConfig(
295 filename=log_file,
296 format="%(levelname)s - %(message)s",
297 level=logging.INFO,
298 )
299 else:
300 logging.basicConfig(
301 filename=log_file,
302 format="%(levelname)s - %(message)s",
303 level=logging.INFO,
304 )
305 logging.info("verbose mode on")
306 gro = ""
307 xtc = ""
308 list_nbr_sub_traj = []
309 if args.verbose:
310 logging.info("Start cut trajectory")
311 if args.gro_file and ".gro" in args.gro_file:
312 gro = os.path.basename(args.gro_file)
313 else:
314 if args.verbose:
315 logging.error("The entry is not a file or is not a .gro file")
316 sys.exit(1)
317 if args.xtc_file and ".xtc" in args.xtc_file:
318 xtc = os.path.basename(args.xtc_file)
319 else:
320 if args.verbose:
321 logging.error("The entry is not a file or is not a .xtc file")
322 sys.exit(1)
323 if ".tsv" in args.nbr_sub_traj:
324 list_nbr_sub_traj = search_nbr_sub_traj(args.nbr_sub_traj)
325 nb_sub_traj = list_nbr_sub_traj[1]
326 else:
327 nb_sub_traj = int(args.nbr_sub_traj)
328 nb_steps_time_step = search_nbr_steps_time_step(
329 args.input_check)
330 if args.start_traj and args.start_traj != "":
331 start_trajectory = int(args.start_traj)
332 else:
333 start_trajectory = 0
334 if args.end_traj and args.end_traj != "":
335 end_trajectory = int(args.end_traj)
336 else:
337 end_trajectory = nb_steps_time_step[0] - 1
338 len_trajectory = end_trajectory - start_trajectory + 1
339 list_nbr_sub_traj = [
340 len_trajectory,
341 nb_sub_traj,
342 nb_steps_time_step[1],
343 len_trajectory // nb_sub_traj,
344 start_trajectory,
345 end_trajectory,
346 ]
347 if nb_sub_traj >= (int(list_nbr_sub_traj[0]) // 2):
348 if args.verbose:
349 logging.error(
350 "The number of averages requested is greater than "
351 "half the trajectory size"
352 )
353 print(
354 "Number of requested sub-trajectories too large compared "
355 "to the size of the trajectory"
356 )
357 sys.exit(2)
358 nb_cpus = args.number_cpus
359 launch_cut_traj(
360 list_nbr_sub_traj, gro, xtc, out_directory, log_file,
361 args.group_output, nb_cpus
362 )
363
364