comparison metfrag.py @ 4:eb581a101672 draft

planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 28da8cdf5f9cc45575b614a329da5790803e59c4
author tomnl
date Thu, 01 Aug 2019 09:02:47 -0400
parents 5ee936e570a7
children c53ed894d736
comparison
equal deleted inserted replaced
3:5ee936e570a7 4:eb581a101672
48 parser.add_argument('--pctexplpeak_thrshld', default=0) 48 parser.add_argument('--pctexplpeak_thrshld', default=0)
49 parser.add_argument('--schema') 49 parser.add_argument('--schema')
50 parser.add_argument('--cores_top_level', default=1) 50 parser.add_argument('--cores_top_level', default=1)
51 parser.add_argument('--chunks', default=1) 51 parser.add_argument('--chunks', default=1)
52 parser.add_argument('--meta_select_col', default='name') 52 parser.add_argument('--meta_select_col', default='name')
53 parser.add_argument('--skip_invalid_adducts', action='store_true')
53 54
54 parser.add_argument('--ScoreSuspectLists', default='') 55 parser.add_argument('--ScoreSuspectLists', default='')
55 parser.add_argument('--MetFragScoreTypes', default="FragmenterScore,OfflineMetFusionScore") 56 parser.add_argument('--MetFragScoreTypes', default="FragmenterScore,OfflineMetFusionScore")
56 parser.add_argument('--MetFragScoreWeights', default="1.0,1.0") 57 parser.add_argument('--MetFragScoreWeights', default="1.0,1.0")
57 58
59
58 args = parser.parse_args() 60 args = parser.parse_args()
59 print(args) 61 print(args)
62
63 if os.stat(args.input_pth).st_size == 0:
64 exit('Input file empty')
65
60 66
61 # Create temporary working directory 67 # Create temporary working directory
62 if args.temp_dir: 68 if args.temp_dir:
63 wd = args.temp_dir 69 wd = args.temp_dir
64 else: 70 else:
117 '[M+ACN+H]+': 42.033823, 123 '[M+ACN+H]+': 42.033823,
118 '[M+ACN+Na]+': 64.015765, 124 '[M+ACN+Na]+': 64.015765,
119 '[M+2ACN+H]+': 83.06037, 125 '[M+2ACN+H]+': 83.06037,
120 '[M-H]-': -1.007276, 126 '[M-H]-': -1.007276,
121 '[M+Cl]-': 34.969402, 127 '[M+Cl]-': 34.969402,
128 '[M+HCOO]-': 44.99819,
129 '[M-H+HCOOH]-': 44.99819, # same as above but different style of writing adduct
130 '[M+CH3COO]-': 59.01385,
131 '[M-H+CH3COOH]-': 59.01385 # same as above but different style of writing adduct
122 } 132 }
123 133
124 # function to extract the meta data using the regular expressions 134 # function to extract the meta data using the regular expressions
125 def parse_meta(meta_regex, meta_info={}): 135 def parse_meta(meta_regex, meta_info={}):
126 for k, regexes in six.iteritems(meta_regex): 136 for k, regexes in six.iteritems(meta_regex):
260 outfile.write(p[0] + "\t" + p[1] + "\n") 270 outfile.write(p[0] + "\t" + p[1] + "\n")
261 271
262 # =============== Update param based on MSP metadata ====================== 272 # =============== Update param based on MSP metadata ======================
263 # Replace param details with details from MSP if required 273 # Replace param details with details from MSP if required
264 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types: 274 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types:
265 275 nm = float(meta_info['precursor_mz']) - adduct_types[meta_info['precursor_type']]
266 nm = float(meta_info['precursor_mz']) + adduct_types[meta_info['precursor_type']]
267 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0)) 276 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0))
277 elif not args.skip_invalid_adducts:
278 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault']
279 nm = float(meta_info['precursor_mz']) - paramd['nm_mass_diff_default']
268 else: 280 else:
269 281 print('Skipping {}'.format(paramd["SampleName"]))
270 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault'] 282 return '', ''
271 nm = float(meta_info['precursor_mz']) + paramd['nm_mass_diff_default']
272 283
273 paramd["NeutralPrecursorMass"] = nm 284 paramd["NeutralPrecursorMass"] = nm
274 285
275 # =============== Create CLI cmd for metfrag =============================== 286 # =============== Create CLI cmd for metfrag ===============================
276 cmd = "metfrag" 287 cmd = "metfrag"
277 for k, v in six.iteritems(paramd): 288 for k, v in six.iteritems(paramd):
278 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']: 289 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']:
279 cmd += " {}={}".format(str(k), str(v)) 290 cmd += " {}={}".format(str(k), str(v))
280 291
281 # =============== Run metfrag ============================================== 292 # =============== Run metfrag ==============================================
293 print(cmd)
282 # Filter before process with a minimum number of MS/MS peaks 294 # Filter before process with a minimum number of MS/MS peaks
283 if plinesread >= float(args.minMSMSpeaks): 295 if plinesread >= float(args.minMSMSpeaks):
284 296
285 if int(args.cores_top_level) == 1: 297 if int(args.cores_top_level) == 1:
286 os.system(cmd) 298 os.system(cmd)
334 elif plinesread and plinesread == pnumlines: 346 elif plinesread and plinesread == pnumlines:
335 # =============== Get sample name and additional details for output ======= 347 # =============== Get sample name and additional details for output =======
336 spectrac += 1 348 spectrac += 1
337 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac, adduct_types) 349 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac, adduct_types)
338 350
339 paramds[paramd["SampleName"]] = paramd 351 if paramd:
340 cmds.append(cmd) 352 paramds[paramd["SampleName"]] = paramd
353 cmds.append(cmd)
341 354
342 meta_info = {} 355 meta_info = {}
343 pnumlines = 0 356 pnumlines = 0
344 plinesread = 0 357 plinesread = 0
345 358
346 # end of file. Check if there is a MSP spectra to run metfrag on still 359 # end of file. Check if there is a MSP spectra to run metfrag on still
347 if plinesread and plinesread == pnumlines: 360 if plinesread and plinesread == pnumlines:
348 361
349 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac+1, adduct_types) 362 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac+1, adduct_types)
350 363
351 paramds[paramd["SampleName"]] = paramd 364 if paramd:
352 cmds.append(cmd) 365 paramds[paramd["SampleName"]] = paramd
353 366 cmds.append(cmd)
354 367
355 368
356 369
357 370
358 # Perform multiprocessing on command line call level 371 # Perform multiprocessing on command line call level