Mercurial > repos > tomnl > metfrag
comparison metfrag.py @ 8:9a3019c609d9 draft
planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit febaaca439248736775db9ad0a857e30463d10aa
| author | tomnl |
|---|---|
| date | Fri, 13 Sep 2019 08:27:59 -0400 |
| parents | 0b3816a7a14b |
| children | 5763234618d4 |
comparison
equal
deleted
inserted
replaced
| 7:0b3816a7a14b | 8:9a3019c609d9 |
|---|---|
| 115 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz']) | 115 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz']) |
| 116 meta_regex['precursor_type'].extend(regex_msp['precursor_type']) | 116 meta_regex['precursor_type'].extend(regex_msp['precursor_type']) |
| 117 meta_regex['num_peaks'].extend(regex_msp['num_peaks']) | 117 meta_regex['num_peaks'].extend(regex_msp['num_peaks']) |
| 118 meta_regex['msp'] = regex_msp['msp'] | 118 meta_regex['msp'] = regex_msp['msp'] |
| 119 | 119 |
| 120 print(meta_regex) | 120 |
| 121 | 121 |
| 122 adduct_types = { | 122 adduct_types = { |
| 123 '[M+H]+': 1.007276, | 123 '[M+H]+': 1.007276, |
| 124 '[M+NH4]+': 18.034374, | 124 '[M+NH4]+': 18.034374, |
| 125 '[M+Na]+': 22.989218, | 125 '[M+Na]+': 22.989218, |
| 133 '[M+HCOO]-': 44.99819, | 133 '[M+HCOO]-': 44.99819, |
| 134 '[M-H+HCOOH]-': 44.99819, # same as above but different style of writing adduct | 134 '[M-H+HCOOH]-': 44.99819, # same as above but different style of writing adduct |
| 135 '[M+CH3COO]-': 59.01385, | 135 '[M+CH3COO]-': 59.01385, |
| 136 '[M-H+CH3COOH]-': 59.01385 # same as above but different style of writing adduct | 136 '[M-H+CH3COOH]-': 59.01385 # same as above but different style of writing adduct |
| 137 } | 137 } |
| 138 inv_adduct_types = {int(round(v, 0)): k for k, v in adduct_types.iteritems()} | |
| 138 | 139 |
| 139 # function to extract the meta data using the regular expressions | 140 # function to extract the meta data using the regular expressions |
| 140 def parse_meta(meta_regex, meta_info={}): | 141 def parse_meta(meta_regex, meta_info={}): |
| 141 for k, regexes in six.iteritems(meta_regex): | 142 for k, regexes in six.iteritems(meta_regex): |
| 142 for reg in regexes: | 143 for reg in regexes: |
| 262 paramd['additional_details'] = meta_info | 263 paramd['additional_details'] = meta_info |
| 263 else: | 264 else: |
| 264 # Just have and index of the spectra in the MSP file | 265 # Just have and index of the spectra in the MSP file |
| 265 paramd['additional_details'] = {'spectra_idx': spectrac} | 266 paramd['additional_details'] = {'spectra_idx': spectrac} |
| 266 | 267 |
| 268 | |
| 267 paramd["SampleName"] = "{}_metfrag_result".format(spectrac) | 269 paramd["SampleName"] = "{}_metfrag_result".format(spectrac) |
| 268 | 270 |
| 269 # =============== Output peaks to txt file ============================== | 271 # =============== Output peaks to txt file ============================== |
| 270 paramd["PeakListPath"] = os.path.join(wd, "{}_tmpspec.txt".format(spectrac)) | 272 paramd["PeakListPath"] = os.path.join(wd, "{}_tmpspec.txt".format(spectrac)) |
| 271 | 273 |
| 275 outfile.write(p[0] + "\t" + p[1] + "\n") | 277 outfile.write(p[0] + "\t" + p[1] + "\n") |
| 276 | 278 |
| 277 # =============== Update param based on MSP metadata ====================== | 279 # =============== Update param based on MSP metadata ====================== |
| 278 # Replace param details with details from MSP if required | 280 # Replace param details with details from MSP if required |
| 279 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types: | 281 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types: |
| 282 adduct = meta_info['precursor_type'] | |
| 280 nm = float(meta_info['precursor_mz']) - adduct_types[meta_info['precursor_type']] | 283 nm = float(meta_info['precursor_mz']) - adduct_types[meta_info['precursor_type']] |
| 281 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0)) | 284 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0)) |
| 282 elif not args.skip_invalid_adducts: | 285 elif not args.skip_invalid_adducts: |
| 286 adduct = inv_adduct_types[int(paramd['PrecursorIonModeDefault'])] | |
| 283 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault'] | 287 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault'] |
| 284 nm = float(meta_info['precursor_mz']) - paramd['nm_mass_diff_default'] | 288 nm = float(meta_info['precursor_mz']) - paramd['nm_mass_diff_default'] |
| 285 else: | 289 else: |
| 286 print('Skipping {}'.format(paramd["SampleName"])) | 290 print('Skipping {}'.format(paramd["SampleName"])) |
| 287 return '', '' | 291 return '', '' |
| 288 | 292 |
| 293 paramd['additional_details']['adduct'] = adduct | |
| 289 paramd["NeutralPrecursorMass"] = nm | 294 paramd["NeutralPrecursorMass"] = nm |
| 290 | 295 |
| 291 # =============== Create CLI cmd for metfrag =============================== | 296 # =============== Create CLI cmd for metfrag =============================== |
| 292 cmd = "metfrag" | 297 cmd = "metfrag" |
| 293 for k, v in six.iteritems(paramd): | 298 for k, v in six.iteritems(paramd): |
| 294 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']: | 299 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']: |
| 295 cmd += " {}={}".format(str(k), str(v)) | 300 cmd += " {}={}".format(str(k), str(v)) |
| 296 | 301 |
| 297 # =============== Run metfrag ============================================== | 302 # =============== Run metfrag ============================================== |
| 298 print(cmd) | 303 #print(cmd) |
| 299 # Filter before process with a minimum number of MS/MS peaks | 304 # Filter before process with a minimum number of MS/MS peaks |
| 300 if plinesread >= float(args.minMSMSpeaks): | 305 if plinesread >= float(args.minMSMSpeaks): |
| 301 | 306 |
| 302 if int(args.cores_top_level) == 1: | 307 if int(args.cores_top_level) == 1: |
| 303 os.system(cmd) | 308 os.system(cmd) |
| 309 | |
| 310 | |
| 304 | 311 |
| 305 return paramd, cmd | 312 return paramd, cmd |
| 306 | 313 |
| 307 | 314 |
| 308 def work(cmds): | 315 def work(cmds): |
| 414 | 421 |
| 415 additional_detail_headers = ['sample_name'] | 422 additional_detail_headers = ['sample_name'] |
| 416 for k, paramd in six.iteritems(paramds): | 423 for k, paramd in six.iteritems(paramds): |
| 417 additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys()))) | 424 additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys()))) |
| 418 | 425 |
| 426 # add inchikey if not already present (missing in metchem output) | |
| 427 if 'InChIKey' not in headers: | |
| 428 headers.append('InChIKey') | |
| 429 | |
| 419 headers = additional_detail_headers + sorted(list(set(headers))) | 430 headers = additional_detail_headers + sorted(list(set(headers))) |
| 431 | |
| 420 | 432 |
| 421 | 433 |
| 422 # Sort files nicely | 434 # Sort files nicely |
| 423 outfiles.sort(key = lambda s: int(re.match('^.*/(\d+)_metfrag_result.csv', s).group(1))) | 435 outfiles.sort(key = lambda s: int(re.match('^.*/(\d+)_metfrag_result.csv', s).group(1))) |
| 424 | 436 |
| 460 | 472 |
| 461 # Write the line if it pass all filters | 473 # Write the line if it pass all filters |
| 462 if bewrite: | 474 if bewrite: |
| 463 bfn = os.path.basename(fn) | 475 bfn = os.path.basename(fn) |
| 464 bfn = bfn.replace(".csv", "") | 476 bfn = bfn.replace(".csv", "") |
| 477 line['sample_name'] = paramds[bfn]['SampleName'] | |
| 465 ad = paramds[bfn]['additional_details'] | 478 ad = paramds[bfn]['additional_details'] |
| 479 | |
| 480 if args.MetFragDatabaseType == "MetChem": | |
| 481 # for some reason the metchem database option does not report the full inchikey (at least | |
| 482 # in the Bham setup. This ensures we always get the fully inchikey | |
| 483 line['InChIKey'] = '{}-{}-{}'.format(line['InChIKey1'], line['InChIKey2'], line['InChIKey3']) | |
| 484 | |
| 466 line.update(ad) | 485 line.update(ad) |
| 467 line['sample_name'] = paramds[bfn]['SampleName'] | |
| 468 | |
| 469 dwriter.writerow(line) | 486 dwriter.writerow(line) |
