diff metfrag.py @ 8:9a3019c609d9 draft

planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit febaaca439248736775db9ad0a857e30463d10aa
author tomnl
date Fri, 13 Sep 2019 08:27:59 -0400
parents 0b3816a7a14b
children 5763234618d4
line wrap: on
line diff
--- a/metfrag.py	Thu Sep 05 06:48:28 2019 -0400
+++ b/metfrag.py	Fri Sep 13 08:27:59 2019 -0400
@@ -117,7 +117,7 @@
     meta_regex['num_peaks'].extend(regex_msp['num_peaks'])
     meta_regex['msp'] = regex_msp['msp']
 
-    print(meta_regex)
+
 
 adduct_types = {
     '[M+H]+': 1.007276,
@@ -135,6 +135,7 @@
     '[M+CH3COO]-': 59.01385,
     '[M-H+CH3COOH]-': 59.01385  # same as above but different style of writing adduct
 }
+inv_adduct_types = {int(round(v, 0)): k for k, v in adduct_types.iteritems()}
 
 # function to extract the meta data using the regular expressions
 def parse_meta(meta_regex, meta_info={}):
@@ -264,6 +265,7 @@
         # Just have and index of the spectra in the MSP file
         paramd['additional_details'] = {'spectra_idx': spectrac}
 
+
     paramd["SampleName"] = "{}_metfrag_result".format(spectrac)
 
     # =============== Output peaks to txt file  ==============================
@@ -277,15 +279,18 @@
     # =============== Update param based on MSP metadata ======================
     # Replace param details with details from MSP if required
     if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types:
+        adduct = meta_info['precursor_type']
         nm = float(meta_info['precursor_mz']) - adduct_types[meta_info['precursor_type']]
         paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0))
     elif not args.skip_invalid_adducts:
+        adduct = inv_adduct_types[int(paramd['PrecursorIonModeDefault'])]
         paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault']
         nm = float(meta_info['precursor_mz']) - paramd['nm_mass_diff_default']
     else:
         print('Skipping {}'.format(paramd["SampleName"]))
         return '', ''
 
+    paramd['additional_details']['adduct'] = adduct
     paramd["NeutralPrecursorMass"] = nm
 
     # =============== Create CLI cmd for metfrag ===============================
@@ -295,13 +300,15 @@
             cmd += " {}={}".format(str(k), str(v))
 
     # =============== Run metfrag ==============================================
-    print(cmd)
+    #print(cmd)
     # Filter before process with a minimum number of MS/MS peaks
     if plinesread >= float(args.minMSMSpeaks):
 
         if int(args.cores_top_level) == 1:
             os.system(cmd)
 
+
+
     return paramd, cmd
 
 
@@ -416,9 +423,14 @@
 for k, paramd in six.iteritems(paramds):
     additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys())))
 
+# add inchikey if not already present (missing in metchem output)
+if 'InChIKey' not in headers:
+    headers.append('InChIKey')
+
 headers = additional_detail_headers + sorted(list(set(headers)))
 
 
+
 # Sort files nicely
 outfiles.sort(key = lambda s: int(re.match('^.*/(\d+)_metfrag_result.csv', s).group(1)))
 
@@ -462,8 +474,13 @@
                 if bewrite:
                     bfn = os.path.basename(fn)
                     bfn = bfn.replace(".csv", "")
+                    line['sample_name'] = paramds[bfn]['SampleName']
                     ad = paramds[bfn]['additional_details']
+
+                    if  args.MetFragDatabaseType == "MetChem":
+                        # for some reason the metchem database option does not report the full inchikey (at least
+                        # in the Bham setup. This ensures we always get the fully inchikey
+                        line['InChIKey'] = '{}-{}-{}'.format(line['InChIKey1'], line['InChIKey2'], line['InChIKey3'])
+
                     line.update(ad)
-                    line['sample_name'] = paramds[bfn]['SampleName']
-
                     dwriter.writerow(line)