Mercurial > repos > tomnl > metfrag
comparison metfrag.py @ 2:d040e27b6225 draft
planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 59631850110e60900d03f642c445cc35a36414b5
| author | tomnl |
|---|---|
| date | Mon, 03 Jun 2019 11:20:55 -0400 |
| parents | c1b168770b68 |
| children | 5ee936e570a7 |
comparison
equal
deleted
inserted
replaced
| 1:c1b168770b68 | 2:d040e27b6225 |
|---|---|
| 67 if os.path.exists(wd): | 67 if os.path.exists(wd): |
| 68 shutil.rmtree(wd) | 68 shutil.rmtree(wd) |
| 69 os.makedirs(wd) | 69 os.makedirs(wd) |
| 70 else: | 70 else: |
| 71 os.makedirs(wd) | 71 os.makedirs(wd) |
| 72 | |
| 73 ###################################################################### | |
| 74 # Setup parameter dictionary | |
| 75 ###################################################################### | |
| 76 paramd = defaultdict() | |
| 77 | |
| 78 paramd["MetFragDatabaseType"] = args.MetFragDatabaseType | |
| 79 | |
| 80 if args.MetFragDatabaseType == "LocalCSV": | |
| 81 paramd["LocalDatabasePath"] = args.LocalDatabasePath | |
| 82 elif args.MetFragDatabaseType == "MetChem": | |
| 83 paramd["LocalMetChemDatabase"] = "metchem" | |
| 84 paramd["LocalMetChemDatabasePortNumber"] = 5432 | |
| 85 paramd["LocalMetChemDatabaseServerIp"] = args.LocalMetChemDatabaseServerIp | |
| 86 paramd["LocalMetChemDatabaseUser"] = "metchemro" | |
| 87 paramd["LocalMetChemDatabasePassword"] = "metchemro" | |
| 88 | |
| 89 paramd["FragmentPeakMatchAbsoluteMassDeviation"] = args.FragmentPeakMatchAbsoluteMassDeviation | |
| 90 paramd["FragmentPeakMatchRelativeMassDeviation"] = args.FragmentPeakMatchRelativeMassDeviation | |
| 91 paramd["DatabaseSearchRelativeMassDeviation"] = args.DatabaseSearchRelativeMassDeviation | |
| 92 paramd["SampleName"] = '' | |
| 93 paramd["ResultsPath"] = os.path.join(wd) | |
| 94 | |
| 95 if args.polarity == "pos": | |
| 96 paramd["IsPositiveIonMode"] = True | |
| 97 paramd["PrecursorIonModeDefault"] = "1" | |
| 98 paramd["PrecursorIonMode"] = "1" | |
| 99 paramd["nm_mass_diff_default"] = 1.007276 | |
| 100 else: | |
| 101 paramd["IsPositiveIonMode"] = False | |
| 102 paramd["PrecursorIonModeDefault"] = "-1" | |
| 103 paramd["PrecursorIonMode"] = "-1" | |
| 104 paramd["nm_mass_diff_default"] = -1.007276 | |
| 105 | |
| 106 paramd["MetFragCandidateWriter"] = "CSV" | |
| 107 paramd["NumberThreads"] = args.NumberThreads | |
| 108 | |
| 109 if args.ScoreSuspectLists: | |
| 110 paramd["ScoreSuspectLists"] = args.ScoreSuspectLists | |
| 111 | |
| 112 paramd["MetFragScoreTypes"] = args.MetFragScoreTypes | |
| 113 paramd["MetFragScoreWeights"] = args.MetFragScoreWeights | |
| 114 | |
| 115 dct_filter = defaultdict() | |
| 116 filterh = [] | |
| 117 | |
| 118 if args.UnconnectedCompoundFilter: | |
| 119 filterh.append('UnconnectedCompoundFilter') | |
| 120 | |
| 121 if args.IsotopeFilter: | |
| 122 filterh.append('IsotopeFilter') | |
| 123 | |
| 124 if args.FilterMinimumElements: | |
| 125 filterh.append('MinimumElementsFilter') | |
| 126 dct_filter['FilterMinimumElements'] = args.FilterMinimumElements | |
| 127 | |
| 128 if args.FilterMaximumElements: | |
| 129 filterh.append('MaximumElementsFilter') | |
| 130 dct_filter['FilterMaximumElements'] = args.FilterMaximumElements | |
| 131 | |
| 132 if args.FilterSmartsInclusionList: | |
| 133 filterh.append('SmartsSubstructureInclusionFilter') | |
| 134 dct_filter['FilterSmartsInclusionList'] = args.FilterSmartsInclusionList | |
| 135 | |
| 136 if args.FilterSmartsExclusionList: | |
| 137 filterh.append('SmartsSubstructureExclusionFilter') | |
| 138 dct_filter['FilterSmartsExclusionList'] = args.FilterSmartsExclusionList | |
| 139 | |
| 140 # My understanding is that both 'ElementInclusionExclusiveFilter' and 'ElementExclusionFilter' use | |
| 141 # 'FilterIncludedElements' | |
| 142 if args.FilterIncludedExclusiveElements: | |
| 143 filterh.append('ElementInclusionExclusiveFilter') | |
| 144 dct_filter['FilterIncludedElements'] = args.FilterIncludedExclusiveElements | |
| 145 | |
| 146 if args.FilterIncludedElements: | |
| 147 filterh.append('ElementInclusionFilter') | |
| 148 dct_filter['FilterIncludedElements'] = args.FilterIncludedElements | |
| 149 | |
| 150 if args.FilterExcludedElements: | |
| 151 filterh.append('ElementExclusionFilter') | |
| 152 dct_filter['FilterExcludedElements'] = args.FilterExcludedElements | |
| 153 | |
| 154 if filterh: | |
| 155 fcmds = ','.join(filterh) + ' ' | |
| 156 for k, v in six.iteritems(dct_filter): | |
| 157 fcmds += "{0}={1} ".format(str(k), str(v)) | |
| 158 | |
| 159 paramd["MetFragPreProcessingCandidateFilter"] = fcmds | |
| 160 | |
| 161 print(paramd) | |
| 162 | 72 |
| 163 ###################################################################### | 73 ###################################################################### |
| 164 # Setup regular expressions for MSP parsing dictionary | 74 # Setup regular expressions for MSP parsing dictionary |
| 165 ###################################################################### | 75 ###################################################################### |
| 166 regex_msp = {} | 76 regex_msp = {} |
| 196 meta_regex['num_peaks'].extend(regex_msp['num_peaks']) | 106 meta_regex['num_peaks'].extend(regex_msp['num_peaks']) |
| 197 meta_regex['msp'] = regex_msp['msp'] | 107 meta_regex['msp'] = regex_msp['msp'] |
| 198 | 108 |
| 199 print(meta_regex) | 109 print(meta_regex) |
| 200 | 110 |
| 201 | |
| 202 | |
| 203 # this dictionary will store the meta data results form the MSp file | |
| 204 meta_info = {} | |
| 205 | |
| 206 | |
| 207 # function to extract the meta data using the regular expressions | |
| 208 def parse_meta(meta_regex, meta_info={}): | |
| 209 for k, regexes in six.iteritems(meta_regex): | |
| 210 for reg in regexes: | |
| 211 m = re.search(reg, line, re.IGNORECASE) | |
| 212 if m: | |
| 213 meta_info[k] = '-'.join(m.groups()).strip() | |
| 214 return meta_info | |
| 215 | |
| 216 | |
| 217 adduct_types = { | 111 adduct_types = { |
| 218 '[M+H]+': 1.007276, | 112 '[M+H]+': 1.007276, |
| 219 '[M+NH4]+': 18.034374, | 113 '[M+NH4]+': 18.034374, |
| 220 '[M+Na]+': 22.989218, | 114 '[M+Na]+': 22.989218, |
| 221 '[M+K]+': 38.963158, | 115 '[M+K]+': 38.963158, |
| 225 '[M+2ACN+H]+': 83.06037, | 119 '[M+2ACN+H]+': 83.06037, |
| 226 '[M-H]-': -1.007276, | 120 '[M-H]-': -1.007276, |
| 227 '[M+Cl]-': 34.969402, | 121 '[M+Cl]-': 34.969402, |
| 228 } | 122 } |
| 229 | 123 |
| 124 # function to extract the meta data using the regular expressions | |
| 125 def parse_meta(meta_regex, meta_info={}): | |
| 126 for k, regexes in six.iteritems(meta_regex): | |
| 127 for reg in regexes: | |
| 128 m = re.search(reg, line, re.IGNORECASE) | |
| 129 if m: | |
| 130 meta_info[k] = '-'.join(m.groups()).strip() | |
| 131 return meta_info | |
| 132 | |
| 133 | |
| 134 | |
| 135 ###################################################################### | |
| 136 # Setup parameter dictionary | |
| 137 ###################################################################### | |
| 138 def init_paramd(args): | |
| 139 paramd = defaultdict() | |
| 140 | |
| 141 paramd["MetFragDatabaseType"] = args.MetFragDatabaseType | |
| 142 | |
| 143 if args.MetFragDatabaseType == "LocalCSV": | |
| 144 paramd["LocalDatabasePath"] = args.LocalDatabasePath | |
| 145 elif args.MetFragDatabaseType == "MetChem": | |
| 146 paramd["LocalMetChemDatabase"] = "metchem" | |
| 147 paramd["LocalMetChemDatabasePortNumber"] = 5432 | |
| 148 paramd["LocalMetChemDatabaseServerIp"] = args.LocalMetChemDatabaseServerIp | |
| 149 paramd["LocalMetChemDatabaseUser"] = "metchemro" | |
| 150 paramd["LocalMetChemDatabasePassword"] = "metchemro" | |
| 151 | |
| 152 paramd["FragmentPeakMatchAbsoluteMassDeviation"] = args.FragmentPeakMatchAbsoluteMassDeviation | |
| 153 paramd["FragmentPeakMatchRelativeMassDeviation"] = args.FragmentPeakMatchRelativeMassDeviation | |
| 154 paramd["DatabaseSearchRelativeMassDeviation"] = args.DatabaseSearchRelativeMassDeviation | |
| 155 paramd["SampleName"] = '' | |
| 156 paramd["ResultsPath"] = os.path.join(wd) | |
| 157 | |
| 158 if args.polarity == "pos": | |
| 159 paramd["IsPositiveIonMode"] = True | |
| 160 paramd["PrecursorIonModeDefault"] = "1" | |
| 161 paramd["PrecursorIonMode"] = "1" | |
| 162 paramd["nm_mass_diff_default"] = 1.007276 | |
| 163 else: | |
| 164 paramd["IsPositiveIonMode"] = False | |
| 165 paramd["PrecursorIonModeDefault"] = "-1" | |
| 166 paramd["PrecursorIonMode"] = "-1" | |
| 167 paramd["nm_mass_diff_default"] = -1.007276 | |
| 168 | |
| 169 paramd["MetFragCandidateWriter"] = "CSV" | |
| 170 paramd["NumberThreads"] = args.NumberThreads | |
| 171 | |
| 172 if args.ScoreSuspectLists: | |
| 173 paramd["ScoreSuspectLists"] = args.ScoreSuspectLists | |
| 174 | |
| 175 paramd["MetFragScoreTypes"] = args.MetFragScoreTypes | |
| 176 paramd["MetFragScoreWeights"] = args.MetFragScoreWeights | |
| 177 | |
| 178 dct_filter = defaultdict() | |
| 179 filterh = [] | |
| 180 | |
| 181 if args.UnconnectedCompoundFilter: | |
| 182 filterh.append('UnconnectedCompoundFilter') | |
| 183 | |
| 184 if args.IsotopeFilter: | |
| 185 filterh.append('IsotopeFilter') | |
| 186 | |
| 187 if args.FilterMinimumElements: | |
| 188 filterh.append('MinimumElementsFilter') | |
| 189 dct_filter['FilterMinimumElements'] = args.FilterMinimumElements | |
| 190 | |
| 191 if args.FilterMaximumElements: | |
| 192 filterh.append('MaximumElementsFilter') | |
| 193 dct_filter['FilterMaximumElements'] = args.FilterMaximumElements | |
| 194 | |
| 195 if args.FilterSmartsInclusionList: | |
| 196 filterh.append('SmartsSubstructureInclusionFilter') | |
| 197 dct_filter['FilterSmartsInclusionList'] = args.FilterSmartsInclusionList | |
| 198 | |
| 199 if args.FilterSmartsExclusionList: | |
| 200 filterh.append('SmartsSubstructureExclusionFilter') | |
| 201 dct_filter['FilterSmartsExclusionList'] = args.FilterSmartsExclusionList | |
| 202 | |
| 203 # My understanding is that both 'ElementInclusionExclusiveFilter' and 'ElementExclusionFilter' use | |
| 204 # 'FilterIncludedElements' | |
| 205 if args.FilterIncludedExclusiveElements: | |
| 206 filterh.append('ElementInclusionExclusiveFilter') | |
| 207 dct_filter['FilterIncludedElements'] = args.FilterIncludedExclusiveElements | |
| 208 | |
| 209 if args.FilterIncludedElements: | |
| 210 filterh.append('ElementInclusionFilter') | |
| 211 dct_filter['FilterIncludedElements'] = args.FilterIncludedElements | |
| 212 | |
| 213 if args.FilterExcludedElements: | |
| 214 filterh.append('ElementExclusionFilter') | |
| 215 dct_filter['FilterExcludedElements'] = args.FilterExcludedElements | |
| 216 | |
| 217 if filterh: | |
| 218 fcmds = ','.join(filterh) + ' ' | |
| 219 for k, v in six.iteritems(dct_filter): | |
| 220 fcmds += "{0}={1} ".format(str(k), str(v)) | |
| 221 | |
| 222 paramd["MetFragPreProcessingCandidateFilter"] = fcmds | |
| 223 | |
| 224 return paramd | |
| 225 | |
| 226 | |
| 227 ###################################################################### | |
| 228 # Function to run metfrag when all metainfo and peaks have been parsed | |
| 229 ###################################################################### | |
| 230 def run_metfrag(meta_info, peaklist, args, wd, spectrac, adduct_types): | |
| 231 # Get sample details (if possible to extract) e.g. if created as part of the msPurity pipeline) | |
| 232 # choose between getting additional details to add as columns as either all meta data from msp, just | |
| 233 # details from the record name (i.e. when using msPurity and we have the columns coded into the name) or | |
| 234 # just the spectra index (spectrac)] | |
| 235 # Returns the parameters used and the command line call | |
| 236 | |
| 237 paramd = init_paramd(args) | |
| 238 if args.meta_select_col == 'name': | |
| 239 # have additional column of just the name | |
| 240 paramd['additional_details'] = {'name': meta_info['name']} | |
| 241 elif args.meta_select_col == 'name_split': | |
| 242 # have additional columns split by "|" and then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1 | |
| 243 paramd['additional_details'] = {sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in | |
| 244 meta_info['name'].split("|")} | |
| 245 elif args.meta_select_col == 'all': | |
| 246 # have additional columns based on all the meta information extracted from the MSP | |
| 247 paramd['additional_details'] = meta_info | |
| 248 else: | |
| 249 # Just have and index of the spectra in the MSP file | |
| 250 paramd['additional_details'] = {'spectra_idx': spectrac} | |
| 251 | |
| 252 paramd["SampleName"] = "{}_metfrag_result".format(spectrac) | |
| 253 | |
| 254 # =============== Output peaks to txt file ============================== | |
| 255 paramd["PeakListPath"] = os.path.join(wd, "{}_tmpspec.txt".format(spectrac)) | |
| 256 | |
| 257 # write spec file | |
| 258 with open(paramd["PeakListPath"], 'w') as outfile: | |
| 259 for p in peaklist: | |
| 260 outfile.write(p[0] + "\t" + p[1] + "\n") | |
| 261 | |
| 262 # =============== Update param based on MSP metadata ====================== | |
| 263 # Replace param details with details from MSP if required | |
| 264 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types: | |
| 265 | |
| 266 nm = float(meta_info['precursor_mz']) + adduct_types[meta_info['precursor_type']] | |
| 267 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0)) | |
| 268 else: | |
| 269 | |
| 270 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault'] | |
| 271 nm = float(meta_info['precursor_mz']) + paramd['nm_mass_diff_default'] | |
| 272 | |
| 273 paramd["NeutralPrecursorMass"] = nm | |
| 274 | |
| 275 # =============== Create CLI cmd for metfrag =============================== | |
| 276 cmd = "metfrag" | |
| 277 for k, v in six.iteritems(paramd): | |
| 278 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']: | |
| 279 cmd += " {}={}".format(str(k), str(v)) | |
| 280 | |
| 281 # =============== Run metfrag ============================================== | |
| 282 # Filter before process with a minimum number of MS/MS peaks | |
| 283 if plinesread >= float(args.minMSMSpeaks): | |
| 284 | |
| 285 if int(args.cores_top_level) == 1: | |
| 286 os.system(cmd) | |
| 287 | |
| 288 return paramd, cmd | |
| 289 | |
| 290 | |
| 291 def work(cmds): | |
| 292 return [os.system(cmd) for cmd in cmds] | |
| 293 | |
| 294 | |
| 295 | |
| 230 ###################################################################### | 296 ###################################################################### |
| 231 # Parse MSP file and run metfrag CLI | 297 # Parse MSP file and run metfrag CLI |
| 232 ###################################################################### | 298 ###################################################################### |
| 233 # keep list of commands if performing in CLI in parallel | 299 # keep list of commands if performing in CLI in parallel |
| 234 cmds = [] | 300 cmds = [] |
| 235 # keep a dictionary of all params | 301 # keep a dictionary of all params |
| 236 paramds = {} | 302 paramds = {} |
| 237 # keep count of spectra (for uid) | 303 # keep count of spectra (for uid) |
| 238 spectrac = 0 | 304 spectrac = 0 |
| 305 # this dictionary will store the meta data results form the MSp file | |
| 306 meta_info = {} | |
| 239 | 307 |
| 240 with open(args.input_pth, "r") as infile: | 308 with open(args.input_pth, "r") as infile: |
| 241 numlines = 0 | 309 # number of lines for the peaks |
| 310 pnumlines = 0 | |
| 311 # number of lines read for the peaks | |
| 312 plinesread = 0 | |
| 242 for line in infile: | 313 for line in infile: |
| 243 line = line.strip() | 314 line = line.strip() |
| 244 print(line) | 315 |
| 245 if numlines == 0: | 316 if pnumlines == 0: |
| 246 # =============== Extract metadata from MSP ======================== | 317 # =============== Extract metadata from MSP ======================== |
| 247 meta_info = parse_meta(meta_regex, meta_info) | 318 meta_info = parse_meta(meta_regex, meta_info) |
| 248 print(meta_info) | 319 |
| 249 if ('massbank' in meta_info and 'cols' in meta_info) or ('msp' in meta_info and 'num_peaks' in meta_info): | 320 if ('massbank' in meta_info and 'cols' in meta_info) or ('msp' in meta_info and 'num_peaks' in meta_info): |
| 250 print('check') | 321 |
| 251 numlines = int(meta_info['num_peaks']) | 322 pnumlines = int(meta_info['num_peaks']) |
| 252 linesread = 0 | 323 plinesread = 0 |
| 253 peaklist = [] | 324 peaklist = [] |
| 254 | 325 |
| 255 elif linesread < numlines: | 326 elif plinesread < pnumlines: |
| 256 # =============== Extract peaks from MSP ========================== | 327 # =============== Extract peaks from MSP ========================== |
| 257 line = tuple(line.split()) # .split() will split on any empty space (i.e. tab and space) | 328 line = tuple(line.split()) # .split() will split on any empty space (i.e. tab and space) |
| 258 # Keep only m/z and intensity, not relative intensity | 329 # Keep only m/z and intensity, not relative intensity |
| 259 save_line = tuple(line[0].split() + line[1].split()) | 330 save_line = tuple(line[0].split() + line[1].split()) |
| 260 linesread += 1 | 331 plinesread += 1 |
| 261 peaklist.append(save_line) | 332 peaklist.append(save_line) |
| 262 | 333 |
| 263 elif linesread == numlines: | 334 elif plinesread and plinesread == pnumlines: |
| 264 # =============== Get sample name and additional details for output ======= | 335 # =============== Get sample name and additional details for output ======= |
| 265 # use a unique uuid4 to keep track of processing (important for multicore) | |
| 266 #rd = str(uuid.uuid4()) | |
| 267 spectrac += 1 | 336 spectrac += 1 |
| 268 | 337 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac, adduct_types) |
| 269 # Get sample details (if possible to extract) e.g. if created as part of the msPurity pipeline) | 338 |
| 270 # choose between getting additional details to add as columns as either all meta data from msp, just | |
| 271 # details from the record name (i.e. when using msPurity and we have the columns coded into the name) or | |
| 272 # just the spectra index (spectrac) | |
| 273 if args.meta_select_col == 'name': | |
| 274 # have additional column of just the name | |
| 275 paramd['additional_details'] = {'name': meta_info['name']} | |
| 276 elif args.meta_select_col == 'name_split': | |
| 277 # have additional columns split by "|" and then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1 | |
| 278 sampled = {sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in | |
| 279 meta_info['name'].split("|")} | |
| 280 elif args.meta_select_col == 'all': | |
| 281 # have additional columns based on all the meta information extracted from the MSP | |
| 282 paramd['additional_details'] = meta_info | |
| 283 else: | |
| 284 # Just have and index of the spectra in the MSP file | |
| 285 paramd['additional_details'] = {'spectra_idx': spectrac} | |
| 286 | |
| 287 paramd["SampleName"] = "{}_metfrag_result".format(spectrac) | |
| 288 | |
| 289 # =============== Output peaks to txt file ============================== | |
| 290 numlines = 0 | |
| 291 paramd["PeakListPath"] = os.path.join(wd, "{}_tmpspec.txt".format(spectrac)) | |
| 292 print(paramd["PeakListPath"]) | |
| 293 # write spec file | |
| 294 with open(paramd["PeakListPath"], 'w') as outfile: | |
| 295 for p in peaklist: | |
| 296 outfile.write(p[0] + "\t" + p[1] + "\n") | |
| 297 | |
| 298 # =============== Update param based on MSP metadata ====================== | |
| 299 # Replace param details with details from MSP if required | |
| 300 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types: | |
| 301 | |
| 302 nm = float(meta_info['precursor_mz']) + adduct_types[meta_info['precursor_type']] | |
| 303 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0)) | |
| 304 else: | |
| 305 | |
| 306 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault'] | |
| 307 nm = float(meta_info['precursor_mz']) + paramd['nm_mass_diff_default'] | |
| 308 | |
| 309 paramd["NeutralPrecursorMass"] = nm | |
| 310 | |
| 311 # =============== Create CLI cmd for metfrag =============================== | |
| 312 cmd = "metfrag" | |
| 313 for k, v in six.iteritems(paramd): | |
| 314 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']: | |
| 315 cmd += " {}={}".format(str(k), str(v)) | |
| 316 paramds[paramd["SampleName"]] = paramd | 339 paramds[paramd["SampleName"]] = paramd |
| 317 | 340 cmds.append(cmd) |
| 318 # =============== Run metfrag ============================================== | |
| 319 # Filter before process with a minimum number of MS/MS peaks | |
| 320 if linesread >= float(args.minMSMSpeaks): | |
| 321 | |
| 322 if int(args.cores_top_level) > 1: | |
| 323 cmds.append(cmd) | |
| 324 else: | |
| 325 print(cmd) | |
| 326 os.system(cmd) | |
| 327 | 341 |
| 328 meta_info = {} | 342 meta_info = {} |
| 329 | 343 pnumlines = 0 |
| 330 | 344 plinesread = 0 |
| 331 def work(cmds): | 345 |
| 332 return [os.system(cmd) for cmd in cmds] | 346 # end of file. Check if there is a MSP spectra to run metfrag on still |
| 347 if plinesread and plinesread == pnumlines: | |
| 348 | |
| 349 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac+1, adduct_types) | |
| 350 | |
| 351 paramds[paramd["SampleName"]] = paramd | |
| 352 cmds.append(cmd) | |
| 353 | |
| 354 | |
| 355 | |
| 333 | 356 |
| 334 | 357 |
| 335 # Perform multiprocessing on command line call level | 358 # Perform multiprocessing on command line call level |
| 336 if int(args.cores_top_level) > 1: | 359 if int(args.cores_top_level) > 1: |
| 337 cmds_chunks = [cmds[x:x + int(args.chunks)] for x in list(range(0, len(cmds), int(args.chunks)))] | 360 cmds_chunks = [cmds[x:x + int(args.chunks)] for x in list(range(0, len(cmds), int(args.chunks)))] |
| 369 additional_detail_headers = ['sample_name'] | 392 additional_detail_headers = ['sample_name'] |
| 370 for k, paramd in six.iteritems(paramds): | 393 for k, paramd in six.iteritems(paramds): |
| 371 additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys()))) | 394 additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys()))) |
| 372 | 395 |
| 373 headers = additional_detail_headers + sorted(list(set(headers))) | 396 headers = additional_detail_headers + sorted(list(set(headers))) |
| 397 | |
| 398 | |
| 399 # Sort files nicely | |
| 400 outfiles.sort(key = lambda s: int(re.match('^.*/(\d+)_metfrag_result.csv', s).group(1))) | |
| 401 | |
| 402 print(outfiles) | |
| 403 | |
| 374 | 404 |
| 375 # merge outputs | 405 # merge outputs |
| 376 with open(args.result_pth, 'a') as merged_outfile: | 406 with open(args.result_pth, 'a') as merged_outfile: |
| 377 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t', quotechar='"', | 407 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t', quotechar='"', |
| 378 quoting=csv.QUOTE_NONNUMERIC,) | 408 quoting=csv.QUOTE_NONNUMERIC,) |
