Mercurial > repos > tomnl > metfrag
comparison metfrag.py @ 1:c1b168770b68 draft
planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
| author | tomnl |
|---|---|
| date | Fri, 31 May 2019 04:41:09 -0400 |
| parents | 75c805123b45 |
| children | d040e27b6225 |
comparison
equal
deleted
inserted
replaced
| 0:75c805123b45 | 1:c1b168770b68 |
|---|---|
| 1 from __future__ import absolute_import, print_function | |
| 1 import argparse | 2 import argparse |
| 2 import csv | 3 import csv |
| 3 import os | 4 import os |
| 4 import sys | 5 import sys |
| 6 import six | |
| 7 import re | |
| 8 import random | |
| 9 import string | |
| 10 import shutil | |
| 11 import glob | |
| 12 import tempfile | |
| 13 import multiprocessing | |
| 14 | |
| 15 from subprocess import call | |
| 16 from collections import defaultdict | |
| 17 | |
| 5 print(sys.version) | 18 print(sys.version) |
| 6 | 19 |
| 7 parser = argparse.ArgumentParser() | 20 parser = argparse.ArgumentParser() |
| 8 parser.add_argument('--input') | 21 parser.add_argument('--input_pth') |
| 9 parser.add_argument('--db_local') | 22 parser.add_argument('--result_pth', default='metfrag_result.csv') |
| 10 parser.add_argument('--db_online') | 23 |
| 11 parser.add_argument('--ppm') | 24 parser.add_argument('--temp_dir') |
| 12 parser.add_argument('--ppm_frag') | 25 parser.add_argument('--polarity', default='pos') |
| 13 parser.add_argument('--fragmasstol') | 26 parser.add_argument('--minMSMSpeaks', default=1) |
| 14 parser.add_argument('--polarity') | 27 |
| 15 parser.add_argument('--results') | 28 parser.add_argument('--MetFragDatabaseType', default='PubChem') |
| 16 parser.add_argument('--threads') | 29 parser.add_argument('--LocalDatabasePath', default='') |
| 30 parser.add_argument('--LocalMetChemDatabaseServerIp', default='') | |
| 31 | |
| 32 parser.add_argument('--DatabaseSearchRelativeMassDeviation', default=5) | |
| 33 parser.add_argument('--FragmentPeakMatchRelativeMassDeviation', default=10) | |
| 34 parser.add_argument('--FragmentPeakMatchAbsoluteMassDeviation', default=0.001) | |
| 35 parser.add_argument('--NumberThreads', default=1) | |
| 36 parser.add_argument('--UnconnectedCompoundFilter', action='store_true') | |
| 37 parser.add_argument('--IsotopeFilter', action='store_true') | |
| 38 | |
| 39 parser.add_argument('--FilterMinimumElements', default='') | |
| 40 parser.add_argument('--FilterMaximumElements', default='') | |
| 41 parser.add_argument('--FilterSmartsInclusionList', default='') | |
| 42 parser.add_argument('--FilterSmartsExclusionList', default='') | |
| 43 parser.add_argument('--FilterIncludedElements', default='') | |
| 44 parser.add_argument('--FilterExcludedElements', default='') | |
| 45 parser.add_argument('--FilterIncludedExclusiveElements', default='') | |
| 46 | |
| 47 parser.add_argument('--score_thrshld', default=0) | |
| 48 parser.add_argument('--pctexplpeak_thrshld', default=0) | |
| 49 parser.add_argument('--schema') | |
| 50 parser.add_argument('--cores_top_level', default=1) | |
| 51 parser.add_argument('--chunks', default=1) | |
| 52 parser.add_argument('--meta_select_col', default='name') | |
| 53 | |
| 54 parser.add_argument('--ScoreSuspectLists', default='') | |
| 55 parser.add_argument('--MetFragScoreTypes', default="FragmenterScore,OfflineMetFusionScore") | |
| 56 parser.add_argument('--MetFragScoreWeights', default="1.0,1.0") | |
| 17 | 57 |
| 18 args = parser.parse_args() | 58 args = parser.parse_args() |
| 19 print args | 59 print(args) |
| 20 | 60 |
| 21 os.makedirs("tmet") | 61 # Create temporary working directory |
| 22 | 62 if args.temp_dir: |
| 23 with open(args.input,"r") as infile: | 63 wd = args.temp_dir |
| 64 else: | |
| 65 wd = tempfile.mkdtemp() | |
| 66 | |
| 67 if os.path.exists(wd): | |
| 68 shutil.rmtree(wd) | |
| 69 os.makedirs(wd) | |
| 70 else: | |
| 71 os.makedirs(wd) | |
| 72 | |
| 73 ###################################################################### | |
| 74 # Setup parameter dictionary | |
| 75 ###################################################################### | |
| 76 paramd = defaultdict() | |
| 77 | |
| 78 paramd["MetFragDatabaseType"] = args.MetFragDatabaseType | |
| 79 | |
| 80 if args.MetFragDatabaseType == "LocalCSV": | |
| 81 paramd["LocalDatabasePath"] = args.LocalDatabasePath | |
| 82 elif args.MetFragDatabaseType == "MetChem": | |
| 83 paramd["LocalMetChemDatabase"] = "metchem" | |
| 84 paramd["LocalMetChemDatabasePortNumber"] = 5432 | |
| 85 paramd["LocalMetChemDatabaseServerIp"] = args.LocalMetChemDatabaseServerIp | |
| 86 paramd["LocalMetChemDatabaseUser"] = "metchemro" | |
| 87 paramd["LocalMetChemDatabasePassword"] = "metchemro" | |
| 88 | |
| 89 paramd["FragmentPeakMatchAbsoluteMassDeviation"] = args.FragmentPeakMatchAbsoluteMassDeviation | |
| 90 paramd["FragmentPeakMatchRelativeMassDeviation"] = args.FragmentPeakMatchRelativeMassDeviation | |
| 91 paramd["DatabaseSearchRelativeMassDeviation"] = args.DatabaseSearchRelativeMassDeviation | |
| 92 paramd["SampleName"] = '' | |
| 93 paramd["ResultsPath"] = os.path.join(wd) | |
| 94 | |
| 95 if args.polarity == "pos": | |
| 96 paramd["IsPositiveIonMode"] = True | |
| 97 paramd["PrecursorIonModeDefault"] = "1" | |
| 98 paramd["PrecursorIonMode"] = "1" | |
| 99 paramd["nm_mass_diff_default"] = 1.007276 | |
| 100 else: | |
| 101 paramd["IsPositiveIonMode"] = False | |
| 102 paramd["PrecursorIonModeDefault"] = "-1" | |
| 103 paramd["PrecursorIonMode"] = "-1" | |
| 104 paramd["nm_mass_diff_default"] = -1.007276 | |
| 105 | |
| 106 paramd["MetFragCandidateWriter"] = "CSV" | |
| 107 paramd["NumberThreads"] = args.NumberThreads | |
| 108 | |
| 109 if args.ScoreSuspectLists: | |
| 110 paramd["ScoreSuspectLists"] = args.ScoreSuspectLists | |
| 111 | |
| 112 paramd["MetFragScoreTypes"] = args.MetFragScoreTypes | |
| 113 paramd["MetFragScoreWeights"] = args.MetFragScoreWeights | |
| 114 | |
| 115 dct_filter = defaultdict() | |
| 116 filterh = [] | |
| 117 | |
| 118 if args.UnconnectedCompoundFilter: | |
| 119 filterh.append('UnconnectedCompoundFilter') | |
| 120 | |
| 121 if args.IsotopeFilter: | |
| 122 filterh.append('IsotopeFilter') | |
| 123 | |
| 124 if args.FilterMinimumElements: | |
| 125 filterh.append('MinimumElementsFilter') | |
| 126 dct_filter['FilterMinimumElements'] = args.FilterMinimumElements | |
| 127 | |
| 128 if args.FilterMaximumElements: | |
| 129 filterh.append('MaximumElementsFilter') | |
| 130 dct_filter['FilterMaximumElements'] = args.FilterMaximumElements | |
| 131 | |
| 132 if args.FilterSmartsInclusionList: | |
| 133 filterh.append('SmartsSubstructureInclusionFilter') | |
| 134 dct_filter['FilterSmartsInclusionList'] = args.FilterSmartsInclusionList | |
| 135 | |
| 136 if args.FilterSmartsExclusionList: | |
| 137 filterh.append('SmartsSubstructureExclusionFilter') | |
| 138 dct_filter['FilterSmartsExclusionList'] = args.FilterSmartsExclusionList | |
| 139 | |
| 140 # My understanding is that both 'ElementInclusionExclusiveFilter' and 'ElementExclusionFilter' use | |
| 141 # 'FilterIncludedElements' | |
| 142 if args.FilterIncludedExclusiveElements: | |
| 143 filterh.append('ElementInclusionExclusiveFilter') | |
| 144 dct_filter['FilterIncludedElements'] = args.FilterIncludedExclusiveElements | |
| 145 | |
| 146 if args.FilterIncludedElements: | |
| 147 filterh.append('ElementInclusionFilter') | |
| 148 dct_filter['FilterIncludedElements'] = args.FilterIncludedElements | |
| 149 | |
| 150 if args.FilterExcludedElements: | |
| 151 filterh.append('ElementExclusionFilter') | |
| 152 dct_filter['FilterExcludedElements'] = args.FilterExcludedElements | |
| 153 | |
| 154 if filterh: | |
| 155 fcmds = ','.join(filterh) + ' ' | |
| 156 for k, v in six.iteritems(dct_filter): | |
| 157 fcmds += "{0}={1} ".format(str(k), str(v)) | |
| 158 | |
| 159 paramd["MetFragPreProcessingCandidateFilter"] = fcmds | |
| 160 | |
| 161 print(paramd) | |
| 162 | |
| 163 ###################################################################### | |
| 164 # Setup regular expressions for MSP parsing dictionary | |
| 165 ###################################################################### | |
| 166 regex_msp = {} | |
| 167 regex_msp['name'] = ['^Name(?:=|:)(.*)$'] | |
| 168 regex_msp['polarity'] = ['^ion.*mode(?:=|:)(.*)$', '^ionization.*mode(?:=|:)(.*)$', '^polarity(?:=|:)(.*)$'] | |
| 169 regex_msp['precursor_mz'] = ['^precursor.*m/z(?:=|:)\s*(\d*[.,]?\d*)$', '^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$'] | |
| 170 regex_msp['precursor_type'] = ['^precursor.*type(?:=|:)(.*)$', '^adduct(?:=|:)(.*)$', '^ADDUCTIONNAME(?:=|:)(.*)$'] | |
| 171 regex_msp['num_peaks'] = ['^Num.*Peaks(?:=|:)\s*(\d*)$'] | |
| 172 regex_msp['msp'] = ['^Name(?:=|:)(.*)$'] # Flag for standard MSP format | |
| 173 | |
| 174 regex_massbank = {} | |
| 175 regex_massbank['name'] = ['^RECORD_TITLE:(.*)$'] | |
| 176 regex_massbank['polarity'] = ['^AC\$MASS_SPECTROMETRY:\s+ION_MODE\s+(.*)$'] | |
| 177 regex_massbank['precursor_mz'] = ['^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$'] | |
| 178 regex_massbank['precursor_type'] = ['^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$'] | |
| 179 regex_massbank['num_peaks'] = ['^PK\$NUM_PEAK:\s+(\d*)'] | |
| 180 regex_massbank['cols'] = ['^PK\$PEAK:\s+(.*)'] | |
| 181 regex_massbank['massbank'] = ['^RECORD_TITLE:(.*)$'] # Flag for massbank format | |
| 182 | |
| 183 if args.schema == 'msp': | |
| 184 meta_regex = regex_msp | |
| 185 elif args.schema == 'massbank': | |
| 186 meta_regex = regex_massbank | |
| 187 elif args.schema == 'auto': | |
| 188 # If auto we just check for all the available paramter names and then determine if Massbank or MSP based on | |
| 189 # the name parameter | |
| 190 meta_regex = {} | |
| 191 meta_regex.update(regex_massbank) | |
| 192 meta_regex['name'].extend(regex_msp['name']) | |
| 193 meta_regex['polarity'].extend(regex_msp['polarity']) | |
| 194 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz']) | |
| 195 meta_regex['precursor_type'].extend(regex_msp['precursor_type']) | |
| 196 meta_regex['num_peaks'].extend(regex_msp['num_peaks']) | |
| 197 meta_regex['msp'] = regex_msp['msp'] | |
| 198 | |
| 199 print(meta_regex) | |
| 200 | |
| 201 | |
| 202 | |
| 203 # this dictionary will store the meta data results form the MSp file | |
| 204 meta_info = {} | |
| 205 | |
| 206 | |
| 207 # function to extract the meta data using the regular expressions | |
| 208 def parse_meta(meta_regex, meta_info={}): | |
| 209 for k, regexes in six.iteritems(meta_regex): | |
| 210 for reg in regexes: | |
| 211 m = re.search(reg, line, re.IGNORECASE) | |
| 212 if m: | |
| 213 meta_info[k] = '-'.join(m.groups()).strip() | |
| 214 return meta_info | |
| 215 | |
| 216 | |
| 217 adduct_types = { | |
| 218 '[M+H]+': 1.007276, | |
| 219 '[M+NH4]+': 18.034374, | |
| 220 '[M+Na]+': 22.989218, | |
| 221 '[M+K]+': 38.963158, | |
| 222 '[M+CH3OH+H]+': 33.033489, | |
| 223 '[M+ACN+H]+': 42.033823, | |
| 224 '[M+ACN+Na]+': 64.015765, | |
| 225 '[M+2ACN+H]+': 83.06037, | |
| 226 '[M-H]-': -1.007276, | |
| 227 '[M+Cl]-': 34.969402, | |
| 228 } | |
| 229 | |
| 230 ###################################################################### | |
| 231 # Parse MSP file and run metfrag CLI | |
| 232 ###################################################################### | |
| 233 # keep list of commands if performing in CLI in parallel | |
| 234 cmds = [] | |
| 235 # keep a dictionary of all params | |
| 236 paramds = {} | |
| 237 # keep count of spectra (for uid) | |
| 238 spectrac = 0 | |
| 239 | |
| 240 with open(args.input_pth, "r") as infile: | |
| 24 numlines = 0 | 241 numlines = 0 |
| 25 for line in infile: | 242 for line in infile: |
| 26 line = line.strip() | 243 line = line.strip() |
| 244 print(line) | |
| 27 if numlines == 0: | 245 if numlines == 0: |
| 28 if "NAME" in line: | 246 # =============== Extract metadata from MSP ======================== |
| 29 featid = line.split("NAME: ")[1] | 247 meta_info = parse_meta(meta_regex, meta_info) |
| 30 if "PRECURSORMZ" in line: | 248 print(meta_info) |
| 31 mz = float(line.split("PRECURSORMZ: ")[1]) | 249 if ('massbank' in meta_info and 'cols' in meta_info) or ('msp' in meta_info and 'num_peaks' in meta_info): |
| 32 if args.polarity=="pos": | 250 print('check') |
| 33 mz2 = mz-1.007276 | 251 numlines = int(meta_info['num_peaks']) |
| 34 else: | |
| 35 mz2 = mz+1.007276 | |
| 36 if "Num Peaks" in line: | |
| 37 numlines = int(line.split("Num Peaks: ")[1]) | |
| 38 linesread = 0 | 252 linesread = 0 |
| 39 peaklist = [] | 253 peaklist = [] |
| 40 else: | 254 |
| 41 if linesread == numlines: | 255 elif linesread < numlines: |
| 42 numlines = 0 | 256 # =============== Extract peaks from MSP ========================== |
| 43 #write spec file | 257 line = tuple(line.split()) # .split() will split on any empty space (i.e. tab and space) |
| 44 with open('./tmpspec.txt', 'w') as outfile: | 258 # Keep only m/z and intensity, not relative intensity |
| 45 for p in peaklist: | 259 save_line = tuple(line[0].split() + line[1].split()) |
| 46 outfile.write(p[0]+"\t"+p[1]+"\n") | 260 linesread += 1 |
| 47 #create commandline input | 261 peaklist.append(save_line) |
| 48 cmd_command = "PeakListPath=tmpspec.txt " | 262 |
| 49 if args.db_local != "None": | 263 elif linesread == numlines: |
| 50 cmd_command += "MetFragDatabaseType=LocalCSV " | 264 # =============== Get sample name and additional details for output ======= |
| 51 cmd_command += "LocalDatabasePath={0} ".format(args.db_local) | 265 # use a unique uuid4 to keep track of processing (important for multicore) |
| 266 #rd = str(uuid.uuid4()) | |
| 267 spectrac += 1 | |
| 268 | |
| 269 # Get sample details (if possible to extract) e.g. if created as part of the msPurity pipeline) | |
| 270 # choose between getting additional details to add as columns as either all meta data from msp, just | |
| 271 # details from the record name (i.e. when using msPurity and we have the columns coded into the name) or | |
| 272 # just the spectra index (spectrac) | |
| 273 if args.meta_select_col == 'name': | |
| 274 # have additional column of just the name | |
| 275 paramd['additional_details'] = {'name': meta_info['name']} | |
| 276 elif args.meta_select_col == 'name_split': | |
| 277 # have additional columns split by "|" and then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1 | |
| 278 sampled = {sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in | |
| 279 meta_info['name'].split("|")} | |
| 280 elif args.meta_select_col == 'all': | |
| 281 # have additional columns based on all the meta information extracted from the MSP | |
| 282 paramd['additional_details'] = meta_info | |
| 283 else: | |
| 284 # Just have and index of the spectra in the MSP file | |
| 285 paramd['additional_details'] = {'spectra_idx': spectrac} | |
| 286 | |
| 287 paramd["SampleName"] = "{}_metfrag_result".format(spectrac) | |
| 288 | |
| 289 # =============== Output peaks to txt file ============================== | |
| 290 numlines = 0 | |
| 291 paramd["PeakListPath"] = os.path.join(wd, "{}_tmpspec.txt".format(spectrac)) | |
| 292 print(paramd["PeakListPath"]) | |
| 293 # write spec file | |
| 294 with open(paramd["PeakListPath"], 'w') as outfile: | |
| 295 for p in peaklist: | |
| 296 outfile.write(p[0] + "\t" + p[1] + "\n") | |
| 297 | |
| 298 # =============== Update param based on MSP metadata ====================== | |
| 299 # Replace param details with details from MSP if required | |
| 300 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types: | |
| 301 | |
| 302 nm = float(meta_info['precursor_mz']) + adduct_types[meta_info['precursor_type']] | |
| 303 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0)) | |
| 304 else: | |
| 305 | |
| 306 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault'] | |
| 307 nm = float(meta_info['precursor_mz']) + paramd['nm_mass_diff_default'] | |
| 308 | |
| 309 paramd["NeutralPrecursorMass"] = nm | |
| 310 | |
| 311 # =============== Create CLI cmd for metfrag =============================== | |
| 312 cmd = "metfrag" | |
| 313 for k, v in six.iteritems(paramd): | |
| 314 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']: | |
| 315 cmd += " {}={}".format(str(k), str(v)) | |
| 316 paramds[paramd["SampleName"]] = paramd | |
| 317 | |
| 318 # =============== Run metfrag ============================================== | |
| 319 # Filter before process with a minimum number of MS/MS peaks | |
| 320 if linesread >= float(args.minMSMSpeaks): | |
| 321 | |
| 322 if int(args.cores_top_level) > 1: | |
| 323 cmds.append(cmd) | |
| 52 else: | 324 else: |
| 53 cmd_command += "MetFragDatabaseType={0} ".format(args.db_online) | 325 print(cmd) |
| 54 cmd_command += "FragmentPeakMatchAbsoluteMassDeviation={0} ".format(args.fragmasstol) | 326 os.system(cmd) |
| 55 cmd_command += "FragmentPeakMatchRelativeMassDeviation={0} ".format(args.ppm_frag) | 327 |
| 56 cmd_command += "DatabaseSearchRelativeMassDeviation={0} ".format(args.ppm) | 328 meta_info = {} |
| 57 cmd_command += "NeutralPrecursorMass={0} ".format(mz2) | 329 |
| 58 cmd_command += "SampleName={0}_metfrag ".format(featid) | 330 |
| 59 cmd_command += "ResultsPath=./tmet/ " | 331 def work(cmds): |
| 60 if args.polarity == "pos": | 332 return [os.system(cmd) for cmd in cmds] |
| 61 cmd_command += "IsPositiveIonMode=True " | 333 |
| 62 else: | 334 |
| 63 cmd_command += "IsPositiveIonMode=False " | 335 # Perform multiprocessing on command line call level |
| 64 if args.polarity == "pos": ### Annotation information. Create a dict for the PrecurorIonModes?? | 336 if int(args.cores_top_level) > 1: |
| 65 cmd_command += "PrecursorIonMode=1 " | 337 cmds_chunks = [cmds[x:x + int(args.chunks)] for x in list(range(0, len(cmds), int(args.chunks)))] |
| 66 else: | 338 pool = multiprocessing.Pool(processes=int(args.cores_top_level)) |
| 67 cmd_command += "PrecursorIonMode=-1 " | 339 pool.map(work, cmds_chunks) |
| 68 cmd_command += "MetFragCandidateWriter=CSV " ## TSV not available | 340 pool.close() |
| 69 cmd_command += "NumberThreads={} ".format(args.threads) | 341 pool.join() |
| 70 # run Metfrag | 342 |
| 71 print "metfrag {0}".format(cmd_command) | 343 ###################################################################### |
| 72 os.system("metfrag {0}".format(cmd_command)) | 344 # Concatenate and filter the output |
| 73 else: | 345 ###################################################################### |
| 74 line = tuple(line.split("\t")) | 346 # outputs might have different headers. Need to get a list of all the headers before we start merging the files |
| 75 linesread += 1 | 347 # outfiles = [os.path.join(wd, f) for f in glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] |
| 76 peaklist.append(line) | 348 outfiles = glob.glob(os.path.join(wd, "*_metfrag_result.csv")) |
| 77 | |
| 78 | |
| 79 #outputs might have different headers. Need to get a list of all the headers before we start merging the files | |
| 80 outfiles = sorted(os.listdir("./tmet")) | |
| 81 | 349 |
| 82 headers = [] | 350 headers = [] |
| 83 c = 0 | 351 c = 0 |
| 84 for fname in outfiles: | 352 for fn in outfiles: |
| 85 with open("./tmet/"+fname) as infile: | 353 with open(fn, 'r') as infile: |
| 86 reader = csv.reader(infile) | 354 reader = csv.reader(infile) |
| 87 headers.extend(reader.next()) | 355 if sys.version_info >= (3, 0): |
| 356 headers.extend(next(reader)) | |
| 357 else: | |
| 358 headers.extend(reader.next()) | |
| 88 # check if file has any data rows | 359 # check if file has any data rows |
| 89 for i, row in enumerate(reader): | 360 for i, row in enumerate(reader): |
| 90 c+=1 | 361 c += 1 |
| 91 if i==1: | 362 if i == 1: |
| 92 break | 363 break |
| 93 | 364 |
| 94 # if no data rows (e.g. matches) then do not save an output and leave the program | 365 # if no data rows (e.g. matches) then do not save an output and leave the program |
| 95 if c==0: | 366 if c == 0: |
| 96 sys.exit() | 367 sys.exit() |
| 97 | 368 |
| 98 | 369 additional_detail_headers = ['sample_name'] |
| 99 print headers | 370 for k, paramd in six.iteritems(paramds): |
| 100 headers = ['UID'] + sorted(list(set(headers))) | 371 additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys()))) |
| 101 print headers | 372 |
| 102 | 373 headers = additional_detail_headers + sorted(list(set(headers))) |
| 103 #merge outputs | 374 |
| 104 with open(args.results, 'a') as merged_outfile: | 375 # merge outputs |
| 105 | 376 with open(args.result_pth, 'a') as merged_outfile: |
| 106 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t') | 377 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t', quotechar='"', |
| 378 quoting=csv.QUOTE_NONNUMERIC,) | |
| 107 dwriter.writeheader() | 379 dwriter.writeheader() |
| 108 | 380 |
| 109 for fname in outfiles: | 381 for fn in outfiles: |
| 110 fileid = os.path.basename(fname).split("_")[0] | 382 |
| 111 with open("./tmet/"+fname) as infile: | 383 with open(fn) as infile: |
| 112 reader = csv.DictReader(infile, delimiter=',', quotechar='"') | 384 reader = csv.DictReader(infile, delimiter=',', quotechar='"') |
| 113 for line in reader: | 385 for line in reader: |
| 114 line['UID'] = fileid | 386 bewrite = True |
| 115 dwriter.writerow(line) | 387 for key, value in line.items(): |
| 116 | 388 # Filter when no MS/MS peak matched |
| 117 | 389 if key == "ExplPeaks": |
| 390 if float(args.pctexplpeak_thrshld) > 0 and "NA" in value: | |
| 391 bewrite = False | |
| 392 # Filter with a score threshold | |
| 393 elif key == "Score": | |
| 394 if float(value) <= float(args.score_thrshld): | |
| 395 bewrite = False | |
| 396 elif key == "NoExplPeaks": | |
| 397 nbfindpeak = float(value) | |
| 398 elif key == "NumberPeaksUsed": | |
| 399 totpeaks = float(value) | |
| 400 # Filter with a relative number of peak matched | |
| 401 try: | |
| 402 pctexplpeak = nbfindpeak / totpeaks * 100 | |
| 403 except ZeroDivisionError: | |
| 404 bewrite = False | |
| 405 else: | |
| 406 if pctexplpeak < float(args.pctexplpeak_thrshld): | |
| 407 bewrite = False | |
| 408 | |
| 409 # Write the line if it pass all filters | |
| 410 if bewrite: | |
| 411 bfn = os.path.basename(fn) | |
| 412 bfn = bfn.replace(".csv", "") | |
| 413 ad = paramds[bfn]['additional_details'] | |
| 414 line.update(ad) | |
| 415 line['sample_name'] = paramds[bfn]['SampleName'] | |
| 416 | |
| 417 dwriter.writerow(line) |
