Mercurial > repos > tomnl > metfrag
diff metfrag.py @ 0:75c805123b45 draft
planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
| author | tomnl |
|---|---|
| date | Tue, 24 Jul 2018 07:59:44 -0400 |
| parents | |
| children | c1b168770b68 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metfrag.py Tue Jul 24 07:59:44 2018 -0400 @@ -0,0 +1,117 @@ +import argparse +import csv +import os +import sys +print(sys.version) + +parser = argparse.ArgumentParser() +parser.add_argument('--input') +parser.add_argument('--db_local') +parser.add_argument('--db_online') +parser.add_argument('--ppm') +parser.add_argument('--ppm_frag') +parser.add_argument('--fragmasstol') +parser.add_argument('--polarity') +parser.add_argument('--results') +parser.add_argument('--threads') + +args = parser.parse_args() +print args + +os.makedirs("tmet") + +with open(args.input,"r") as infile: + numlines = 0 + for line in infile: + line = line.strip() + if numlines == 0: + if "NAME" in line: + featid = line.split("NAME: ")[1] + if "PRECURSORMZ" in line: + mz = float(line.split("PRECURSORMZ: ")[1]) + if args.polarity=="pos": + mz2 = mz-1.007276 + else: + mz2 = mz+1.007276 + if "Num Peaks" in line: + numlines = int(line.split("Num Peaks: ")[1]) + linesread = 0 + peaklist = [] + else: + if linesread == numlines: + numlines = 0 + #write spec file + with open('./tmpspec.txt', 'w') as outfile: + for p in peaklist: + outfile.write(p[0]+"\t"+p[1]+"\n") + #create commandline input + cmd_command = "PeakListPath=tmpspec.txt " + if args.db_local != "None": + cmd_command += "MetFragDatabaseType=LocalCSV " + cmd_command += "LocalDatabasePath={0} ".format(args.db_local) + else: + cmd_command += "MetFragDatabaseType={0} ".format(args.db_online) + cmd_command += "FragmentPeakMatchAbsoluteMassDeviation={0} ".format(args.fragmasstol) + cmd_command += "FragmentPeakMatchRelativeMassDeviation={0} ".format(args.ppm_frag) + cmd_command += "DatabaseSearchRelativeMassDeviation={0} ".format(args.ppm) + cmd_command += "NeutralPrecursorMass={0} ".format(mz2) + cmd_command += "SampleName={0}_metfrag ".format(featid) + cmd_command += "ResultsPath=./tmet/ " + if args.polarity == "pos": + cmd_command += "IsPositiveIonMode=True " + else: + cmd_command += "IsPositiveIonMode=False " + if args.polarity == "pos": ### Annotation information. Create a dict for the PrecurorIonModes?? + cmd_command += "PrecursorIonMode=1 " + else: + cmd_command += "PrecursorIonMode=-1 " + cmd_command += "MetFragCandidateWriter=CSV " ## TSV not available + cmd_command += "NumberThreads={} ".format(args.threads) + # run Metfrag + print "metfrag {0}".format(cmd_command) + os.system("metfrag {0}".format(cmd_command)) + else: + line = tuple(line.split("\t")) + linesread += 1 + peaklist.append(line) + + +#outputs might have different headers. Need to get a list of all the headers before we start merging the files +outfiles = sorted(os.listdir("./tmet")) + +headers = [] +c = 0 +for fname in outfiles: + with open("./tmet/"+fname) as infile: + reader = csv.reader(infile) + headers.extend(reader.next()) + # check if file has any data rows + for i, row in enumerate(reader): + c+=1 + if i==1: + break + +# if no data rows (e.g. matches) then do not save an output and leave the program +if c==0: + sys.exit() + + +print headers +headers = ['UID'] + sorted(list(set(headers))) +print headers + +#merge outputs +with open(args.results, 'a') as merged_outfile: + + dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t') + dwriter.writeheader() + + for fname in outfiles: + fileid = os.path.basename(fname).split("_")[0] + with open("./tmet/"+fname) as infile: + reader = csv.DictReader(infile, delimiter=',', quotechar='"') + for line in reader: + line['UID'] = fileid + dwriter.writerow(line) + +
