comparison metfrag.py @ 0:75c805123b45 draft

planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
author tomnl
date Tue, 24 Jul 2018 07:59:44 -0400
parents
children c1b168770b68
comparison
equal deleted inserted replaced
-1:000000000000 0:75c805123b45
1 import argparse
2 import csv
3 import os
4 import sys
5 print(sys.version)
6
7 parser = argparse.ArgumentParser()
8 parser.add_argument('--input')
9 parser.add_argument('--db_local')
10 parser.add_argument('--db_online')
11 parser.add_argument('--ppm')
12 parser.add_argument('--ppm_frag')
13 parser.add_argument('--fragmasstol')
14 parser.add_argument('--polarity')
15 parser.add_argument('--results')
16 parser.add_argument('--threads')
17
18 args = parser.parse_args()
19 print args
20
21 os.makedirs("tmet")
22
23 with open(args.input,"r") as infile:
24 numlines = 0
25 for line in infile:
26 line = line.strip()
27 if numlines == 0:
28 if "NAME" in line:
29 featid = line.split("NAME: ")[1]
30 if "PRECURSORMZ" in line:
31 mz = float(line.split("PRECURSORMZ: ")[1])
32 if args.polarity=="pos":
33 mz2 = mz-1.007276
34 else:
35 mz2 = mz+1.007276
36 if "Num Peaks" in line:
37 numlines = int(line.split("Num Peaks: ")[1])
38 linesread = 0
39 peaklist = []
40 else:
41 if linesread == numlines:
42 numlines = 0
43 #write spec file
44 with open('./tmpspec.txt', 'w') as outfile:
45 for p in peaklist:
46 outfile.write(p[0]+"\t"+p[1]+"\n")
47 #create commandline input
48 cmd_command = "PeakListPath=tmpspec.txt "
49 if args.db_local != "None":
50 cmd_command += "MetFragDatabaseType=LocalCSV "
51 cmd_command += "LocalDatabasePath={0} ".format(args.db_local)
52 else:
53 cmd_command += "MetFragDatabaseType={0} ".format(args.db_online)
54 cmd_command += "FragmentPeakMatchAbsoluteMassDeviation={0} ".format(args.fragmasstol)
55 cmd_command += "FragmentPeakMatchRelativeMassDeviation={0} ".format(args.ppm_frag)
56 cmd_command += "DatabaseSearchRelativeMassDeviation={0} ".format(args.ppm)
57 cmd_command += "NeutralPrecursorMass={0} ".format(mz2)
58 cmd_command += "SampleName={0}_metfrag ".format(featid)
59 cmd_command += "ResultsPath=./tmet/ "
60 if args.polarity == "pos":
61 cmd_command += "IsPositiveIonMode=True "
62 else:
63 cmd_command += "IsPositiveIonMode=False "
64 if args.polarity == "pos": ### Annotation information. Create a dict for the PrecurorIonModes??
65 cmd_command += "PrecursorIonMode=1 "
66 else:
67 cmd_command += "PrecursorIonMode=-1 "
68 cmd_command += "MetFragCandidateWriter=CSV " ## TSV not available
69 cmd_command += "NumberThreads={} ".format(args.threads)
70 # run Metfrag
71 print "metfrag {0}".format(cmd_command)
72 os.system("metfrag {0}".format(cmd_command))
73 else:
74 line = tuple(line.split("\t"))
75 linesread += 1
76 peaklist.append(line)
77
78
79 #outputs might have different headers. Need to get a list of all the headers before we start merging the files
80 outfiles = sorted(os.listdir("./tmet"))
81
82 headers = []
83 c = 0
84 for fname in outfiles:
85 with open("./tmet/"+fname) as infile:
86 reader = csv.reader(infile)
87 headers.extend(reader.next())
88 # check if file has any data rows
89 for i, row in enumerate(reader):
90 c+=1
91 if i==1:
92 break
93
94 # if no data rows (e.g. matches) then do not save an output and leave the program
95 if c==0:
96 sys.exit()
97
98
99 print headers
100 headers = ['UID'] + sorted(list(set(headers)))
101 print headers
102
103 #merge outputs
104 with open(args.results, 'a') as merged_outfile:
105
106 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t')
107 dwriter.writeheader()
108
109 for fname in outfiles:
110 fileid = os.path.basename(fname).split("_")[0]
111 with open("./tmet/"+fname) as infile:
112 reader = csv.DictReader(infile, delimiter=',', quotechar='"')
113 for line in reader:
114 line['UID'] = fileid
115 dwriter.writerow(line)
116
117