annotate metfrag.py @ 1:c1b168770b68 draft

planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
author tomnl
date Fri, 31 May 2019 04:41:09 -0400
parents 75c805123b45
children d040e27b6225
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
1 from __future__ import absolute_import, print_function
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
2 import argparse
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
3 import csv
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
4 import os
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
5 import sys
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
6 import six
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
7 import re
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
8 import random
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
9 import string
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
10 import shutil
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
11 import glob
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
12 import tempfile
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
13 import multiprocessing
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
14
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
15 from subprocess import call
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
16 from collections import defaultdict
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
17
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
18 print(sys.version)
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
19
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
20 parser = argparse.ArgumentParser()
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
21 parser.add_argument('--input_pth')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
22 parser.add_argument('--result_pth', default='metfrag_result.csv')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
23
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
24 parser.add_argument('--temp_dir')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
25 parser.add_argument('--polarity', default='pos')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
26 parser.add_argument('--minMSMSpeaks', default=1)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
27
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
28 parser.add_argument('--MetFragDatabaseType', default='PubChem')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
29 parser.add_argument('--LocalDatabasePath', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
30 parser.add_argument('--LocalMetChemDatabaseServerIp', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
31
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
32 parser.add_argument('--DatabaseSearchRelativeMassDeviation', default=5)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
33 parser.add_argument('--FragmentPeakMatchRelativeMassDeviation', default=10)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
34 parser.add_argument('--FragmentPeakMatchAbsoluteMassDeviation', default=0.001)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
35 parser.add_argument('--NumberThreads', default=1)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
36 parser.add_argument('--UnconnectedCompoundFilter', action='store_true')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
37 parser.add_argument('--IsotopeFilter', action='store_true')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
38
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
39 parser.add_argument('--FilterMinimumElements', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
40 parser.add_argument('--FilterMaximumElements', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
41 parser.add_argument('--FilterSmartsInclusionList', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
42 parser.add_argument('--FilterSmartsExclusionList', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
43 parser.add_argument('--FilterIncludedElements', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
44 parser.add_argument('--FilterExcludedElements', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
45 parser.add_argument('--FilterIncludedExclusiveElements', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
46
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
47 parser.add_argument('--score_thrshld', default=0)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
48 parser.add_argument('--pctexplpeak_thrshld', default=0)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
49 parser.add_argument('--schema')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
50 parser.add_argument('--cores_top_level', default=1)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
51 parser.add_argument('--chunks', default=1)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
52 parser.add_argument('--meta_select_col', default='name')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
53
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
54 parser.add_argument('--ScoreSuspectLists', default='')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
55 parser.add_argument('--MetFragScoreTypes', default="FragmenterScore,OfflineMetFusionScore")
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
56 parser.add_argument('--MetFragScoreWeights', default="1.0,1.0")
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
57
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
58 args = parser.parse_args()
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
59 print(args)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
60
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
61 # Create temporary working directory
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
62 if args.temp_dir:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
63 wd = args.temp_dir
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
64 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
65 wd = tempfile.mkdtemp()
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
66
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
67 if os.path.exists(wd):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
68 shutil.rmtree(wd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
69 os.makedirs(wd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
70 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
71 os.makedirs(wd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
72
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
73 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
74 # Setup parameter dictionary
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
75 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
76 paramd = defaultdict()
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
77
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
78 paramd["MetFragDatabaseType"] = args.MetFragDatabaseType
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
79
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
80 if args.MetFragDatabaseType == "LocalCSV":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
81 paramd["LocalDatabasePath"] = args.LocalDatabasePath
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
82 elif args.MetFragDatabaseType == "MetChem":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
83 paramd["LocalMetChemDatabase"] = "metchem"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
84 paramd["LocalMetChemDatabasePortNumber"] = 5432
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
85 paramd["LocalMetChemDatabaseServerIp"] = args.LocalMetChemDatabaseServerIp
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
86 paramd["LocalMetChemDatabaseUser"] = "metchemro"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
87 paramd["LocalMetChemDatabasePassword"] = "metchemro"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
88
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
89 paramd["FragmentPeakMatchAbsoluteMassDeviation"] = args.FragmentPeakMatchAbsoluteMassDeviation
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
90 paramd["FragmentPeakMatchRelativeMassDeviation"] = args.FragmentPeakMatchRelativeMassDeviation
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
91 paramd["DatabaseSearchRelativeMassDeviation"] = args.DatabaseSearchRelativeMassDeviation
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
92 paramd["SampleName"] = ''
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
93 paramd["ResultsPath"] = os.path.join(wd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
94
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
95 if args.polarity == "pos":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
96 paramd["IsPositiveIonMode"] = True
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
97 paramd["PrecursorIonModeDefault"] = "1"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
98 paramd["PrecursorIonMode"] = "1"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
99 paramd["nm_mass_diff_default"] = 1.007276
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
100 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
101 paramd["IsPositiveIonMode"] = False
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
102 paramd["PrecursorIonModeDefault"] = "-1"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
103 paramd["PrecursorIonMode"] = "-1"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
104 paramd["nm_mass_diff_default"] = -1.007276
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
105
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
106 paramd["MetFragCandidateWriter"] = "CSV"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
107 paramd["NumberThreads"] = args.NumberThreads
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
108
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
109 if args.ScoreSuspectLists:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
110 paramd["ScoreSuspectLists"] = args.ScoreSuspectLists
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
111
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
112 paramd["MetFragScoreTypes"] = args.MetFragScoreTypes
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
113 paramd["MetFragScoreWeights"] = args.MetFragScoreWeights
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
114
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
115 dct_filter = defaultdict()
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
116 filterh = []
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
117
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
118 if args.UnconnectedCompoundFilter:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
119 filterh.append('UnconnectedCompoundFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
120
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
121 if args.IsotopeFilter:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
122 filterh.append('IsotopeFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
123
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
124 if args.FilterMinimumElements:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
125 filterh.append('MinimumElementsFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
126 dct_filter['FilterMinimumElements'] = args.FilterMinimumElements
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
127
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
128 if args.FilterMaximumElements:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
129 filterh.append('MaximumElementsFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
130 dct_filter['FilterMaximumElements'] = args.FilterMaximumElements
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
131
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
132 if args.FilterSmartsInclusionList:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
133 filterh.append('SmartsSubstructureInclusionFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
134 dct_filter['FilterSmartsInclusionList'] = args.FilterSmartsInclusionList
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
135
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
136 if args.FilterSmartsExclusionList:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
137 filterh.append('SmartsSubstructureExclusionFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
138 dct_filter['FilterSmartsExclusionList'] = args.FilterSmartsExclusionList
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
139
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
140 # My understanding is that both 'ElementInclusionExclusiveFilter' and 'ElementExclusionFilter' use
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
141 # 'FilterIncludedElements'
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
142 if args.FilterIncludedExclusiveElements:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
143 filterh.append('ElementInclusionExclusiveFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
144 dct_filter['FilterIncludedElements'] = args.FilterIncludedExclusiveElements
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
145
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
146 if args.FilterIncludedElements:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
147 filterh.append('ElementInclusionFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
148 dct_filter['FilterIncludedElements'] = args.FilterIncludedElements
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
149
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
150 if args.FilterExcludedElements:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
151 filterh.append('ElementExclusionFilter')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
152 dct_filter['FilterExcludedElements'] = args.FilterExcludedElements
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
153
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
154 if filterh:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
155 fcmds = ','.join(filterh) + ' '
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
156 for k, v in six.iteritems(dct_filter):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
157 fcmds += "{0}={1} ".format(str(k), str(v))
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
158
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
159 paramd["MetFragPreProcessingCandidateFilter"] = fcmds
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
160
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
161 print(paramd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
162
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
163 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
164 # Setup regular expressions for MSP parsing dictionary
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
165 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
166 regex_msp = {}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
167 regex_msp['name'] = ['^Name(?:=|:)(.*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
168 regex_msp['polarity'] = ['^ion.*mode(?:=|:)(.*)$', '^ionization.*mode(?:=|:)(.*)$', '^polarity(?:=|:)(.*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
169 regex_msp['precursor_mz'] = ['^precursor.*m/z(?:=|:)\s*(\d*[.,]?\d*)$', '^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
170 regex_msp['precursor_type'] = ['^precursor.*type(?:=|:)(.*)$', '^adduct(?:=|:)(.*)$', '^ADDUCTIONNAME(?:=|:)(.*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
171 regex_msp['num_peaks'] = ['^Num.*Peaks(?:=|:)\s*(\d*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
172 regex_msp['msp'] = ['^Name(?:=|:)(.*)$'] # Flag for standard MSP format
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
173
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
174 regex_massbank = {}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
175 regex_massbank['name'] = ['^RECORD_TITLE:(.*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
176 regex_massbank['polarity'] = ['^AC\$MASS_SPECTROMETRY:\s+ION_MODE\s+(.*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
177 regex_massbank['precursor_mz'] = ['^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
178 regex_massbank['precursor_type'] = ['^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
179 regex_massbank['num_peaks'] = ['^PK\$NUM_PEAK:\s+(\d*)']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
180 regex_massbank['cols'] = ['^PK\$PEAK:\s+(.*)']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
181 regex_massbank['massbank'] = ['^RECORD_TITLE:(.*)$'] # Flag for massbank format
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
182
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
183 if args.schema == 'msp':
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
184 meta_regex = regex_msp
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
185 elif args.schema == 'massbank':
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
186 meta_regex = regex_massbank
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
187 elif args.schema == 'auto':
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
188 # If auto we just check for all the available paramter names and then determine if Massbank or MSP based on
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
189 # the name parameter
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
190 meta_regex = {}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
191 meta_regex.update(regex_massbank)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
192 meta_regex['name'].extend(regex_msp['name'])
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
193 meta_regex['polarity'].extend(regex_msp['polarity'])
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
194 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz'])
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
195 meta_regex['precursor_type'].extend(regex_msp['precursor_type'])
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
196 meta_regex['num_peaks'].extend(regex_msp['num_peaks'])
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
197 meta_regex['msp'] = regex_msp['msp']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
198
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
199 print(meta_regex)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
200
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
201
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
202
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
203 # this dictionary will store the meta data results form the MSp file
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
204 meta_info = {}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
205
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
206
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
207 # function to extract the meta data using the regular expressions
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
208 def parse_meta(meta_regex, meta_info={}):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
209 for k, regexes in six.iteritems(meta_regex):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
210 for reg in regexes:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
211 m = re.search(reg, line, re.IGNORECASE)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
212 if m:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
213 meta_info[k] = '-'.join(m.groups()).strip()
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
214 return meta_info
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
215
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
216
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
217 adduct_types = {
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
218 '[M+H]+': 1.007276,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
219 '[M+NH4]+': 18.034374,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
220 '[M+Na]+': 22.989218,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
221 '[M+K]+': 38.963158,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
222 '[M+CH3OH+H]+': 33.033489,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
223 '[M+ACN+H]+': 42.033823,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
224 '[M+ACN+Na]+': 64.015765,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
225 '[M+2ACN+H]+': 83.06037,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
226 '[M-H]-': -1.007276,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
227 '[M+Cl]-': 34.969402,
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
228 }
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
229
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
230 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
231 # Parse MSP file and run metfrag CLI
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
232 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
233 # keep list of commands if performing in CLI in parallel
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
234 cmds = []
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
235 # keep a dictionary of all params
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
236 paramds = {}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
237 # keep count of spectra (for uid)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
238 spectrac = 0
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
239
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
240 with open(args.input_pth, "r") as infile:
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
241 numlines = 0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
242 for line in infile:
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
243 line = line.strip()
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
244 print(line)
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
245 if numlines == 0:
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
246 # =============== Extract metadata from MSP ========================
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
247 meta_info = parse_meta(meta_regex, meta_info)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
248 print(meta_info)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
249 if ('massbank' in meta_info and 'cols' in meta_info) or ('msp' in meta_info and 'num_peaks' in meta_info):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
250 print('check')
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
251 numlines = int(meta_info['num_peaks'])
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
252 linesread = 0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
253 peaklist = []
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
254
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
255 elif linesread < numlines:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
256 # =============== Extract peaks from MSP ==========================
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
257 line = tuple(line.split()) # .split() will split on any empty space (i.e. tab and space)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
258 # Keep only m/z and intensity, not relative intensity
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
259 save_line = tuple(line[0].split() + line[1].split())
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
260 linesread += 1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
261 peaklist.append(save_line)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
262
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
263 elif linesread == numlines:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
264 # =============== Get sample name and additional details for output =======
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
265 # use a unique uuid4 to keep track of processing (important for multicore)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
266 #rd = str(uuid.uuid4())
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
267 spectrac += 1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
268
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
269 # Get sample details (if possible to extract) e.g. if created as part of the msPurity pipeline)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
270 # choose between getting additional details to add as columns as either all meta data from msp, just
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
271 # details from the record name (i.e. when using msPurity and we have the columns coded into the name) or
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
272 # just the spectra index (spectrac)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
273 if args.meta_select_col == 'name':
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
274 # have additional column of just the name
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
275 paramd['additional_details'] = {'name': meta_info['name']}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
276 elif args.meta_select_col == 'name_split':
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
277 # have additional columns split by "|" and then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
278 sampled = {sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
279 meta_info['name'].split("|")}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
280 elif args.meta_select_col == 'all':
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
281 # have additional columns based on all the meta information extracted from the MSP
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
282 paramd['additional_details'] = meta_info
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
283 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
284 # Just have and index of the spectra in the MSP file
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
285 paramd['additional_details'] = {'spectra_idx': spectrac}
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
286
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
287 paramd["SampleName"] = "{}_metfrag_result".format(spectrac)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
288
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
289 # =============== Output peaks to txt file ==============================
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
290 numlines = 0
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
291 paramd["PeakListPath"] = os.path.join(wd, "{}_tmpspec.txt".format(spectrac))
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
292 print(paramd["PeakListPath"])
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
293 # write spec file
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
294 with open(paramd["PeakListPath"], 'w') as outfile:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
295 for p in peaklist:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
296 outfile.write(p[0] + "\t" + p[1] + "\n")
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
297
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
298 # =============== Update param based on MSP metadata ======================
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
299 # Replace param details with details from MSP if required
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
300 if 'precursor_type' in meta_info and meta_info['precursor_type'] in adduct_types:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
301
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
302 nm = float(meta_info['precursor_mz']) + adduct_types[meta_info['precursor_type']]
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
303 paramd["PrecursorIonMode"] = int(round(adduct_types[meta_info['precursor_type']], 0))
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
304 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
305
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
306 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
307 nm = float(meta_info['precursor_mz']) + paramd['nm_mass_diff_default']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
308
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
309 paramd["NeutralPrecursorMass"] = nm
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
310
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
311 # =============== Create CLI cmd for metfrag ===============================
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
312 cmd = "metfrag"
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
313 for k, v in six.iteritems(paramd):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
314 if not k in ['PrecursorIonModeDefault', 'nm_mass_diff_default', 'additional_details']:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
315 cmd += " {}={}".format(str(k), str(v))
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
316 paramds[paramd["SampleName"]] = paramd
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
317
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
318 # =============== Run metfrag ==============================================
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
319 # Filter before process with a minimum number of MS/MS peaks
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
320 if linesread >= float(args.minMSMSpeaks):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
321
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
322 if int(args.cores_top_level) > 1:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
323 cmds.append(cmd)
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
324 else:
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
325 print(cmd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
326 os.system(cmd)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
327
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
328 meta_info = {}
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
329
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
330
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
331 def work(cmds):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
332 return [os.system(cmd) for cmd in cmds]
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
333
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
334
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
335 # Perform multiprocessing on command line call level
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
336 if int(args.cores_top_level) > 1:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
337 cmds_chunks = [cmds[x:x + int(args.chunks)] for x in list(range(0, len(cmds), int(args.chunks)))]
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
338 pool = multiprocessing.Pool(processes=int(args.cores_top_level))
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
339 pool.map(work, cmds_chunks)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
340 pool.close()
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
341 pool.join()
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
342
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
343 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
344 # Concatenate and filter the output
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
345 ######################################################################
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
346 # outputs might have different headers. Need to get a list of all the headers before we start merging the files
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
347 # outfiles = [os.path.join(wd, f) for f in glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
348 outfiles = glob.glob(os.path.join(wd, "*_metfrag_result.csv"))
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
349
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
350 headers = []
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
351 c = 0
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
352 for fn in outfiles:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
353 with open(fn, 'r') as infile:
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
354 reader = csv.reader(infile)
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
355 if sys.version_info >= (3, 0):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
356 headers.extend(next(reader))
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
357 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
358 headers.extend(reader.next())
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
359 # check if file has any data rows
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
360 for i, row in enumerate(reader):
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
361 c += 1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
362 if i == 1:
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
363 break
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
364
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
365 # if no data rows (e.g. matches) then do not save an output and leave the program
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
366 if c == 0:
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
367 sys.exit()
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
368
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
369 additional_detail_headers = ['sample_name']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
370 for k, paramd in six.iteritems(paramds):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
371 additional_detail_headers = list(set(additional_detail_headers + list(paramd['additional_details'].keys())))
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
372
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
373 headers = additional_detail_headers + sorted(list(set(headers)))
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
374
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
375 # merge outputs
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
376 with open(args.result_pth, 'a') as merged_outfile:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
377 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter='\t', quotechar='"',
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
378 quoting=csv.QUOTE_NONNUMERIC,)
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
379 dwriter.writeheader()
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
380
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
381 for fn in outfiles:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
382
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
383 with open(fn) as infile:
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
384 reader = csv.DictReader(infile, delimiter=',', quotechar='"')
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
385 for line in reader:
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
386 bewrite = True
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
387 for key, value in line.items():
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
388 # Filter when no MS/MS peak matched
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
389 if key == "ExplPeaks":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
390 if float(args.pctexplpeak_thrshld) > 0 and "NA" in value:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
391 bewrite = False
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
392 # Filter with a score threshold
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
393 elif key == "Score":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
394 if float(value) <= float(args.score_thrshld):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
395 bewrite = False
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
396 elif key == "NoExplPeaks":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
397 nbfindpeak = float(value)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
398 elif key == "NumberPeaksUsed":
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
399 totpeaks = float(value)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
400 # Filter with a relative number of peak matched
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
401 try:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
402 pctexplpeak = nbfindpeak / totpeaks * 100
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
403 except ZeroDivisionError:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
404 bewrite = False
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
405 else:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
406 if pctexplpeak < float(args.pctexplpeak_thrshld):
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
407 bewrite = False
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
408
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
409 # Write the line if it pass all filters
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
410 if bewrite:
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
411 bfn = os.path.basename(fn)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
412 bfn = bfn.replace(".csv", "")
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
413 ad = paramds[bfn]['additional_details']
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
414 line.update(ad)
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
415 line['sample_name'] = paramds[bfn]['SampleName']
0
75c805123b45 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit b5f4005c77b275b9b1b4349b08476afc783a8e14
tomnl
parents:
diff changeset
416
1
c1b168770b68 planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 5b384bd55e5bb4b0dc1daebd2ef5d3ee0e379b2e-dirty
tomnl
parents: 0
diff changeset
417 dwriter.writerow(line)