Mercurial > repos > prog > lcmsmatching
annotate list-chrom-cols.py @ 1:45e985cd8e9e draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
| author | prog |
|---|---|
| date | Tue, 31 Jan 2017 05:27:24 -0500 |
| parents | |
| children | b8f70d8216b3 |
| rev | line source |
|---|---|
|
1
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
2 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
3 import argparse |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
4 import subprocess |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
5 import re |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
6 import urllib2 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
7 import json |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
8 import csv |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
9 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
10 def get_chrom_cols(dbtype, dburl, dbtoken = None, dbfields = None): |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
11 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
12 cols = [] |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
13 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
14 if dbtype == 'peakforest': |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
15 url = dburl + ( '' if dburl[-1] == '/' else '/' ) + 'metadata/lc/list-code-columns' |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
16 if dbtoken is not None: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
17 url += '?token=' + dbtoken |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
18 result = urllib2.urlopen(url).read() |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
19 v = json.JSONDecoder().decode(result) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
20 i = 0 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
21 for colid, coldesc in v.iteritems(): |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
22 cols.append( (coldesc['name'], colid, i == 0) ) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
23 ++i |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
24 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
25 elif dbtype == 'inhouse': |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
26 # Get field for chromatographic column name |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
27 col_field = 'chromcol' |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
28 if dbfields is not None: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
29 fields = dict(u.split("=") for u in dbfields.split(",")) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
30 if 'chromcol' in fields: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
31 col_field = fields['chromcol'] |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
32 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
33 # Get all column names from file |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
34 with open(dburl if isinstance(dburl, str) else dburl.get_file_name(), 'rb') as dbfile: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
35 reader = csv.reader(dbfile, delimiter = "\t", quotechar='"') |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
36 header = reader.next() |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
37 if col_field in header: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
38 i = header.index(col_field) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
39 allcols = [] |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
40 for row in reader: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
41 col = row[i] |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
42 if col not in allcols: |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
43 allcols.append(col) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
44 for i, c in enumerate(allcols): |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
45 cols.append( (c, c, i == 0) ) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
46 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
47 return cols |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
48 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
49 ######## |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
50 # MAIN # |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
51 ######## |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
52 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
53 if __name__ == '__main__': |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
54 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
55 # Parse command line arguments |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
56 parser = argparse.ArgumentParser(description='Script for getting chromatographic columns of an RMSDB database for Galaxy tool lcmsmatching.') |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
57 parser.add_argument('-d', help = 'Database type', dest = 'dbtype', required = True) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
58 parser.add_argument('-u', help = 'Database URL', dest = 'dburl', required = True) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
59 parser.add_argument('-t', help = 'Database token', dest = 'dbtoken', required = False) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
60 parser.add_argument('-f', help = 'Database fields', dest = 'dbfields', required = False) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
61 args = parser.parse_args() |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
62 args_dict = vars(args) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
63 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
64 print(get_chrom_cols(**args_dict)) |
