annotate list-chrom-cols.py @ 6:b8f70d8216b3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
author prog
date Mon, 27 Mar 2017 06:27:29 -0400
parents 45e985cd8e9e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
1 #!/usr/bin/env python
6
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
2 # vi: fdm=marker
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
3
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
4 import argparse
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
5 import subprocess
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
6 import re
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
7 import urllib2
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
8 import json
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
9 import csv
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
10
6
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
11 # Get chrom cols {{{1
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
12 ################################################################
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
13
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
14 def get_chrom_cols(dbtype, dburl, dbtoken = None, col_field = 'chromcol'):
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
15
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
16 cols = []
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
17
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
18 if dbtype == 'peakforest':
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
19 url = dburl + ( '' if dburl[-1] == '/' else '/' ) + 'metadata/lc/list-code-columns'
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
20 if dbtoken is not None:
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
21 url += '?token=' + dbtoken
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
22 result = urllib2.urlopen(url).read()
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
23 v = json.JSONDecoder().decode(result)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
24 i = 0
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
25 for colid, coldesc in v.iteritems():
6
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
26 s = coldesc['name'] + ' - ' + coldesc['constructor'] + ' - L' + str(coldesc['length']) + ' - diam. ' + str(coldesc['diameter']) + ' - part. ' + str(coldesc['particule_size']) + ' - flow ' + str(coldesc['flow_rate'])
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
27 cols.append( (s , colid, i == 0) )
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
28 ++i
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
29
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
30 elif dbtype == 'inhouse':
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
31
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
32 # Get all column names from file
6
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
33 with open(dburl if isinstance(dburl, str) else dburl.get_file_name(), 'r') as dbfile:
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
34 reader = csv.reader(dbfile, delimiter = "\t", quotechar='"')
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
35 header = reader.next()
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
36 if col_field in header:
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
37 i = header.index(col_field)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
38 allcols = []
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
39 for row in reader:
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
40 col = row[i]
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
41 if col not in allcols:
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
42 allcols.append(col)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
43 for i, c in enumerate(allcols):
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
44 cols.append( (c, c, i == 0) )
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
45
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
46 return cols
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
47
6
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
48 # Main {{{1
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
49 ################################################################
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
50
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
51 if __name__ == '__main__':
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
52
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
53 # Parse command line arguments
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
54 parser = argparse.ArgumentParser(description='Script for getting chromatographic columns of an RMSDB database for Galaxy tool lcmsmatching.')
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
55 parser.add_argument('-d', help = 'Database type', dest = 'dbtype', required = True)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
56 parser.add_argument('-u', help = 'Database URL', dest = 'dburl', required = True)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
57 parser.add_argument('-t', help = 'Database token', dest = 'dbtoken', required = False)
6
b8f70d8216b3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents: 1
diff changeset
58 parser.add_argument('-f', help = 'Chromatogrphic column field name', dest = 'col_field', required = False)
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
59 args = parser.parse_args()
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
60 args_dict = vars(args)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
61
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
62 print(get_chrom_cols(**args_dict))