annotate histogram.py @ 2:4d2c30376614 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author devteam
date Mon, 27 Jul 2020 07:25:28 +0000
parents f4e143b7eb06
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
2 # Greg Von Kuster
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
3
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
4 import sys
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
5
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
6 from rpy2.robjects import r, vectors
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
7 from rpy2.robjects.packages import importr
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
8
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
9
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
10 def main():
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
11 # Handle input params
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
12 in_fname = sys.argv[1]
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
13 out_fname = sys.argv[2]
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
14 try:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
15 column = int(sys.argv[3]) - 1
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
16 except Exception:
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
17 sys.exit("Column not specified, your query does not contain a column of numerical data.")
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
18 title = sys.argv[4]
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
19 xlab = sys.argv[5]
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
20 breaks = int(sys.argv[6])
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
21 if breaks == 0:
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
22 breaks = "Sturges"
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
23 if sys.argv[7] == "true":
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
24 density = True
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
25 else:
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
26 density = False
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
27 if len(sys.argv) >= 9 and sys.argv[8] == "true":
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
28 frequency = True
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
29 else:
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
30 frequency = False
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
31
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
32 matrix = []
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
33 skipped_lines = 0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
34 first_invalid_line = 0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
35 invalid_value = ''
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
36 i = 0
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
37 for i, line in enumerate(open(in_fname)):
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
38 valid = True
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
39 line = line.rstrip('\r\n')
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
40 # Skip comments
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
41 if line and not line.startswith('#'):
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
42 # Extract values and convert to floats
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
43 row = []
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
44 try:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
45 fields = line.split("\t")
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
46 val = fields[column]
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
47 if val.lower() == "na":
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
48 row.append(float("nan"))
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
49 except Exception:
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
50 valid = False
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
51 skipped_lines += 1
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
52 if not first_invalid_line:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
53 first_invalid_line = i + 1
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
54 else:
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
55 try:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
56 row.append(float(val))
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
57 except ValueError:
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
58 valid = False
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
59 skipped_lines += 1
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
60 if not first_invalid_line:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
61 first_invalid_line = i + 1
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
62 invalid_value = fields[column]
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
63 else:
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
64 valid = False
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
65 skipped_lines += 1
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
66 if not first_invalid_line:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
67 first_invalid_line = i + 1
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
68
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
69 if valid:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
70 matrix.extend(row)
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
71
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
72 if skipped_lines < i:
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
73 try:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
74 grdevices = importr('grDevices')
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
75 graphics = importr('graphics')
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
76 vector = vectors.FloatVector(matrix)
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
77 grdevices.pdf(out_fname, 8, 8)
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
78 histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks)
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
79 if density:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
80 density = r.density(vector)
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
81 if frequency:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
82 scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0]) # uniform bandwidth taken from first 2 midpoints
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
83 density['y'] = map(lambda x: x * scale_factor, density['y'])
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
84 graphics.lines(density)
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
85 grdevices.dev_off()
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
86 except Exception as exc:
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
87 sys.exit("%s" % str(exc))
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
88 else:
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
89 if i == 0:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
90 sys.exit("Input dataset is empty.")
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
91 else:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
92 sys.exit("All values in column %s are non-numeric." % sys.argv[3])
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
93
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
94 print("Histogram of column %s. " % sys.argv[3])
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
95 if skipped_lines > 0:
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
96 print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value))
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
97
2
4d2c30376614 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents: 0
diff changeset
98
0
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
99 if __name__ == "__main__":
f4e143b7eb06 Imported from capsule None
devteam
parents:
diff changeset
100 main()