Mercurial > repos > devteam > histogram
annotate histogram.py @ 2:4d2c30376614 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author | devteam |
---|---|
date | Mon, 27 Jul 2020 07:25:28 +0000 |
parents | f4e143b7eb06 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
2 # Greg Von Kuster |
0 | 3 |
4 import sys | |
5 | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
6 from rpy2.robjects import r, vectors |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
7 from rpy2.robjects.packages import importr |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
8 |
0 | 9 |
10 def main(): | |
11 # Handle input params | |
12 in_fname = sys.argv[1] | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
13 out_fname = sys.argv[2] |
0 | 14 try: |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
15 column = int(sys.argv[3]) - 1 |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
16 except Exception: |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
17 sys.exit("Column not specified, your query does not contain a column of numerical data.") |
0 | 18 title = sys.argv[4] |
19 xlab = sys.argv[5] | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
20 breaks = int(sys.argv[6]) |
0 | 21 if breaks == 0: |
22 breaks = "Sturges" | |
23 if sys.argv[7] == "true": | |
24 density = True | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
25 else: |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
26 density = False |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
27 if len(sys.argv) >= 9 and sys.argv[8] == "true": |
0 | 28 frequency = True |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
29 else: |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
30 frequency = False |
0 | 31 |
32 matrix = [] | |
33 skipped_lines = 0 | |
34 first_invalid_line = 0 | |
35 invalid_value = '' | |
36 i = 0 | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
37 for i, line in enumerate(open(in_fname)): |
0 | 38 valid = True |
39 line = line.rstrip('\r\n') | |
40 # Skip comments | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
41 if line and not line.startswith('#'): |
0 | 42 # Extract values and convert to floats |
43 row = [] | |
44 try: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
45 fields = line.split("\t") |
0 | 46 val = fields[column] |
47 if val.lower() == "na": | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
48 row.append(float("nan")) |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
49 except Exception: |
0 | 50 valid = False |
51 skipped_lines += 1 | |
52 if not first_invalid_line: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
53 first_invalid_line = i + 1 |
0 | 54 else: |
55 try: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
56 row.append(float(val)) |
0 | 57 except ValueError: |
58 valid = False | |
59 skipped_lines += 1 | |
60 if not first_invalid_line: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
61 first_invalid_line = i + 1 |
0 | 62 invalid_value = fields[column] |
63 else: | |
64 valid = False | |
65 skipped_lines += 1 | |
66 if not first_invalid_line: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
67 first_invalid_line = i + 1 |
0 | 68 |
69 if valid: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
70 matrix.extend(row) |
0 | 71 |
72 if skipped_lines < i: | |
73 try: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
74 grdevices = importr('grDevices') |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
75 graphics = importr('graphics') |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
76 vector = vectors.FloatVector(matrix) |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
77 grdevices.pdf(out_fname, 8, 8) |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
78 histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks) |
0 | 79 if density: |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
80 density = r.density(vector) |
0 | 81 if frequency: |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
82 scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0]) # uniform bandwidth taken from first 2 midpoints |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
83 density['y'] = map(lambda x: x * scale_factor, density['y']) |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
84 graphics.lines(density) |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
85 grdevices.dev_off() |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
86 except Exception as exc: |
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
87 sys.exit("%s" % str(exc)) |
0 | 88 else: |
89 if i == 0: | |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
90 sys.exit("Input dataset is empty.") |
0 | 91 else: |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
92 sys.exit("All values in column %s are non-numeric." % sys.argv[3]) |
0 | 93 |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
94 print("Histogram of column %s. " % sys.argv[3]) |
0 | 95 if skipped_lines > 0: |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
96 print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value)) |
0 | 97 |
2
4d2c30376614
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
98 |
0 | 99 if __name__ == "__main__": |
100 main() |