Mercurial > repos > devteam > histogram
comparison histogram.py @ 2:4d2c30376614 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author | devteam |
---|---|
date | Mon, 27 Jul 2020 07:25:28 +0000 |
parents | f4e143b7eb06 |
children |
comparison
equal
deleted
inserted
replaced
1:a629ef17ee46 | 2:4d2c30376614 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #Greg Von Kuster | 2 # Greg Von Kuster |
3 | 3 |
4 import sys | 4 import sys |
5 from rpy import * | |
6 | 5 |
7 assert sys.version_info[:2] >= ( 2, 4 ) | 6 from rpy2.robjects import r, vectors |
7 from rpy2.robjects.packages import importr | |
8 | 8 |
9 def stop_err(msg): | |
10 sys.stderr.write(msg) | |
11 sys.exit() | |
12 | 9 |
13 def main(): | 10 def main(): |
14 | |
15 # Handle input params | 11 # Handle input params |
16 in_fname = sys.argv[1] | 12 in_fname = sys.argv[1] |
17 out_fname = sys.argv[2] | 13 out_fname = sys.argv[2] |
18 try: | 14 try: |
19 column = int( sys.argv[3] ) - 1 | 15 column = int(sys.argv[3]) - 1 |
20 except: | 16 except Exception: |
21 stop_err( "Column not specified, your query does not contain a column of numerical data." ) | 17 sys.exit("Column not specified, your query does not contain a column of numerical data.") |
22 title = sys.argv[4] | 18 title = sys.argv[4] |
23 xlab = sys.argv[5] | 19 xlab = sys.argv[5] |
24 breaks = int( sys.argv[6] ) | 20 breaks = int(sys.argv[6]) |
25 if breaks == 0: | 21 if breaks == 0: |
26 breaks = "Sturges" | 22 breaks = "Sturges" |
27 if sys.argv[7] == "true": | 23 if sys.argv[7] == "true": |
28 density = True | 24 density = True |
29 else: density = False | 25 else: |
30 if len( sys.argv ) >= 9 and sys.argv[8] == "true": | 26 density = False |
27 if len(sys.argv) >= 9 and sys.argv[8] == "true": | |
31 frequency = True | 28 frequency = True |
32 else: frequency = False | 29 else: |
30 frequency = False | |
33 | 31 |
34 matrix = [] | 32 matrix = [] |
35 skipped_lines = 0 | 33 skipped_lines = 0 |
36 first_invalid_line = 0 | 34 first_invalid_line = 0 |
37 invalid_value = '' | 35 invalid_value = '' |
38 i = 0 | 36 i = 0 |
39 for i, line in enumerate( file( in_fname ) ): | 37 for i, line in enumerate(open(in_fname)): |
40 valid = True | 38 valid = True |
41 line = line.rstrip('\r\n') | 39 line = line.rstrip('\r\n') |
42 # Skip comments | 40 # Skip comments |
43 if line and not line.startswith( '#' ): | 41 if line and not line.startswith('#'): |
44 # Extract values and convert to floats | 42 # Extract values and convert to floats |
45 row = [] | 43 row = [] |
46 try: | 44 try: |
47 fields = line.split( "\t" ) | 45 fields = line.split("\t") |
48 val = fields[column] | 46 val = fields[column] |
49 if val.lower() == "na": | 47 if val.lower() == "na": |
50 row.append( float( "nan" ) ) | 48 row.append(float("nan")) |
51 except: | 49 except Exception: |
52 valid = False | 50 valid = False |
53 skipped_lines += 1 | 51 skipped_lines += 1 |
54 if not first_invalid_line: | 52 if not first_invalid_line: |
55 first_invalid_line = i+1 | 53 first_invalid_line = i + 1 |
56 else: | 54 else: |
57 try: | 55 try: |
58 row.append( float( val ) ) | 56 row.append(float(val)) |
59 except ValueError: | 57 except ValueError: |
60 valid = False | 58 valid = False |
61 skipped_lines += 1 | 59 skipped_lines += 1 |
62 if not first_invalid_line: | 60 if not first_invalid_line: |
63 first_invalid_line = i+1 | 61 first_invalid_line = i + 1 |
64 invalid_value = fields[column] | 62 invalid_value = fields[column] |
65 else: | 63 else: |
66 valid = False | 64 valid = False |
67 skipped_lines += 1 | 65 skipped_lines += 1 |
68 if not first_invalid_line: | 66 if not first_invalid_line: |
69 first_invalid_line = i+1 | 67 first_invalid_line = i + 1 |
70 | 68 |
71 if valid: | 69 if valid: |
72 matrix += row | 70 matrix.extend(row) |
73 | 71 |
74 if skipped_lines < i: | 72 if skipped_lines < i: |
75 try: | 73 try: |
76 a = r.array( matrix ) | 74 grdevices = importr('grDevices') |
77 r.pdf( out_fname, 8, 8 ) | 75 graphics = importr('graphics') |
78 histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks ) | 76 vector = vectors.FloatVector(matrix) |
77 grdevices.pdf(out_fname, 8, 8) | |
78 histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks) | |
79 if density: | 79 if density: |
80 density = r.density( a ) | 80 density = r.density(vector) |
81 if frequency: | 81 if frequency: |
82 scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints | 82 scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0]) # uniform bandwidth taken from first 2 midpoints |
83 density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] ) | 83 density['y'] = map(lambda x: x * scale_factor, density['y']) |
84 r.lines( density ) | 84 graphics.lines(density) |
85 r.dev_off() | 85 grdevices.dev_off() |
86 except Exception, exc: | 86 except Exception as exc: |
87 stop_err( "%s" %str( exc ) ) | 87 sys.exit("%s" % str(exc)) |
88 else: | 88 else: |
89 if i == 0: | 89 if i == 0: |
90 stop_err("Input dataset is empty.") | 90 sys.exit("Input dataset is empty.") |
91 else: | 91 else: |
92 stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) | 92 sys.exit("All values in column %s are non-numeric." % sys.argv[3]) |
93 | 93 |
94 print "Histogram of column %s. " %sys.argv[3] | 94 print("Histogram of column %s. " % sys.argv[3]) |
95 if skipped_lines > 0: | 95 if skipped_lines > 0: |
96 print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) | 96 print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value)) |
97 | 97 |
98 r.quit( save="no" ) | 98 |
99 | |
100 if __name__ == "__main__": | 99 if __name__ == "__main__": |
101 main() | 100 main() |