# HG changeset patch
# User devteam
# Date 1406561428 14400
# Node ID f4e143b7eb0672efe786351be01b41c6c392484f
Imported from capsule None
diff -r 000000000000 -r f4e143b7eb06 histogram.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/histogram.py Mon Jul 28 11:30:28 2014 -0400
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+#Greg Von Kuster
+
+import sys
+from rpy import *
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+def main():
+
+ # Handle input params
+ in_fname = sys.argv[1]
+ out_fname = sys.argv[2]
+ try:
+ column = int( sys.argv[3] ) - 1
+ except:
+ stop_err( "Column not specified, your query does not contain a column of numerical data." )
+ title = sys.argv[4]
+ xlab = sys.argv[5]
+ breaks = int( sys.argv[6] )
+ if breaks == 0:
+ breaks = "Sturges"
+ if sys.argv[7] == "true":
+ density = True
+ else: density = False
+ if len( sys.argv ) >= 9 and sys.argv[8] == "true":
+ frequency = True
+ else: frequency = False
+
+ matrix = []
+ skipped_lines = 0
+ first_invalid_line = 0
+ invalid_value = ''
+ i = 0
+ for i, line in enumerate( file( in_fname ) ):
+ valid = True
+ line = line.rstrip('\r\n')
+ # Skip comments
+ if line and not line.startswith( '#' ):
+ # Extract values and convert to floats
+ row = []
+ try:
+ fields = line.split( "\t" )
+ val = fields[column]
+ if val.lower() == "na":
+ row.append( float( "nan" ) )
+ except:
+ valid = False
+ skipped_lines += 1
+ if not first_invalid_line:
+ first_invalid_line = i+1
+ else:
+ try:
+ row.append( float( val ) )
+ except ValueError:
+ valid = False
+ skipped_lines += 1
+ if not first_invalid_line:
+ first_invalid_line = i+1
+ invalid_value = fields[column]
+ else:
+ valid = False
+ skipped_lines += 1
+ if not first_invalid_line:
+ first_invalid_line = i+1
+
+ if valid:
+ matrix += row
+
+ if skipped_lines < i:
+ try:
+ a = r.array( matrix )
+ r.pdf( out_fname, 8, 8 )
+ histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks )
+ if density:
+ density = r.density( a )
+ if frequency:
+ scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints
+ density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] )
+ r.lines( density )
+ r.dev_off()
+ except Exception, exc:
+ stop_err( "%s" %str( exc ) )
+ else:
+ if i == 0:
+ stop_err("Input dataset is empty.")
+ else:
+ stop_err( "All values in column %s are non-numeric." %sys.argv[3] )
+
+ print "Histogram of column %s. " %sys.argv[3]
+ if skipped_lines > 0:
+ print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value )
+
+ r.quit( save="no" )
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r f4e143b7eb06 histogram2.png
Binary file histogram2.png has changed
diff -r 000000000000 -r f4e143b7eb06 histogram2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/histogram2.xml Mon Jul 28 11:30:28 2014 -0400
@@ -0,0 +1,77 @@
+
+ of a numeric column
+
+ rpy
+ R
+
+ histogram.py $input $out_file1 $numerical_column "$title" "$xlab" $breaks $density $frequency
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** To remove comment lines that do not begin with a *#* character, use *Text Manipulation->Remove beginning*
+
+ .. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+**Syntax**
+
+This tool computes a histogram of the numerical values in a column of a dataset.
+
+- All invalid, blank and comment lines in the dataset are skipped. The number of skipped lines is displayed in the resulting history item.
+- **Column for x axis** - only numerical columns are possible.
+- **Number of breaks(bars)** - breakpoints between histogram cells. Value of '0' will determine breaks automatically.
+- **Plot title** - the histogram title.
+- **Label for x axis** - the label of the x axis for the histogram.
+- **Include smoothed density** - if checked, the resulting graph will join the given corresponding points with line segments.
+
+-----
+
+**Example**
+
+- Input file::
+
+ 1 68 4.1
+ 2 71 4.6
+ 3 62 3.8
+ 4 75 4.4
+ 5 58 3.2
+ 6 60 3.1
+ 7 67 3.8
+ 8 68 4.1
+ 9 71 4.3
+ 10 69 3.7
+
+- Create a histogram on column 2 of the above dataset.
+
+.. image:: histogram2.png
+
+
+
diff -r 000000000000 -r f4e143b7eb06 plot_filter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plot_filter.py Mon Jul 28 11:30:28 2014 -0400
@@ -0,0 +1,19 @@
+
+def validate(incoming):
+ """Validator for the plotting program"""
+
+ bins = incoming.get("bins","")
+ col = incoming.get("col","")
+
+ if not bins or not col:
+ raise Exception, "You need to specify a number for bins and columns"
+
+ try:
+ bins = int(bins)
+ col = int(col)
+ except:
+ raise Exception, "Parameters are not valid numbers, columns:%s, bins:%s" % (col, bins)
+
+ if not 1= ( 2, 4 )
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+if __name__ == '__main__':
+ # parse the arguments
+
+ if len(sys.argv) != 6:
+ stop_err('Usage: python histogram.py input_file column bins output_file style')
+ sys.exit()
+
+ mode = sys.argv[5]
+ HIST = mode == 'hist'
+ try:
+ col = int(float(sys.argv[2]))
+ if HIST:
+ bin = int(float(sys.argv[3]))
+ else:
+ # hack, this parameter is the plotting style for scatter plots
+ if sys.argv[3] == 'P':
+ style = 'o'
+ elif sys.argv[3] == 'LP':
+ style = 'o-'
+ else:
+ style = '-'
+
+ except:
+ msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4])
+ stop_err(msg)
+
+ # validate arguments
+ inp_file = sys.argv[1]
+ out_file = sys.argv[4]
+
+ if HIST:
+ print "Histogram on column %s (%s bins)" % (col, bin)
+ else:
+ print "Scatterplot on column %s" % (col)
+
+ xcol= col -1
+ # read the file
+ values = []
+ try:
+ count = 0
+ for line in file(inp_file):
+ count += 1
+ line = line.strip()
+ if line and line[0] != '#':
+ values.append(float(line.split()[xcol]))
+ except Exception, e:
+ stop_err('%s' % e)
+ stop_err("Non numerical data at line %d, column %d" % (count, col) )
+
+ # plot the data
+
+ if HIST:
+ n, bins, patches = hist(values, bins=bin, normed=0)
+ else:
+ plot(values, style)
+
+ xlabel('values')
+ ylabel('counts')
+
+ if HIST:
+ title('Histogram of values over column %s (%s bins)' % (col, len(bins)) )
+ else:
+ title('Scatterplot over column %s' % col )
+ grid(True)
+
+ # the plotter detects types by file extension
+ png_out = out_file + '.png' # force it to png
+ savefig(png_out)
+
+ # shuffle it back and clean up
+ data = file(png_out, 'rb').read()
+ fp = open(out_file, 'wb')
+ fp.write(data)
+ fp.close()
+ os.remove(png_out)
diff -r 000000000000 -r f4e143b7eb06 test-data/histogram_in1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/histogram_in1.tabular Mon Jul 28 11:30:28 2014 -0400
@@ -0,0 +1,10 @@
+1 68 4.1
+2 71 4.6
+3 62 3.8
+4 75 4.4
+5 58 3.2
+6 60 3.1
+7 67 3.8
+8 68 4.1
+9 71 4.3
+10 69 3.7
diff -r 000000000000 -r f4e143b7eb06 test-data/histogram_out1.pdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/histogram_out1.pdf Mon Jul 28 11:30:28 2014 -0400
@@ -0,0 +1,545 @@
+%PDF-1.4
+%ρ\r
+1 0 obj
+<<
+/CreationDate (D:20110303082028)
+/ModDate (D:20110303082028)
+/Title (R Graphics Output)
+/Producer (R 2.11.0)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<<
+/Type /Catalog
+/Pages 3 0 R
+>>
+endobj
+5 0 obj
+<<
+/Type /Page
+/Parent 3 0 R
+/Contents 6 0 R
+/Resources 4 0 R
+>>
+endobj
+6 0 obj
+<<
+/Length 7 0 R
+>>
+stream
+1 J 1 j q
+Q q
+BT
+0.000 0.000 0.000 rg
+/F3 1 Tf 14.00 0.00 -0.00 14.00 267.78 541.45 Tm (Histogram) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 295.06 18.72 Tm (V1) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 275.20 Tm (Density) Tj
+ET
+Q q
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+77.07 73.44 m 527.73 73.44 l S
+77.07 73.44 m 77.07 66.24 l S
+189.73 73.44 m 189.73 66.24 l S
+302.40 73.44 m 302.40 66.24 l S
+415.07 73.44 m 415.07 66.24 l S
+527.73 73.44 m 527.73 66.24 l S
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 70.39 47.52 Tm (55) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 183.06 47.52 Tm (60) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 295.73 47.52 Tm (65) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 408.39 47.52 Tm (70) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 521.06 47.52 Tm (75) Tj
+ET
+59.04 89.87 m 59.04 500.53 l S
+59.04 89.87 m 51.84 89.87 l S
+59.04 192.53 m 51.84 192.53 l S
+59.04 295.20 m 51.84 295.20 l S
+59.04 397.87 m 51.84 397.87 l S
+59.04 500.53 m 51.84 500.53 l S
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 78.19 Tm (0.00) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 180.86 Tm (0.02) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 283.52 Tm (0.04) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 386.19 Tm (0.06) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 488.86 Tm (0.08) Tj
+ET
+Q q 59.04 73.44 486.72 443.52 re W n
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+77.07 89.87 112.67 205.33 re S
+189.73 89.87 112.67 102.67 re S
+302.40 89.87 112.67 410.67 re S
+415.07 89.87 112.67 308.00 re S
+0.00 98.74 m
+0.12 98.77 l
+1.67 99.23 l
+3.22 99.72 l
+4.77 100.22 l
+6.33 100.74 l
+7.88 101.29 l
+9.43 101.85 l
+10.98 102.44 l
+12.53 103.05 l
+14.09 103.69 l
+15.64 104.35 l
+17.19 105.03 l
+18.74 105.74 l
+20.30 106.47 l
+21.85 107.23 l
+23.40 108.02 l
+24.95 108.83 l
+26.51 109.67 l
+28.06 110.53 l
+29.61 111.43 l
+31.16 112.35 l
+32.72 113.31 l
+34.27 114.29 l
+35.82 115.30 l
+37.37 116.35 l
+38.92 117.42 l
+40.48 118.52 l
+42.03 119.66 l
+43.58 120.82 l
+45.13 122.02 l
+46.69 123.24 l
+48.24 124.51 l
+49.79 125.80 l
+51.34 127.11 l
+52.90 128.48 l
+54.45 129.85 l
+56.00 131.28 l
+57.55 132.73 l
+59.11 134.20 l
+60.66 135.72 l
+62.21 137.25 l
+63.76 138.83 l
+65.31 140.43 l
+66.87 142.07 l
+68.42 143.73 l
+69.97 145.41 l
+71.52 147.14 l
+73.08 148.89 l
+74.63 150.66 l
+76.18 152.47 l
+77.73 154.29 l
+79.29 156.15 l
+80.84 158.02 l
+82.39 159.93 l
+83.94 161.85 l
+85.50 163.79 l
+87.05 165.77 l
+88.60 167.75 l
+90.15 169.76 l
+91.70 171.78 l
+93.26 173.82 l
+94.81 175.88 l
+96.36 177.95 l
+97.91 180.03 l
+99.47 182.13 l
+101.02 184.23 l
+102.57 186.35 l
+104.12 188.47 l
+105.68 190.60 l
+107.23 192.73 l
+108.78 194.87 l
+110.33 197.01 l
+111.89 199.14 l
+113.44 201.28 l
+114.99 203.42 l
+116.54 205.55 l
+118.09 207.67 l
+119.65 209.79 l
+121.20 211.89 l
+122.75 213.99 l
+124.30 216.08 l
+125.86 218.15 l
+127.41 220.21 l
+128.96 222.24 l
+130.51 224.27 l
+132.07 226.26 l
+133.62 228.24 l
+135.17 230.20 l
+136.72 232.13 l
+138.28 234.04 l
+139.83 235.91 l
+141.38 237.77 l
+142.93 239.59 l
+144.48 241.37 l
+146.04 243.14 l
+147.59 244.85 l
+149.14 246.54 l
+150.69 248.19 l
+152.25 249.80 l
+153.80 251.39 l
+155.35 252.91 l
+156.90 254.41 l
+158.46 255.87 l
+160.01 257.28 l
+161.56 258.66 l
+163.11 259.98 l
+164.67 261.28 l
+166.22 262.51 l
+167.77 263.71 l
+169.32 264.88 l
+170.87 265.98 l
+172.43 267.06 l
+173.98 268.07 l
+175.53 269.05 l
+177.08 269.98 l
+178.64 270.87 l
+180.19 271.73 l
+181.74 272.52 l
+183.29 273.28 l
+184.85 274.00 l
+186.40 274.67 l
+187.95 275.32 l
+189.50 275.90 l
+191.06 276.47 l
+192.61 276.99 l
+194.16 277.47 l
+195.71 277.93 l
+197.26 278.33 l
+198.82 278.72 l
+200.37 279.07 l
+201.92 279.39 l
+203.47 279.69 l
+205.03 279.96 l
+206.58 280.21 l
+208.13 280.43 l
+209.68 280.63 l
+211.24 280.82 l
+212.79 280.99 l
+214.34 281.15 l
+215.89 281.29 l
+217.45 281.43 l
+219.00 281.56 l
+220.55 281.69 l
+222.10 281.81 l
+223.65 281.93 l
+225.21 282.06 l
+226.76 282.19 l
+228.31 282.34 l
+229.86 282.48 l
+231.42 282.65 l
+232.97 282.83 l
+234.52 283.03 l
+236.07 283.25 l
+237.63 283.49 l
+239.18 283.77 l
+240.73 284.06 l
+242.28 284.39 l
+243.84 284.75 l
+245.39 285.15 l
+246.94 285.59 l
+248.49 286.06 l
+250.04 286.59 l
+251.60 287.15 l
+253.15 287.76 l
+254.70 288.42 l
+256.25 289.12 l
+257.81 289.90 l
+259.36 290.71 l
+260.91 291.59 l
+262.46 292.53 l
+264.02 293.50 l
+265.57 294.58 l
+267.12 295.68 l
+268.67 296.87 l
+270.23 298.11 l
+271.78 299.40 l
+273.33 300.80 l
+274.88 302.22 l
+276.43 303.74 l
+277.99 305.31 l
+279.54 306.94 l
+281.09 308.66 l
+282.64 310.42 l
+284.20 312.27 l
+285.75 314.17 l
+287.30 316.14 l
+288.85 318.19 l
+290.41 320.27 l
+291.96 322.45 l
+293.51 324.67 l
+295.06 326.95 l
+296.62 329.30 l
+298.17 331.68 l
+299.72 334.16 l
+301.27 336.66 l
+302.82 339.22 l
+304.38 341.83 l
+305.93 344.47 l
+307.48 347.18 l
+309.03 349.91 l
+310.59 352.69 l
+312.14 355.51 l
+313.69 358.34 l
+315.24 361.22 l
+316.80 364.12 l
+318.35 367.04 l
+319.90 369.99 l
+321.45 372.94 l
+323.01 375.91 l
+324.56 378.89 l
+326.11 381.87 l
+327.66 384.86 l
+329.21 387.84 l
+330.77 390.81 l
+332.32 393.78 l
+333.87 396.72 l
+335.42 399.66 l
+336.98 402.57 l
+338.53 405.45 l
+340.08 408.31 l
+341.63 411.12 l
+343.19 413.90 l
+344.74 416.64 l
+346.29 419.33 l
+347.84 421.99 l
+349.39 424.56 l
+350.95 427.11 l
+352.50 429.58 l
+354.05 431.98 l
+355.60 434.34 l
+357.16 436.60 l
+358.71 438.81 l
+360.26 440.92 l
+361.81 442.96 l
+363.37 444.93 l
+364.92 446.79 l
+366.47 448.60 l
+368.02 450.27 l
+369.58 451.88 l
+371.13 453.39 l
+372.68 454.78 l
+374.23 456.12 l
+375.78 457.29 l
+377.34 458.41 l
+378.89 459.40 l
+380.44 460.28 l
+381.99 461.09 l
+383.55 461.74 l
+385.10 462.32 l
+386.65 462.76 l
+388.20 463.10 l
+389.76 463.35 l
+391.31 463.45 l
+392.86 463.48 l
+394.41 463.35 l
+395.97 463.14 l
+397.52 462.83 l
+399.07 462.36 l
+400.62 461.84 l
+402.17 461.16 l
+403.73 460.40 l
+405.28 459.53 l
+406.83 458.53 l
+408.38 457.47 l
+409.94 456.25 l
+411.49 454.96 l
+413.04 453.57 l
+414.59 452.06 l
+416.15 450.50 l
+417.70 448.78 l
+419.25 447.01 l
+420.80 445.13 l
+422.36 443.17 l
+423.91 441.14 l
+425.46 438.99 l
+427.01 436.79 l
+428.56 434.48 l
+430.12 432.11 l
+431.67 429.68 l
+433.22 427.15 l
+434.77 424.58 l
+436.33 421.92 l
+437.88 419.21 l
+439.43 416.44 l
+440.98 413.60 l
+442.54 410.73 l
+444.09 407.78 l
+445.64 404.80 l
+447.19 401.77 l
+448.75 398.69 l
+450.30 395.58 l
+451.85 392.41 l
+453.40 389.23 l
+454.95 386.00 l
+456.51 382.75 l
+458.06 379.48 l
+459.61 376.16 l
+461.16 372.84 l
+462.72 369.49 l
+464.27 366.13 l
+465.82 362.75 l
+467.37 359.36 l
+468.93 355.96 l
+470.48 352.55 l
+472.03 349.14 l
+473.58 345.72 l
+475.14 342.30 l
+476.69 338.88 l
+478.24 335.46 l
+479.79 332.05 l
+481.34 328.64 l
+482.90 325.24 l
+484.45 321.85 l
+486.00 318.47 l
+487.55 315.10 l
+489.11 311.75 l
+490.66 308.41 l
+492.21 305.08 l
+493.76 301.78 l
+495.32 298.49 l
+496.87 295.22 l
+498.42 291.97 l
+499.97 288.74 l
+501.53 285.54 l
+503.08 282.35 l
+504.63 279.19 l
+506.18 276.06 l
+507.73 272.94 l
+509.29 269.86 l
+510.84 266.79 l
+512.39 263.76 l
+513.94 260.76 l
+515.50 257.77 l
+517.05 254.82 l
+518.60 251.89 l
+520.15 249.00 l
+521.71 246.13 l
+523.26 243.28 l
+524.81 240.47 l
+526.36 237.68 l
+527.92 234.93 l
+529.47 232.20 l
+531.02 229.50 l
+532.57 226.83 l
+534.12 224.18 l
+535.68 221.58 l
+537.23 218.99 l
+538.78 216.43 l
+540.33 213.91 l
+541.89 211.40 l
+543.44 208.94 l
+544.99 206.49 l
+546.54 204.08 l
+548.10 201.69 l
+549.65 199.33 l
+551.20 197.01 l
+552.75 194.70 l
+554.31 192.43 l
+555.86 190.18 l
+557.41 187.96 l
+558.96 185.77 l
+560.51 183.60 l
+562.07 181.48 l
+563.62 179.36 l
+565.17 177.28 l
+566.72 175.23 l
+568.28 173.20 l
+569.83 171.21 l
+571.38 169.24 l
+572.93 167.30 l
+574.49 165.38 l
+576.00 163.54 l
+S
+Q
+endstream
+endobj
+7 0 obj
+7425
+endobj
+3 0 obj
+<<
+/Type /Pages
+/Kids [
+5 0 R
+]
+/Count 1
+/MediaBox [0 0 576 576]
+>>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <>
+/ExtGState << >>
+>>
+endobj
+8 0 obj
+<<
+/Type /Encoding
+/BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+9 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica
+/Encoding 8 0 R
+>> endobj
+10 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F3
+/BaseFont /Helvetica-Bold
+/Encoding 8 0 R
+>> endobj
+xref
+0 11
+0000000000 65535 f
+0000000021 00000 n
+0000000164 00000 n
+0000007791 00000 n
+0000007874 00000 n
+0000000213 00000 n
+0000000293 00000 n
+0000007771 00000 n
+0000007966 00000 n
+0000008223 00000 n
+0000008319 00000 n
+trailer
+<<
+/Size 11
+/Info 1 0 R
+/Root 2 0 R
+>>
+startxref
+8421
+%%EOF
diff -r 000000000000 -r f4e143b7eb06 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Jul 28 11:30:28 2014 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+