comparison snpEff_2_1a/scripts/hist.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f8eaa3f8194b
1 #!/usr/bin/perl
2 #-------------------------------------------------------------------------------
3 #
4 # Plot a histogram (using R)
5 # Data is feed as a 1 column of numbers
6 #
7 # Note: Any line that does not match a numeric regular expression, is filtered out).
8 #
9 # Pablo Cingolani
10 #-------------------------------------------------------------------------------
11
12 #-------------------------------------------------------------------------------
13 # Main
14 #-------------------------------------------------------------------------------
15
16 # Parse command line option (file base name)
17 $base = 'hist';
18 if( $ARGV[0] ne '' ) { $base = $ARGV[0]; }
19
20 $pngFile = "$base.png";
21 $txtFile = "$base.txt";
22
23 # Read STDIN and create an R vector
24 open TXT, "> $txtFile" or die "Cannot open output file '$txtFile'\n";
25 print TXT "x\n";
26 for( $ln = 0 ; $l = <STDIN> ; ) {
27 chomp $l;
28
29 # Does the string contain exactly one number? (can be float)
30 if( $l =~ /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/ ) { print TXT "$l\n"; }
31 }
32 close TXT;
33
34 #---
35 # Create an R program, save histogram plot as PNG image
36 #---
37
38 open R, "| R --vanilla --slave " or die "Cannot open R program\n";
39 print R <<EOF;
40
41 histDens <- function( x, title, q=1.0, breaks = 30 ) {
42 # Show only this part of the data
43 xmin <- quantile( x, 1-q )
44 xmax <- quantile( x, q )
45 data <- x[ (x >= xmin) & (x <= xmax) ];
46
47 dens <- density(data)
48
49 h <- hist(data, main=title, xlab = "data", ylab = "Frequency", freq = T, breaks=breaks);
50
51 # Adjust density height to 'frecuency'
52 dens\$y <- max(h\$counts) * dens\$y/max(dens\$y)
53 lines(dens, col='red')
54
55 # Mean & median calculated over the whola data
56 abline( v=mean(x), col='blue', lty=2, lwd=2);
57 abline( v=median(x), col='green', lty=2, lwd=2);
58
59 legend("topright",c("Mean","Median"),lty=c(1,1),col=c("blue","green"))
60
61 }
62
63 png('$pngFile', width = 1024, height = 1024);
64 par( mfrow=c(2,1) );
65
66 data <- read.csv("$txtFile", sep='\\t', header = TRUE);
67 x <- data\$x
68
69 histDens( x, "Histogram: All data", 1.0 );
70 histDens( x, "Histogram: Quantile [2% - 98%]", 0.98 );
71
72 print( summary( x ) )
73
74 dev.off();
75 quit( save='no' )
76 EOF
77
78 close R;
79
80 #---
81 # Show figure
82 #---
83
84 $os = `uname`;
85 $show = "eog";
86 if( $os =~ "Darwin" ) { $show = "open"; }
87 `$show $pngFile`;
88