Mercurial > repos > greg > snpeff_v2_from_pablo
comparison snpEff_2_1a/scripts/hist.pl @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author | greg |
---|---|
date | Fri, 20 Apr 2012 14:47:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f8eaa3f8194b |
---|---|
1 #!/usr/bin/perl | |
2 #------------------------------------------------------------------------------- | |
3 # | |
4 # Plot a histogram (using R) | |
5 # Data is feed as a 1 column of numbers | |
6 # | |
7 # Note: Any line that does not match a numeric regular expression, is filtered out). | |
8 # | |
9 # Pablo Cingolani | |
10 #------------------------------------------------------------------------------- | |
11 | |
12 #------------------------------------------------------------------------------- | |
13 # Main | |
14 #------------------------------------------------------------------------------- | |
15 | |
16 # Parse command line option (file base name) | |
17 $base = 'hist'; | |
18 if( $ARGV[0] ne '' ) { $base = $ARGV[0]; } | |
19 | |
20 $pngFile = "$base.png"; | |
21 $txtFile = "$base.txt"; | |
22 | |
23 # Read STDIN and create an R vector | |
24 open TXT, "> $txtFile" or die "Cannot open output file '$txtFile'\n"; | |
25 print TXT "x\n"; | |
26 for( $ln = 0 ; $l = <STDIN> ; ) { | |
27 chomp $l; | |
28 | |
29 # Does the string contain exactly one number? (can be float) | |
30 if( $l =~ /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/ ) { print TXT "$l\n"; } | |
31 } | |
32 close TXT; | |
33 | |
34 #--- | |
35 # Create an R program, save histogram plot as PNG image | |
36 #--- | |
37 | |
38 open R, "| R --vanilla --slave " or die "Cannot open R program\n"; | |
39 print R <<EOF; | |
40 | |
41 histDens <- function( x, title, q=1.0, breaks = 30 ) { | |
42 # Show only this part of the data | |
43 xmin <- quantile( x, 1-q ) | |
44 xmax <- quantile( x, q ) | |
45 data <- x[ (x >= xmin) & (x <= xmax) ]; | |
46 | |
47 dens <- density(data) | |
48 | |
49 h <- hist(data, main=title, xlab = "data", ylab = "Frequency", freq = T, breaks=breaks); | |
50 | |
51 # Adjust density height to 'frecuency' | |
52 dens\$y <- max(h\$counts) * dens\$y/max(dens\$y) | |
53 lines(dens, col='red') | |
54 | |
55 # Mean & median calculated over the whola data | |
56 abline( v=mean(x), col='blue', lty=2, lwd=2); | |
57 abline( v=median(x), col='green', lty=2, lwd=2); | |
58 | |
59 legend("topright",c("Mean","Median"),lty=c(1,1),col=c("blue","green")) | |
60 | |
61 } | |
62 | |
63 png('$pngFile', width = 1024, height = 1024); | |
64 par( mfrow=c(2,1) ); | |
65 | |
66 data <- read.csv("$txtFile", sep='\\t', header = TRUE); | |
67 x <- data\$x | |
68 | |
69 histDens( x, "Histogram: All data", 1.0 ); | |
70 histDens( x, "Histogram: Quantile [2% - 98%]", 0.98 ); | |
71 | |
72 print( summary( x ) ) | |
73 | |
74 dev.off(); | |
75 quit( save='no' ) | |
76 EOF | |
77 | |
78 close R; | |
79 | |
80 #--- | |
81 # Show figure | |
82 #--- | |
83 | |
84 $os = `uname`; | |
85 $show = "eog"; | |
86 if( $os =~ "Darwin" ) { $show = "open"; } | |
87 `$show $pngFile`; | |
88 |