annotate plot_distribution.r @ 0:d2655e17c24d draft

Uploaded
author ynewton
date Fri, 05 Oct 2012 02:09:19 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
1 #!/usr/bin/Rscript
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
2
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
3 #usage, options and doc goes here
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
4 argspec <- c("normalize.r - takes any flat file and normalizes the rows or the columns using various normalizations (median_shift, mean_shift, t_statistic (z-score), exp_fit, normal_fit, weibull_0.5_fit, weibull_1_fit, weibull_1.5_fit, weibull_5_fit). Requires a single header line and a single cloumn of annotation.
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
5 Usage:
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
6 normalize.r input.tab norm_type norm_by > output.tab
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
7 Example:
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
8 Rscript normalize.r test_matrix.tab median_shift column > output.tab
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
9 Rscript normalize.r test_matrix.tab mean_shift row normals.tab > output.tab
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
10 Options:
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
11 input matrix (annotated by row and column names)
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
12 normalization type; available options:
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
13 median_shift - shifts all values by the median or the row/column if no normals are specified, otherwise shifts by the median of normals
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
14 mean_shift - shifts all values by the mean or the row/column if no normals are specified, otherwise shifts by the mean of normals
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
15 t_statistic - converts all values to z-scores; if normals are specified then converts to z-scores within normal and non-normal classes separately
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
16 exp_fit - (only by column) ranks data and transforms exponential CDF
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
17 normal_fit - (only by column) ranks data and transforms normal CDF
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
18 weibull_0.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 0.5
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
19 weibull_1_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
20 weibull_1.5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 1.5
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
21 weibull_5_fit - (only by column) ranks data and transforms Weibull CDF with scale parameter = 1 and shape parameter = 5
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
22 normalization by:
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
23 row
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
24 column
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
25 normals_file is an optional parameter which contains a list of column headers from the input matrix, which should be considered as normals
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
26 output file is specified through redirect character >")
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
27
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
28 read_matrix <- function(in_file){
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
29 header <- strsplit(readLines(con=in_file, n=1), "\t")[[1]]
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
30 cl.cols<- 1:length(header) > 1
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
31 data_matrix.df <- read.delim(in_file, header=TRUE, row.names=NULL, stringsAsFactors=FALSE, na.strings="NA", check.names=FALSE)
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
32 data_matrix <- as.matrix(data_matrix.df[,cl.cols])
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
33 rownames(data_matrix) <- data_matrix.df[,1]
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
34 return(data_matrix)
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
35 }
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
36
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
37 main <- function(argv) {
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
38 in_file <- argv[1]
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
39 out_file <- argv[2]
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
40 sink('/dev/null')
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
41
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
42 input_data <- read_matrix(in_file)
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
43
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
44 pdf(out_file, bg="white")
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
45 par(mfrow=c(1,1))
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
46 hist(input_data, col="lightblue", labels=TRUE, main="Histogram", xlab="")
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
47 plot(density(input_data), type="l", col="blue", main="Density")
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
48 dev.off()
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
49 }
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
50
d2655e17c24d Uploaded
ynewton
parents:
diff changeset
51 main(commandArgs(TRUE))