Mercurial > repos > peter-waltman > ucsc_cluster_tools2
comparison cluster.tools/rnaseq.feature.selection.R @ 1:dddfeedb85af draft
Uploaded
author | peter-waltman |
---|---|
date | Fri, 01 Mar 2013 10:16:53 -0500 |
parents | |
children | 563832f48c08 |
comparison
equal
deleted
inserted
replaced
0:0decf3fd54bc | 1:dddfeedb85af |
---|---|
1 #!/usr/bin/env Rscript | |
2 ## RNASeq selection script by Peter Waltman | |
3 ## August 21, 2011 | |
4 ## License under Creative Commons Attribution 3.0 Unported (CC BY 3.0) | |
5 ## | |
6 #usage, options and doc goes here - NEEDS TO BE UPDATED ---- TBD | |
7 argspec <- c("ipl.feature.selection.R takes a set of results from Paradigm, and filters for features that are | |
8 active, inactive or modulated above a given IPL threshold over a sufficient percentage of samples. | |
9 | |
10 Usage: | |
11 ipl.feature.selection.R -d <data.file> | |
12 Optional: | |
13 -o <output.name> | |
14 -g <genes-only> ## to set if only returning genes (default is all features) | |
15 -f <filter.type> ## filter.type must be either 'modulated', 'active'or 'inactive' (default is modulated) | |
16 -t <threshold> ## the threshold to use for the filter (default is 0.25) | |
17 -p <perc.pass> ## the percentage of samples that must pass the filter (default is 0.33) | |
18 -v <verbose> ## to set verbose on | |
19 | |
20 \n\n") | |
21 args <- commandArgs(TRUE) | |
22 if ( length( args ) == 1 && args =="--help") { | |
23 write(argspec, stderr()) | |
24 q(); | |
25 } | |
26 | |
27 ## some helper fn's | |
28 write.2.tab <- function( mat, | |
29 fname ) { | |
30 mat <- rbind( colnames( mat ), mat ) | |
31 mat <- cbind( c( "ID", rownames( mat )[-1] ), | |
32 mat ) | |
33 write.table( mat, fname, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE ) | |
34 } | |
35 | |
36 lib.load.quiet <- function( package ) { | |
37 package <- as.character(substitute(package)) | |
38 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) | |
39 } | |
40 ## MAXIMUM absolute deviation function | |
41 maxad <- function( x, na.rm=FALSE, center=median(x, na.rm=na.rm ) ) { | |
42 return( max( abs(x), na.rm=na.rm ) - center ) | |
43 } | |
44 | |
45 lib.load.quiet(getopt) | |
46 | |
47 spec <- matrix( c( "data.fname", "d", 1, "character", | |
48 "output.fname", "o", 2, "character", | |
49 "var.method", "m", 2, "character", ## must be either 'active', 'inactive' or 'modulated' | |
50 "z.transform", "z", 2, "character", | |
51 "perc.pass", "p", 2, "numeric" | |
52 ), | |
53 nc=4, | |
54 byrow=TRUE | |
55 ) | |
56 | |
57 opt <- getopt( spec=spec ) | |
58 data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) ) | |
59 #set some reasonable defaults for the options that are needed, | |
60 #but were not specified. | |
61 if ( is.null( opt$perc.pass ) ) { opt$perc.pass=1500 } | |
62 if ( opt$perc.pass < 0 ) { | |
63 stop( "please specify a positive number for the percentage of samples that pass the filter (if applicable)" ) | |
64 } else { | |
65 if ( opt$perc.pass < 1 ) { | |
66 opt$perc.pass <- floor( opt$perc.pass * nrow( data ) ) | |
67 } | |
68 } | |
69 | |
70 if ( is.null( opt$var.method ) ) { opt$var.method <- 'mad' } | |
71 if ( is.null( opt$z.transform ) ) { | |
72 opt$z.transform <- TRUE | |
73 } else { | |
74 opt$z.transform <- ( tolower( opt$z.transform ) %in% "yes" ) | |
75 } | |
76 if ( is.null( opt$output.fname ) ) opt$output.fname <- "high.var.rnaseq.tab" | |
77 | |
78 if ( opt$z.transform ) { | |
79 ##normalize it to have mean==0, std==1 | |
80 data <- t( data ) | |
81 data <- t( scale( data ) ) | |
82 } | |
83 | |
84 | |
85 my.var.fn <- get( opt$var.method ) | |
86 my.var.vals <- apply( data, 1, my.var.fn, na.rm=T ) | |
87 filter.vect <- sort( my.var.vals, dec=TRUE, index=TRUE )$ix[ 1:opt$perc.pass ] | |
88 | |
89 data <- data[ filter.vect, ] | |
90 write.2.tab( data, opt$output.fname ) | |
91 |