Mercurial > repos > peter-waltman > ucsc_cluster_tools2
view cluster.tools/rnaseq.feature.selection.R @ 9:a3c03541fe6f draft default tip
Uploaded
author | peter-waltman |
---|---|
date | Mon, 11 Mar 2013 17:30:48 -0400 |
parents | 563832f48c08 |
children |
line wrap: on
line source
#!/usr/bin/env Rscript ## RNASeq selection script by Peter Waltman ## August 21, 2011 ## License under Creative Commons Attribution 3.0 Unported (CC BY 3.0) ## #usage, options and doc goes here - NEEDS TO BE UPDATED ---- TBD argspec <- c("ipl.feature.selection.R takes a set of results from Paradigm, and filters for features that are active, inactive or modulated above a given IPL threshold over a sufficient percentage of samples. Usage: ipl.feature.selection.R -d <data.file> Optional: -o <output.name> -g <genes-only> ## to set if only returning genes (default is all features) -f <filter.type> ## filter.type must be either 'modulated', 'active'or 'inactive' (default is modulated) -t <threshold> ## the threshold to use for the filter (default is 0.25) -p <perc.pass> ## the percentage of samples that must pass the filter (default is 0.33) -v <verbose> ## to set verbose on \n\n") args <- commandArgs(TRUE) if ( length( args ) == 1 && args =="--help") { write(argspec, stderr()) q(); } ## some helper fn's write.2.tab <- function( mat, fname ) { mat <- rbind( colnames( mat ), mat ) mat <- cbind( c( "ID", rownames( mat )[-1] ), mat ) write.table( mat, fname, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE ) } lib.load.quiet <- function( package ) { package <- as.character(substitute(package)) suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) } ## MAXIMUM absolute deviation function maxad <- function( x, na.rm=FALSE, center=median(x, na.rm=na.rm ) ) { return( max( abs(x), na.rm=na.rm ) - center ) } lib.load.quiet(getopt) spec <- matrix( c( "data.fname", "d", 1, "character", "output.fname", "o", 2, "character", "var.method", "m", 2, "character", "z.transform", "z", 2, "character", "perc.pass", "p", 2, "numeric" ), nc=4, byrow=TRUE ) opt <- getopt( spec=spec ) data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) ) #set some reasonable defaults for the options that are needed, #but were not specified. if ( is.null( opt$perc.pass ) ) { opt$perc.pass=1500 } if ( opt$perc.pass < 0 ) { stop( "please specify a positive number for the percentage of samples that pass the filter (if applicable)" ) } else { if ( opt$perc.pass < 1 ) { opt$perc.pass <- floor( opt$perc.pass * nrow( data ) ) } } if ( is.null( opt$var.method ) ) { opt$var.method <- 'mad' } if ( is.null( opt$z.transform ) ) { opt$z.transform <- TRUE } else { opt$z.transform <- ( tolower( opt$z.transform ) %in% "yes" ) } if ( is.null( opt$output.fname ) ) opt$output.fname <- "high.var.rnaseq.tab" if ( opt$z.transform ) { ##normalize it to have mean==0, std==1 data <- t( data ) data <- t( scale( data ) ) } my.var.fn <- get( opt$var.method ) my.var.vals <- apply( data, 1, my.var.fn, na.rm=T ) filter.vect <- sort( my.var.vals, dec=TRUE, index=TRUE )$ix[ 1:opt$perc.pass ] data <- data[ filter.vect, ] write.2.tab( data, opt$output.fname )