Mercurial > repos > peter-waltman > ucsc_cluster_tools2
comparison cluster.tools/order.by.cl.R @ 0:0decf3fd54bc draft
Uploaded
author | peter-waltman |
---|---|
date | Thu, 28 Feb 2013 01:45:39 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0decf3fd54bc |
---|---|
1 #!/usr/bin/env Rscript | |
2 argspec <- c("tab.2.cdt.R converts a data matrix to cdt format | |
3 | |
4 Usage: | |
5 tab.2.cdt.R -d <data.file> | |
6 Optional: | |
7 -o <output_file> | |
8 \n\n") | |
9 args <- commandArgs(TRUE) | |
10 if ( length( args ) == 1 && args =="--help") { | |
11 write(argspec, stderr()) | |
12 q(); | |
13 } | |
14 | |
15 lib.load.quiet <- function( package ) { | |
16 package <- as.character(substitute(package)) | |
17 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) | |
18 } | |
19 lib.load.quiet(getopt) | |
20 lib.load.quiet( gplots ) | |
21 if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) { | |
22 if ( 'flashClust' %in% installed.packages() ) { | |
23 lib.load.quiet( flashClust ) | |
24 } else { | |
25 if ( 'fastcluster' %in% installed.packages() ) { | |
26 lib.load.quiet( fastcluster ) | |
27 } | |
28 } | |
29 } | |
30 | |
31 | |
32 spec <- matrix( c( "data.fname", "d", 1, "character", | |
33 "class.select", "c", 1, "character", | |
34 "genes.only", "g", 0, "logical", | |
35 "within.cl.srt", "w", 0, "logical", | |
36 "output.fname", "o", 2, "character" | |
37 ), | |
38 nc=4, | |
39 byrow=TRUE | |
40 ) | |
41 | |
42 | |
43 opt <- getopt( spec=spec ) | |
44 if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname ) | |
45 if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE | |
46 if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE | |
47 | |
48 data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) ) | |
49 | |
50 if ( opt$genes.only ) { | |
51 feats <- rownames( data ) | |
52 gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ] | |
53 data <- data[ gene.feats, ] | |
54 } | |
55 | |
56 | |
57 cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) ) | |
58 cls <- cls[ order( cls[,1] ), , drop=FALSE ] | |
59 | |
60 row.cluster <- FALSE | |
61 ## we assume this is a row-wise cluster if any rows are in the columns | |
62 if ( any( rownames( cls ) %in% rownames( data ) ) ) { | |
63 row.cluster <- TRUE | |
64 data <- t( data ) | |
65 } | |
66 | |
67 if ( ! all( rownames( cls ) %in% colnames( data ) ) ) { | |
68 | |
69 ovp <- rownames( cls ) | |
70 ovp <- ovp[ ovp %in% colnames( data ) ] | |
71 if ( length( ovp ) > 0 ) { | |
72 cls <- cls[ ovp, ] | |
73 } | |
74 else { | |
75 stop( "no samples in cluster are found in data file\n" ) | |
76 } | |
77 } | |
78 | |
79 if ( opt$within.cl.srt ) { | |
80 | |
81 cls.orig <- cls | |
82 cls.vect <- cls[,1] | |
83 cls <- sort( unique( as.numeric( cls.vect ) ) ) | |
84 | |
85 cls <- unlist( lapply( cls, | |
86 function(i) { | |
87 elts <- names( cls.vect[ cls.vect %in% i ] ) | |
88 sub.mat <- data[, elts ] | |
89 browser() | |
90 sub.dist <- dist( t( sub.mat ) ) | |
91 return( elts[ hclust( sub.dist )$order ] ) | |
92 } | |
93 ) | |
94 ) | |
95 cls <- cls.orig[ cls, , drop=FALSE ] | |
96 } | |
97 | |
98 | |
99 ## re-order and update column names | |
100 data <- data[, rownames(cls) ] | |
101 colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" ) | |
102 | |
103 ## now re-transpose | |
104 if ( row.cluster ) { | |
105 data <- t( data ) | |
106 } | |
107 write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE ) | |
108 |