annotate cluster.tools/new.ccplus.R @ 2:b442996b66ae draft

Uploaded
author peter-waltman
date Wed, 27 Feb 2013 20:17:04 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
1 ##!/usr/bin/env Rscript
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
2 ## Consensus Clustering Script by Peter Waltman
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
3 ## May 31, 2011
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
4 ## License under Creative Commons Attribution 3.0 Unported (CC BY 3.0)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
5 ##
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
6 #usage, options and doc goes here
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
7 argspec <- c("consensus.clustering.R takes a clustering from ConsensusClusterPlus and clinical survival data
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
8 and generates a KM-plot, along with the log-rank p-values
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
9
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
10 Usage:
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
11 consensus.clustering.R -d <data.file>
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
12 Optional:
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
13 -o <output.name>
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
14 -a <cluster.alg> ## must be either 'hc' or 'km'
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
15 -m <distance.metric> ## must be one supported by ConsensusClusterPlus
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
16 -k <max.k>
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
17 -r <reps>
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
18 -f <filter> ## filter, o/w no filtering
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
19
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
20 \n\n")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
21 args <- commandArgs(TRUE)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
22 if ( length( args ) == 1 && args =="--help") {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
23 write(argspec, stderr())
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
24 q();
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
25 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
26
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
27 require(getopt)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
28 ##require(ConsensusClusterPlus)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
29 ## if any of the faster clustering methods are available on this system, load them
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
30 require( amap )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
31 require( cluster )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
32 if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
33 if ( 'flashClust' %in% installed.packages() ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
34 require( flashClust )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
35 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
36 if ( 'fastcluster' %in% installed.packages() ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
37 require( fastcluster )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
38 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
39 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
40 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
41
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
42 ###################
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
43 ## code borrowed/updated from ConsensusClusterPlus
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
44 ###################
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
45
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
46 ConsensusClusterPlus <- function( d=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
47 maxK = 3,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
48 reps=10,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
49 pItem=0.8,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
50 pFeature=1,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
51 clusterAlg="hc",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
52 title="untitled_consensus_cluster",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
53 innerLinkage="average",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
54 finalLinkage="average",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
55 distance=ifelse( inherits(d,"dist"), attr( d, "method" ), "euclidean" ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
56 ml=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
57 tmyPal=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
58 seed=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
59 plot=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
60 writeTable=FALSE,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
61 weightsItem=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
62 weightsFeature=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
63 verbose=F ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
64 ##description: runs consensus subsamples
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
65
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
66
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
67 if(is.null(seed)==TRUE){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
68 seed=timeSeed = as.numeric(Sys.time())
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
69 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
70 set.seed(seed)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
71
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
72 if(is.null(ml)==TRUE){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
73
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
74 if ( inherits( distance, "dist" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
75 stop( "If you want to pass in a pre-calculated distance object, pass it in as the data, rather than the distance parameter\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
76 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
77
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
78 if ( ! class( d ) %in% c( "dist", "matrix", "ExpressionSet" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
79 stop("d must be a matrix, distance object or ExpressionSet (eset object)")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
80 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
81
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
82 if ( inherits( d, "dist" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
83 ## if d is a distance matrix, fix a few things so that they don't cause problems with the analysis
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
84 ## Note, assumption is that if d is a distance matrix, the user doesn't want to sample over the row features
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
85 if ( is.null( attr( d, "method" ) ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
86 attr( d, "method" ) <- distance <- "unknown - user-specified"
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
87 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
88 if ( is.null( distance ) || ( distance != attr( d, "method" ) ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
89 distance <- attr( d, "method" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
90 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
91
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
92 if ( ( ! is.null( pFeature ) ) && ( pFeature < 1 ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
93 if ( verbose ) warning( "Cannot use the pFeatures parameter when specifying a distance matrix as the data object\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
94 pFeature <- 1
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
95 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
96 if ( ! is.null( weightsFeature ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
97 if ( verbose ) warning( "Cannot use the weightsFeature parameter when specifying a distance matrix as the data object\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
98 weightsFeature <- NULL
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
99 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
100 if ( clusterAlg == "km" ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
101 if ( verbose ) warning( "You are asking CCPLUS to use K-means to cluster a distance matrix (rather than the data itself) - this may produce unintended results. We suggest using PAM if you want to use alternate distance metrics/objects\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
102 ##d <- as.matrix( d ) #this is now done w/in ccRun
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
103 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
104 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
105 if ( is.null( distance ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
106 ## we should never get here, but just in case
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
107 distance <- "pearson"
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
108 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
109 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
110
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
111 if ( ( clusterAlg == "km" ) && inherits( distance, "character" ) && ( distance != "euclidean" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
112 warning( "WARNING: kmeans can only use the euclidean distance metric. If you would like to use an alternate metric, we suggest using PAM or HC clustering instead. This parameter combinationwill use k-means, but will NOT use the specified distance metric\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
113 distance <- 'euclidean'
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
114 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
115
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
116
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
117 if ( inherits( d,"ExpressionSet" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
118 d <- exprs(d)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
119 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
120
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
121 ml <- ccRun( d=d,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
122 maxK=maxK,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
123 repCount=reps,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
124 diss=inherits(d,"dist"),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
125 pItem=pItem,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
126 pFeature=pFeature,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
127 innerLinkage=innerLinkage,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
128 clusterAlg=clusterAlg,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
129 weightsFeature=weightsFeature,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
130 weightsItem=weightsItem,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
131 distance=distance,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
132 verbose=verbose)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
133 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
134 res=list();
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
135
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
136 ##make results directory
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
137 if((is.null(plot)==FALSE | writeTable) & !file.exists(paste(title,sep=""))){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
138 dir.create(paste(title,sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
139 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
140
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
141 ##write log file
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
142 log <- matrix( ncol=2,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
143 byrow=T,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
144 c("title",title,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
145 "maxK",maxK,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
146 "input matrix rows",ifelse ( inherits( d, "matrix" ), nrow(d), "dist-mat" ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
147 "input matric columns",ifelse ( inherits( d, "matrix" ), ncol(d), ncol( as.matrix(d) ) ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
148 "number of bootstraps",reps,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
149 "item subsampling proportion",pItem,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
150 "feature subsampling proportion",ifelse( is.null(pFeature), 1, pFeature ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
151 "cluster algorithm",clusterAlg,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
152 "inner linkage type",innerLinkage,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
153 "final linkage type",finalLinkage,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
154 "correlation method",distance,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
155 "plot",if(is.null(plot)) NA else plot,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
156 "seed",if(is.null(seed)) NA else seed))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
157 colnames(log) = c("option","value")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
158 if(writeTable){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
159 write.csv(file=paste(title,"/",title,".log.csv",sep=""), log,row.names=F)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
160 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
161 if(is.null(plot)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
162 ##nothing
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
163 }else if(plot=="png"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
164 png(paste(title,"/","consensus%03d.png",sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
165 }else if (plot=="pdf"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
166 pdf(onefile=TRUE, paste(title,"/","consensus.pdf",sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
167 }else if (plot=="ps"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
168 postscript(onefile=TRUE, paste(title,"/","consensus.ps",sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
169 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
170
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
171 colorList=list()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
172 colorM = rbind() #matrix of colors.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
173
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
174 #18 colors for marking different clusters
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
175 thisPal <- c("#A6CEE3","#1F78B4","#B2DF8A","#33A02C","#FB9A99","#E31A1C","#FDBF6F","#FF7F00","#CAB2D6","#6A3D9A","#FFFF99","#B15928",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
176 "#bd18ea", #magenta
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
177 "#2ef4ca", #aqua
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
178 "#f4cced", #pink,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
179 "#f4cc03", #lightorange
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
180 "#05188a", #navy,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
181 "#e5a25a", #light brown
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
182 "#06f106", #bright green
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
183 "#85848f", #med gray
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
184 "#000000", #black
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
185 "#076f25", #dark green
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
186 "#93cd7f",#lime green
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
187 "#4d0776", #dark purple
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
188 "#ffffff" #white
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
189 )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
190
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
191 ##plot scale
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
192 colBreaks=NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
193 if(is.null(tmyPal)==TRUE){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
194 colBreaks=10
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
195 tmyPal = myPal(colBreaks)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
196 }else{
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
197 colBreaks=length(tmyPal)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
198 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
199 sc = cbind(seq(0,1,by=1/( colBreaks) )); rownames(sc) = sc[,1]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
200 sc = cbind(sc,sc)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
201 heatmap(sc, Colv=NA, Rowv=NA, symm=FALSE, scale='none', col=tmyPal, na.rm=TRUE,labRow=rownames(sc),labCol=F,main="consensus matrix legend")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
202
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
203 for (tk in 2:maxK){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
204 if(verbose){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
205 message(paste("consensus ",tk))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
206 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
207 fm = ml[[tk]]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
208 hc=hclust( as.dist( 1 - fm ), method=finalLinkage);
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
209 message("clustered")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
210 ct = cutree(hc,tk)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
211 names(ct) = colnames(d)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
212 c = fm
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
213 ##colnames(c) = colnames(d)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
214 ##rownames(c) = colnames(d)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
215
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
216 colorList = setClusterColors(res[[tk-1]][[3]],ct,thisPal,colorList)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
217
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
218 pc = c
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
219 pc=pc[hc$order,] #***pc is matrix for plotting, same as c but is row-ordered and has names and extra row of zeros.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
220 pc = rbind(pc,0)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
221
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
222 heatmap(pc, Colv=as.dendrogram(hc), Rowv=NA, symm=FALSE, scale='none', col=tmyPal, na.rm=TRUE,labRow=F,labCol=F,mar=c(5,5),main=paste("consensus matrix k=",tk,sep="") , ColSideCol=colorList[[1]])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
223 legend("topright",legend=unique(ct),fill=unique(colorList[[1]]),horiz=FALSE )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
224
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
225 res[[tk]] = list(consensusMatrix=c,consensusTree=hc,consensusClass=ct,ml=ml[[tk]],clrs=colorList)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
226 colorM = rbind(colorM,colorList[[1]])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
227 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
228 CDF(ml)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
229 clusterTrackingPlot(colorM[,res[[length(res)]]$consensusTree$order])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
230 if(is.null(plot)==FALSE){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
231 dev.off();
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
232 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
233 res[[1]] = colorM
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
234 if(writeTable){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
235 for(i in 2:length(res)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
236 write.csv(file=paste(title,"/",title,".k=",i,".consensusMatrix.csv",sep=""), res[[i]]$consensusMatrix)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
237 write.table(file=paste(title,"/",title,".k=",i,".consensusClass.csv",sep=""), res[[i]]$consensusClass,col.names = F,sep=",")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
238 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
239 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
240 return(res)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
241 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
242
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
243
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
244 calcICL = function(res,title="untitled_consensus_cluster",plot=NULL,writeTable=FALSE){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
245 #calculates and plots cluster consensus and item consensus
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
246 cc=rbind()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
247 cci = rbind()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
248 sumRes=list()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
249 colorsArr=c()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
250
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
251 #make results directory
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
252 if((is.null(plot)==FALSE | writeTable) & !file.exists(paste(title,sep=""))){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
253 dir.create(paste(title,sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
254 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
255 if(is.null(plot)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
256 #to screen
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
257 }else if(plot=="pdf"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
258 pdf(onefile=TRUE, paste(title,"/","icl.pdf",sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
259 }else if(plot=="ps"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
260 postscript(onefile=TRUE, paste(title,"/","icl.ps",sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
261 }else if (plot=="png"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
262 png(paste(title,"/","icl%03d.png",sep=""))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
263 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
264
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
265 par(mfrow=c(3,1),mar=c(4,3,2,0))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
266
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
267 for (k in 2:length(res)){ #each k
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
268 eiCols = c();
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
269 o = res[[k]]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
270 m = o$consensusMatrix
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
271 m = triangle(m,mode=2)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
272 for (ci in sort(unique(o$consensusClass))){ #each cluster in k
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
273 items = which(o$consensusClass==ci)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
274 nk = length(items)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
275 mk = sum( m[items,items], na.rm=T)/((nk*(nk-1))/2)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
276 cc=rbind(cc,c(k,ci,mk)) #cluster-consensus
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
277
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
278 for (ei in rev(res[[2]]$consensusTree$order) ){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
279 denom = if (ei %in% items) { nk - 1} else { nk }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
280 mei = sum( c(m[ei,items],m[items,ei]), na.rm=T)/denom # mean item consensus to a cluster.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
281 cci = rbind(cci,c(k,ci,ei,mei)) #cluster, cluster index, item index, item-consensus
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
282 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
283 eiCols = c(eiCols, rep(ci,length(o$consensusClass)) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
284 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
285
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
286 cck = cci[which(cci[,1]==k),] #only plot the new k data.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
287
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
288 #group by item, order by cluster i
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
289 w=lapply(split(cck,cck[,3]), function(x) { y=matrix(unlist(x),ncol=4); y[order(y[,2]),4] })
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
290 q = matrix(as.numeric(unlist(w)),ncol=length(w),byrow=F)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
291 q = q[,res[[2]]$consensusTree$order] #order by leave order of k=2
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
292 #q is a matrix of k rows and sample columns, values are item consensus of sample to the cluster.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
293
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
294 thisColors = unique(cbind(res[[k]]$consensusClass,res[[k]]$clrs[[1]]))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
295 thisColors=thisColors[order(as.numeric(thisColors[,1])),2]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
296 colorsArr=c(colorsArr,thisColors)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
297 sumRes[[k]] = rankedBarPlot(q,thisColors,cc=res[[k]]$consensusClass[res[[2]]$consensusTree$order],paste("k=",k,sep="") )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
298 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
299
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
300 ys=cs=lab=c()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
301 lastk=cc[1,1]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
302 for(i in 1:length(colorsArr)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
303 if(lastk != cc[i,1]){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
304 ys=c(ys,0,0)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
305 cs=c(cs,NA,NA)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
306 lastk=cc[i,1]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
307 lab=c(lab,NA,NA)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
308 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
309 ys=c(ys,cc[i,3])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
310 cs=c(cs,colorsArr[i])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
311 lab=c(lab,cc[i,1])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
312 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
313 names(ys) = lab
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
314 par(mfrow=c(3,1),mar=c(4,3,2,0))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
315 barplot(ys,col=cs,border=cs,main="cluster-consensus",ylim=c(0,1),las=1)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
316 if(is.null(plot)==FALSE){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
317 dev.off()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
318 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
319 colnames(cc) = c("k","cluster","clusterConsensus")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
320 colnames(cci) = c("k","cluster","item","itemConsensus")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
321 cci[,"item"] = names(res[[2]]$consensusClass)[ cci[,"item"] ]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
322 #type cci
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
323 cci = data.frame( k=as.numeric(cci[,"k"]), cluster=as.numeric(cci[,"cluster"]), item=cci[,"item"], itemConsensus=as.numeric(cci[,"itemConsensus"]))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
324
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
325 #write to file.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
326 if(writeTable){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
327 write.csv(file=paste(title,"/",title,".summary.cluster.consensus.csv",sep=""),row.names=F, cc)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
328 write.csv(file=paste(title,"/",title,".summary.item.consensus.csv",sep=""), row.names=F, cc)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
329 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
330 return(list(clusterConsensus=cc,itemConsensus=cci))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
331 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
332
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
333
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
334 ccRun <- function( d=d,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
335 maxK=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
336 repCount=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
337 diss=inherits( d, "dist" ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
338 pItem=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
339 pFeature=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
340 innerLinkage=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
341 distance=ifelse( inherits(d,"dist"), attr( d, "method" ), "euclidean" ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
342 clusterAlg=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
343 weightsItem=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
344 weightsFeature=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
345 verbose=NULL) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
346 m = vector(mode='list', repCount)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
347 ml = vector(mode="list",maxK)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
348 n <- ifelse( diss, ncol( as.matrix(d) ), ncol(d) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
349 mCount = mConsist = matrix(c(0),ncol=n,nrow=n)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
350 ml[[1]] = c(0);
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
351
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
352 if (is.null( distance ) ) distance <- 'euclidean' ## necessary if d is a dist object and attr( d, "method" ) == NULLa
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
353
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
354 require( amap )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
355 ## we're going to use the amap Dist function, but they misname their correlation
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
356 ## functions, so re-name them correctly
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
357 amap.distance <- c( "euclidean", "maximum", "manhattan", "canberra", "binary",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
358 "pearson", "abspearson", "correlation", "abscorrelation", "spearman", "kendall" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
359 names( amap.distance ) <- c( "euclidean", "maximum", "manhattan", "canberra", "binary",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
360 "cosine", "abscosine", "pearson", "abspearson", "spearman", "kendall" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
361 main.dist.obj <- NULL
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
362 ##browser()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
363 if ( diss ){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
364 main.dist.obj <- d
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
365
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
366 ## reset the pFeature & weightsFeature params if they've been set (irrelevant if d is a dist matrix)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
367 if ( ( !is.null(pFeature) ) &&
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
368 ( pFeature < 1 ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
369 if (verbose) warning( "user-supplied data is a distance matrix; ignoring user-specified pFeature parameter\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
370 pFeature <- 1 # set it to 1 to avoid problems with sampleCols
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
371 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
372 if ( ! is.null( weightsFeature ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
373 if (verbose) warning( "user-supplied data is a distance matrix; ignoring user-specified weightsFeature parameter\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
374 weightsFeature <- NULL # set it to NULL to avoid problems with sampleCols
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
375 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
376 } else { ## d is a data matrix
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
377 ## we're not sampling over the features
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
378 if ( ( clusterAlg != "km" ) &&
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
379 ( is.null( pFeature ) ||
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
380 ( ( pFeature == 1 ) && is.null( weightsFeature ) ) ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
381 ## only generate a main.dist.object IFF 1) d is a matrix, 2) we're not sampling the features, and 3) the algorithm isn't 'km'
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
382 if ( inherits( distance, "character" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
383 if ( ! distance %in% names( amap.distance ) ) stop("unsupported distance.")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
384
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
385 main.dist.obj <- Dist( t(d), method=as.character( amap.distance[ distance ] ) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
386 ## now fix dumb amap naming convention for distance metrics
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
387 attr( main.dist.obj, "method" ) <- as.character( amap.distance[ distance ] )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
388 } else stop("unsupported distance specified.")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
389 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
390 ## pFeature < 1 or a weightsFeature != NULL
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
391 ## since d is a data matrix, the user wants to sample over the gene features, so main.dist.obj is left as NULL
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
392 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
393 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
394
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
395
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
396 for (i in 1:repCount){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
397 ##browser()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
398 if(verbose){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
399 message(paste("random subsample",i));
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
400 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
401 ## take expression matrix sample, samples and genes
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
402 sample_x = sampleCols( d, pItem, pFeature, weightsItem, weightsFeature )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
403
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
404 this_dist = NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
405 if ( ! is.null( main.dist.obj ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
406 boot.cols <- sample_x$subcols
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
407 this_dist <- as.matrix( main.dist.obj )[ boot.cols, boot.cols ]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
408 if ( clusterAlg != "km" ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
409 ## if this isn't kmeans, then convert to a distance object
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
410 this_dist <- as.dist( this_dist )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
411 attr( this_dist, "method" ) <- attr( main.dist.obj, "method" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
412 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
413 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
414 ## if main.dist.obj is NULL, then d is a data matrix, and either:
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
415 ## 1) clusterAlg is 'km'
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
416 ## 2) pFeatures < 1 or weightsFeatures have been specified, or
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
417 ## 3) both
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
418 ## so we can't use a main distance object and for every iteration, we will have to re-calculate either
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
419 ## 1) the distance matrix (because we're also sampling the features as well), or
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
420 ## 2) the submat (if using km)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
421
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
422 if ( clusterAlg != "km" ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
423 if ( ! distance %in% names( amap.distance ) ) stop("unsupported distance.")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
424 ## good, we have a supported distance type
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
425 this_dist <- Dist( t( sample_x$submat ), method=as.character( amap.distance[ distance ] ) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
426 ## now fix dumb amap naming convention for distance metrics
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
427 attr( this_dist, "method" ) <- as.character( amap.distance[ distance ] )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
428 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
429 ##browser()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
430 ##clusterAlg == "km"
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
431 ## if we're not sampling the features, then grab the colslice
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
432 if ( is.null( pFeature ) ||
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
433 ( ( pFeature == 1 ) && is.null( weightsFeature ) ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
434 this_dist <- d[, sample_x$subcols ]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
435 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
436 if ( is.na( sample_x$submat ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
437 save( "ccrun.submat.eq.na.dbg.rda" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
438 stop( "Houston, we have a problem. sample_x$submat is NA in ccRun when it should be specified - saving state\n" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
439 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
440
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
441 this_dist <- sample_x$submat
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
442 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
443 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
444 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
445
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
446 ## cluster samples for HC.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
447 this_cluster=NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
448 if(clusterAlg=="hc"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
449 this_cluster = hclust( this_dist, method=innerLinkage)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
450 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
451 ##browser()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
452 ##mCount is possible number of times that two sample occur in same random sample, independent of k
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
453 ##mCount stores number of times a sample pair was sampled together.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
454 mCount <- connectivityMatrix( rep( 1,length(sample_x[[3]])),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
455 mCount,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
456 sample_x[[3]] )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
457
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
458 ##use samples for each k
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
459 for (k in 2:maxK){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
460 if(verbose){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
461 message(paste(" k =",k))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
462 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
463 if (i==1){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
464 ml[[k]] = mConsist #initialize
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
465 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
466 this_assignment=NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
467 if(clusterAlg=="hc"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
468 ##prune to k for hc
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
469 this_assignment = cutree(this_cluster,k)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
470 ##browser()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
471 }else if(clusterAlg=="km"){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
472 ##this_dist should now be a matrix corresponding to the result from sampleCols
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
473 this_assignment <- kmeans( t( this_dist ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
474 k,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
475 iter.max = 10,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
476 nstart = 1,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
477 algorithm = c("Hartigan-Wong") )$cluster
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
478 }else if ( clusterAlg == "pam" ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
479 require( cluster )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
480 this_assignment <- pam( x=this_dist,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
481 k,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
482 diss=TRUE,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
483 metric=distance,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
484 cluster.only=TRUE )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
485 } else{
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
486 ##optional cluterArg Hook.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
487 this_assignment <- get(clusterAlg)(this_dist, k)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
488 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
489 ##add to tally
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
490 ml[[k]] <- connectivityMatrix( this_assignment,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
491 ml[[k]],
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
492 sample_x[[3]] )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
493 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
494 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
495
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
496
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
497 ##consensus fraction
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
498 res = vector(mode="list",maxK)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
499 for (k in 2:maxK){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
500 ##fill in other half of matrix for tally and count.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
501 tmp = triangle(ml[[k]],mode=3)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
502 tmpCount = triangle(mCount,mode=3)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
503 res[[k]] = tmp / tmpCount
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
504 res[[k]][which(tmpCount==0)] = 0
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
505 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
506 message("end fraction")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
507 return(res)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
508 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
509
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
510
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
511 connectivityMatrix <- function( clusterAssignments, m, sampleKey){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
512 ##input: named vector of cluster assignments, matrix to add connectivities
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
513 ##output: connectivity matrix
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
514 names( clusterAssignments ) <- sampleKey
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
515 cls <- lapply( unique( clusterAssignments ), function(i) as.numeric( names( clusterAssignments[ clusterAssignments %in% i ] ) ) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
516
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
517 for ( i in 1:length( cls ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
518 nelts <- 1:ncol( m )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
519 cl <- as.numeric( nelts %in% cls[[i]] ) ## produces a binary vector
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
520 updt <- outer( cl, cl )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
521 m <- m + updt
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
522 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
523 return(m)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
524 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
525
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
526 ## returns a list with the sample columns, as well as the sub-matrix & sample features (if necessary)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
527 ## if no sampling over the features is performed, the submatrix & sample features are returned as NAs
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
528 ## to reduce memory overhead
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
529 sampleCols <- function( d,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
530 pSamp=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
531 pRow=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
532 weightsItem=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
533 weightsFeature=NULL ){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
534 space <- ifelse( inherits( d, "dist" ), ncol( as.matrix(d) ), ncol(d) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
535 sampleN <- floor(space*pSamp)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
536 sampCols <- sort( sample(space, sampleN, replace = FALSE, prob = weightsItem) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
537
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
538 this_sample <- sampRows <- NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
539 if ( inherits( d, "matrix" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
540 if ( (! is.null( pRow ) ) &&
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
541 ( (pRow < 1 ) || (! is.null( weightsFeature ) ) ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
542 ## only sample the rows and generate a sub-matrix if we're sampling over the row/gene/features
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
543 space = nrow(d)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
544 sampleN = floor(space*pRow)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
545 sampRows = sort( sample(space, sampleN, replace = FALSE, prob = weightsFeature) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
546 this_sample <- d[sampRows,sampCols]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
547 dimnames(this_sample) <- NULL
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
548 } else {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
549 ## do nothing
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
550 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
551 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
552 return( list( submat=this_sample,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
553 subrows=sampRows,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
554 subcols=sampCols ) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
555 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
556
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
557 CDF=function(ml,breaks=100){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
558 #plot CDF distribution
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
559 plot(c(0),xlim=c(0,1),ylim=c(0,1),col="white",bg="white",xlab="consensus index",ylab="CDF",main="consensus CDF", las=2)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
560 k=length(ml)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
561 this_colors = rainbow(k-1)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
562 areaK = c()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
563 for (i in 2:length(ml)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
564 v=triangle(ml[[i]],mode=1)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
565
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
566 #empirical CDF distribution. default number of breaks is 100
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
567 h = hist(v, plot=FALSE, breaks=seq(0,1,by=1/breaks))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
568 h$counts = cumsum(h$counts)/sum(h$counts)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
569
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
570 #calculate area under CDF curve, by histogram method.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
571 thisArea=0
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
572 for (bi in 1:(length(h$breaks)-1)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
573 thisArea = thisArea + h$counts[bi]*(h$breaks[bi+1]-h$breaks[bi]) #increment by height by width
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
574 bi = bi + 1
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
575 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
576 areaK = c(areaK,thisArea)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
577 lines(h$mids,h$counts,col=this_colors[i-1],lwd=2,type='l')
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
578 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
579 legend(0.8,0.5,legend=paste(rep("",k-1),seq(2,k,by=1),sep=""),fill=this_colors)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
580
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
581 #plot area under CDF change.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
582 deltaK=areaK[1] #initial auc at k=2
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
583 for(i in 2:(length(areaK))){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
584 #proportional increase relative to prior K.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
585 deltaK = c(deltaK,( areaK[i] - areaK[i-1])/areaK[i-1])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
586 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
587 plot(1+(1:length(deltaK)),y=deltaK,xlab="k",ylab="relative change in area under CDF curve",main="Delta area",type="b")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
588 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
589
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
590
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
591 myPal = function(n=10){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
592 #returns n colors
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
593 seq = rev(seq(0,255,by=255/(n)))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
594 palRGB = cbind(seq,seq,255)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
595 rgb(palRGB,maxColorValue=255)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
596 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
597
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
598 setClusterColors = function(past_ct,ct,colorU,colorList){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
599 #description: sets common color of clusters between different K
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
600 newColors = c()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
601 if(length(colorList)==0){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
602 #k==2
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
603 newColors = colorU[ct]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
604 colori=2
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
605 }else{
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
606 newColors = rep(NULL,length(ct))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
607 colori = colorList[[2]]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
608 mo=table(past_ct,ct)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
609 m=mo/apply(mo,1,sum)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
610 for(tci in 1:ncol(m)){ # for each cluster
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
611 maxC = max(m[,tci])
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
612 pci = which(m[,tci] == maxC)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
613 if( sum(m[,tci]==maxC)==1 & max(m[pci,])==maxC & sum(m[pci,]==maxC)==1 ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
614 #if new column maximum is unique, same cell is row maximum and is also unique
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
615 ##Note: the greatest of the prior clusters' members are the greatest in a current cluster's members.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
616 newColors[which(ct==tci)] = unique(colorList[[1]][which(past_ct==pci)]) # one value
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
617 }else{ #add new color.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
618 colori=colori+1
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
619 newColors[which(ct==tci)] = colorU[colori]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
620 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
621 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
622 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
623 return(list(newColors,colori,unique(newColors) ))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
624 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
625
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
626 clusterTrackingPlot = function(m){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
627 #description: plots cluster tracking plot
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
628 #input: m - matrix where rows are k, columns are samples, and values are cluster assignments.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
629 plot(NULL,xlim=c(-0.1,1),ylim=c(0,1),axes=FALSE,xlab="samples",ylab="k",main="tracking plot")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
630 for(i in 1:nrow(m)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
631 rect( xleft=seq(0,1-1/ncol(m),by=1/ncol(m)), ybottom=rep(1-i/nrow(m),ncol(m)) , xright=seq(1/ncol(m),1,by=1/ncol(m)), ytop=rep(1-(i-1)/nrow(m),ncol(m)), col=m[i,],border=NA)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
632 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
633 #hatch lines to indicate samples
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
634 xl = seq(0,1-1/ncol(m),by=1/ncol(m))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
635 segments( xl, rep(-0.1,ncol(m)) , xl, rep(0,ncol(m)), col="black") #** alt white and black color?
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
636 ypos = seq(1,0,by=-1/nrow(m))-1/(2*nrow(m))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
637 text(x=-0.1,y=ypos[-length(ypos)],labels=seq(2,nrow(m)+1,by=1))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
638 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
639
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
640 triangle = function(m,mode=1){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
641 #mode=1 for CDF, vector of lower triangle.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
642 #mode==3 for full matrix.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
643 #mode==2 for calcICL; nonredundant half matrix coun
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
644 #mode!=1 for summary
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
645 n=dim(m)[1]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
646 nm = matrix(0,ncol=n,nrow=n)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
647 fm = m
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
648
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
649
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
650 nm[upper.tri(nm)] = m[upper.tri(m)] #only upper half
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
651
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
652 fm = t(nm)+nm
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
653 diag(fm) = diag(m)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
654
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
655 nm=fm
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
656 nm[upper.tri(nm)] = NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
657 diag(nm) = NA
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
658 vm = m[lower.tri(nm)]
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
659
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
660 if(mode==1){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
661 return(vm) #vector
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
662 }else if(mode==3){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
663 return(fm) #return full matrix
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
664 }else if(mode == 2){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
665 return(nm) #returns lower triangle and no diagonal. no double counts.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
666 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
667
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
668 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
669
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
670
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
671 rankedBarPlot=function(d,myc,cc,title){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
672 colors = rbind() #each row is a barplot series
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
673 byRank = cbind()
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
674
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
675 spaceh = 0.1 #space between bars
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
676 for(i in 1:ncol(d)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
677 byRank = cbind(byRank,sort(d[,i],na.last=F))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
678 colors = rbind(colors,order(d[,i],na.last=F))
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
679 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
680 maxH = max(c(1.5,apply(byRank,2,sum)),na.rm=T) #maximum height of graph
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
681
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
682 #barplot largest to smallest so that smallest is in front.
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
683 barp = barplot( apply(byRank,2,sum) , col=myc[colors[,1]] ,space=spaceh,ylim=c(0,maxH),main=paste("item-consensus", title),border=NA,las=1 )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
684 for(i in 2:nrow(byRank)){
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
685 barplot( apply(matrix(byRank[i:nrow(byRank),],ncol=ncol(byRank)) ,2,sum), space=spaceh,col=myc[colors[,i]],ylim=c(0,maxH), add=T,border=NA,las=1 )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
686 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
687 xr=seq(spaceh,ncol(d)+ncol(d)*spaceh,(ncol(d)+ncol(d)*spaceh)/ncol(d) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
688 #class labels as asterisks
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
689 text("*",x=xr+0.5,y=maxH,col=myc[cc],cex=1.4) #rect(xr,1.4,xr+1,1.5,col=myc[cc] )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
690 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
691
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
692
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
693
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
694 ###################################################################3333
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
695 ## RESTART MY SCRIPTS HERE
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
696 ##save.image( '/home/waltman/work.local/tmp/new.ccplus.R.dbg' )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
697 stop( "phw forced stop\n")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
698 spec <- matrix( c( "data.fname", "d", 1, "character",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
699 "direction", "n", 2, "character",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
700 "output.name", "o", 2, "character",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
701 "cluster.alg", "a", 2, "character", ## must be either 'hc' or 'km'
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
702 "distance.metric", "m", 2, "character", ## must be one supported by ConsensusClusterPlus
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
703 "max.k", "k", 2, "integer",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
704 "reps", "r", 2, "integer",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
705 "innerLinkage", "i", 1, "character",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
706 "finalLinkage", "f", 1, "character",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
707 "out.report.dir", "p", 2, "character",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
708 "out.report.html", "h", 2, "character"
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
709 ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
710 nc=4,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
711 byrow=TRUE
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
712 )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
713
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
714 opt <- getopt( spec=spec )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
715
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
716 ## default params for non-required params
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
717 if ( is.null( opt$direction ) ) { opt$direction <- "cols" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
718 if ( is.null( opt$cluster.alg ) ) { opt$cluster.alg <- "pam" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
719 if ( is.null( opt$output.name ) ) { opt$output.name <- "consensus.cluster.result" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
720 if ( is.null( opt$distance.metric ) ) { opt$distance.metric <- "cosine" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
721 if ( is.null( opt$max.k ) ) { opt$max.k <- 10 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
722 if ( is.null( opt$reps ) ) { opt$reps <- 1000 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
723 if ( is.null( opt$innerLinkage ) ) { opt$innerLinkage <- "average" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
724 if ( is.null( opt$finalLinkage ) ) { opt$finalLinkage <- "average" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
725
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
726 if ( is.null( opt$out.report.dir ) ) { opt$out.report.dir <- "report" }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
727 if ( is.null( opt$out.report.html ) ) { opt$out.report.html <- file.path( "report", "index.html" ) }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
728
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
729 ## validate params here (make sure set to valid values)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
730 if ( !opt$cluster.alg %in% c( "hc", "km", "pam" ) ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
731 stop( "invalid clustering algorithm specified", cluster.alg )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
732 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
733
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
734
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
735 data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
736 ## transpose the matrix if we want to cluster the rows (genes)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
737 if ( opt$direction == "rows" ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
738 data <- t( data )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
739 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
740
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
741
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
742 title <- paste( opt$cluster.alg, opt$output.name, sep="." )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
743 ##source( '~/bin/galaxy-dist/tools/ucsc.cancer.tools/cluster.tools/new.ccplus.R' )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
744 results <- ConsensusClusterPlus( data,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
745 maxK=opt$max.k,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
746 reps=opt$reps,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
747 pItem=0.8,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
748 ##pFeature=NULL,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
749 pFeature=0.5,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
750 title=opt$out.report.dir,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
751 clusterAlg=opt$cluster.alg,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
752 distance=opt$distance.metric,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
753 innerLinkage=opt$innerLinkage,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
754 finalLinkage=opt$finalLinkage,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
755 plot='pdf',
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
756 writeTable=FALSE,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
757 seed=100,
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
758 weightsFeature=abs( rnorm( nrow( orig.data ) ) ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
759 ##verbose=FALSE )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
760 verbose=TRUE )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
761
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
762 pngs = list.files(path=opt$out.report.dir, patt="png")
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
763 html.out <- paste( "<html>",
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
764 paste( paste( "<div><img src=\'", pngs, sep="" ), "\'/></div>", sep="" ),
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
765 "</html>" )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
766 cat( html.out, file=opt$out.report.html )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
767
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
768
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
769 ## re-transpose the matrix back if we've clustered the rows (genes)
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
770 if ( opt$direction == "rows" ) {
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
771 data <- t( data )
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
772 }
b442996b66ae Uploaded
peter-waltman
parents:
diff changeset
773 save( file=opt$output.name, data, results)