Mercurial > repos > insilico-bob > ngchm
comparison mda_heatmap_gen/CHM.R @ 0:73aa1f80c237 draft
Upload
| author | insilico-bob |
|---|---|
| date | Thu, 07 Apr 2016 10:54:23 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:73aa1f80c237 |
|---|---|
| 1 ### This method generates a row and column ordering given an input matrix and ordering methods. | |
| 2 ### | |
| 3 ### matrixData - numeric matrix | |
| 4 ### rowOrderMethod - Hierarchical, Original, Random | |
| 5 ### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. | |
| 6 ### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, | |
| 7 ### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. | |
| 8 ### colOrderMethod | |
| 9 ### colDistanceMeasure | |
| 10 ### colAgglomerationMethod | |
| 11 ### rowOrderFile - output file of order of rows | |
| 12 ### rowDendroFile - output file of row dendrogram | |
| 13 ### colOrderFile - output file of order of cols | |
| 14 ### colDendroFile - output file of col dendrogram | |
| 15 | |
| 16 performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile) | |
| 17 { | |
| 18 dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", row.names = 1, as.is=TRUE) | |
| 19 rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) | |
| 20 if (rowOrderMethod == "Hierarchical") { | |
| 21 writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) | |
| 22 } else { | |
| 23 writeOrderTSV(rowOrder, rownames(dataMatrix), rowOrderFile) | |
| 24 } | |
| 25 | |
| 26 colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) | |
| 27 if (colOrderMethod == "Hierarchical") { | |
| 28 writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) | |
| 29 } else { | |
| 30 writeOrderTSV(colOrder, colnames(dataMatrix), colOrderFile) | |
| 31 } | |
| 32 } | |
| 33 | |
| 34 #creates output files for hclust ordering | |
| 35 writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) | |
| 36 { | |
| 37 data<-cbind(uDend$merge, uDend$height, deparse.level=0) | |
| 38 colnames(data)<-c("A", "B", "Height") | |
| 39 write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
| 40 | |
| 41 data=matrix(,length(uDend$labels),2); | |
| 42 for (i in 1:length(uDend$labels)) { | |
| 43 data[i,1] = uDend$labels[i]; | |
| 44 data[i,2] = which(uDend$order==i); | |
| 45 } | |
| 46 colnames(data)<-c("Id", "Order") | |
| 47 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
| 48 } | |
| 49 | |
| 50 #creates order file for non-clustering methods | |
| 51 writeOrderTSV<-function(newOrder, originalOrder, outputHCOrderFileName) | |
| 52 { | |
| 53 data=matrix(,length(originalOrder),2); | |
| 54 for (i in 1:length(originalOrder)) { | |
| 55 data[i,1] = originalOrder[i]; | |
| 56 data[i,2] = which(newOrder==originalOrder[i]); | |
| 57 } | |
| 58 colnames(data)<-c("Id", "Order") | |
| 59 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
| 60 } | |
| 61 | |
| 62 | |
| 63 | |
| 64 createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) | |
| 65 { | |
| 66 ordering <- NULL | |
| 67 | |
| 68 if (orderMethod == "Hierarchical") | |
| 69 { | |
| 70 | |
| 71 # Compute dendrogram for "Distance Metric" | |
| 72 distVals <- NULL | |
| 73 if(direction=="row") { | |
| 74 if (distanceMeasure == "correlation") { | |
| 75 geneGeneCor <- cor(t(matrixData), use="pairwise") | |
| 76 distVals <- as.dist((1-geneGeneCor)/2) | |
| 77 } else { | |
| 78 distVals <- dist(matrixData, method=distanceMeasure) | |
| 79 } | |
| 80 } else { #column | |
| 81 if (distanceMeasure == "correlation") { | |
| 82 geneGeneCor <- cor(matrixData, use="pairwise") | |
| 83 distVals <- as.dist((1-geneGeneCor)/2) | |
| 84 } else { | |
| 85 distVals <- dist(t(matrixData), method=distanceMeasure) | |
| 86 } | |
| 87 } | |
| 88 | |
| 89 if (agglomerationMethod == "ward") { | |
| 90 ordering <- hclust(distVals * distVals, method="ward.D2") | |
| 91 } else { | |
| 92 ordering <- hclust(distVals, method=agglomerationMethod) | |
| 93 } | |
| 94 } | |
| 95 else if (orderMethod == "Random") | |
| 96 { | |
| 97 if(direction=="row") { | |
| 98 headerList <- rownames(matrixData) | |
| 99 ordering <- sample(headerList, length(headerList)) | |
| 100 } else { | |
| 101 headerList <- colnames(matrixData) | |
| 102 ordering <- sample(headerList, length(headerList)) | |
| 103 } | |
| 104 } | |
| 105 else if (orderMethod == "Original") | |
| 106 { | |
| 107 if(direction=="row") { | |
| 108 ordering <- rownames(matrixData) | |
| 109 } else { | |
| 110 ordering <- colnames(matrixData) | |
| 111 } | |
| 112 } else { | |
| 113 stop("createOrdering -- failed to find ordering method") | |
| 114 } | |
| 115 return(ordering) | |
| 116 } | |
| 117 ### Initialize command line arguments and call performDataOrdering | |
| 118 | |
| 119 options(warn=-1) | |
| 120 | |
| 121 args = commandArgs(TRUE) | |
| 122 | |
| 123 performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]) | |
| 124 | |
| 125 #suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11])) |
