comparison featureselect/feature_selection.R @ 4:5364cf43a8c1 draft

Uploaded
author deepakjadmin
date Sun, 02 Oct 2016 05:36:30 -0400
parents 91c141c5efa6
children
comparison
equal deleted inserted replaced
3:91c141c5efa6 4:5364cf43a8c1
20 isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric) 20 isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric)
21 if(any(!isNum)) stop("all predictors in rawData should be numeric") 21 if(any(!isNum)) stop("all predictors in rawData should be numeric")
22 22
23 colRate <- apply(rawData[, predictorNames, drop = FALSE], 23 colRate <- apply(rawData[, predictorNames, drop = FALSE],
24 2, function(x) mean(is.na(x))) 24 2, function(x) mean(is.na(x)))
25 colExclude <- colRate > 0.01 25 colExclude <- colRate > 0.1
26 if(any(colExclude)){ 26 if(any(colExclude)){
27 predictorNames <- predictorNames[-which(colExclude)] 27 predictorNames <- predictorNames[-which(colExclude)]
28 rawData <- RAWDATA[, c(predictorNames,"outcome")] 28 rawData <- RAWDATA[, c(predictorNames,"outcome")]
29 } else { 29 } else {
30 rawData <- RAWDATA 30 rawData <- RAWDATA
31 } 31 }
32 rowRate <- apply(rawData[, predictorNames, drop = FALSE], 32 rowRate <- apply(rawData[, predictorNames, drop = FALSE],
33 1, function(x) mean(is.na(x))) 33 1, function(x) mean(is.na(x)))
34 34
35 rowno <- dim(rawData)[1] 35
36 if (rowno <= 1000){ 36 rowExclude <- rowRate > 0
37 cutoff <- rowno / (rowno * 100)
38 } else if (rowno > 1000 & rowno <= 5000) {
39 cutoff <- rowno / (rowno * 100 * 0.5 )
40 } else {
41 cutoff <- rowno / (rowno * 100 * 0.5 * 0.5)
42 }
43 rowExclude <- rowRate > cutoff
44 if(any(rowExclude)){ 37 if(any(rowExclude)){
45 rawData <- rawData[!rowExclude, ] 38 rawData <- rawData[!rowExclude, ]
46 ##hasMissing <- apply(rawData[, predictorNames, drop = FALSE], 39 ##hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
47 ##1, function(x) mean(is.na(x))) 40 ##1, function(x) mean(is.na(x)))
48 41
53 } else { 46 } else {
54 rawData <- rawData[complete.cases(rawData),] 47 rawData <- rawData[complete.cases(rawData),]
55 48
56 } 49 }
57 50
58 set.seed(1234) 51 set.seed(2)
59 52
60 #print(dim(dataX)) 53 #print(dim(dataX))
61 #print(dim(rawData)) 54 #print(dim(rawData))
62 #print(length(dataY)) 55 #print(length(dataY))
63 56