Mercurial > repos > deepakjadmin > feature_selection_test1
comparison featureselect/feature_selection.R @ 4:5364cf43a8c1 draft
Uploaded
author | deepakjadmin |
---|---|
date | Sun, 02 Oct 2016 05:36:30 -0400 |
parents | 91c141c5efa6 |
children |
comparison
equal
deleted
inserted
replaced
3:91c141c5efa6 | 4:5364cf43a8c1 |
---|---|
20 isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric) | 20 isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric) |
21 if(any(!isNum)) stop("all predictors in rawData should be numeric") | 21 if(any(!isNum)) stop("all predictors in rawData should be numeric") |
22 | 22 |
23 colRate <- apply(rawData[, predictorNames, drop = FALSE], | 23 colRate <- apply(rawData[, predictorNames, drop = FALSE], |
24 2, function(x) mean(is.na(x))) | 24 2, function(x) mean(is.na(x))) |
25 colExclude <- colRate > 0.01 | 25 colExclude <- colRate > 0.1 |
26 if(any(colExclude)){ | 26 if(any(colExclude)){ |
27 predictorNames <- predictorNames[-which(colExclude)] | 27 predictorNames <- predictorNames[-which(colExclude)] |
28 rawData <- RAWDATA[, c(predictorNames,"outcome")] | 28 rawData <- RAWDATA[, c(predictorNames,"outcome")] |
29 } else { | 29 } else { |
30 rawData <- RAWDATA | 30 rawData <- RAWDATA |
31 } | 31 } |
32 rowRate <- apply(rawData[, predictorNames, drop = FALSE], | 32 rowRate <- apply(rawData[, predictorNames, drop = FALSE], |
33 1, function(x) mean(is.na(x))) | 33 1, function(x) mean(is.na(x))) |
34 | 34 |
35 rowno <- dim(rawData)[1] | 35 |
36 if (rowno <= 1000){ | 36 rowExclude <- rowRate > 0 |
37 cutoff <- rowno / (rowno * 100) | |
38 } else if (rowno > 1000 & rowno <= 5000) { | |
39 cutoff <- rowno / (rowno * 100 * 0.5 ) | |
40 } else { | |
41 cutoff <- rowno / (rowno * 100 * 0.5 * 0.5) | |
42 } | |
43 rowExclude <- rowRate > cutoff | |
44 if(any(rowExclude)){ | 37 if(any(rowExclude)){ |
45 rawData <- rawData[!rowExclude, ] | 38 rawData <- rawData[!rowExclude, ] |
46 ##hasMissing <- apply(rawData[, predictorNames, drop = FALSE], | 39 ##hasMissing <- apply(rawData[, predictorNames, drop = FALSE], |
47 ##1, function(x) mean(is.na(x))) | 40 ##1, function(x) mean(is.na(x))) |
48 | 41 |
53 } else { | 46 } else { |
54 rawData <- rawData[complete.cases(rawData),] | 47 rawData <- rawData[complete.cases(rawData),] |
55 | 48 |
56 } | 49 } |
57 | 50 |
58 set.seed(1234) | 51 set.seed(2) |
59 | 52 |
60 #print(dim(dataX)) | 53 #print(dim(dataX)) |
61 #print(dim(rawData)) | 54 #print(dim(rawData)) |
62 #print(length(dataY)) | 55 #print(length(dataY)) |
63 | 56 |