Mercurial > repos > deepakjadmin > feature_selection_test1
changeset 4:5364cf43a8c1 draft
Uploaded
author | deepakjadmin |
---|---|
date | Sun, 02 Oct 2016 05:36:30 -0400 |
parents | 91c141c5efa6 |
children | 016c69bfb2a1 |
files | featureselect/feature_selection.R |
diffstat | 1 files changed, 4 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/featureselect/feature_selection.R Fri Mar 25 06:48:10 2016 -0400 +++ b/featureselect/feature_selection.R Sun Oct 02 05:36:30 2016 -0400 @@ -22,7 +22,7 @@ colRate <- apply(rawData[, predictorNames, drop = FALSE], 2, function(x) mean(is.na(x))) -colExclude <- colRate > 0.01 +colExclude <- colRate > 0.1 if(any(colExclude)){ predictorNames <- predictorNames[-which(colExclude)] rawData <- RAWDATA[, c(predictorNames,"outcome")] @@ -32,15 +32,8 @@ rowRate <- apply(rawData[, predictorNames, drop = FALSE], 1, function(x) mean(is.na(x))) -rowno <- dim(rawData)[1] -if (rowno <= 1000){ -cutoff <- rowno / (rowno * 100) -} else if (rowno > 1000 & rowno <= 5000) { -cutoff <- rowno / (rowno * 100 * 0.5 ) -} else { -cutoff <- rowno / (rowno * 100 * 0.5 * 0.5) -} -rowExclude <- rowRate > cutoff + +rowExclude <- rowRate > 0 if(any(rowExclude)){ rawData <- rawData[!rowExclude, ] ##hasMissing <- apply(rawData[, predictorNames, drop = FALSE], @@ -55,7 +48,7 @@ } -set.seed(1234) +set.seed(2) #print(dim(dataX)) #print(dim(rawData))