# HG changeset patch # User deepakjadmin # Date 1475400990 14400 # Node ID 5364cf43a8c15ba64554d28a22f1d4f4dd47f09b # Parent 91c141c5efa6eab21f584e4061a25637aeab0f19 Uploaded diff -r 91c141c5efa6 -r 5364cf43a8c1 featureselect/feature_selection.R --- a/featureselect/feature_selection.R Fri Mar 25 06:48:10 2016 -0400 +++ b/featureselect/feature_selection.R Sun Oct 02 05:36:30 2016 -0400 @@ -22,7 +22,7 @@ colRate <- apply(rawData[, predictorNames, drop = FALSE], 2, function(x) mean(is.na(x))) -colExclude <- colRate > 0.01 +colExclude <- colRate > 0.1 if(any(colExclude)){ predictorNames <- predictorNames[-which(colExclude)] rawData <- RAWDATA[, c(predictorNames,"outcome")] @@ -32,15 +32,8 @@ rowRate <- apply(rawData[, predictorNames, drop = FALSE], 1, function(x) mean(is.na(x))) -rowno <- dim(rawData)[1] -if (rowno <= 1000){ -cutoff <- rowno / (rowno * 100) -} else if (rowno > 1000 & rowno <= 5000) { -cutoff <- rowno / (rowno * 100 * 0.5 ) -} else { -cutoff <- rowno / (rowno * 100 * 0.5 * 0.5) -} -rowExclude <- rowRate > cutoff + +rowExclude <- rowRate > 0 if(any(rowExclude)){ rawData <- rawData[!rowExclude, ] ##hasMissing <- apply(rawData[, predictorNames, drop = FALSE], @@ -55,7 +48,7 @@ } -set.seed(1234) +set.seed(2) #print(dim(dataX)) #print(dim(rawData))