Mercurial > repos > deepakjadmin > feature_selection_test1
view toolrfe.xml @ 5:016c69bfb2a1 draft
Uploaded
author | deepakjadmin |
---|---|
date | Tue, 03 Jan 2017 02:26:17 -0500 |
parents | |
children | b84589b7c014 |
line wrap: on
line source
<tool id="featureSelectR" name="Feature Selection" > <description> This tool used for extract best feature subsets cantaining input data for model building. </description> <!--command interpreter="bash">step3run.sh $file1 $model $output1 2>/dev/null </command--> <requirements> <requirement type="set_environment">FEATURE_SELECTION_R</requirement> <requirement type="set_environment">R_ROOT_DIR</requirement> <requirement type="package" version="3.2.0">R</requirement> <requirement type="package" version="1.0.0">caret-tools</requirement> </requirements> <command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff $SAMPLING> /dev/null 2>&1 </command> <inputs> <param name="input" type="data" label="Select input data file" help="input .RData file" /> <param name="SAMPLING" type="select" label="3(i). Select Sampling Method for imbalanced data" help="Defualt is with No sampling. you may choose downsample or upsample" > <option value="garBage" selected="true">No Sampling</option> <option value="downsampling">downsample</option> <option value="upsampling">upsample</option> </param> <param name="function1" type="select" display="radio" label="Select appropriate function for algorithm" > <option value="rfFuncs" selected="true">random forest based function </option> <option value="lmFuncs">linear model based function</option> <option value="treebagFuncs">treebag(CART) based function</option> <option value="nbFuncs">neive bayes based function</option> </param> <param name="corcutoff" type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " /> <param name="resampling" type="select" label="Select appropriate resampling method" > <option value="repeatedcv" selected="true">repeatedcv </option> <option value="boot">boot</option> <option value="cv">cv</option> <option value="boot632">boot632</option> </param> <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 "> <option value="3" selected="true">3</option> <option value="1">1</option> <option value="5">5</option> <option value="10">10</option> </param> <param name="number" type="select" label="Set Number of times Resample" help="default is 10"> <option value="10" selected="true">10</option> <option value="5">5</option> <option value="15">15</option> <option value="20">20</option> <option value="25">25</option> </param> </inputs> <outputs> <data type="data" format="data" name="profile" label="$function1-profile" /> <data type= "data" format="data" name="finalset" label="Selected_feature.RData "/> </outputs> <help> .. class:: infomark **RFE based feature selection for classification and regression** Input file must be RData file obtained by converting csv file in to RData. output "Selected_feature.RData" file used for model building purpose.While profile represents feature selection model. Correlation cutoff value is desired for choosing independent variables For example Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values. User may choose varous resampling methods in combination with repeats and times of resample. </help> <tests> <test> <param name="input" value="testinput.RData"/> <param name="function1" value="rfFuncs" /> <param name="corcutoff" value="0.6" /> <param name="resampling" value="repeatedcv" /> <param name="repeat" value="1" /> <param name="number" value="5" /> <param name="SAMPLING" value="garb" /> <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" /> <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/> </test> </tests> </tool>