Mercurial > repos > anmoljh > feature_selection
diff feature_selection.xml @ 0:f1780bac9fa6 draft
planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
| author | anmoljh |
|---|---|
| date | Mon, 12 Jun 2017 09:30:15 -0400 |
| parents | |
| children | 246ffbb958df |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/feature_selection.xml Mon Jun 12 09:30:15 2017 -0400 @@ -0,0 +1,106 @@ +<tool id="featureSelectR" name="Feature Selection" version="1.0"> +<description> + This tool used for extract best feature subsets cantaining input data for model building. +</description> + +<requirements> + <requirement type="set_environment">FEATURE_SELECTION_R</requirement> + <requirement type="package" version="3.2.1">R</requirement> + <requirement type="package" version="1.0">carettools</requirement> +</requirements> + +<stdio> + <exit_code range="1:" /> +</stdio> + +<command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff $SAMPLING $cores> /dev/null 2>&1 </command> + +<inputs> +<param name="input" type="data" format="data" label="Select input data file" help="input .RData file" /> +<param name="SAMPLING" type="select" label="Select Sampling Method for imbalanced data" help="Defualt is with No sampling. you may choose downsample or upsample" > + <option value="garBage" selected="true">No Sampling</option> + <option value="downsampling">downsample</option> + <option value="upsampling">upsample</option> + </param> +<param name="function1" type="select" display="radio" label="Select appropriate function for algorithm" > + <option value="rfFuncs" selected="true">random forest based function </option> + <option value="lmFuncs">linear model based function</option> + <option value="treebagFuncs">treebag(CART) based function</option> + <option value="nbFuncs">neive bayes based function</option> +</param> + +<param name="cores" type="select" label="Set Number of Cores " help="default is 1"> + <option value="1" selected="true">1</option> + <option value="4">4</option> + <option value="8">8</option> + <option value="10">10</option> + <option value="16">16</option> + </param> + + +<param name="corcutoff" type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " /> +<param name="resampling" type="select" label="Select appropriate resampling method" > + <option value="repeatedcv" selected="true">repeatedcv </option> + <option value="boot">boot</option> + <option value="cv">cv</option> + <option value="boot632">boot632</option> +</param> + <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 "> + <option value="3" selected="true">3</option> + <option value="5">5</option> + <option value="7">7</option> + <option value="10">10</option> + </param> +<param name="number" type="select" label="Set Number of times Resample" help="default is 10"> + <option value="10" selected="true">10</option> + <option value="5">5</option> + <option value="15">15</option> + <option value="20">20</option> + <option value="25">25</option> + </param> + +</inputs> + +<outputs> + <data format="data" name="profile" label="$function1-profile" /> + <data format="data" name="finalset" label="Selected_feature.RData "/> +</outputs> + +<tests> + <test> + <param name="input" value="testinput.RData"/> + <param name="function1" value="rfFuncs" /> + <param name="corcutoff" value="0.6" /> + <param name="resampling" value="repeatedcv" /> + <param name="repeat" value="1" /> + <param name="number" value="5" /> + <param name="SAMPLING" value="garb" /> + <param name="cores" value="1" /> + <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" /> + <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/> + </test> +</tests> + +<help> + +.. class:: infomark + +**RFE based feature selection for classification and regression** + +Input file must be RData file obtained by converting csv file in to RData. + +output "Selected_feature.RData" file used for model building purpose.While profile + +represents feature selection model. + +Correlation cutoff value is desired for choosing independent variables For example + +Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values. + +User may choose varous resampling methods in combination with repeats and times of resample. + +</help> + + + +</tool>
