5
|
1 <tool id="featureSelectR" name="Feature Selection" >
|
|
2 <description>
|
|
3 This tool used for extract best feature subsets cantaining input data for model building.
|
|
4 </description>
|
|
5 <!--command interpreter="bash">step3run.sh $file1 $model $output1 2>/dev/null </command-->
|
|
6 <requirements>
|
|
7 <requirement type="set_environment">FEATURE_SELECTION_R</requirement>
|
11
|
8 <requirement type="set_environment">R_ROOT_DIR</requirement>
|
|
9 <requirement type="package" version="3.2.0">R</requirement>
|
|
10 <requirement type="package" version="1.0.0">caret-tools</requirement>
|
|
11 </requirements>
|
8
|
12 <command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff $SAMPLING $cores> /dev/null 2>&1 </command>
|
5
|
13
|
|
14 <inputs>
|
|
15 <param name="input" type="data" label="Select input data file" help="input .RData file" />
|
6
|
16 <param name="SAMPLING" type="select" label="Select Sampling Method for imbalanced data" help="Defualt is with No sampling. you may choose downsample or upsample" >
|
5
|
17 <option value="garBage" selected="true">No Sampling</option>
|
|
18 <option value="downsampling">downsample</option>
|
|
19 <option value="upsampling">upsample</option>
|
|
20 </param>
|
|
21 <param name="function1" type="select" display="radio" label="Select appropriate function for algorithm" >
|
|
22 <option value="rfFuncs" selected="true">random forest based function </option>
|
|
23 <option value="lmFuncs">linear model based function</option>
|
|
24 <option value="treebagFuncs">treebag(CART) based function</option>
|
|
25 <option value="nbFuncs">neive bayes based function</option>
|
|
26 </param>
|
|
27
|
8
|
28 <param name="cores" type="select" label="Set Number of Cores " help="default is 1">
|
|
29 <option value="1" selected="true">1</option>
|
|
30 <option value="4">4</option>
|
|
31 <option value="8">8</option>
|
|
32 <option value="10">10</option>
|
|
33 <option value="16">16</option>
|
|
34 </param>
|
|
35
|
|
36
|
5
|
37 <param name="corcutoff" type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " />
|
|
38 <param name="resampling" type="select" label="Select appropriate resampling method" >
|
|
39 <option value="repeatedcv" selected="true">repeatedcv </option>
|
|
40 <option value="boot">boot</option>
|
|
41 <option value="cv">cv</option>
|
|
42 <option value="boot632">boot632</option>
|
|
43 </param>
|
|
44 <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 ">
|
|
45 <option value="3" selected="true">3</option>
|
12
|
46 <option value="5">5</option>
|
|
47 <option value="7">7</option>
|
5
|
48 <option value="10">10</option>
|
|
49 </param>
|
|
50 <param name="number" type="select" label="Set Number of times Resample" help="default is 10">
|
|
51 <option value="10" selected="true">10</option>
|
|
52 <option value="5">5</option>
|
|
53 <option value="15">15</option>
|
|
54 <option value="20">20</option>
|
|
55 <option value="25">25</option>
|
|
56 </param>
|
|
57
|
|
58 </inputs>
|
|
59 <outputs>
|
|
60 <data type="data" format="data" name="profile" label="$function1-profile" />
|
|
61 <data type= "data" format="data" name="finalset" label="Selected_feature.RData "/>
|
|
62 </outputs>
|
|
63 <help>
|
|
64 .. class:: infomark
|
|
65
|
|
66 **RFE based feature selection for classification and regression**
|
|
67
|
|
68 Input file must be RData file obtained by converting csv file in to RData.
|
|
69
|
|
70 output "Selected_feature.RData" file used for model building purpose.While profile
|
|
71
|
|
72 represents feature selection model.
|
|
73
|
|
74 Correlation cutoff value is desired for choosing independent variables For example
|
|
75
|
|
76 Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values.
|
|
77
|
|
78 User may choose varous resampling methods in combination with repeats and times of resample.
|
|
79
|
|
80
|
|
81
|
|
82 </help>
|
|
83
|
|
84
|
|
85 <tests>
|
|
86 <test>
|
|
87 <param name="input" value="testinput.RData"/>
|
|
88 <param name="function1" value="rfFuncs" />
|
|
89 <param name="corcutoff" value="0.6" />
|
|
90 <param name="resampling" value="repeatedcv" />
|
|
91 <param name="repeat" value="1" />
|
|
92 <param name="number" value="5" />
|
|
93 <param name="SAMPLING" value="garb" />
|
10
|
94 <param name="cores" value="1" />
|
5
|
95 <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" />
|
|
96 <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/>
|
|
97 </test>
|
|
98 </tests>
|
|
99
|
|
100
|
|
101 </tool>
|