comparison toolrfe.xml @ 1:69b8598d9338 draft

Uploaded
author deepakjadmin
date Wed, 23 Mar 2016 04:53:29 -0400
parents
children 31cd51e67666
comparison
equal deleted inserted replaced
0:16c9aaf658e6 1:69b8598d9338
1 <tool id="featureSelectR" name="Feature Selection" >
2 <description>
3 This tool used for extract best feature subsets cantaining input data for model building.
4 </description>
5 <!--command interpreter="bash">step3run.sh $file1 $model $output1 2>/dev/null </command-->
6 <requirements>
7 <requirement type="set_environment">FEATURE_SELECTION_R</requirement>
8 <requirement type="set_environment">R_ROOT_DIR</requirement>
9 <requirement type="package" version="3.2.0">R</requirement>
10 <requirement type="package" version="1.0.0">caret-tools</requirement>
11 </requirements>
12 <command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff > /dev/null 2>&amp;1 </command>
13
14 <inputs>
15 <param name="input" format="RData" type="data" label="Select input data file" help="input .RData file" />
16 <param name="function1" type="select" display="radio" label="Select appropriate function for algorithm" >
17 <option value="rfFuncs" selected="true">random forest based function </option>
18 <option value="lmFuncs">linear model based function</option>
19 <option value="treebagFuncs">treebag(CART) based function</option>
20 <option value="nbFuncs">neive bayes based function</option>
21 </param>
22
23 <param name="corcutoff" type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " />
24 <param name="resampling" type="select" label="Select appropriate resampling method" >
25 <option value="repeatedcv" selected="true">repeatedcv </option>
26 <option value="boot">boot</option>
27 <option value="cv">cv</option>
28 <option value="boot632">boot632</option>
29 </param>
30 <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 ">
31 <option value="3" selected="true">3</option>
32 <option value="1">1</option>
33 <option value="5">5</option>
34 <option value="10">10</option>
35 </param>
36 <param name="number" type="select" label="Set Number of times Resample" help="default is 10">
37 <option value="10" selected="true">10</option>
38 <option value="5">5</option>
39 <option value="15">15</option>
40 <option value="20">20</option>
41 <option value="25">25</option>
42 </param>
43
44 </inputs>
45 <outputs>
46 <data format="RData" name="profile" label="$function1-profile" />
47 <data format="RData" name="finalset" label="Selected_feature.RData "/>
48 </outputs>
49 <help>
50 .. class:: infomark
51
52 **RFE based feature selection for classification and regression**
53
54 Input file must be RData file obtained by converting csv file in to RData.
55
56 output "Selected_feature.RData" file used for model building purpose.While profile
57
58 represents feature selection model.
59
60 Correlation cutoff value is desired for choosing independent variables For example
61
62 Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values.
63
64 User may choose varous resampling methods in combination with repeats and times of resample.
65
66
67
68 </help>
69
70
71 <tests>
72 <test>
73 <param name="input" value="testinput.RData"/>
74 <param name="function1" value="rfFuncs" />
75 <param name="corcutoff" value="0.6" />
76 <param name="resampling" value="repeatedcv" />
77 <param name="repeat" value="1" />
78 <param name="number" value="5" />
79
80
81 <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" />
82 <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/>
83 </test>
84 </tests>
85
86
87 </tool>