annotate feature_selection.xml @ 5:24f2624e8a55 draft default tip

planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
author anmoljh
date Fri, 06 Apr 2018 08:46:01 -0400
parents e7561ef82bf1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
1 <tool id="featureSelectR" name="Feature Selection" version="1.0">
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
2 <description>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
3 This tool used for extract best feature subsets cantaining input data for model building.
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
4 </description>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
5
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
6 <requirements>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
7 <requirement type="package" version="3.2.1">R</requirement>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
8 <requirement type="package" version="1.0">carettools</requirement>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
9 </requirements>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
10
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
11 <stdio>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
12 <exit_code range="1:" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
13 </stdio>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
14
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
15 <command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff $SAMPLING \${GALAXY_SLOTS:-1} >/dev/null 2>&amp;1 </command>
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
16
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
17 <inputs>
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
18 <param name="input" type="data" format="rdata" label="Select input data file" help="input .RData file" />
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
19 <param name="SAMPLING" type="select" label="Select Sampling Method for imbalanced data" help="Defualt is with No sampling. you may choose downsample or upsample" >
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
20 <option value="garBage" selected="true">No Sampling</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
21 <option value="downsampling">downsample</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
22 <option value="upsampling">upsample</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
23 </param>
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
24 <param name="function1" type="select" display="radio" label="Select appropriate function for algorithm" >
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
25 <option value="rfFuncs" selected="true">random forest based function </option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
26 <option value="lmFuncs">linear model based function</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
27 <option value="treebagFuncs">treebag(CART) based function</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
28 <option value="nbFuncs">neive bayes based function</option>
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
29 </param>
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
30
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
31 <param name="corcutoff" type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " />
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
32 <param name="resampling" type="select" label="Select appropriate resampling method" >
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
33 <option value="repeatedcv" selected="true">repeatedcv </option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
34 <option value="boot">boot</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
35 <option value="cv">cv</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
36 <option value="boot632">boot632</option>
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
37 </param>
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
38
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
39 <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 ">
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
40 <option value="3" selected="true">3</option>
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
41 <option value="5">5</option>
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
42 <option value="7">7</option>
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
43 <option value="10">10</option>
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
44 </param>
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
45 <param name="number" type="select" label="Set Number of times Resample" help="default is 10">
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
46 <option value="10" selected="true">10</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
47 <option value="5">5</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
48 <option value="15">15</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
49 <option value="20">20</option>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
50 <option value="25">25</option>
5
24f2624e8a55 planemo upload commit 2797c26edc75e09465ee41fe44091a39d49b36ab-dirty
anmoljh
parents: 4
diff changeset
51 </param>
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
52 </inputs>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
53
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
54 <outputs>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
55 <data format="data" name="profile" label="$function1-profile" />
2
246ffbb958df planemo upload commit b860aaaa948dbc6edc43f776572110e0a8766ab3-dirty
anmoljh
parents: 0
diff changeset
56 <data format="rdata" name="finalset" label="Selected_feature.RData "/>
0
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
57 </outputs>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
58
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
59 <tests>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
60 <test>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
61 <param name="input" value="testinput.RData"/>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
62 <param name="function1" value="rfFuncs" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
63 <param name="corcutoff" value="0.6" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
64 <param name="resampling" value="repeatedcv" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
65 <param name="repeat" value="1" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
66 <param name="number" value="5" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
67 <param name="SAMPLING" value="garb" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
68 <param name="cores" value="1" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
69 <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" />
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
70 <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
71 </test>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
72 </tests>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
73
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
74 <help>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
75
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
76 .. class:: infomark
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
77
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
78 **RFE based feature selection for classification and regression**
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
79
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
80 Input file must be RData file obtained by converting csv file in to RData.
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
81
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
82 output "Selected_feature.RData" file used for model building purpose.While profile
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
83
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
84 represents feature selection model.
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
85
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
86 Correlation cutoff value is desired for choosing independent variables For example
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
87
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
88 Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values.
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
89
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
90 User may choose varous resampling methods in combination with repeats and times of resample.
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
91
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
92 </help>
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
93
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
94
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
95
f1780bac9fa6 planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
anmoljh
parents:
diff changeset
96 </tool>