diff feature_selection.xml @ 0:f1780bac9fa6 draft

planemo upload commit e713bcfa1b1690f9a21ad0bd796c2d385f646e66-dirty
author anmoljh
date Mon, 12 Jun 2017 09:30:15 -0400
parents
children 246ffbb958df
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_selection.xml	Mon Jun 12 09:30:15 2017 -0400
@@ -0,0 +1,106 @@
+<tool id="featureSelectR" name="Feature Selection" version="1.0">
+<description>
+ This tool used for extract best feature subsets cantaining input data for model building.   
+</description>
+
+<requirements>
+        <requirement type="set_environment">FEATURE_SELECTION_R</requirement>
+    	<requirement type="package" version="3.2.1">R</requirement>
+    	<requirement type="package" version="1.0">carettools</requirement>
+</requirements>
+
+<stdio>
+        <exit_code range="1:" />
+</stdio>
+
+<command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff $SAMPLING $cores> /dev/null 2>&amp;1 </command>
+
+<inputs>
+<param name="input"  type="data" format="data" label="Select input data file" help="input .RData file" />
+<param name="SAMPLING" type="select"  label="Select Sampling Method for imbalanced data" help="Defualt is with No sampling. you may choose downsample or upsample" >
+                <option value="garBage" selected="true">No Sampling</option>
+                <option value="downsampling">downsample</option>
+                <option value="upsampling">upsample</option>
+        </param>
+<param name="function1" type="select" display="radio" label="Select appropriate function for algorithm"  >
+                <option value="rfFuncs" selected="true">random forest based function </option>
+                <option value="lmFuncs">linear model based function</option>
+                <option value="treebagFuncs">treebag(CART) based function</option>
+                <option value="nbFuncs">neive bayes based function</option>
+</param>
+
+<param name="cores" type="select" label="Set Number of Cores " help="default is 1">
+                <option value="1" selected="true">1</option>
+                <option value="4">4</option>
+                <option value="8">8</option>
+                <option value="10">10</option>
+                <option value="16">16</option>
+        </param>
+
+
+<param name="corcutoff"  type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " />
+<param name="resampling" type="select" label="Select appropriate resampling method"  >
+                <option value="repeatedcv" selected="true">repeatedcv </option>
+                <option value="boot">boot</option>
+                <option value="cv">cv</option>
+                <option value="boot632">boot632</option>
+</param>
+ <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 ">
+               <option value="3" selected="true">3</option>
+                <option value="5">5</option>
+                <option value="7">7</option>
+                <option value="10">10</option>
+        </param>
+<param name="number" type="select" label="Set Number of times Resample" help="default is 10">
+                <option value="10" selected="true">10</option>
+                <option value="5">5</option>
+                <option value="15">15</option>
+                <option value="20">20</option>
+                <option value="25">25</option>
+        </param>
+
+</inputs>
+
+<outputs>
+	<data format="data" name="profile"  label="$function1-profile" />
+	<data format="data" name="finalset" label="Selected_feature.RData "/>
+</outputs>
+
+<tests>
+   <test>
+          <param name="input" value="testinput.RData"/>
+          <param name="function1"  value="rfFuncs" />
+          <param name="corcutoff"  value="0.6" />
+          <param name="resampling"  value="repeatedcv" />
+          <param name="repeat"  value="1" />
+          <param name="number"  value="5" />
+          <param name="SAMPLING"  value="garb" />
+          <param name="cores"  value="1" />
+          <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" />
+          <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/>
+    </test>
+</tests>
+
+<help>
+
+.. class:: infomark
+
+**RFE based feature selection for classification and regression**
+
+Input file must be  RData file obtained by converting csv file in to RData.
+
+output  "Selected_feature.RData"  file used for model building purpose.While profile
+
+represents feature selection model.
+
+Correlation cutoff value is desired for choosing independent variables For example
+
+Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values.
+
+User may choose varous resampling methods in combination with repeats and times of resample.
+
+</help>
+
+
+
+</tool>