diff EDeN_train.xml @ 7:59b3b6ce10bb draft

Uploaded
author bgruening
date Tue, 29 Oct 2013 11:07:49 -0400
parents a3edc97e056c
children 9262f801d739
line wrap: on
line diff
--- a/EDeN_train.xml	Thu Sep 05 12:52:45 2013 -0400
+++ b/EDeN_train.xml	Tue Oct 29 11:07:49 2013 -0400
@@ -1,14 +1,20 @@
 <tool id="bg_eden_train" name="EDeN Train" version="0.1">
     <description></description>
-    <requirements>
-    </requirements>
+    <macros>
+        <import>eden_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <command>
+        tmp_dir=`mktemp -d -u`;
+
         EDeN --action TRAIN
 
         --input_data_file_name $infile
         --file_type "SPARSE_VECTOR"
         --binary_file_type
 
+        ##--output_directory_path \$tmp_dir
+
         ## TODO: we need a tool that creates such a file, maybe from the metadata of an SDF file
         ## target_file_name is a file with 1 or -1 one in each row, indicating the class
         --target_file_name $target_infile
@@ -40,15 +46,15 @@
 
         <!-- Semi-supervised-settings -->
         <param name="threshold" type="float" value="1.0" label="Top and low quantile" 
-            help="Only the top and low quantile will be used as positives and negative instances. A threshold of 1 means that all unsupervised instaces are used in the next phase. ">
+            help="Only the top and low quantile will be used as positives and negative instances. A threshold of 1 means that all unsupervised instaces are used in the next phase.">
             <validator type="in_range" min="0.0" />
         </param>
-        <param name="num_iterations" type="integer" value="3" label="Number of iterations">
+        <param name="num_iterations" type="integer" value="3" label="Number of iterations" />
         <param name="only_negative" type="boolean" label="Induce only negative class instances." truevalue="--only_negative" falsevalue="" checked="false" />
         <param name="only_positive" type="boolean" label="Induce only positive class instances." truevalue="--only_positive" falsevalue="" checked="false" />
 
 
-        <param name="topological_regularization_decay_rate" type="float" value="0.01" label="Topological regularization decay rate" />
+        <param name="topological_regularization_decay_rate" type="float" value="0.01" label="Topological regularization decay rate">
             <validator type="in_range" min="0.0" />
         </param>
         <param name="topological_regularization_num_neighbors" type="integer" value="0" label="Topological regularization number of neighbors">
@@ -58,7 +64,7 @@
             <validator type="in_range" min="0" />
         </param>
 
-        <param name="random_seed" type="integer" value="1" label="Randam Seed" help="" />
+        <param name="random_seed" type="integer" value="1" label="Random Seed" help="" />
 
     </inputs>
     <outputs>
@@ -80,13 +86,10 @@
 When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
 If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.
 
-This tool is part of the EDeN (Explicit Decomposition with Neighborhoods) suite, developed by Fabrizio Costa.
-
+@references@
 
-REFERENCES
-==========
-
-The code for Stochastic Gradient Descent SVM is adapted from http://leon.bottou.org/projects/sgd. Léon Bottou and Yann LeCun, ''Large Scale Online Learning'', Advances in Neural Information Processing Systems 16, Edited by Sebastian Thrun, Lawrence Saul and Bernhard Schölkopf, MIT Press, Cambridge, MA, 2004.
+The code for Stochastic Gradient Descent SVM is adapted from http://leon.bottou.org/projects/sgd. Léon Bottou and Yann LeCun, ''Large Scale Online Learning'', 
+Advances in Neural Information Processing Systems 16, Edited by Sebastian Thrun, Lawrence Saul and Bernhard Schölkopf, MIT Press, Cambridge, MA, 2004.