Mercurial > repos > bgruening > eden_toolbox
diff EDeN_train.xml @ 7:59b3b6ce10bb draft
Uploaded
author | bgruening |
---|---|
date | Tue, 29 Oct 2013 11:07:49 -0400 |
parents | a3edc97e056c |
children | 9262f801d739 |
line wrap: on
line diff
--- a/EDeN_train.xml Thu Sep 05 12:52:45 2013 -0400 +++ b/EDeN_train.xml Tue Oct 29 11:07:49 2013 -0400 @@ -1,14 +1,20 @@ <tool id="bg_eden_train" name="EDeN Train" version="0.1"> <description></description> - <requirements> - </requirements> + <macros> + <import>eden_macros.xml</import> + </macros> + <expand macro="requirements" /> <command> + tmp_dir=`mktemp -d -u`; + EDeN --action TRAIN --input_data_file_name $infile --file_type "SPARSE_VECTOR" --binary_file_type + ##--output_directory_path \$tmp_dir + ## TODO: we need a tool that creates such a file, maybe from the metadata of an SDF file ## target_file_name is a file with 1 or -1 one in each row, indicating the class --target_file_name $target_infile @@ -40,15 +46,15 @@ <!-- Semi-supervised-settings --> <param name="threshold" type="float" value="1.0" label="Top and low quantile" - help="Only the top and low quantile will be used as positives and negative instances. A threshold of 1 means that all unsupervised instaces are used in the next phase. "> + help="Only the top and low quantile will be used as positives and negative instances. A threshold of 1 means that all unsupervised instaces are used in the next phase."> <validator type="in_range" min="0.0" /> </param> - <param name="num_iterations" type="integer" value="3" label="Number of iterations"> + <param name="num_iterations" type="integer" value="3" label="Number of iterations" /> <param name="only_negative" type="boolean" label="Induce only negative class instances." truevalue="--only_negative" falsevalue="" checked="false" /> <param name="only_positive" type="boolean" label="Induce only positive class instances." truevalue="--only_positive" falsevalue="" checked="false" /> - <param name="topological_regularization_decay_rate" type="float" value="0.01" label="Topological regularization decay rate" /> + <param name="topological_regularization_decay_rate" type="float" value="0.01" label="Topological regularization decay rate"> <validator type="in_range" min="0.0" /> </param> <param name="topological_regularization_num_neighbors" type="integer" value="0" label="Topological regularization number of neighbors"> @@ -58,7 +64,7 @@ <validator type="in_range" min="0" /> </param> - <param name="random_seed" type="integer" value="1" label="Randam Seed" help="" /> + <param name="random_seed" type="integer" value="1" label="Random Seed" help="" /> </inputs> <outputs> @@ -80,13 +86,10 @@ When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances. If the target information is imbalanced a minority class resampling technique is used to rebalance the training set. -This tool is part of the EDeN (Explicit Decomposition with Neighborhoods) suite, developed by Fabrizio Costa. - +@references@ -REFERENCES -========== - -The code for Stochastic Gradient Descent SVM is adapted from http://leon.bottou.org/projects/sgd. Léon Bottou and Yann LeCun, ''Large Scale Online Learning'', Advances in Neural Information Processing Systems 16, Edited by Sebastian Thrun, Lawrence Saul and Bernhard Schölkopf, MIT Press, Cambridge, MA, 2004. +The code for Stochastic Gradient Descent SVM is adapted from http://leon.bottou.org/projects/sgd. Léon Bottou and Yann LeCun, ''Large Scale Online Learning'', +Advances in Neural Information Processing Systems 16, Edited by Sebastian Thrun, Lawrence Saul and Bernhard Schölkopf, MIT Press, Cambridge, MA, 2004.