# HG changeset patch
# User bgruening
# Date 1400170287 14400
# Node ID 5be8af51780dc8d862143399e858b1324d01a487
# Parent 9262f801d7394479894ca3f6a79308bbe0cf39b7
Uploaded
diff -r 9262f801d739 -r 5be8af51780d EDeN_cross_validation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/EDeN_cross_validation.xml Thu May 15 12:11:27 2014 -0400
@@ -0,0 +1,50 @@
+
+
+
+ eden_macros.xml
+
+
+
+ EDeN --action CROSS_VALIDATION
+
+ --input_data_file_name $sparse_vector_infile
+ --file_type "SPARSE_VECTOR"
+
+ ## target_file_name is a file with 1 or -1 one in each row, indicating the class
+ --target_file_name $target_infile
+ --binary_file_type
+
+ --num_cross_validation_folds ${num_cross_validation_folds}
+ ;
+ cat cv_predictions | tr ' ' \\t > $outfile;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**What it does**
+
+The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun.
+When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
+If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.
+
+@references@
+
+
+
diff -r 9262f801d739 -r 5be8af51780d EDeN_feature.xml
--- a/EDeN_feature.xml Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_feature.xml Thu May 15 12:11:27 2014 -0400
@@ -5,12 +5,8 @@
- tmp_dir=`mktemp -d -u`;
-
EDeN --action FEATURE
- --output_directory_path \$tmp_dir
-
--input_data_file_name $infile
--model_file_name $outfile
@@ -34,10 +30,6 @@
@input_smooth_conditional@
- ;
- cp \$tmp_dir/feature $outfile;
- rm \$tmp_dir -rf;
-
Graph
+
@@ -65,8 +58,7 @@
-
-
+
@@ -114,7 +106,7 @@
-
+
diff -r 9262f801d739 -r 5be8af51780d EDeN_nearest_neighbor.xml
--- a/EDeN_nearest_neighbor.xml Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_nearest_neighbor.xml Thu May 15 12:11:27 2014 -0400
@@ -5,7 +5,6 @@
eden_macros.xml
- tmp_dir=`mktemp -d -u`;
EDeN --action NEAREST_NEIGHBOR
--input_data_file_name $infile
@@ -14,34 +13,6 @@
--file_type "SPARSE_VECTOR"
--binary_file_type
- --output_directory_path \$tmp_dir
-
-
- ##
- ## shuffling files to create the correct outputs for Galaxy
- ##
-
- ;
- cp \$tmp_dir/knn $ofile_nnlist 2> /dev/null
-
- ## Nearest neighbor feature representation
- #if 'nnf' in str($additional_outputs).split(','):
- ;
- cp \$tmp_dir/knn_feature $ofile_nnf 2> /dev/null
- #end if
-
- ## Nearest neighbor target value list
- #if 'nnt' in str($additional_outputs).split(','):
- ;
- cp \$tmp_dir/knn_target_value $ofile_nnt 2> /dev/null
- #end if
-
- ## Nearest neighbor kernel value list
- #if 'nnk' in str($additional_outputs).split(','):
- ;
- cp \$tmp_dir/knn_kernel_value $ofile_nnk 2> /dev/null
- #end if
-
@@ -65,14 +36,14 @@
-
-
+
+
'nnf' in additional_outputs
-
+
'nnt' in additional_outputs
-
+
'nnk' in additional_outputs
diff -r 9262f801d739 -r 5be8af51780d EDeN_test.xml
--- a/EDeN_test.xml Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_test.xml Thu May 15 12:11:27 2014 -0400
@@ -5,7 +5,6 @@
- tmp_dir=`mktemp -d -u`;
EDeN --action TEST
--input_data_file_name $sparse_vector_infile
@@ -14,13 +13,8 @@
--model_file_name $model_infile
- --output_directory_path \$tmp_dir
--minimal_output
- ;
- cp \$tmp_dir/prediction $output;
- rm \$tmp_dir -rf
-
@@ -35,7 +29,7 @@
-
+
diff -r 9262f801d739 -r 5be8af51780d EDeN_train.xml
--- a/EDeN_train.xml Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_train.xml Thu May 15 12:11:27 2014 -0400
@@ -5,16 +5,12 @@
- tmp_dir=`mktemp -d -u`;
-
EDeN --action TRAIN
--input_data_file_name $infile
--file_type "SPARSE_VECTOR"
--binary_file_type
- ##--output_directory_path \$tmp_dir
-
## TODO: we need a tool that creates such a file, maybe from the metadata of an SDF file
## target_file_name is a file with 1 or -1 one in each row, indicating the class
--target_file_name $target_infile
diff -r 9262f801d739 -r 5be8af51780d eden.py
--- a/eden.py Mon Jan 13 09:28:44 2014 -0500
+++ b/eden.py Thu May 15 12:11:27 2014 -0400
@@ -3,6 +3,7 @@
"""
from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes import data
class Gspan( Tabular ):
@@ -20,7 +21,7 @@
try:
return dataset.peek
except:
- return "Binary gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) )
+ return "Tabular gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) )
class SparseVector( Tabular ):
"""Class describing an SparseVector file"""
@@ -37,4 +38,4 @@
try:
return dataset.peek
except:
- return "Binary SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) )
+ return "Tabular SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) )
diff -r 9262f801d739 -r 5be8af51780d eden_macros.xml
--- a/eden_macros.xml Mon Jan 13 09:28:44 2014 -0500
+++ b/eden_macros.xml Thu May 15 12:11:27 2014 -0400
@@ -88,7 +88,7 @@
- eden
+ eden
@@ -103,12 +103,14 @@
-
+
+
+
diff -r 9262f801d739 -r 5be8af51780d tool_dependencies.xml
--- a/tool_dependencies.xml Mon Jan 13 09:28:44 2014 -0500
+++ b/tool_dependencies.xml Thu May 15 12:11:27 2014 -0400
@@ -1,6 +1,6 @@
-
-
+
+