# HG changeset patch # User bgruening # Date 1400170287 14400 # Node ID 5be8af51780dc8d862143399e858b1324d01a487 # Parent 9262f801d7394479894ca3f6a79308bbe0cf39b7 Uploaded diff -r 9262f801d739 -r 5be8af51780d EDeN_cross_validation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/EDeN_cross_validation.xml Thu May 15 12:11:27 2014 -0400 @@ -0,0 +1,50 @@ + + + + eden_macros.xml + + + + EDeN --action CROSS_VALIDATION + + --input_data_file_name $sparse_vector_infile + --file_type "SPARSE_VECTOR" + + ## target_file_name is a file with 1 or -1 one in each row, indicating the class + --target_file_name $target_infile + --binary_file_type + + --num_cross_validation_folds ${num_cross_validation_folds} + ; + cat cv_predictions | tr ' ' \\t > $outfile; + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What it does** + +The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun. +When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances. +If the target information is imbalanced a minority class resampling technique is used to rebalance the training set. + +@references@ + + + diff -r 9262f801d739 -r 5be8af51780d EDeN_feature.xml --- a/EDeN_feature.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_feature.xml Thu May 15 12:11:27 2014 -0400 @@ -5,12 +5,8 @@ - tmp_dir=`mktemp -d -u`; - EDeN --action FEATURE - --output_directory_path \$tmp_dir - --input_data_file_name $infile --model_file_name $outfile @@ -34,10 +30,6 @@ @input_smooth_conditional@ - ; - cp \$tmp_dir/feature $outfile; - rm \$tmp_dir -rf; - Graph + @@ -65,8 +58,7 @@ - - + @@ -114,7 +106,7 @@ - + diff -r 9262f801d739 -r 5be8af51780d EDeN_nearest_neighbor.xml --- a/EDeN_nearest_neighbor.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_nearest_neighbor.xml Thu May 15 12:11:27 2014 -0400 @@ -5,7 +5,6 @@ eden_macros.xml - tmp_dir=`mktemp -d -u`; EDeN --action NEAREST_NEIGHBOR --input_data_file_name $infile @@ -14,34 +13,6 @@ --file_type "SPARSE_VECTOR" --binary_file_type - --output_directory_path \$tmp_dir - - - ## - ## shuffling files to create the correct outputs for Galaxy - ## - - ; - cp \$tmp_dir/knn $ofile_nnlist 2> /dev/null - - ## Nearest neighbor feature representation - #if 'nnf' in str($additional_outputs).split(','): - ; - cp \$tmp_dir/knn_feature $ofile_nnf 2> /dev/null - #end if - - ## Nearest neighbor target value list - #if 'nnt' in str($additional_outputs).split(','): - ; - cp \$tmp_dir/knn_target_value $ofile_nnt 2> /dev/null - #end if - - ## Nearest neighbor kernel value list - #if 'nnk' in str($additional_outputs).split(','): - ; - cp \$tmp_dir/knn_kernel_value $ofile_nnk 2> /dev/null - #end if - @@ -65,14 +36,14 @@ - - + + 'nnf' in additional_outputs - + 'nnt' in additional_outputs - + 'nnk' in additional_outputs diff -r 9262f801d739 -r 5be8af51780d EDeN_test.xml --- a/EDeN_test.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_test.xml Thu May 15 12:11:27 2014 -0400 @@ -5,7 +5,6 @@ - tmp_dir=`mktemp -d -u`; EDeN --action TEST --input_data_file_name $sparse_vector_infile @@ -14,13 +13,8 @@ --model_file_name $model_infile - --output_directory_path \$tmp_dir --minimal_output - ; - cp \$tmp_dir/prediction $output; - rm \$tmp_dir -rf - @@ -35,7 +29,7 @@ - + diff -r 9262f801d739 -r 5be8af51780d EDeN_train.xml --- a/EDeN_train.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_train.xml Thu May 15 12:11:27 2014 -0400 @@ -5,16 +5,12 @@ - tmp_dir=`mktemp -d -u`; - EDeN --action TRAIN --input_data_file_name $infile --file_type "SPARSE_VECTOR" --binary_file_type - ##--output_directory_path \$tmp_dir - ## TODO: we need a tool that creates such a file, maybe from the metadata of an SDF file ## target_file_name is a file with 1 or -1 one in each row, indicating the class --target_file_name $target_infile diff -r 9262f801d739 -r 5be8af51780d eden.py --- a/eden.py Mon Jan 13 09:28:44 2014 -0500 +++ b/eden.py Thu May 15 12:11:27 2014 -0400 @@ -3,6 +3,7 @@ """ from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes import data class Gspan( Tabular ): @@ -20,7 +21,7 @@ try: return dataset.peek except: - return "Binary gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) ) + return "Tabular gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) ) class SparseVector( Tabular ): """Class describing an SparseVector file""" @@ -37,4 +38,4 @@ try: return dataset.peek except: - return "Binary SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) ) + return "Tabular SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) ) diff -r 9262f801d739 -r 5be8af51780d eden_macros.xml --- a/eden_macros.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/eden_macros.xml Thu May 15 12:11:27 2014 -0400 @@ -88,7 +88,7 @@ - eden + eden @@ -103,12 +103,14 @@ - + + + diff -r 9262f801d739 -r 5be8af51780d tool_dependencies.xml --- a/tool_dependencies.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/tool_dependencies.xml Thu May 15 12:11:27 2014 -0400 @@ -1,6 +1,6 @@ - - + +