Mercurial > repos > bgruening > eden_toolbox
changeset 9:5be8af51780d draft
Uploaded
author | bgruening |
---|---|
date | Thu, 15 May 2014 12:11:27 -0400 |
parents | 9262f801d739 |
children | d495c233148c |
files | EDeN_cross_validation.xml EDeN_feature.xml EDeN_nearest_neighbor.xml EDeN_test.xml EDeN_train.xml eden.py eden_macros.xml tool_dependencies.xml |
diffstat | 8 files changed, 67 insertions(+), 61 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/EDeN_cross_validation.xml Thu May 15 12:11:27 2014 -0400 @@ -0,0 +1,50 @@ +<tool id="bg_eden_cross_validation" name="EDeN Crossvalidation" version="0.1"> + <description></description> + <macros> + <import>eden_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + EDeN --action CROSS_VALIDATION + + --input_data_file_name $sparse_vector_infile + --file_type "SPARSE_VECTOR" + + ## target_file_name is a file with 1 or -1 one in each row, indicating the class + --target_file_name $target_infile + --binary_file_type + + --num_cross_validation_folds ${num_cross_validation_folds} + ; + cat cv_predictions | tr ' ' \\t > $outfile; + + </command> + <inputs> + <param format="eden_sparse_vector" name="sparse_vector_infile" type="data" label="Input File" help="(--input_data_file_name/-f)"/> + <param format="txt" name="target_infile" type="data" label="Target file" help="indicates with -1 and 1 the class"/> + + <param name="num_cross_validation_folds" type="integer" value="10" label="Number of cross validations" help="--num_cross_validation_folds/-c"> + <validator type="in_range" min="1" /> + </param> + </inputs> + <outputs> + <data format="tabular" name="outfile" label="Crossvalidation of ${on_string}"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help> + +.. class:: infomark + +**What it does** + +The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun. +When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances. +If the target information is imbalanced a minority class resampling technique is used to rebalance the training set. + +@references@ + + </help> +</tool>
--- a/EDeN_feature.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_feature.xml Thu May 15 12:11:27 2014 -0400 @@ -5,12 +5,8 @@ </macros> <expand macro="requirements" /> <command> - tmp_dir=`mktemp -d -u`; - EDeN --action FEATURE - --output_directory_path \$tmp_dir - --input_data_file_name $infile --model_file_name $outfile @@ -34,10 +30,6 @@ @input_smooth_conditional@ - ; - cp \$tmp_dir/feature $outfile; - rm \$tmp_dir -rf; - </command> <stdio> <regex match="Error" @@ -54,6 +46,7 @@ <option value="GRAPH">Graph</option> <option value="SPARSE_VECTOR">sparse vector</option> <option value="SEQUENCE">Sequence</option> + <option value="STRINGSEQ">String (can be any word like character sequence)</option> </param> <when value="GRAPH" /> <when value="SPARSE_VECTOR" /> @@ -65,8 +58,7 @@ <param name="sequence_multi_line" type="boolean" label="Sequence is in multi-line notation" truevalue="--sequence_multi_line" falsevalue="" checked="false" /> <param name="sequence_pairwise_interaction" type="boolean" label="Sequence pairwise iterations" truevalue="--sequence_pairwise_interaction" falsevalue="" checked="false" /> </when> - <when value="sdf" /> - <when value="smi" /> + <when value="STRINGSEQ" /> </conditional> <expand macro="kernel_type_options" /> @@ -114,7 +106,7 @@ </configfile> </configfiles> <outputs> - <data format="sparsevector" name="outfile" label="Sparse Vector from ${on_string}"/> + <data format="sparsevector" name="outfile" from_work_dir="feature" label="Sparse Vector from ${on_string}"/> </outputs> <tests> <test>
--- a/EDeN_nearest_neighbor.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_nearest_neighbor.xml Thu May 15 12:11:27 2014 -0400 @@ -5,7 +5,6 @@ <import>eden_macros.xml</import> </macros> <command> - tmp_dir=`mktemp -d -u`; EDeN --action NEAREST_NEIGHBOR --input_data_file_name $infile @@ -14,34 +13,6 @@ --file_type "SPARSE_VECTOR" --binary_file_type - --output_directory_path \$tmp_dir - - - ## - ## shuffling files to create the correct outputs for Galaxy - ## - - ; - cp \$tmp_dir/knn $ofile_nnlist 2> /dev/null - - ## Nearest neighbor feature representation - #if 'nnf' in str($additional_outputs).split(','): - ; - cp \$tmp_dir/knn_feature $ofile_nnf 2> /dev/null - #end if - - ## Nearest neighbor target value list - #if 'nnt' in str($additional_outputs).split(','): - ; - cp \$tmp_dir/knn_target_value $ofile_nnt 2> /dev/null - #end if - - ## Nearest neighbor kernel value list - #if 'nnk' in str($additional_outputs).split(','): - ; - cp \$tmp_dir/knn_kernel_value $ofile_nnk 2> /dev/null - #end if - </command> <inputs> @@ -65,14 +36,14 @@ </inputs> <outputs> - <data format="tabular" name="ofile_nnlist" label="${tool.name} on ${on_string}"/> - <data format="tabular" name="ofile_nnf" label="${tool.name} on ${on_string} (Nearest neighbor feature representation)"> + <data format="tabular" name="ofile_nnlist" from_work_dir="knn" label="${tool.name} on ${on_string}"/> + <data format="tabular" name="ofile_nnf" from_work_dir="knn_feature" label="${tool.name} on ${on_string} (Nearest neighbor feature representation)"> <filter>'nnf' in additional_outputs</filter> </data> - <data format="tabular" name="ofile_nnt" label="${tool.name} on ${on_string} (Nearest neighbor target value list)"> + <data format="tabular" name="ofile_nnt" from_work_dir="knn_target_value" label="${tool.name} on ${on_string} (Nearest neighbor target value list)"> <filter>'nnt' in additional_outputs</filter> </data> - <data format="tabular" name="ofile_nnk" label="${tool.name} on ${on_string} (Nearest neighbor kernel value list)"> + <data format="tabular" name="ofile_nnk" from_work_dir="knn_kernel_value" label="${tool.name} on ${on_string} (Nearest neighbor kernel value list)"> <filter>'nnk' in additional_outputs</filter> </data> </outputs>
--- a/EDeN_test.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_test.xml Thu May 15 12:11:27 2014 -0400 @@ -5,7 +5,6 @@ </macros> <expand macro="requirements" /> <command> - tmp_dir=`mktemp -d -u`; EDeN --action TEST --input_data_file_name $sparse_vector_infile @@ -14,13 +13,8 @@ --model_file_name $model_infile - --output_directory_path \$tmp_dir --minimal_output - ; - cp \$tmp_dir/prediction $output; - rm \$tmp_dir -rf - </command> <inputs> <param format="eden_sparse_vector" name="sparse_vector_infile" type="data" label="Input File" help=""/> @@ -35,7 +29,7 @@ </inputs> <outputs> - <data format="tabular" name="output" label="Generated from ${on_string}"/> + <data format="tabular" name="output" from_work_dir="prediction" label="Generated from ${on_string}"/> </outputs> <tests> <test>
--- a/EDeN_train.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/EDeN_train.xml Thu May 15 12:11:27 2014 -0400 @@ -5,16 +5,12 @@ </macros> <expand macro="requirements" /> <command> - tmp_dir=`mktemp -d -u`; - EDeN --action TRAIN --input_data_file_name $infile --file_type "SPARSE_VECTOR" --binary_file_type - ##--output_directory_path \$tmp_dir - ## TODO: we need a tool that creates such a file, maybe from the metadata of an SDF file ## target_file_name is a file with 1 or -1 one in each row, indicating the class --target_file_name $target_infile
--- a/eden.py Mon Jan 13 09:28:44 2014 -0500 +++ b/eden.py Thu May 15 12:11:27 2014 -0400 @@ -3,6 +3,7 @@ """ from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes import data class Gspan( Tabular ): @@ -20,7 +21,7 @@ try: return dataset.peek except: - return "Binary gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) ) + return "Tabular gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) ) class SparseVector( Tabular ): """Class describing an SparseVector file""" @@ -37,4 +38,4 @@ try: return dataset.peek except: - return "Binary SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) ) + return "Tabular SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) )
--- a/eden_macros.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/eden_macros.xml Thu May 15 12:11:27 2014 -0400 @@ -88,7 +88,7 @@ <xml name="requirements"> <requirements> - <requirement type="package" version="1.1">eden</requirement> + <requirement type="package" version="1.3.5">eden</requirement> <yield /> </requirements> <!--<version_command>EDeN -version</version_command>--> @@ -103,12 +103,14 @@ <option value="USPK">USPK</option> <option value="DDK">DDK</option> <option value="NSDDK">ANSDDK</option> - <option value="SK">SK [NSPDK]</option> + <option value="SK">SK</option> + <option value="STRING">STRING</option> </param> <when value="NSPDK" /> <when value="WDK" /> <when value="PBK" /> <when value="USPK" /> + <when value="STRING" /> <when value="SK"> <param name="radius_two" type="integer" value="2" label="Radius Two" help=""> <validator type="in_range" min="1" />
--- a/tool_dependencies.xml Mon Jan 13 09:28:44 2014 -0500 +++ b/tool_dependencies.xml Thu May 15 12:11:27 2014 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="eden" version="1.1"> - <repository changeset_revision="25d3b2b5b677" name="package_eden_1_1" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <package name="eden" version="1.3.5"> + <repository changeset_revision="dc13e98b0e3d" name="package_eden_1_3" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>