changeset 9:5be8af51780d draft

Uploaded
author bgruening
date Thu, 15 May 2014 12:11:27 -0400
parents 9262f801d739
children d495c233148c
files EDeN_cross_validation.xml EDeN_feature.xml EDeN_nearest_neighbor.xml EDeN_test.xml EDeN_train.xml eden.py eden_macros.xml tool_dependencies.xml
diffstat 8 files changed, 67 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/EDeN_cross_validation.xml	Thu May 15 12:11:27 2014 -0400
@@ -0,0 +1,50 @@
+<tool id="bg_eden_cross_validation" name="EDeN Crossvalidation" version="0.1">
+    <description></description>
+    <macros>
+        <import>eden_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        EDeN --action CROSS_VALIDATION
+
+        --input_data_file_name  $sparse_vector_infile
+        --file_type "SPARSE_VECTOR"
+
+        ## target_file_name is a file with 1 or -1 one in each row, indicating the class
+        --target_file_name $target_infile
+        --binary_file_type
+
+        --num_cross_validation_folds ${num_cross_validation_folds}
+        ;
+        cat cv_predictions | tr ' ' \\t > $outfile;
+
+    </command>
+    <inputs>
+        <param format="eden_sparse_vector" name="sparse_vector_infile" type="data" label="Input File" help="(--input_data_file_name/-f)"/>
+        <param format="txt" name="target_infile" type="data" label="Target file" help="indicates with -1 and 1 the class"/>
+
+        <param name="num_cross_validation_folds" type="integer" value="10" label="Number of cross validations" help="--num_cross_validation_folds/-c">
+            <validator type="in_range" min="1" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile" label="Crossvalidation of ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+
+.. class:: infomark
+
+**What it does** 
+
+The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun.
+When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
+If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.
+
+@references@
+
+    </help>
+</tool>
--- a/EDeN_feature.xml	Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_feature.xml	Thu May 15 12:11:27 2014 -0400
@@ -5,12 +5,8 @@
     </macros>
     <expand macro="requirements" />
     <command>
-        tmp_dir=`mktemp -d -u`;
-
         EDeN --action FEATURE
 
-        --output_directory_path \$tmp_dir
-
         --input_data_file_name $infile
         --model_file_name $outfile
 
@@ -34,10 +30,6 @@
 
         @input_smooth_conditional@
 
-        ;
-        cp \$tmp_dir/feature $outfile;
-        rm \$tmp_dir -rf;
-
     </command>
     <stdio>
         <regex match="Error" 
@@ -54,6 +46,7 @@
                 <option value="GRAPH">Graph</option>
                 <option value="SPARSE_VECTOR">sparse vector</option>
                 <option value="SEQUENCE">Sequence</option>
+                <option value="STRINGSEQ">String (can be any word like character sequence)</option>
             </param>
             <when value="GRAPH" />
             <when value="SPARSE_VECTOR" />
@@ -65,8 +58,7 @@
                 <param name="sequence_multi_line" type="boolean" label="Sequence is in multi-line notation" truevalue="--sequence_multi_line" falsevalue="" checked="false" />
                 <param name="sequence_pairwise_interaction" type="boolean" label="Sequence pairwise iterations" truevalue="--sequence_pairwise_interaction" falsevalue="" checked="false" />
             </when>
-            <when value="sdf" />
-            <when value="smi" />
+            <when value="STRINGSEQ" />
         </conditional>
 
         <expand macro="kernel_type_options" />
@@ -114,7 +106,7 @@
 </configfile>
     </configfiles>
     <outputs>
-        <data format="sparsevector" name="outfile" label="Sparse Vector from ${on_string}"/>
+        <data format="sparsevector" name="outfile" from_work_dir="feature" label="Sparse Vector from ${on_string}"/>
     </outputs>
     <tests>
         <test>
--- a/EDeN_nearest_neighbor.xml	Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_nearest_neighbor.xml	Thu May 15 12:11:27 2014 -0400
@@ -5,7 +5,6 @@
         <import>eden_macros.xml</import>
     </macros>
     <command>
-        tmp_dir=`mktemp -d -u`;
         EDeN --action NEAREST_NEIGHBOR
 
         --input_data_file_name $infile
@@ -14,34 +13,6 @@
         --file_type "SPARSE_VECTOR"
         --binary_file_type
 
-        --output_directory_path \$tmp_dir
-
-
-        ##
-        ## shuffling files to create the correct outputs for Galaxy
-        ##
-
-        ;
-        cp \$tmp_dir/knn $ofile_nnlist 2> /dev/null
-        
-        ## Nearest neighbor feature representation
-        #if 'nnf' in str($additional_outputs).split(','):
-            ;
-            cp \$tmp_dir/knn_feature $ofile_nnf 2> /dev/null
-        #end if
-
-        ## Nearest neighbor target value list
-        #if 'nnt' in str($additional_outputs).split(','):
-            ;
-            cp \$tmp_dir/knn_target_value $ofile_nnt 2> /dev/null
-        #end if
-
-        ## Nearest neighbor kernel value list
-        #if 'nnk' in str($additional_outputs).split(','):
-            ;
-            cp \$tmp_dir/knn_kernel_value $ofile_nnk 2> /dev/null
-        #end if
-
     </command>
     <inputs>
 
@@ -65,14 +36,14 @@
 
     </inputs>
     <outputs>
-        <data format="tabular" name="ofile_nnlist" label="${tool.name} on ${on_string}"/>
-        <data format="tabular" name="ofile_nnf" label="${tool.name} on ${on_string} (Nearest neighbor feature representation)">
+        <data format="tabular" name="ofile_nnlist" from_work_dir="knn" label="${tool.name} on ${on_string}"/>
+        <data format="tabular" name="ofile_nnf" from_work_dir="knn_feature" label="${tool.name} on ${on_string} (Nearest neighbor feature representation)">
           <filter>'nnf' in additional_outputs</filter>
         </data>
-        <data format="tabular" name="ofile_nnt" label="${tool.name} on ${on_string} (Nearest neighbor target value list)">
+        <data format="tabular" name="ofile_nnt" from_work_dir="knn_target_value" label="${tool.name} on ${on_string} (Nearest neighbor target value list)">
           <filter>'nnt' in additional_outputs</filter>
         </data>
-        <data format="tabular" name="ofile_nnk" label="${tool.name} on ${on_string} (Nearest neighbor kernel value list)">
+        <data format="tabular" name="ofile_nnk" from_work_dir="knn_kernel_value" label="${tool.name} on ${on_string} (Nearest neighbor kernel value list)">
           <filter>'nnk' in additional_outputs</filter>
         </data>
     </outputs>
--- a/EDeN_test.xml	Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_test.xml	Thu May 15 12:11:27 2014 -0400
@@ -5,7 +5,6 @@
     </macros>
     <expand macro="requirements" />
     <command>
-        tmp_dir=`mktemp -d -u`;
         EDeN --action TEST
 
         --input_data_file_name  $sparse_vector_infile
@@ -14,13 +13,8 @@
 
         --model_file_name $model_infile
 
-        --output_directory_path \$tmp_dir
         --minimal_output 
 
-        ;
-        cp \$tmp_dir/prediction $output;
-        rm \$tmp_dir -rf
-
     </command>
     <inputs>
         <param format="eden_sparse_vector" name="sparse_vector_infile" type="data" label="Input File" help=""/>
@@ -35,7 +29,7 @@
 
     </inputs>
     <outputs>
-        <data format="tabular" name="output" label="Generated from ${on_string}"/>
+        <data format="tabular" name="output" from_work_dir="prediction" label="Generated from ${on_string}"/>
     </outputs>
     <tests>
         <test>
--- a/EDeN_train.xml	Mon Jan 13 09:28:44 2014 -0500
+++ b/EDeN_train.xml	Thu May 15 12:11:27 2014 -0400
@@ -5,16 +5,12 @@
     </macros>
     <expand macro="requirements" />
     <command>
-        tmp_dir=`mktemp -d -u`;
-
         EDeN --action TRAIN
 
         --input_data_file_name $infile
         --file_type "SPARSE_VECTOR"
         --binary_file_type
 
-        ##--output_directory_path \$tmp_dir
-
         ## TODO: we need a tool that creates such a file, maybe from the metadata of an SDF file
         ## target_file_name is a file with 1 or -1 one in each row, indicating the class
         --target_file_name $target_infile
--- a/eden.py	Mon Jan 13 09:28:44 2014 -0500
+++ b/eden.py	Thu May 15 12:11:27 2014 -0400
@@ -3,6 +3,7 @@
 """
 
 from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes import data
 
 
 class Gspan( Tabular ):
@@ -20,7 +21,7 @@
         try:
             return dataset.peek
         except:
-            return "Binary gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) )
+            return "Tabular gSpan file (%s)" % ( data.nice_size( dataset.get_size() ) )
 
 class SparseVector( Tabular ):
     """Class describing an SparseVector file"""
@@ -37,4 +38,4 @@
         try:
             return dataset.peek
         except:
-            return "Binary SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) )
+            return "Tabular SparseVector file (%s)" % ( data.nice_size( dataset.get_size() ) )
--- a/eden_macros.xml	Mon Jan 13 09:28:44 2014 -0500
+++ b/eden_macros.xml	Thu May 15 12:11:27 2014 -0400
@@ -88,7 +88,7 @@
 
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="1.1">eden</requirement>
+            <requirement type="package" version="1.3.5">eden</requirement>
             <yield />
         </requirements>
         <!--<version_command>EDeN -version</version_command>-->
@@ -103,12 +103,14 @@
                 <option value="USPK">USPK</option>
                 <option value="DDK">DDK</option>
                 <option value="NSDDK">ANSDDK</option>
-                <option value="SK">SK [NSPDK]</option>
+                <option value="SK">SK</option>
+                <option value="STRING">STRING</option>
             </param>
             <when value="NSPDK" />
             <when value="WDK" />
             <when value="PBK" />
             <when value="USPK" />
+            <when value="STRING" />
             <when value="SK">
                 <param name="radius_two" type="integer" value="2" label="Radius Two" help="">
                     <validator type="in_range" min="1" />
--- a/tool_dependencies.xml	Mon Jan 13 09:28:44 2014 -0500
+++ b/tool_dependencies.xml	Thu May 15 12:11:27 2014 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="eden" version="1.1">
-        <repository changeset_revision="25d3b2b5b677" name="package_eden_1_1" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <package name="eden" version="1.3.5">
+        <repository changeset_revision="dc13e98b0e3d" name="package_eden_1_3" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>