Mercurial > repos > bgruening > eden_toolbox
view EDeN_feature.xml @ 7:59b3b6ce10bb draft
Uploaded
author | bgruening |
---|---|
date | Tue, 29 Oct 2013 11:07:49 -0400 |
parents | 7d49e315cb95 |
children | 5be8af51780d |
line wrap: on
line source
<tool id="bg_eden_feature" name="EDeN Converters" version="0.1"> <description>to produce sparce vectors</description> <macros> <import>eden_macros.xml</import> </macros> <expand macro="requirements" /> <command> tmp_dir=`mktemp -d -u`; EDeN --action FEATURE --output_directory_path \$tmp_dir --input_data_file_name $infile --model_file_name $outfile --file_type $file_type_opts.file_type_opts_selector --binary_file_type ## create a binary sparse vector as output @normalization_kernel_hash_radius_dist_vertex@ --kernel_type $kernel_type_opts.kernel_type_opts_selector --graph_type $graph_type #if $file_type_opts.file_type_opts_selector == 'SEQUENCE': --sequence_degree $sequence_degree $sequence_token $sequence_multi_line $sequence_pairwise_interaction #end if @kernel_type_options@ @input_smooth_conditional@ ; cp \$tmp_dir/feature $outfile; rm \$tmp_dir -rf; </command> <stdio> <regex match="Error" source="both" level="fatal" description="An error occured with your Job." /> </stdio> <inputs> <param format="gspan,txt" name="infile" type="data" label="Input file" help="File can contain Graph datatypes (gSpan, sparse vector, sequence)."/> <conditional name="file_type_opts"> <param name="file_type_opts_selector" type="select" label="Type of Input file"> <option value="GRAPH">Graph</option> <option value="SPARSE_VECTOR">sparse vector</option> <option value="SEQUENCE">Sequence</option> </param> <when value="GRAPH" /> <when value="SPARSE_VECTOR" /> <when value="SEQUENCE"> <param name="sequence_degree" type="integer" value="1" label="Sequence degree" help=""> <validator type="in_range" min="1" /> </param> <param name="sequence_token" type="boolean" label="Sequence token" truevalue="--sequence_token" falsevalue="" checked="false" /> <param name="sequence_multi_line" type="boolean" label="Sequence is in multi-line notation" truevalue="--sequence_multi_line" falsevalue="" checked="false" /> <param name="sequence_pairwise_interaction" type="boolean" label="Sequence pairwise iterations" truevalue="--sequence_pairwise_interaction" falsevalue="" checked="false" /> </when> <when value="sdf" /> <when value="smi" /> </conditional> <expand macro="kernel_type_options" /> <expand macro="graph_types" /> <expand macro="input_smooth_conditional" /> <expand macro="normalization_kernel_hash_radius_dist_vertex" /> </inputs> <configfiles> <!-- The strange indentation is necessary, otherwise we get line breaks or white space in our file --> <configfile name="row_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth': #for $element in str( $smooth_opts.row_index ).split(','): #set $element = $element.strip().split('-') #if len($element) == 2: #for $index in range( int($element[0]), int($element[1]) ): ## the following writes the value at the beginning of each line ## #echo $index# inserts a line break automatically, but do not write it ## to the beginning of the line #echo '%s\n' % $index #end for #else: #echo '%s\n' % $element[0] #end if #end for #end if </configfile> <configfile name="col_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth': #for $element in str( $smooth_opts.col_index ).split(','): #set $element = $element.strip().split('-') #if len($element) == 2: #for $index in range( int($element[0]), int($element[1]) ): ## the following writes the value at the beginning of each line ## #echo $index# inserts a line break automatically, but do not write it ## to the beginning of the line #echo '%s\n' % $index #end for #else: #echo '%s\n' % $element[0] #end if #end for #end if </configfile> </configfiles> <outputs> <data format="sparsevector" name="outfile" label="Sparse Vector from ${on_string}"/> </outputs> <tests> <test> <param name="infile" value="3_molceuls.sdf" /> <output name="outfile" file="3_molecules.gspan" /> </test> </tests> <help> .. class:: infomark **What it does** The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun. When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances. If the target information is imbalanced a minority class resampling technique is used to rebalance the training set. @references@ </help> </tool>