Mercurial > repos > bgruening > eden_toolbox

<tool id="bg_eden_feature" name="EDeN Converters" version="0.1">
    <description>to produce sparce vectors</description>
    <macros>
        <import>eden_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command>
        tmp_dir=`mktemp -d -u`;

        EDeN --action FEATURE

        --output_directory_path \$tmp_dir

        --input_data_file_name $infile
        --model_file_name $outfile

        --file_type $file_type_opts.file_type_opts_selector

        --binary_file_type ## create a binary sparse vector as output

        @normalization_kernel_hash_radius_dist_vertex@

        --kernel_type $kernel_type_opts.kernel_type_opts_selector
        --graph_type $graph_type

        #if $file_type_opts.file_type_opts_selector == 'SEQUENCE':
            --sequence_degree $sequence_degree
            $sequence_token
            $sequence_multi_line
            $sequence_pairwise_interaction
        #end if

        @kernel_type_options@

        @input_smooth_conditional@

        ;
        cp \$tmp_dir/feature $outfile;
        rm \$tmp_dir -rf;

    </command>
    <stdio>
        <regex match="Error"
           source="both"
           level="fatal"
           description="An error occured with your Job." />
    </stdio>
    <inputs>
        <param format="gspan,txt" name="infile" type="data" label="Input file"
            help="File can contain Graph datatypes (gSpan, sparse vector, sequence)."/>

        <conditional name="file_type_opts">
            <param name="file_type_opts_selector" type="select" label="Type of Input file">
                <option value="GRAPH">Graph</option>
                <option value="SPARSE_VECTOR">sparse vector</option>
                <option value="SEQUENCE">Sequence</option>
            </param>
            <when value="GRAPH" />
            <when value="SPARSE_VECTOR" />
            <when value="SEQUENCE">
                <param name="sequence_degree" type="integer" value="1" label="Sequence degree" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="sequence_token" type="boolean" label="Sequence token" truevalue="--sequence_token" falsevalue="" checked="false" />
                <param name="sequence_multi_line" type="boolean" label="Sequence is in multi-line notation" truevalue="--sequence_multi_line" falsevalue="" checked="false" />
                <param name="sequence_pairwise_interaction" type="boolean" label="Sequence pairwise iterations" truevalue="--sequence_pairwise_interaction" falsevalue="" checked="false" />
            </when>
            <when value="sdf" />
            <when value="smi" />
        </conditional>

        <expand macro="kernel_type_options" />

        <expand macro="graph_types" />

        <expand macro="input_smooth_conditional" />

        <expand macro="normalization_kernel_hash_radius_dist_vertex" />

    </inputs>
    <configfiles>
<!-- The strange indentation is necessary, otherwise we get line breaks or white space in our file -->
<configfile name="row_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth':
                #for $element in str( $smooth_opts.row_index ).split(','):
                    #set $element = $element.strip().split('-')
                    #if len($element) == 2:
                        #for $index in range( int($element[0]), int($element[1]) ):
                            ## the following writes the value at the beginning of each line
                            ## #echo $index# inserts a line break automatically, but do not write it
                            ## to the beginning of the line
                            #echo '%s\n' % $index
                        #end for
                    #else:
                        #echo '%s\n' % $element[0]
                    #end if
                #end for
            #end if
</configfile>
<configfile name="col_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth':
                #for $element in str( $smooth_opts.col_index ).split(','):
                    #set $element = $element.strip().split('-')
                    #if len($element) == 2:
                        #for $index in range( int($element[0]), int($element[1]) ):
                            ## the following writes the value at the beginning of each line
                            ## #echo $index# inserts a line break automatically, but do not write it
                            ## to the beginning of the line
                            #echo '%s\n' % $index
                        #end for
                    #else:
                        #echo '%s\n' % $element[0]
                    #end if
                #end for
            #end if
</configfile>
    </configfiles>
    <outputs>
        <data format="sparsevector" name="outfile" label="Sparse Vector from ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="3_molceuls.sdf" />
            <output name="outfile" file="3_molecules.gspan" />
        </test>
    </tests>
    <help>

.. class:: infomark

**What it does**

The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun.
When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.

@references@

    </help>
</tool>
author	bgruening
date	Tue, 29 Oct 2013 11:07:49 -0400
parents	7d49e315cb95
children	5be8af51780d