Mercurial > repos > bgruening > eden_vectorizer
view eden_vectorizer.xml @ 5:4338e8aac25b draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a9f28163f0d2e808e49c43a6df5a040706e79991
author | bgruening |
---|---|
date | Thu, 23 Jun 2016 15:27:02 -0400 |
parents | 3df106e75b33 |
children |
line wrap: on
line source
<tool id="eden_vectorizer" name="Transform" version="@VERSION@"> <description>real vector graphs to sparse vectors</description> <macros> <import>main_macros.xml</import> </macros> <expand macro="python_requirements"/> <expand macro="macro_stdio"/> <version_command>echo "@VERSION@"</version_command> <command> <![CDATA[ python "$vectorizer_script" '$inputs' ]]> </command> <configfiles> <inputs name="inputs"/> <configfile name="vectorizer_script"> <![CDATA[ import sys import json import networkx import numpy as np from scipy.sparse import csr_matrix from scipy.io import mmwrite from eden.graph import Vectorizer from eden.converter.graph.node_link_data import node_link_data_to_eden input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) options = { k : v for k,v in params["options"].iteritems() if v} graph_list = node_link_data_to_eden("$infile") vectorizer = Vectorizer(**options) sparse_representation = vectorizer.transform(graph_list) mmwrite(open("$outfile" , 'w+'), sparse_representation) ]]> </configfile> </configfiles> <inputs> <param name="infile" type="data" format="json" label="Graph data" help="JSON representation of graphs."/> <section name="options" title="Advanced Options" expanded="False"> <param argument="r" type="integer" optional="true" value="3" label="Maximum radius size" help="The maximal radius size in pairwise neighborhood. ( unit : node )"/> <param argument="d" type="integer" optional="true" value="3" label="Maximum distance size" help="The maximal distance size in pairwise neighborhood. ( unit : node )"/> <param argument="n" type="integer" optional="true" value="1" label="Maximum number of clusters" help="The maximal number of clusters used to discretize real label vectors."/> <param argument="min_r" type="integer" optional="true" value="0" label="Minimal radius size" help="The minimal radius size in pairwise neighborhood."/> <param argument="min_d" type="integer" optional="true" value="0" label="Minimal distance size" help="The minimal distance size in pairwise neighborhood."/> <param argument="min_n" type="integer" optional="true" value="2" label="Minimal number of clusters" help="The minimal number of clusters used to discretize real label vectors ( default : 2 )"/> <param argument="label_size" type="integer" optional="true" value="1" label="Label size" help="The number of discretization steps used in the conversion from real valued labels to discrete labels."/> <param argument="nbits" type="integer" optional="true" value="20" label="Feature space size" help="The number of bits that defines the feature space size: |feature space|=2^nbits"/> <param argument="normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalization" help="Flag to set the resulting feature vector to have unit euclidean norm."/> <param argument="inner_normalization" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Combine radius and distance size to normalize" help="Flag to set the feature vector for a specific combination of the radius and distance size to have unit euclidean norm (default True). When used together with the 'normalization' flag it will be applied first and then the resulting feature vector will be normalized."/> <param argument="triangular_decomposition" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Process dense graphs" help=""/> </section> </inputs> <outputs> <data format="txt" name="outfile"/> </outputs> <tests> <test> <param name="infile" value="converter_result01.json" ftype="json"/> <output name="outfile" file="vectorizer_result01.mtx"/> </test> <test> <param name="infile" value="converter_result02.json" ftype="json"/> <output name="outfile" file="vectorizer_result02.mtx"/> </test> <test> <param name="infile" value="converter_result02.json" ftype="json"/> <param name="complexity" value="5"/> <param name="r" value="4"/> <output name="outfile" file="vectorizer_result03.mtx"/> </test> <test> <param name="infile" value="converter_result02.json" ftype="json"/> <param name="complexity" value="5"/> <param name="r" value="4"/> <output name="outfile" file="vectorizer_result04.mtx"/> </test> </tests> <help> <![CDATA[ **What it does** This tool transforms real vector, labeled, weighted, nested graphs to sparse vectors. For more information check http://dx.doi.org/10.5281/zenodo.15094 ]]> </help> <expand macro="eden_citation"/> </tool>