Mercurial > repos > bgruening > graphclust_nspdk
diff NSPDK_candidateClusters.xml @ 0:28e440724de2 draft default tip
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/NSPDK commit f447414150c19865e904d3914a68e2479fadddce
author | bgruening |
---|---|
date | Thu, 15 Dec 2016 18:18:52 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NSPDK_candidateClusters.xml Thu Dec 15 18:18:52 2016 -0500 @@ -0,0 +1,149 @@ +<tool id="NSPDK_candidateClust" name="NSPDK_candidateClusters" version="9.2"> + <requirements> + <requirement type="package" version="0.1">graphclust-wrappers</requirement> + <requirement type="package" version="0.5">perl-array-utils</requirement> + <requirement type="package" version="9.2">nspdk</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + <![CDATA[ + + mkdir -p SVECTOR && + + cp $data_svector SVECTOR/data.svector && + + 'NSPDK_candidateClusters.pl' '$data_fasta' '$data_names' $noCache $ensf $oc $usn $knn $nhf $nspdk_nhf_max $nspdk_nhf_step $GLOBAL_num_clusters $max_rad $max_dist_relations + + #if $iteration_num.iteration_num_selector: + $iteration_num.CI + '$blacklist' + '$final_partition_soft' + '$fast_cluster_last_round' + $iteration_num.GLOBAL_hit_blacklist_overlap + + #else: + 1 + + #end if + +]]> + </command> + <inputs> + <param type="data" name="data_svector" format="zip" /> + <param type="data" name="data_fasta" format="fasta" /> + <param type="data" name="data_names" format="txt" /> + <conditional name="iteration_num"> + <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> + <when value="true"> + <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> + <param type="data" name="blacklist" format="txt" /> + <param type="data" name="final_partition_soft" format="txt" /> + <param type="data" name="fast_cluster_last_round" format="txt" /> + <param name="GLOBAL_hit_blacklist_overlap" type="float" value="0.2" size="5" label="Blacklist hit overlap" /> + </when> + <when value="false" > + <param name="CI" type="hidden" value="1" size="5" label="Number of current iteration "></param> + </when> + </conditional> + <param name="max_rad" type="integer" value="3" size="5" label="maximum radius " help="-R"/> + <param name="max_dist_relations" type="integer" value="3" size="5" label="maximum distance relations" help="-D"/> + <param name="noCache" truevalue="-no-cache" falsevalue="" checked="True" type="boolean" + label="Deactivate caching of kernel value computation (-no-cache)" help="to minimize memory usage"/> + <param name="ensf" type="integer" value="5" size="5" label="eccess neighbour size factor" help="-ensf"/> + <param name="usn" truevalue="-usn" falsevalue="" checked="True" type="boolean" + label="Use shared neighbourhood to weight center density (-usn)" help="by default true"/> + <param name="oc" truevalue="-oc" falsevalue="" checked="True" type="boolean" + label=" flag to output clusters (-oc)" help="by default true"/> + <param name="knn" type="integer" value="20" size="5" label="Number of nearest neighbors" help="-knn num"/> + <param name="nhf" type="integer" value="500" size="5" label="Number of hash functions " help="-nhf num"/> + <param name="nspdk_nhf_max" type="integer" value="1000" size="5" label="Maximal number of hash functions " /> + <param name="nspdk_nhf_step" type="integer" value="25" size="5" label="Size of step for increasing hash functions " help="The number of hash functions is increased by this value after each iteration."/> + <param name="GLOBAL_num_clusters" type="integer" value="100" size="5" label="Maxinum number of clusters " /> + </inputs> + <outputs> + <data name="fast_cluster" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster" label="fast_cluster.1" > + <filter> iteration_num['iteration_num_selector'] is False</filter> + </data> + <data name="fast_cluster_sim" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster_sim" label="fast_cluster_sim.1" > + <filter> iteration_num['iteration_num_selector'] is False </filter> + </data> + <data name="black_list" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.1" label="blacklist.1" > + <filter> iteration_num['iteration_num_selector'] is False </filter> + </data> + <data name="fast_cluster_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster" label="fast_cluster.$iteration_num.CI" > + <filter> iteration_num['iteration_num_selector'] is True</filter> + </data> + <data name="fast_cluster_sim_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster_sim" label="fast_cluster_sim.$iteration_num.CI" > + <filter> iteration_num['iteration_num_selector'] is True</filter> + </data> + <data name="black_list_m" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.*" label="blacklist.$iteration_num.CI" > + <filter> iteration_num['iteration_num_selector'] is True</filter> + </data> + </outputs> + <tests> + <test> + <param name="data_fasta" value="data.fasta"/> + <param name="data_names" value="data.names"/> + <param name="data_svector" value="data.svector.1" ftype="zip" /> + <conditional name="iteration_num"> + <param name="iteration_num_selector" value="false"/> + </conditional> + <param name="noCache" value="-no-cache"/> + <param name="ensf" value="5"/> + <param name="oc" value="-oc"/> + <param name="max_rad" value="3"/> + <param name="max_dist_relations" value="3"/> + <param name="usn" value="-usn"/> + <param name="knn" value="20"/> + <param name="nhf" value="500"/> + <param name="nspdk_nhf_max" value="1000"/> + <param name="nspdk_nhf_step" value="25"/> + <param name="GLOBAL_num_clusters" value="100"/> + <output name="fast_cluster" file="SVECTOR/data.svector.1.fast_cluster" /> + <output name="fast_cluster_sim" file="SVECTOR/data.svector.1.fast_cluster_sim" /> + <output name="black_list" file="SVECTOR/data.svector.blacklist.1" /> + </test> + </tests> + <help> + <![CDATA[ + +**What it does** + +Copmutes global feature index and returns top dense sets. +The candidate clusters are chosen as the top ranking neighborhoods provided that the size of their overlap +is below a specified threshold. +For more information see *Fast neighborhood subgraph pairwise distance kernel* paper. + + +**Parameters** + ++ **-knn** : <num nearest neighbors> (default: 10) + ++ **-otknn** : flag to output true (i.e. implies full kernel matrix evaluation) k-nearest neighburs (default: 0) + ++ **-oc** : flag to output clusters (default: 0) + ++ **-nhf** : <num hash functions> for the Locality Sensitive Hashing function (default: 250) + ++ **-ensf** : <eccess neighbour size factor> (default: 10) (0 to avoid trimming) + ++ **-usn** : use shared neighbourhood to weight center density (default: 0) + + + ]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/bts224</citation> + <citation type="bibtex">@inproceedings{costa2010fast, + title={Fast neighborhood subgraph pairwise distance kernel}, + author={Costa, Fabrizio and De Grave, Kurt}, + booktitle={Proceedings of the 26th International Conference on Machine Learning}, + pages={255--262}, + year={2010}, + organization={Omnipress} + } + </citation> + </citations> +</tool>