view clustalomega/clustal-omega-0.2.0/src/kmpp/KMeans.h @ 0:ea6d0e588642 default tip

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 16:13:02 -0400
parents
children
line wrap: on
line source

#ifndef CLUSTALO
// BEWARE: BETA VERSION
// --------------------
//
// The main set of utilities for runnning k-means and k-means++ on arbitrary data sets.
//
// Author: David Arthur (darthur@gmail.com), 2009
#endif

#ifndef KMEANS_H__
#define KMEANS_H__

#ifndef CLUSTALO
// Includes
#include "KmUtils.h"
#include <iostream>

// Sets preferences for how much logging is done and where it is outputted, when k-means is run.
void ClearKMeansLogging();
void AddKMeansLogging(std::ostream *out, bool verbose);

// Runs k-means on the given set of points.
//   - n: The number of points in the data set
//   - k: The number of clusters to look for
//   - d: The number of dimensions that the data set lives in
//   - points: An array of size n*d where points[d*i + j] gives coordinate j of point i
//   - attempts: The number of times to independently run k-means with different starting centers.
//               The best result is always returned (as measured by the cost function).
//   - centers: This can either be null or an array of size k*d. In the latter case, it will be
//              filled with the locations of all final cluster centers. Specifically
//              centers[d*i + j] will give coordinate j of center i. If the cluster is unused, it
//              will contain NaN instead.
//   - assignments: This can either be null or an array of size n. In the latter case, it will be
//                  filled with the cluster that each point is assigned to (an integer between 0
//                  and k-1 inclusive).
// The final cost of the clustering is also returned.
// The final cost of the clustering is also returned.
Scalar RunKMeans(int n, int k, int d, Scalar *points, int attempts,
                 Scalar *centers, int *assignments);

// Runs k-means++ on the given set of points. Set RunKMeans for info on the parameters.
Scalar RunKMeansPlusPlus(int n, int k, int d, Scalar *points, int attempts,
                         Scalar *centers, int *assignments);

#else

/* CLUSTALO PATCH:
 * same as above, with one addition: if use_lloyds_method is false, kmpp will be used
 * otherwise the 'classical' i.e. Looyd's method will be used
 */
extern double
KMeans(int n, int k, int d, double *points, int attempts, int use_lloyds_method,
                 double *centers, int *assignments);

#endif
#endif