SemiDiscreteOT 1.0
Semi-Discrete Optimal Transport Library
Loading...
Searching...
No Matches
Namespaces | Classes | Functions
dkm Namespace Reference

Namespaces

namespace  details
 

Classes

class  clustering_parameters
 

Functions

template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > kmeans_lloyd (const std::vector< std::array< T, N > > &data, const clustering_parameters< T > &parameters)
 
template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > kmeans_lloyd (const std::vector< std::array< T, N > > &data, uint32_t k, uint64_t max_iter=0, T min_delta=-1.0)
 
template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > kmeans_lloyd_parallel (const std::vector< std::array< T, N > > &data, const clustering_parameters< T > &parameters)
 
template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > kmeans_lloyd_parallel (const std::vector< std::array< T, N > > &data, uint32_t k, uint64_t max_iter=0, T min_delta=-1.0)
 
template<typename T , size_t N>
std::vector< T > dist_to_center (const std::vector< std::array< T, N > > &points, const std::array< T, N > &center)
 
template<typename T , size_t N>
sum_dist (const std::vector< std::array< T, N > > &points, const std::array< T, N > &center)
 
template<typename T , size_t N>
std::vector< std::array< T, N > > get_cluster (const std::vector< std::array< T, N > > &points, const std::vector< uint32_t > &labels, const uint32_t label)
 
template<typename T , size_t N>
means_inertia (const std::vector< std::array< T, N > > &points, const std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > &means, uint32_t k)
 
template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > get_best_means (const std::vector< std::array< T, N > > &points, uint32_t k, uint32_t n_init=10)
 
template<typename T , size_t N>
size_t predict (const std::vector< std::array< T, N > > &centroids, const std::array< T, N > &query)
 
template<typename T , size_t N>
std::vector< std::array< T, N > > load_csv (const std::string &path)
 

Function Documentation

◆ kmeans_lloyd() [1/2]

template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > dkm::kmeans_lloyd ( const std::vector< std::array< T, N > > &  data,
const clustering_parameters< T > &  parameters 
)

Definition at line 271 of file dkm.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ kmeans_lloyd() [2/2]

template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > dkm::kmeans_lloyd ( const std::vector< std::array< T, N > > &  data,
uint32_t  k,
uint64_t  max_iter = 0,
min_delta = -1.0 
)

Definition at line 305 of file dkm.hpp.

Here is the call graph for this function:

◆ kmeans_lloyd_parallel() [1/2]

template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > dkm::kmeans_lloyd_parallel ( const std::vector< std::array< T, N > > &  data,
const clustering_parameters< T > &  parameters 
)

Definition at line 121 of file dkm_parallel.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ kmeans_lloyd_parallel() [2/2]

template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > dkm::kmeans_lloyd_parallel ( const std::vector< std::array< T, N > > &  data,
uint32_t  k,
uint64_t  max_iter = 0,
min_delta = -1.0 
)

Definition at line 155 of file dkm_parallel.hpp.

Here is the call graph for this function:

◆ dist_to_center()

template<typename T , size_t N>
std::vector< T > dkm::dist_to_center ( const std::vector< std::array< T, N > > &  points,
const std::array< T, N > &  center 
)

Calculates the Euclidean distance from each point in the given sequence to given center and returns the results as a vector.

Parameters
pointsPoint sequence.
centerCenter point with which the distance of each point is calculated.
Returns
std::vector<T> containing distance of each point to the center.

Definition at line 39 of file dkm_utils.hpp.

Here is the caller graph for this function:

◆ sum_dist()

template<typename T , size_t N>
T dkm::sum_dist ( const std::vector< std::array< T, N > > &  points,
const std::array< T, N > &  center 
)

Calculates sum of distances from each point in points to given center point.

Parameters
pointsPoint sequence.
centerCenter point with which the distance of each point is calculated.
Returns
Sum of distances of each point to the center.

Definition at line 57 of file dkm_utils.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_cluster()

template<typename T , size_t N>
std::vector< std::array< T, N > > dkm::get_cluster ( const std::vector< std::array< T, N > > &  points,
const std::vector< uint32_t > &  labels,
const uint32_t  label 
)

Return a point sequence whose elements all belong to the same cluster given by label.

Parameters
pointsSequence that were passed to dkm::kmeans_lloyd
labelsSequence of labels that were obtained from dkm:kmeans_lloyd
labelLabel of the cluster to be obtained.
Returns
Sequence of points that all belong to the cluster with the given label.

Definition at line 74 of file dkm_utils.hpp.

Here is the caller graph for this function:

◆ means_inertia()

template<typename T , size_t N>
T dkm::means_inertia ( const std::vector< std::array< T, N > > &  points,
const std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > &  means,
uint32_t  k 
)

Calculates inertia of a given k-means cluster. Inertia is defined as sum of Euclidean distances of each point to its closest cluster center.

Parameters
pointsSequence that were passed to dkm::kmeans_lloyd
meansResult of dkm::kmeans_lloyd
kNumber of clusters
Returns
Total inertia of the given clustering.

Definition at line 99 of file dkm_utils.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_best_means()

template<typename T , size_t N>
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > dkm::get_best_means ( const std::vector< std::array< T, N > > &  points,
uint32_t  k,
uint32_t  n_init = 10 
)

Return the best clustering obtained from a given number of k-means calculations.

Parameters
pointsSequence of points to be clustered.
kNumber of clusters
n_initNumber of times a k-means clustering will be calculated.
Returns
Clustering with the lowest inertia.

Definition at line 126 of file dkm_utils.hpp.

Here is the call graph for this function:

◆ predict()

template<typename T , size_t N>
size_t dkm::predict ( const std::vector< std::array< T, N > > &  centroids,
const std::array< T, N > &  query 
)

Return the index of the cluster that has the closest centroid to the query

Parameters
centroidsList of cluster centroids
queryQuery to which the closest centroids is found
Returns
Index of closest centroid (class)

Definition at line 150 of file dkm_utils.hpp.

Here is the call graph for this function:

◆ load_csv()

template<typename T , size_t N>
std::vector< std::array< T, N > > dkm::load_csv ( const std::string &  path)

Load a dataset from a CSV file where each row is a point with N values.

Parameters
pathLocation of file on disk to load data from.
Returns
A k-means ready data set (a std::vector of std::array rows)

Definition at line 169 of file dkm_utils.hpp.

Here is the call graph for this function: