Class: Rumale::Clustering::GaussianMixture

Inherits:

Base::Estimator

Object
Base::Estimator
Rumale::Clustering::GaussianMixture

show all

Includes:: Base::ClusterAnalyzer

Defined in:: rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb

Overview

GaussianMixture is a class that implements cluster analysis with gaussian mixture model.

Examples:

require 'rumale/clustering/gaussian_mixture'

analyzer = Rumale::Clustering::GaussianMixture.new(n_clusters: 10, max_iter: 50)
cluster_labels = analyzer.fit_predict(samples)

# If Numo::Linalg is installed, you can specify 'full' for the tyep of covariance option.
require 'numo/linalg/autoloader'
require 'rumale/clustering/gaussian_mixture'

analyzer = Rumale::Clustering::GaussianMixture.new(n_clusters: 10, max_iter: 50, covariance_type: 'full')
cluster_labels = analyzer.fit_predict(samples)

Instance Attribute Summary collapse

#covariances ⇒ Numo::DFloat readonly

Return the diagonal elements of covariance matrix of each cluster.
#means ⇒ Numo::DFloat readonly

Return the mean of each cluster.
#n_iter ⇒ Integer readonly

Return the number of iterations to covergence.
#weights ⇒ Numo::DFloat readonly

Return the weight of each cluster.

Attributes inherited from Base::Estimator

#params

Instance Method Summary collapse

#fit(x) ⇒ GaussianMixture

Analysis clusters with given training data.
#fit_predict(x) ⇒ Numo::Int32

Analysis clusters and assign samples to clusters.
#initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil) ⇒ GaussianMixture constructor

Create a new cluster analyzer with gaussian mixture model.
#predict(x) ⇒ Numo::Int32

Predict cluster labels for samples.

Methods included from Base::ClusterAnalyzer

#score

Constructor Details

#initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil) ⇒ `GaussianMixture`

Create a new cluster analyzer with gaussian mixture model.

Parameters:

n_clusters (Integer) (defaults to: 8) —

The number of clusters.
init (String) (defaults to: 'k-means++') —

The initialization method for centroids (‘random’ or ‘k-means++’).
covariance_type (String) (defaults to: 'diag') —

The type of covariance parameter to be used (‘diag’ or ‘full’).
max_iter (Integer) (defaults to: 50) —

The maximum number of iterations.
tol (Float) (defaults to: 1.0e-4) —

The tolerance of termination criterion.
reg_covar (Float) (defaults to: 1.0e-6) —

The non-negative regularization to the diagonal of covariance.
random_seed (Integer) (defaults to: nil) —

The seed value using to initialize the random generator.

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 54

def initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag',
               max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil)
  super()
  @params = {
    n_clusters: n_clusters,
    init: (init == 'random' ? 'random' : 'k-means++'),
    covariance_type: (covariance_type == 'full' ? 'full' : 'diag'),
    max_iter: max_iter,
    tol: tol,
    reg_covar: reg_covar,
    random_seed: random_seed || srand
  }
end

Instance Attribute Details

#covariances ⇒ `Numo::DFloat` (readonly)

Return the diagonal elements of covariance matrix of each cluster.

Returns:

(Numo::DFloat) —

(shape: [n_clusters, n_features] if ‘diag’, [n_clusters, n_features, n_features] if ‘full’)



43
44
45

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 43

def covariances
  @covariances
end

#means ⇒ `Numo::DFloat` (readonly)

Return the mean of each cluster.

Returns:

(Numo::DFloat) —

(shape: [n_clusters, n_features])



39
40
41

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 39

def means
  @means
end

#n_iter ⇒ `Integer` (readonly)

Return the number of iterations to covergence.

Returns:

(Integer)



31
32
33

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 31

def n_iter
  @n_iter
end

#weights ⇒ `Numo::DFloat` (readonly)

Return the weight of each cluster.

Returns:

(Numo::DFloat) —

(shape: [n_clusters])



35
36
37

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 35

def weights
  @weights
end

Instance Method Details

#fit(x) ⇒ `GaussianMixture`

Analysis clusters with given training data.

Returns The learned cluster analyzer itself.

Parameters:

x (Numo::DFloat) —

(shape: [n_samples, n_features]) The training data to be used for cluster analysis.

Returns:

(GaussianMixture) —

The learned cluster analyzer itself.

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 73

def fit(x, _y = nil)
  check_enable_linalg('fit')
  x = ::Rumale::Validation.check_convert_sample_array(x)

  n_samples = x.shape[0]
  memberships = init_memberships(x)
  @params[:max_iter].times do |t|
    @n_iter = t
    @weights = calc_weights(n_samples, memberships)
    @means = calc_means(x, memberships)
    @covariances = calc_covariances(x, @means, memberships, @params[:reg_covar], @params[:covariance_type])
    new_memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
    error = (memberships - new_memberships).abs.max
    break if error <= @params[:tol]

    memberships = new_memberships.dup
  end
  self
end

#fit_predict(x) ⇒ `Numo::Int32`

Analysis clusters and assign samples to clusters.

Parameters:

x (Numo::DFloat) —

(shape: [n_samples, n_features]) The training data to be used for cluster analysis.

Returns:

(Numo::Int32) —

(shape: [n_samples]) Predicted cluster label per sample.

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 109

def fit_predict(x)
  check_enable_linalg('fit_predict')
  x = ::Rumale::Validation.check_convert_sample_array(x)

  fit(x).predict(x)
end

#predict(x) ⇒ `Numo::Int32`

Predict cluster labels for samples.

Parameters:

x (Numo::DFloat) —

(shape: [n_samples, n_features]) The samples to predict the cluster label.

Returns:

(Numo::Int32) —

(shape: [n_samples]) Predicted cluster label per sample.

# File 'rumale-clustering/lib/rumale/clustering/gaussian_mixture.rb', line 97

def predict(x)
  check_enable_linalg('predict')
  x = ::Rumale::Validation.check_convert_sample_array(x)

  memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
  assign_cluster(memberships)
end

Class: Rumale::Clustering::GaussianMixture

Overview

Examples:

Instance Attribute Summary collapse

Attributes inherited from Base::Estimator

Instance Method Summary collapse

Methods included from Base::ClusterAnalyzer

Constructor Details

#initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil) ⇒ GaussianMixture

Instance Attribute Details

#covariances ⇒ Numo::DFloat (readonly)

#means ⇒ Numo::DFloat (readonly)

#n_iter ⇒ Integer (readonly)

#weights ⇒ Numo::DFloat (readonly)

Instance Method Details

#fit(x) ⇒ GaussianMixture

#fit_predict(x) ⇒ Numo::Int32

#predict(x) ⇒ Numo::Int32

#initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil) ⇒ `GaussianMixture`

#covariances ⇒ `Numo::DFloat` (readonly)

#means ⇒ `Numo::DFloat` (readonly)

#n_iter ⇒ `Integer` (readonly)

#weights ⇒ `Numo::DFloat` (readonly)

#fit(x) ⇒ `GaussianMixture`

#fit_predict(x) ⇒ `Numo::Int32`

#predict(x) ⇒ `Numo::Int32`