Class: Rumale::Decomposition::FactorAnalysis

Inherits:
Base::Estimator show all
Includes:
Base::Transformer
Defined in:
rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb

Overview

FactorAnalysis is a class that implements fator analysis with EM algorithm.

Reference

  • Barber, D., “Bayesian Reasoning and Machine Learning,” Cambridge University Press, 2012.

Examples:

require 'numo/linalg/autoloader'
require 'rumale/decomposition/factor_analysis'

decomposer = Rumale::Decomposition::FactorAnalysis.new(n_components: 2)
representaion = decomposer.fit_transform(samples)

Instance Attribute Summary collapse

Attributes inherited from Base::Estimator

#params

Instance Method Summary collapse

Constructor Details

#initialize(n_components: 2, max_iter: 100, tol: 1e-8) ⇒ FactorAnalysis

Create a new transformer with factor analysis.

Parameters:

  • n_components (Integer) (defaults to: 2)

    The number of components (dimensionality of latent space).

  • max_iter (Integer) (defaults to: 100)

    The maximum number of iterations.

  • tol (Float/Nil) (defaults to: 1e-8)

    The tolerance of termination criterion for EM algorithm. If nil is given, iterate EM steps up to the maximum number of iterations.



49
50
51
52
53
54
55
56
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 49

def initialize(n_components: 2, max_iter: 100, tol: 1e-8)
  super()
  @params = {
    n_components: n_components,
    max_iter: max_iter,
    tol: tol
  }
end

Instance Attribute Details

#componentsNumo::DFloat (readonly)

Returns the components with maximum variance.

Returns:

  • (Numo::DFloat)

    (shape: [n_components, n_features])



33
34
35
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 33

def components
  @components
end

#loglikeNumo::DFloat (readonly)

Returns the log likelihood at each iteration.

Returns:

  • (Numo::DFloat)

    (shape: [n_iter])



37
38
39
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 37

def loglike
  @loglike
end

#meanNumo::DFloat (readonly)

Returns the mean vector.

Returns:

  • (Numo::DFloat)

    (shape: [n_features])



25
26
27
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 25

def mean
  @mean
end

#n_iterInteger (readonly)

Return the number of iterations run for optimization

Returns:

  • (Integer)


41
42
43
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 41

def n_iter
  @n_iter
end

#noise_varianceNumo::DFloat (readonly)

Returns the estimated noise variance for each feature.

Returns:

  • (Numo::DFloat)

    (shape: [n_features])



29
30
31
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 29

def noise_variance
  @noise_variance
end

Instance Method Details

#fit(x) ⇒ FactorAnalysis

Fit the model with given training data.

Returns The learned transformer itself.

Parameters:

  • x (Numo::DFloat)

    (shape: [n_samples, n_features]) The training data to be used for fitting the model.

Returns:



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 63

def fit(x, _y = nil)
  x = ::Rumale::Validation.check_convert_sample_array(x)
  raise 'FactorAnalysis#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)

  # initialize some variables.
  n_samples, n_features = x.shape
  @mean = x.mean(0)
  centered_x = x - @mean
  cov_mat = centered_x.transpose.dot(centered_x) / n_samples
  sample_vars = x.var(0)
  sqrt_n_samples = Math.sqrt(n_samples)
  @noise_variance = Numo::DFloat.ones(n_features)

  # run optimization.
  old_loglike = 0.0
  @n_iter = 0
  @loglike = [] unless @params[:tol].nil?
  @params[:max_iter].times do |t|
    @n_iter = t + 1
    sqrt_noise_variance = Numo::NMath.sqrt(@noise_variance)
    scaled_x = centered_x / (sqrt_noise_variance * sqrt_n_samples + 1e-12)
    s, u = truncate_svd(scaled_x, @params[:n_components])
    scaler = Numo::NMath.sqrt(Numo::DFloat.maximum(s**2 - 1.0, 0.0))
    @components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
    @noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
    next if @params[:tol].nil?

    new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
    @loglike.push(new_loglike)
    break if (old_loglike - new_loglike).abs <= @params[:tol]

    old_loglike = new_loglike
  end

  @loglike = Numo::DFloat.cast(@loglike) unless @params[:tol].nil?
  @components = @components[0, true].dup if @params[:n_components] == 1
  self
end

#fit_transform(x) ⇒ Numo::DFloat

Fit the model with training data, and then transform them with the learned model.

Returns (shape: [n_samples, n_components]) The transformed data.

Parameters:

  • x (Numo::DFloat)

    (shape: [n_samples, n_features]) The training data to be used for fitting the model.

Returns:

  • (Numo::DFloat)

    (shape: [n_samples, n_components]) The transformed data



107
108
109
110
111
112
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 107

def fit_transform(x, _y = nil)
  x = ::Rumale::Validation.check_convert_sample_array(x)
  raise 'FactorAnalysis#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)

  fit(x).transform(x)
end

#transform(x) ⇒ Numo::DFloat

Transform the given data with the learned model.

Parameters:

  • x (Numo::DFloat)

    (shape: [n_samples, n_features]) The data to be transformed with the learned model.

Returns:

  • (Numo::DFloat)

    (shape: [n_samples, n_components]) The transformed data.



118
119
120
121
122
123
124
125
126
127
# File 'rumale-decomposition/lib/rumale/decomposition/factor_analysis.rb', line 118

def transform(x)
  x = ::Rumale::Validation.check_convert_sample_array(x)
  raise 'FactorAnalysis#transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)

  factors = @params[:n_components] == 1 ? @components.expand_dims(0) : @components
  centered_x = x - @mean
  beta = Numo::Linalg.inv(Numo::DFloat.eye(factors.shape[0]) + (factors / @noise_variance).dot(factors.transpose))
  z = centered_x.dot((beta.dot(factors) / @noise_variance).transpose)
  @params[:n_components] == 1 ? z[true, 0].dup : z
end