Class: Rumale::Preprocessing::OrdinalEncoder

Inherits:
Base::Estimator show all
Includes:
Base::Transformer
Defined in:
rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb

Overview

Transfrom categorical features to integer values.

Examples:

require 'rumale/preprocessing/ordinal_encoder'

encoder = Rumale::Preprocessing::OrdinalEncoder.new
training_samples = [['left', 10], ['right', 15], ['right', 20]]
training_samples = Numo::NArray.asarray(training_samples)
encoder.fit(training_samples)
p encoder.categories
# [["left", "right"], [10, 15, 20]]
testing_samples = [['left', 20], ['right', 10]]
testing_samples = Numo::NArray.asarray(testing_samples)
encoded = encoder.transform(testing_samples)
p encoded
# Numo::DFloat#shape=[2,2]
# [[0, 2],
#  [1, 0]]
p encoder.inverse_transform(encoded)
# Numo::RObject#shape=[2,2]
# [["left", 20],
#  ["right", 10]]

Instance Attribute Summary collapse

Attributes inherited from Base::Estimator

#params

Instance Method Summary collapse

Constructor Details

#initialize(categories: nil) ⇒ OrdinalEncoder

Create a new encoder that transform categorical features to integer values.

Parameters:

  • categories (Nil/Array) (defaults to: nil)

    The category list for each feature. If nil is given, extracted categories from the training data by calling the fit method are used.



41
42
43
44
# File 'rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb', line 41

def initialize(categories: nil)
  super()
  @categories = categories
end

Instance Attribute Details

#categoriesArray (readonly)

Return the array consists of categorical value each feature.

Returns:

  • (Array)

    (size: n_features)



35
36
37
# File 'rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb', line 35

def categories
  @categories
end

Instance Method Details

#fit(x) ⇒ LabelEncoder

Fit encoder by extracting the category for each feature.

Parameters:

  • x (Numo::NArray)

    (shape: [n_samples, n_features]) The samples consisting of categorical features.

Returns:

Raises:

  • (ArgumentError)


52
53
54
55
56
57
58
# File 'rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb', line 52

def fit(x, _y = nil)
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2

  n_features = x.shape[1]
  @categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
  self
end

#fit_transform(x) ⇒ Numo::DFloat

Fit encoder, then return encoded categorical features to integer values.

Parameters:

  • x (Numo::NArray)

    (shape: [n_samples, n_features]) The samples consisting of categorical features.

Returns:

  • (Numo::DFloat)

    The encoded categorical features to integer values.

Raises:

  • (ArgumentError)


66
67
68
69
70
# File 'rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb', line 66

def fit_transform(x, _y = nil)
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2

  fit(x).transform(x)
end

#inverse_transform(x) ⇒ Numo::NArray

Decode values to categorical features.

Parameters:

  • x (Numo::DFloat)

    (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.

Returns:

  • (Numo::NArray)

    The decoded features.



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb', line 96

def inverse_transform(x)
  n_features = x.shape[1]
  if n_features != @categories.size
    raise ArgumentError,
          'Expect the number of features and the number of categories to be equal'
  end

  inv_transformed = Array.new(n_features) do |n|
    x[true, n].to_a.map { |i| @categories[n][i.to_i] }
  end

  Numo::NArray.asarray(inv_transformed.transpose)
end

#transform(x) ⇒ Numo::DFloat

Encode categorical features.

Parameters:

  • x (Numo::NArray)

    (shape: [n_samples, n_features]) The samples consisting of categorical features.

Returns:

  • (Numo::DFloat)

    The encoded categorical features to integer values.

Raises:

  • (ArgumentError)


76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'rumale-preprocessing/lib/rumale/preprocessing/ordinal_encoder.rb', line 76

def transform(x)
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2

  n_features = x.shape[1]
  if n_features != @categories.size
    raise ArgumentError,
          'Expect the number of features and the number of categories to be equal'
  end

  transformed = Array.new(n_features) do |n|
    x[true, n].to_a.map { |v| @categories[n].index(v) }
  end

  Numo::DFloat.asarray(transformed.transpose)
end