Class: Rumale::Ensemble::RandomForestClassifier
- Inherits:
-
Base::Estimator
- Object
- Base::Estimator
- Rumale::Ensemble::RandomForestClassifier
- Includes:
- Base::Classifier
- Defined in:
- rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb
Overview
RandomForestClassifier is a class that implements random forest for classification.
Direct Known Subclasses
Instance Attribute Summary collapse
-
#classes ⇒ Numo::Int32
readonly
Return the class labels.
-
#estimators ⇒ Array<DecisionTreeClassifier>
readonly
Return the set of estimators.
-
#feature_importances ⇒ Numo::DFloat
readonly
Return the importance for each feature.
-
#rng ⇒ Random
readonly
Return the random generator for random selection of feature index.
Attributes inherited from Base::Estimator
Instance Method Summary collapse
-
#apply(x) ⇒ Numo::Int32
Return the index of the leaf that each sample reached.
-
#fit(x, y) ⇒ RandomForestClassifier
Fit the model with given training data.
-
#initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil, n_jobs: nil, random_seed: nil) ⇒ RandomForestClassifier
constructor
Create a new classifier with random forest.
-
#predict(x) ⇒ Numo::Int32
Predict class labels for samples.
-
#predict_proba(x) ⇒ Numo::DFloat
Predict probability for samples.
Methods included from Base::Classifier
Constructor Details
#initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil, n_jobs: nil, random_seed: nil) ⇒ RandomForestClassifier
Create a new classifier with random forest.
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 59 def initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil, n_jobs: nil, random_seed: nil) super() @params = { n_estimators: n_estimators, criterion: criterion, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf, max_features: max_features, n_jobs: n_jobs, random_seed: random_seed || srand } @rng = Random.new(@params[:random_seed]) end |
Instance Attribute Details
#classes ⇒ Numo::Int32 (readonly)
Return the class labels.
32 33 34 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 32 def classes @classes end |
#estimators ⇒ Array<DecisionTreeClassifier> (readonly)
Return the set of estimators.
28 29 30 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 28 def estimators @estimators end |
#feature_importances ⇒ Numo::DFloat (readonly)
Return the importance for each feature.
36 37 38 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 36 def feature_importances @feature_importances end |
#rng ⇒ Random (readonly)
Return the random generator for random selection of feature index.
40 41 42 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 40 def rng @rng end |
Instance Method Details
#apply(x) ⇒ Numo::Int32
Return the index of the leaf that each sample reached.
154 155 156 157 158 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 154 def apply(x) x = ::Rumale::Validation.check_convert_sample_array(x) Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup end |
#fit(x, y) ⇒ RandomForestClassifier
Fit the model with given training data.
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 81 def fit(x, y) x = ::Rumale::Validation.check_convert_sample_array(x) y = ::Rumale::Validation.check_convert_label_array(y) ::Rumale::Validation.check_sample_size(x, y) # Initialize some variables. n_samples, n_features = x.shape @params[:max_features] = Math.sqrt(n_features).to_i if @params[:max_features].nil? @params[:max_features] = [[1, @params[:max_features]].max, n_features].min # rubocop:disable Style/ComparableClamp @classes = Numo::Int32.asarray(y.to_a.uniq.sort) sub_rng = @rng.dup rngs = Array.new(@params[:n_estimators]) { Random.new(sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)) } # Construct forest. @estimators = if enable_parallel? parallel_map(@params[:n_estimators]) do |n| bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) } plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], y[bootstrap_ids]) end else Array.new(@params[:n_estimators]) do |n| bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) } plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], y[bootstrap_ids]) end end @feature_importances = if enable_parallel? parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.sum else @estimators.sum(&:feature_importances) end @feature_importances /= @feature_importances.sum self end |
#predict(x) ⇒ Numo::Int32
Predict class labels for samples.
120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 120 def predict(x) x = ::Rumale::Validation.check_convert_sample_array(x) n_samples = x.shape[0] n_estimators = @estimators.size predicted = if enable_parallel? predict_set = parallel_map(n_estimators) { |n| @estimators[n].predict(x).to_a }.transpose parallel_map(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first } else predict_set = @estimators.map { |tree| tree.predict(x).to_a }.transpose Array.new(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first } end Numo::Int32.asarray(predicted) end |
#predict_proba(x) ⇒ Numo::DFloat
Predict probability for samples.
139 140 141 142 143 144 145 146 147 148 |
# File 'rumale-ensemble/lib/rumale/ensemble/random_forest_classifier.rb', line 139 def predict_proba(x) x = ::Rumale::Validation.check_convert_sample_array(x) n_estimators = @estimators.size if enable_parallel? parallel_map(n_estimators) { |n| predict_proba_tree(@estimators[n], x) }.sum / n_estimators else @estimators.sum { |tree| predict_proba_tree(tree, x) } / n_estimators end end |