Note
Click here to download the full example code
Train and predict with an ensemble classifier
This example shows the basic usage of an
imbens.ensemble
classifier.
This example uses:
# Authors: Zhining Liu <zhining.liu@outlook.com>
# License: MIT
print(__doc__)
# Import imbalanced-ensemble
import imbens
# Import utilities
from collections import Counter
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from imbens.ensemble.base import sort_dict_by_key
# Import plot utilities
import matplotlib.pyplot as plt
from imbens.utils._plot import plot_2Dprojection_and_cardinality
RANDOM_STATE = 42
Prepare & visualize the data
Make a toy 3-class imbalanced classification task.
# Generate and split a synthetic dataset
X, y = make_classification(n_classes=3, n_samples=2000, class_sep=2,
weights=[0.1, 0.3, 0.6], n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=2, random_state=RANDOM_STATE)
X_train, X_valid, y_train, y_valid = train_test_split(X, y,
test_size=0.5, stratify=y, random_state=RANDOM_STATE)
# Visualize the training dataset
fig = plot_2Dprojection_and_cardinality(X_train, y_train, figsize=(8, 4))
plt.show()
# Print class distribution
print('Training dataset distribution %s' % sort_dict_by_key(Counter(y_train)))
print('Validation dataset distribution %s' % sort_dict_by_key(Counter(y_valid)))
Training dataset distribution {0: 100, 1: 300, 2: 600}
Validation dataset distribution {0: 100, 1: 300, 2: 600}
Using ensemble classifiers in imbens
Take SelfPacedEnsembleClassifier
as example
# Initialize an SelfPacedEnsembleClassifier
clf = imbens.ensemble.SelfPacedEnsembleClassifier(random_state=RANDOM_STATE)
# Train an SelfPacedEnsembleClassifier
clf.fit(X_train, y_train)
# Make predictions
y_pred_proba = clf.predict_proba(X_valid)
y_pred = clf.predict(X_valid)
# Evaluate
balanced_acc_score = sklearn.metrics.balanced_accuracy_score(y_valid, y_pred)
print (f'SPE: ensemble of {clf.n_estimators} {clf.estimator_}')
print ('Validation Balanced Accuracy: {:.3f}'.format(balanced_acc_score))
SPE: ensemble of 50 DecisionTreeClassifier()
Validation Balanced Accuracy: 0.980
Set the ensemble size
(parameter n_estimators
: int)
from imbens.ensemble import SelfPacedEnsembleClassifier as SPE
from sklearn.metrics import balanced_accuracy_score
clf = SPE(
n_estimators=5, # Set ensemble size to 5
random_state=RANDOM_STATE,
).fit(X_train, y_train)
# Evaluate
balanced_acc_score = balanced_accuracy_score(y_valid, clf.predict(X_valid))
print (f'SPE: ensemble of {clf.n_estimators} {clf.estimator_}')
print ('Validation Balanced Accuracy: {:.3f}'.format(balanced_acc_score))
SPE: ensemble of 5 DecisionTreeClassifier()
Validation Balanced Accuracy: 0.978
Use different base estimator
(parameter estimator
: estimator object)
from sklearn.svm import SVC
clf = SPE(
n_estimators=5,
estimator=SVC(probability=True), # Use SVM as the base estimator
random_state=RANDOM_STATE,
).fit(X_train, y_train)
# Evaluate
balanced_acc_score = balanced_accuracy_score(y_valid, clf.predict(X_valid))
print (f'SPE: ensemble of {clf.n_estimators} {clf.estimator_}')
print ('Validation Balanced Accuracy: {:.3f}'.format(balanced_acc_score))
SPE: ensemble of 5 SVC(probability=True)
Validation Balanced Accuracy: 0.972
Enable training log
(fit()
parameter train_verbose
: bool, int or dict)
clf = SPE(random_state=RANDOM_STATE).fit(
X_train, y_train,
train_verbose=True, # Enable training log
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.958 0.968 0.959 ┃
┃ 5 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 15 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999 0.997 0.999 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 25 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 35 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 45 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Total running time of the script: ( 0 minutes 0.687 seconds)