Customize ensemble training log

This example illustrates how to enable and customize the training log when training an imbens.ensemble classifier.

This example uses:

imbens.ensemble.SelfPacedEnsembleClassifier

# Authors: Zhining Liu <zhining.liu@outlook.com>
# License: MIT

print(__doc__)

# Import imbalanced-ensemble
import imbens

# Import utilities
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

RANDOM_STATE = 42

# sphinx_gallery_thumbnail_path = '../../docs/source/_static/training_log_thumbnail.png'

Prepare data

Make a toy 3-class imbalanced classification task.

# make dataset
X, y = make_classification(
    n_classes=3,
    class_sep=2,
    weights=[0.1, 0.3, 0.6],
    n_informative=3,
    n_redundant=1,
    flip_y=0,
    n_features=20,
    n_clusters_per_class=2,
    n_samples=2000,
    random_state=0,
)

# train valid split
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.5, stratify=y, random_state=RANDOM_STATE
)

Customize training log

Take SelfPacedEnsembleClassifier as example, training log is controlled by 3 parameters of the fit() method:

eval_datasets: Dataset(s) used for evaluation during the ensemble training.

eval_metrics: Metric(s) used for evaluation during the ensemble training.

train_verbose: Controls the granularity and content of the training log.

clf = imbens.ensemble.SelfPacedEnsembleClassifier(random_state=RANDOM_STATE)

Set training log format

(fit() parameter: train_verbose: bool, int or dict)

Enable auto training log

clf.fit(
    X_train,
    y_train,
    train_verbose=True,
)

┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                                                        ┃            Data: train             ┃
┃ #Estimators ┃                   Class Distribution                   ┃               Metric               ┃
┃             ┃                                                        ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 0.944      0.955          0.945    ┃
┃      5      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 0.991      0.991          0.991    ┃
┃     10      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 0.999      0.997          0.999    ┃
┃     15      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┃     20      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 0.999      0.997          0.999    ┃
┃     25      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┃     30      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┃     35      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┃     45      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {np.int64(0): 100, np.int64(1): 100, np.int64(2): 100} ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x7C5E51B77040)