Customize ensemble training log

This example illustrates how to enable and customize the training log when training an imbalanced_ensemble.ensemble classifier.

This example uses:

# Authors: Zhining Liu <zhining.liu@outlook.com>
# License: MIT
print(__doc__)

# Import imbalanced_ensemble
import imbalanced_ensemble as imbens

# Import utilities
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

RANDOM_STATE = 42

# sphinx_gallery_thumbnail_path = '../../docs/source/_static/training_log_thumbnail.png'

Prepare data

Make a toy 3-class imbalanced classification task.

# make dataset
X, y = make_classification(n_classes=3, class_sep=2,
    weights=[0.1, 0.3, 0.6], n_informative=3, n_redundant=1, flip_y=0,
    n_features=20, n_clusters_per_class=2, n_samples=2000, random_state=0)

# train valid split
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.5, stratify=y, random_state=RANDOM_STATE)

Customize training log

Take SelfPacedEnsembleClassifier as example, training log is controlled by 3 parameters of the fit() method:

  • eval_datasets: Dataset(s) used for evaluation during the ensemble training.

  • eval_metrics: Metric(s) used for evaluation during the ensemble training.

  • train_verbose: Controls the granularity and content of the training log.

clf = imbens.ensemble.SelfPacedEnsembleClassifier(random_state=RANDOM_STATE)

Set training log format

(fit() parameter: train_verbose: bool, int or dict)

Enable auto training log

clf.fit(X_train, y_train,
        train_verbose=True,
       )

Out:

┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                          ┃            Data: train             ┃
┃ #Estimators ┃    Class Distribution    ┃               Metric               ┃
┃             ┃                          ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.932      0.948          0.933    ┃
┃      5      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.996      0.997          0.996    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     15      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     25      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     35      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     45      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)

Customize training log granularity

clf.fit(X_train, y_train,
        train_verbose={
            'granularity': 10,
        })

Out:

┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                          ┃            Data: train             ┃
┃ #Estimators ┃    Class Distribution    ┃               Metric               ┃
┃             ┃                          ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.952      0.960          0.952    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)

Customize training log content column

clf.fit(X_train, y_train,
        train_verbose={
            'granularity': 10,
            'print_distribution': False,
            'print_metrics': True,
        })

Out:

┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃            Data: train             ┃
┃ #Estimators ┃               Metric               ┃
┃             ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ 0.931      0.946          0.931    ┃
┃     10      ┃ 1.000      1.000          1.000    ┃
┃     20      ┃ 1.000      1.000          1.000    ┃
┃     30      ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)

Add additional evaluation dataset(s)

(fit() parameter: eval_datasets: dict)

clf.fit(X_train, y_train,
        eval_datasets={
            'valid': (X_valid, y_valid), # add validation data
        },
        train_verbose={
            'granularity': 10,
        })

Out:

┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                          ┃            Data: train             ┃            Data: valid             ┃
┃ #Estimators ┃    Class Distribution    ┃               Metric               ┃               Metric               ┃
┃             ┃                          ┃  acc    balanced_acc   weighted_f1 ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.957      0.959          0.957    ┃ 0.922      0.908          0.921    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.967      0.958          0.967    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.968      0.958          0.968    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.971      0.963          0.971    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.972      0.963          0.972    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.973      0.964          0.973    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.973      0.964          0.973    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)

Specify evaluation metric(s)

(fit() parameter: eval_metrics: dict)

clf.fit(X_train, y_train,
        eval_datasets={
            'valid': (X_valid, y_valid),
        },
        eval_metrics={
            'weighted_f1': (sklearn.metrics.f1_score, {'average':'weighted'}), # use weighted_f1
        },
        train_verbose={
            'granularity': 10,
        })

Out:

┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃             ┃                          ┃ Data: train ┃ Data: valid ┃
┃ #Estimators ┃    Class Distribution    ┃   Metric    ┃   Metric    ┃
┃             ┃                          ┃ weighted_f1 ┃ weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃    0.970    ┃    0.953    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.975    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.974    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.974    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.980    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.980    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.980    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━━━━┛

SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)

Total running time of the script: ( 1 minutes 7.301 seconds)

Estimated memory usage: 11 MB

Gallery generated by Sphinx-Gallery