Customize ensemble training log

This example illustrates how to enable and customize the training log when training an imbens.ensemble classifier.

This example uses:

# Authors: Zhining Liu <zhining.liu@outlook.com>
# License: MIT
print(__doc__)

# Import imbalanced-ensemble
import imbens

# Import utilities
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

RANDOM_STATE = 42

# sphinx_gallery_thumbnail_path = '../../docs/source/_static/training_log_thumbnail.png'

Prepare data

Make a toy 3-class imbalanced classification task.

# make dataset
X, y = make_classification(
    n_classes=3,
    class_sep=2,
    weights=[0.1, 0.3, 0.6],
    n_informative=3,
    n_redundant=1,
    flip_y=0,
    n_features=20,
    n_clusters_per_class=2,
    n_samples=2000,
    random_state=0,
)

# train valid split
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.5, stratify=y, random_state=RANDOM_STATE
)

Customize training log

Take SelfPacedEnsembleClassifier as example, training log is controlled by 3 parameters of the fit() method:

  • eval_datasets: Dataset(s) used for evaluation during the ensemble training.

  • eval_metrics: Metric(s) used for evaluation during the ensemble training.

  • train_verbose: Controls the granularity and content of the training log.

clf = imbens.ensemble.SelfPacedEnsembleClassifier(random_state=RANDOM_STATE)

Set training log format

(fit() parameter: train_verbose: bool, int or dict)

Enable auto training log

clf.fit(
    X_train,
    y_train,
    train_verbose=True,
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                          ┃            Data: train             ┃
┃ #Estimators ┃    Class Distribution    ┃               Metric               ┃
┃             ┃                          ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.944      0.955          0.945    ┃
┃      5      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.991      0.991          0.991    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999      0.997          0.999    ┃
┃     15      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999      0.997          0.999    ┃
┃     25      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     35      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     45      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x7F1EFECE5140)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Customize training log granularity

clf.fit(
    X_train,
    y_train,
    train_verbose={
        'granularity': 10,
    },
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                          ┃            Data: train             ┃
┃ #Estimators ┃    Class Distribution    ┃               Metric               ┃
┃             ┃                          ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.958      0.965          0.958    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999      0.999          0.999    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x7F1EFECE5140)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Customize training log content column

clf.fit(
    X_train,
    y_train,
    train_verbose={
        'granularity': 10,
        'print_distribution': False,
        'print_metrics': True,
    },
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃            Data: train             ┃
┃ #Estimators ┃               Metric               ┃
┃             ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ 0.936      0.947          0.937    ┃
┃     10      ┃ 0.999      0.997          0.999    ┃
┃     20      ┃ 0.999      0.997          0.999    ┃
┃     30      ┃ 1.000      1.000          1.000    ┃
┃     40      ┃ 1.000      1.000          1.000    ┃
┃     50      ┃ 1.000      1.000          1.000    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ 1.000      1.000          1.000    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x7F1EFECE5140)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Add additional evaluation dataset(s)

(fit() parameter: eval_datasets: dict)

clf.fit(
    X_train,
    y_train,
    eval_datasets={
        'valid': (X_valid, y_valid),  # add validation data
    },
    train_verbose={
        'granularity': 10,
    },
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃             ┃                          ┃            Data: train             ┃            Data: valid             ┃
┃ #Estimators ┃    Class Distribution    ┃               Metric               ┃               Metric               ┃
┃             ┃                          ┃  acc    balanced_acc   weighted_f1 ┃  acc    balanced_acc   weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃ 0.937      0.945          0.938    ┃ 0.909      0.899          0.911    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.969      0.962          0.969    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.966      0.954          0.966    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.964      0.947          0.964    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.966      0.946          0.966    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.966      0.949          0.966    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000      1.000          1.000    ┃ 0.966      0.949          0.966    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x7F1EFECE5140)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Specify evaluation metric(s)

(fit() parameter: eval_metrics: dict)

clf.fit(
    X_train,
    y_train,
    eval_datasets={
        'valid': (X_valid, y_valid),
    },
    eval_metrics={
        'weighted_f1': (
            sklearn.metrics.f1_score,
            {'average': 'weighted'},
        ),  # use weighted_f1
    },
    train_verbose={
        'granularity': 10,
    },
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃             ┃                          ┃ Data: train ┃ Data: valid ┃
┃ #Estimators ┃    Class Distribution    ┃   Metric    ┃   Metric    ┃
┃             ┃                          ┃ weighted_f1 ┃ weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃      1      ┃ {0: 100, 1: 100, 2: 100} ┃    0.945    ┃    0.906    ┃
┃     10      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.969    ┃
┃     20      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.975    ┃
┃     30      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.974    ┃
┃     40      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.974    ┃
┃     50      ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.973    ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃    final    ┃ {0: 100, 1: 100, 2: 100} ┃    1.000    ┃    0.973    ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x7F1EFECE5140)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Total running time of the script: ( 0 minutes 2.572 seconds)

Gallery generated by Sphinx-Gallery