Note
Click here to download the full example code
Customize ensemble training log
This example illustrates how to enable and customize the training
log when training an imbalanced_ensemble.ensemble
classifier.
This example uses:
# Authors: Zhining Liu <zhining.liu@outlook.com>
# License: MIT
print(__doc__)
# Import imbalanced_ensemble
import imbalanced_ensemble as imbens
# Import utilities
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
RANDOM_STATE = 42
# sphinx_gallery_thumbnail_path = '../../docs/source/_static/training_log_thumbnail.png'
Prepare data
Make a toy 3-class imbalanced classification task.
# make dataset
X, y = make_classification(n_classes=3, class_sep=2,
weights=[0.1, 0.3, 0.6], n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=2, n_samples=2000, random_state=0)
# train valid split
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size=0.5, stratify=y, random_state=RANDOM_STATE)
Customize training log
Take SelfPacedEnsembleClassifier
as example, training log is controlled by 3 parameters of the fit()
method:
eval_datasets
: Dataset(s) used for evaluation during the ensemble training.
eval_metrics
: Metric(s) used for evaluation during the ensemble training.
train_verbose
: Controls the granularity and content of the training log.
clf = imbens.ensemble.SelfPacedEnsembleClassifier(random_state=RANDOM_STATE)
Set training log format
(fit()
parameter: train_verbose
: bool, int or dict)
Enable auto training log
clf.fit(X_train, y_train,
train_verbose=True,
)
Out:
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.932 0.948 0.933 ┃
┃ 5 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.996 0.997 0.996 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 15 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 25 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 35 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 45 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)
Customize training log granularity
clf.fit(X_train, y_train,
train_verbose={
'granularity': 10,
})
Out:
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.952 0.960 0.952 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)
Customize training log content column
clf.fit(X_train, y_train,
train_verbose={
'granularity': 10,
'print_distribution': False,
'print_metrics': True,
})
Out:
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ Data: train ┃
┃ #Estimators ┃ Metric ┃
┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ 0.931 0.946 0.931 ┃
┃ 10 ┃ 1.000 1.000 1.000 ┃
┃ 20 ┃ 1.000 1.000 1.000 ┃
┃ 30 ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)
Add additional evaluation dataset(s)
(fit()
parameter: eval_datasets
: dict)
clf.fit(X_train, y_train,
eval_datasets={
'valid': (X_valid, y_valid), # add validation data
},
train_verbose={
'granularity': 10,
})
Out:
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃ Data: valid ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.957 0.959 0.957 ┃ 0.922 0.908 0.921 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.967 0.958 0.967 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.968 0.958 0.968 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.971 0.963 0.971 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.972 0.963 0.972 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.973 0.964 0.973 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.973 0.964 0.973 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)
Specify evaluation metric(s)
(fit()
parameter: eval_metrics
: dict)
clf.fit(X_train, y_train,
eval_datasets={
'valid': (X_valid, y_valid),
},
eval_metrics={
'weighted_f1': (sklearn.metrics.f1_score, {'average':'weighted'}), # use weighted_f1
},
train_verbose={
'granularity': 10,
})
Out:
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃ Data: valid ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃ Metric ┃
┃ ┃ ┃ weighted_f1 ┃ weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.970 ┃ 0.953 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.975 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.974 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.974 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.980 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.980 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.980 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━━━━┛
SelfPacedEnsembleClassifier(random_state=RandomState(MT19937) at 0x1E1995DC540)
Total running time of the script: ( 1 minutes 7.301 seconds)
Estimated memory usage: 11 MB