Note
Go to the end to download the full example code
Customize ensemble training log
This example illustrates how to enable and customize the training
log when training an imbens.ensemble
classifier.
This example uses:
# Authors: Zhining Liu <zhining.liu@outlook.com>
# License: MIT
print(__doc__)
# Import imbalanced-ensemble
import imbens
# Import utilities
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
RANDOM_STATE = 42
# sphinx_gallery_thumbnail_path = '../../docs/source/_static/training_log_thumbnail.png'
Prepare data
Make a toy 3-class imbalanced classification task.
# make dataset
X, y = make_classification(
n_classes=3,
class_sep=2,
weights=[0.1, 0.3, 0.6],
n_informative=3,
n_redundant=1,
flip_y=0,
n_features=20,
n_clusters_per_class=2,
n_samples=2000,
random_state=0,
)
# train valid split
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size=0.5, stratify=y, random_state=RANDOM_STATE
)
Customize training log
Take SelfPacedEnsembleClassifier
as example, training log is controlled by 3 parameters of the fit()
method:
eval_datasets
: Dataset(s) used for evaluation during the ensemble training.
eval_metrics
: Metric(s) used for evaluation during the ensemble training.
train_verbose
: Controls the granularity and content of the training log.
clf = imbens.ensemble.SelfPacedEnsembleClassifier(random_state=RANDOM_STATE)
Set training log format
(fit()
parameter: train_verbose
: bool, int or dict)
Enable auto training log
clf.fit(
X_train,
y_train,
train_verbose=True,
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.944 0.955 0.945 ┃
┃ 5 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.991 0.991 0.991 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999 0.997 0.999 ┃
┃ 15 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999 0.997 0.999 ┃
┃ 25 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 35 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 45 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Customize training log granularity
clf.fit(
X_train,
y_train,
train_verbose={
'granularity': 10,
},
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.958 0.965 0.958 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.999 0.999 0.999 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Customize training log content column
clf.fit(
X_train,
y_train,
train_verbose={
'granularity': 10,
'print_distribution': False,
'print_metrics': True,
},
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ Data: train ┃
┃ #Estimators ┃ Metric ┃
┃ ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ 0.936 0.947 0.937 ┃
┃ 10 ┃ 0.999 0.997 0.999 ┃
┃ 20 ┃ 0.999 0.997 0.999 ┃
┃ 30 ┃ 1.000 1.000 1.000 ┃
┃ 40 ┃ 1.000 1.000 1.000 ┃
┃ 50 ┃ 1.000 1.000 1.000 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ 1.000 1.000 1.000 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Add additional evaluation dataset(s)
(fit()
parameter: eval_datasets
: dict)
clf.fit(
X_train,
y_train,
eval_datasets={
'valid': (X_valid, y_valid), # add validation data
},
train_verbose={
'granularity': 10,
},
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃ Data: valid ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃ Metric ┃
┃ ┃ ┃ acc balanced_acc weighted_f1 ┃ acc balanced_acc weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.937 0.945 0.938 ┃ 0.909 0.899 0.911 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.969 0.962 0.969 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.966 0.954 0.966 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.964 0.947 0.964 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.966 0.946 0.966 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.966 0.949 0.966 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 1.000 1.000 ┃ 0.966 0.949 0.966 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Specify evaluation metric(s)
(fit()
parameter: eval_metrics
: dict)
clf.fit(
X_train,
y_train,
eval_datasets={
'valid': (X_valid, y_valid),
},
eval_metrics={
'weighted_f1': (
sklearn.metrics.f1_score,
{'average': 'weighted'},
), # use weighted_f1
},
train_verbose={
'granularity': 10,
},
)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ ┃ ┃ Data: train ┃ Data: valid ┃
┃ #Estimators ┃ Class Distribution ┃ Metric ┃ Metric ┃
┃ ┃ ┃ weighted_f1 ┃ weighted_f1 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃ 1 ┃ {0: 100, 1: 100, 2: 100} ┃ 0.945 ┃ 0.906 ┃
┃ 10 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.969 ┃
┃ 20 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.975 ┃
┃ 30 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.974 ┃
┃ 40 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.974 ┃
┃ 50 ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.973 ┃
┣━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━┫
┃ final ┃ {0: 100, 1: 100, 2: 100} ┃ 1.000 ┃ 0.973 ┃
┗━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━━━━┛
Total running time of the script: ( 0 minutes 2.572 seconds)