Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 128 additions & 34 deletions README.md

Large diffs are not rendered by default.

22 changes: 13 additions & 9 deletions examples/sklearn/continuous_data.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
import logging

import numpy as np
from ml3_drift.sklearn.univariate.ks import KSDriftDetector
from ml3_drift.monitoring.algorithms.batch.ks import KSAlgorithm
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from ml3_drift.callbacks.base import logger_callback
from functools import partial

from ml3_drift.sklearn.base import SklearnDriftDetector

logger = logging.getLogger(__name__)


if __name__ == "__main__":
# Define your pipeline as usual, but also add a drift detector
drift_detector = KSDriftDetector(
callbacks=[
partial(
logger_callback,
logger=logger,
level=logging.CRITICAL,
)
]
drift_detector = SklearnDriftDetector(
KSAlgorithm(
callbacks=[
partial(
logger_callback,
logger=logger,
level=logging.CRITICAL,
)
]
)
)

pipeline = Pipeline(
Expand Down
41 changes: 23 additions & 18 deletions examples/sklearn/mixed_data_monitoring.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from functools import partial
import logging
from ml3_drift.sklearn.univariate.ks import KSDriftDetector
from ml3_drift.sklearn.univariate.chi_square import ChiSquareDriftDetector
from ml3_drift.monitoring.algorithms.batch.ks import KSAlgorithm
from ml3_drift.monitoring.algorithms.batch.chi_square import ChiSquareAlgorithm

from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
import numpy as np
from ml3_drift.callbacks.base import logger_callback
from ml3_drift.sklearn.base import SklearnDriftDetector


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,27 +52,31 @@
transformers=[
(
"cont",
KSDriftDetector(
callbacks=[
partial(
logger_callback,
logger=logger,
level=logging.CRITICAL,
),
]
SklearnDriftDetector(
KSAlgorithm(
callbacks=[
partial(
logger_callback,
logger=logger,
level=logging.CRITICAL,
),
]
)
),
[0, 1],
),
(
"cat",
ChiSquareDriftDetector(
callbacks=[
partial(
logger_callback,
logger=logger,
level=logging.CRITICAL,
),
]
SklearnDriftDetector(
ChiSquareAlgorithm(
callbacks=[
partial(
logger_callback,
logger=logger,
level=logging.CRITICAL,
),
]
)
),
[2, 3],
),
Expand Down
3 changes: 3 additions & 0 deletions src/ml3_drift/analysis/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ def __init__(
):
self.concepts = concepts
self.same_distributions = same_distributions

def __repr__(self):
return f"Report(concepts={self.concepts}, same_distributions={dict(self.same_distributions)})"
30 changes: 21 additions & 9 deletions src/ml3_drift/callbacks/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import dataclasses
from logging import Logger
from ml3_drift.callbacks.models import DriftInfo

from ml3_drift.models.monitoring import DriftInfo
Comment thread
GiovanniGiacometti marked this conversation as resolved.


def logger_callback(
drift_info: DriftInfo,
drift_info: DriftInfo | None,
logger: Logger,
level: int,
) -> None:
Expand All @@ -20,14 +22,24 @@ def logger_callback(
"""

if drift_info is None:
logger.log(level, "Drift Detected!")
logger.log(level, "Drift Detected, no drift info provided!")
return
msg = f"Drift detected on feature at index {drift_info.feature_index} by drift detector {drift_info.drift_detector}."

if drift_info.p_value is not None:
msg += f"\n p-value = {drift_info.p_value}"
logger.log(level, f"Drift Detected, drift info: {dataclasses.asdict(drift_info)}")


def print_callback(drift_info: DriftInfo | None) -> None:
"""
Print callback prints a message to the console when drift is detected.
It should be used only for testing purposes.

Example
-------

if drift_info.threshold is not None:
msg += f"\n Threshold = {drift_info.threshold}"
callback = print_callback
Comment thread
GiovanniGiacometti marked this conversation as resolved.
"""
if drift_info is None:
print("Drift Detected, no drift info provided!")
return

logger.log(level, msg)
print(f"Drift Detected, drift info: {dataclasses.asdict(drift_info)}")
13 changes: 0 additions & 13 deletions src/ml3_drift/callbacks/models.py

This file was deleted.

5 changes: 2 additions & 3 deletions src/ml3_drift/monitoring/algorithms/online/kswin.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ def __init__(
self.seed = seed
self._args = args
self._kwargs = kwargs
super().__init__(
comparison_size=1, callbacks=callbacks
) # since we add only one sample per step and river handles building the window internally we set comparison_size to 1
# since we add only one sample per step and river handles building the window internally we set comparison_size to 1
super().__init__(comparison_size=1, callbacks=callbacks)

def _reset_internal_parameters(self):
self.drift_agent = RiverKSWIN(
Expand Down
1 change: 1 addition & 0 deletions tests/test_monitoring/test_online/test_univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_kswin_univariate_one_drift(self, abrupt_univariate_online_drift_info):
)

def test_kswin_univariate_two_drift(self, abrupt_univariate_online_bidrift_info):
np.random.seed(42)
data_stream, drift_point_1, drift_point_2 = (
abrupt_univariate_online_bidrift_info
)
Expand Down
Loading