From 4cbfa15f1839bdc4f4b8426b4ca423e8c3955dda Mon Sep 17 00:00:00 2001 From: Niek Tax Date: Tue, 31 Mar 2026 11:47:21 -0700 Subject: [PATCH] Fix bugs in multicalibration library (#257) Summary: **Bug: Wrong variable validated in `calibration_free_normalized_entropy` (metrics.py:1401)** The shape check validated `labels` instead of `predicted_scores`. This meant 2D prediction arrays (e.g., multi-class probabilities) were silently accepted, producing incorrect results, while 2D label arrays were incorrectly rejected. Differential Revision: D98852527 --- src/mcgrad/metrics.py | 2 +- src/mcgrad/tests/test_metrics.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/mcgrad/metrics.py b/src/mcgrad/metrics.py index 33c46ffb..ae9d7914 100644 --- a/src/mcgrad/metrics.py +++ b/src/mcgrad/metrics.py @@ -1398,7 +1398,7 @@ def calibration_free_normalized_entropy( :param max_iter: Maximum number of iterations for the calibration adjustment. Defaults to 10000. :return: the calibration-free NE. """ - if len(labels.shape) != 1: + if len(predicted_scores.shape) != 1: raise ValueError("y_pred must be the predicted probability for class 1 only.") current_calibration = calibration_ratio(labels, predicted_scores, sample_weight) diff --git a/src/mcgrad/tests/test_metrics.py b/src/mcgrad/tests/test_metrics.py index c74547ec..c3b3839f 100644 --- a/src/mcgrad/tests/test_metrics.py +++ b/src/mcgrad/tests/test_metrics.py @@ -1774,6 +1774,26 @@ def test_calibration_free_normalized_entropy_higher_for_reversed_predictions(): assert result_bad > result_good +def test_calibration_free_normalized_entropy_rejects_2d_predictions(): + labels = np.array([0, 1, 0, 1]) + predictions_2d = np.array([[0.2, 0.8], [0.7, 0.3], [0.1, 0.9], [0.6, 0.4]]) + + with pytest.raises(ValueError, match="y_pred must be the predicted probability"): + metrics.calibration_free_normalized_entropy( + labels=labels, predicted_scores=predictions_2d + ) + + +def test_calibration_free_normalized_entropy_accepts_1d_labels(): + labels = np.array([0, 1, 0, 1]) + predictions = np.array([0.2, 0.8, 0.3, 0.7]) + + result = metrics.calibration_free_normalized_entropy( + labels=labels, predicted_scores=predictions + ) + assert isinstance(result, (float, np.floating)) + + def test_rank_calibration_error_zero_for_perfect_ranking(): labels = np.array([0.0, 0.2, 0.4, 0.6, 0.8, 1.0]) perfect_predictions = labels * 2.0