From abeca020d2ded14f45fb89a29575896392600691 Mon Sep 17 00:00:00 2001 From: Sirajus Salekin Date: Tue, 31 Mar 2026 15:18:45 -0400 Subject: [PATCH 1/3] fix: update packet evaluation metric --- src/stickler/doc_split/packet_evaluation_metrics.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/stickler/doc_split/packet_evaluation_metrics.py b/src/stickler/doc_split/packet_evaluation_metrics.py index b466397..0760de7 100644 --- a/src/stickler/doc_split/packet_evaluation_metrics.py +++ b/src/stickler/doc_split/packet_evaluation_metrics.py @@ -134,7 +134,7 @@ def calculate_ordering_score_per_group( """ Calculate Kendall's Tau for each document group. - Single-page groups are excluded (ordering undefined). + Single-page groups are assigned a perfect score of 1.0 (trivially in correct order). Args: data: DataFrame with group_id, page_number, page_number_predicted. @@ -151,6 +151,7 @@ def calculate_ordering_score_per_group( for group_id, group_data in data.groupby("group_id"): if len(group_data) <= 1: + group_scores[group_id] = 1.0 continue tau, _p_value = kendalltau( @@ -163,9 +164,9 @@ def calculate_ordering_score_per_group( def calculate_average_ordering_score(group_scores: Dict[Any, float]) -> float: """ - Mean Kendall's Tau across all multi-page groups. + Mean Kendall's Tau across all groups (single-page groups score 1.0). - Returns 0 if no multi-page groups exist. + Returns 0 if no groups exist. """ if not group_scores: return 0 From 74b257c8e3c653805eae30f4a69f1e86b69945e2 Mon Sep 17 00:00:00 2001 From: Sirajus Salekin Date: Tue, 31 Mar 2026 18:03:49 -0400 Subject: [PATCH 2/3] test: update single-page group ordering test to match new behavior --- tests/doc_split/test_packet_evaluation_metrics.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/doc_split/test_packet_evaluation_metrics.py b/tests/doc_split/test_packet_evaluation_metrics.py index 97fd7ae..343235f 100644 --- a/tests/doc_split/test_packet_evaluation_metrics.py +++ b/tests/doc_split/test_packet_evaluation_metrics.py @@ -308,17 +308,6 @@ def test_reverse_ordering(self): avg = calculate_average_ordering_score(scores) assert avg == pytest.approx(-1.0, abs=1e-4) - def test_single_page_groups_excluded(self): - """Groups with only 1 page should not appear in ordering scores.""" - data = [ - _page("invoice", "inv-01", 1, "invoice", "inv-01", 1), - _page("form", "form-01", 2, "form", "form-01", 2), - ] - df = pd.DataFrame(data) - scores = calculate_ordering_score_per_group(df) - assert len(scores) == 0 - assert calculate_average_ordering_score(scores) == 0 - def test_missing_columns_raises(self): df = pd.DataFrame([{"foo": 1}]) with pytest.raises(KeyError): From 44b218db7c1ff83be139c3be5821d7da9a767755 Mon Sep 17 00:00:00 2001 From: Sirajus Salekin Date: Thu, 2 Apr 2026 10:00:54 -0400 Subject: [PATCH 3/3] test: add test_single_page_groups_score_perfect per reviewer feedback --- tests/doc_split/test_packet_evaluation_metrics.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/doc_split/test_packet_evaluation_metrics.py b/tests/doc_split/test_packet_evaluation_metrics.py index 343235f..4c4684d 100644 --- a/tests/doc_split/test_packet_evaluation_metrics.py +++ b/tests/doc_split/test_packet_evaluation_metrics.py @@ -308,6 +308,18 @@ def test_reverse_ordering(self): avg = calculate_average_ordering_score(scores) assert avg == pytest.approx(-1.0, abs=1e-4) + def test_single_page_groups_score_perfect(self): + """Single-page groups should receive a perfect ordering score of 1.0.""" + data = [ + _page("invoice", "inv-01", 1, "invoice", "inv-01", 1), + _page("form", "form-01", 2, "form", "form-01", 2), + ] + df = pd.DataFrame(data) + scores = calculate_ordering_score_per_group(df) + assert len(scores) == 2 + assert all(v == 1.0 for v in scores.values()) + assert calculate_average_ordering_score(scores) == 1.0 + def test_missing_columns_raises(self): df = pd.DataFrame([{"foo": 1}]) with pytest.raises(KeyError):