From 3e7f3ac6742a63ab729966c0ff8e205f92ac42f7 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 13 Jan 2026 08:54:15 +0200 Subject: [PATCH 1/7] gh-143658: importlib.metadata: Use `str.translate` to improve performance of `importlib.metadata.Prepared.normalized` (#143660) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Schreiner Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Bartosz Sławecki --- ...-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst | 3 ++ importlib_metadata/__init__.py | 16 ++++++++- tests/test_api.py | 34 +++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst new file mode 100644 index 00000000..1d227095 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst @@ -0,0 +1,3 @@ +:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of +:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and +Henry Schreiner. diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index cdfc1f62..04575234 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -894,6 +894,14 @@ def search(self, prepared: Prepared): return itertools.chain(infos, eggs) +# Translation table for Prepared.normalize: lowercase and +# replace "-" (hyphen) and "." (dot) with "_" (underscore). +_normalize_table = str.maketrans( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", + "abcdefghijklmnopqrstuvwxyz__", +) + + class Prepared: """ A prepared search query for metadata on a possibly-named package. @@ -929,7 +937,13 @@ def normalize(name): """ PEP 503 normalization plus dashes as underscores. """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503 + # About 3x faster, safe since packages only support alphanumeric characters + value = name.translate(_normalize_table) + # Condense repeats (faster than regex) + while "__" in value: + value = value.replace("__", "_") + return value @staticmethod def legacy_normalize(name): diff --git a/tests/test_api.py b/tests/test_api.py index c36f93e0..553fe740 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -6,6 +6,7 @@ from importlib_metadata import ( Distribution, PackageNotFoundError, + Prepared, distribution, entry_points, files, @@ -317,3 +318,36 @@ class InvalidateCache(unittest.TestCase): def test_invalidate_cache(self): # No externally observable behavior, but ensures test coverage... importlib.invalidate_caches() + + +class PreparedTests(unittest.TestCase): + def test_normalize(self): + tests = [ + # Simple + ("sample", "sample"), + # Mixed case + ("Sample", "sample"), + ("SAMPLE", "sample"), + ("SaMpLe", "sample"), + # Separator conversions + ("sample-pkg", "sample_pkg"), + ("sample.pkg", "sample_pkg"), + ("sample_pkg", "sample_pkg"), + # Multiple separators + ("sample---pkg", "sample_pkg"), + ("sample___pkg", "sample_pkg"), + ("sample...pkg", "sample_pkg"), + # Mixed separators + ("sample-._pkg", "sample_pkg"), + ("sample_.-pkg", "sample_pkg"), + # Complex + ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), + ("Sample__Pkg.name__foo", "sample_pkg_name_foo"), + # Uppercase with separators + ("SAMPLE-PKG", "sample_pkg"), + ("Sample.Pkg", "sample_pkg"), + ("SAMPLE_PKG", "sample_pkg"), + ] + for name, expected in tests: + with self.subTest(name=name): + self.assertEqual(Prepared.normalize(name), expected) From 001db0db09ddc4fb9906cfae5e5c0d737f619313 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 6 Feb 2026 19:38:58 +0200 Subject: [PATCH 2/7] gh-143658: Use `str.lower` and `replace` to further improve performance of `importlib.metadata.Prepared.normalized` (#144083) Co-authored-by: Henry Schreiner --- .../2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst | 4 ++++ importlib_metadata/__init__.py | 13 ++----------- 2 files changed, 6 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst diff --git a/Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst b/Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst new file mode 100644 index 00000000..8935b4c6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst @@ -0,0 +1,4 @@ +:mod:`importlib.metadata`: Use :meth:`str.lower` and :meth:`str.replace` to +further improve performance of +:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade +and Henry Schreiner. diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 04575234..09b37255 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -894,14 +894,6 @@ def search(self, prepared: Prepared): return itertools.chain(infos, eggs) -# Translation table for Prepared.normalize: lowercase and -# replace "-" (hyphen) and "." (dot) with "_" (underscore). -_normalize_table = str.maketrans( - "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", - "abcdefghijklmnopqrstuvwxyz__", -) - - class Prepared: """ A prepared search query for metadata on a possibly-named package. @@ -937,9 +929,8 @@ def normalize(name): """ PEP 503 normalization plus dashes as underscores. """ - # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503 - # About 3x faster, safe since packages only support alphanumeric characters - value = name.translate(_normalize_table) + # Much faster than re.sub, and even faster than str.translate + value = name.lower().replace("-", "_").replace(".", "_") # Condense repeats (faster than regex) while "__" in value: value = value.replace("__", "_") From 852e44f218d75fcffaca50a56169fcc4763d863f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 20 Mar 2026 03:10:24 -0400 Subject: [PATCH 3/7] Remove CPython news fragments. --- .../Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst | 3 --- .../Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst | 4 ---- 2 files changed, 7 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst delete mode 100644 Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst deleted file mode 100644 index 1d227095..00000000 --- a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst +++ /dev/null @@ -1,3 +0,0 @@ -:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of -:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and -Henry Schreiner. diff --git a/Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst b/Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst deleted file mode 100644 index 8935b4c6..00000000 --- a/Misc/NEWS.d/next/Library/2026-01-20-20-54-46.gh-issue-143658.v8i1jE.rst +++ /dev/null @@ -1,4 +0,0 @@ -:mod:`importlib.metadata`: Use :meth:`str.lower` and :meth:`str.replace` to -further improve performance of -:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade -and Henry Schreiner. From 1b0be12fc662f0ba4ee6c86d544585485ff40dac Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 20 Mar 2026 03:35:17 -0400 Subject: [PATCH 4/7] Use parameterize fixture for parameterized tests. --- tests/test_api.py | 58 +++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 553fe740..3dbed628 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -321,33 +321,31 @@ def test_invalidate_cache(self): class PreparedTests(unittest.TestCase): - def test_normalize(self): - tests = [ - # Simple - ("sample", "sample"), - # Mixed case - ("Sample", "sample"), - ("SAMPLE", "sample"), - ("SaMpLe", "sample"), - # Separator conversions - ("sample-pkg", "sample_pkg"), - ("sample.pkg", "sample_pkg"), - ("sample_pkg", "sample_pkg"), - # Multiple separators - ("sample---pkg", "sample_pkg"), - ("sample___pkg", "sample_pkg"), - ("sample...pkg", "sample_pkg"), - # Mixed separators - ("sample-._pkg", "sample_pkg"), - ("sample_.-pkg", "sample_pkg"), - # Complex - ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), - ("Sample__Pkg.name__foo", "sample_pkg_name_foo"), - # Uppercase with separators - ("SAMPLE-PKG", "sample_pkg"), - ("Sample.Pkg", "sample_pkg"), - ("SAMPLE_PKG", "sample_pkg"), - ] - for name, expected in tests: - with self.subTest(name=name): - self.assertEqual(Prepared.normalize(name), expected) + @fixtures.parameterize( + # Simple + dict(input='sample', expected='sample'), + # Mixed case + dict(input='Sample', expected='sample'), + dict(input='SAMPLE', expected='sample'), + dict(input='SaMpLe', expected='sample'), + # Separator conversions + dict(input='sample-pkg', expected='sample_pkg'), + dict(input='sample.pkg', expected='sample_pkg'), + dict(input='sample_pkg', expected='sample_pkg'), + # Multiple separators + dict(input='sample---pkg', expected='sample_pkg'), + dict(input='sample___pkg', expected='sample_pkg'), + dict(input='sample...pkg', expected='sample_pkg'), + # Mixed separators + dict(input='sample-._pkg', expected='sample_pkg'), + dict(input='sample_.-pkg', expected='sample_pkg'), + # Complex + dict(input='Sample__Pkg-name.foo', expected='sample_pkg_name_foo'), + dict(input='Sample__Pkg.name__foo', expected='sample_pkg_name_foo'), + # Uppercase with separators + dict(input='SAMPLE-PKG', expected='sample_pkg'), + dict(input='Sample.Pkg', expected='sample_pkg'), + dict(input='SAMPLE_PKG', expected='sample_pkg'), + ) + def test_normalize(self, input, expected): + self.assertEqual(Prepared.normalize(input), expected) From a77d0d1b2f79d7fd21728284d2955ffa6d5caceb Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 20 Mar 2026 03:40:52 -0400 Subject: [PATCH 5/7] Add performance test for Prepared.normalize. --- exercises.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/exercises.py b/exercises.py index adccf03c..b346cc05 100644 --- a/exercises.py +++ b/exercises.py @@ -45,3 +45,10 @@ def entrypoint_regexp_perf(): input = '0' + ' ' * 2**10 + '0' # end warmup re.match(importlib_metadata.EntryPoint.pattern, input) + + +def normalize_perf(): + # python/cpython#143658 + import importlib_metadata # end warmup + + importlib_metadata.Prepared.normalize('sample') From cbadafcad64cee12d292ed8ac1dc96bb0295966a Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 20 Mar 2026 04:31:01 -0400 Subject: [PATCH 6/7] Repeat the operation to get performance visibility. --- exercises.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exercises.py b/exercises.py index b346cc05..cf6d9d18 100644 --- a/exercises.py +++ b/exercises.py @@ -51,4 +51,7 @@ def normalize_perf(): # python/cpython#143658 import importlib_metadata # end warmup - importlib_metadata.Prepared.normalize('sample') + # operation completes in < 1ms, so repeat it to get visibility + # https://github.com/jaraco/pytest-perf/issues/12 + for _ in range(1000): + importlib_metadata.Prepared.normalize('sample') From 27169dcd343e65727805c12bc95bd52c9153cd04 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 20 Mar 2026 11:47:45 -0400 Subject: [PATCH 7/7] Move behavior description into the docstring. Remove references to intermediate implementations. Reference the rationale. --- importlib_metadata/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 09b37255..88d65d5d 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -636,7 +636,8 @@ def _read_files_egginfo_installed(self): return paths = ( - py311.relative_fix((subdir / name).resolve()) + py311 + .relative_fix((subdir / name).resolve()) .relative_to(self.locate_file('').resolve(), walk_up=True) .as_posix() for name in text.splitlines() @@ -928,10 +929,12 @@ def __init__(self, name: str | None): def normalize(name): """ PEP 503 normalization plus dashes as underscores. + + Specifically avoids ``re.sub`` as prescribed for performance + benefits (see python/cpython#143658). """ - # Much faster than re.sub, and even faster than str.translate value = name.lower().replace("-", "_").replace(".", "_") - # Condense repeats (faster than regex) + # Condense repeats while "__" in value: value = value.replace("__", "_") return value