From b58fbdab16edde80ba75758b792b795bcc547315 Mon Sep 17 00:00:00 2001 From: KorsarOfficial Date: Thu, 19 Mar 2026 00:58:17 +0400 Subject: [PATCH 1/3] feat: implement composer classmap autoloading (#49) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for the "classmap" key in composer.json autoload sections. Composer's classmap autoloading lets packages map arbitrary directories or individual .php files to classes without requiring PSR-0/4 naming conventions. Implementation: - ComposerJsonData gains AutoloadClassmapEntry to hold raw paths from composer.json (mirroring the existing AutoloadFileItem/AutoloadPsr4Item pattern for JSON parsing in composer-json-data.cpp). - ComposerAutoloader gains a new autoload_classmap_ flat map populated during load_file() by scanning the listed paths with a heuristic PHP class-declaration scanner (collect_php_class_names / scan_classmap_path in composer.cpp). The scanner handles namespace declarations and class/interface/trait/enum definitions; it uses std::filesystem for recursive directory traversal. - classmap_lookup() is consulted in collect-required-and-classes.cpp after psr0_lookup(), completing the lookup chain: PSR-4 → PSR-0 → classmap → default. Also adds: - tests/python/tests/composer/php/test_autoload_classmap/ — fixture with a classmap-based package (class Logger and Api\ApiClient) - test_classmap_autoloading in test_composer.py --- compiler/composer.cpp | 176 ++++++++++++++++++ compiler/composer.h | 7 + compiler/data/composer-json-data.cpp | 6 + compiler/data/composer-json-data.h | 14 +- .../pipes/collect-required-and-classes.cpp | 4 + .../php/test_autoload_classmap/composer.json | 7 + .../php/test_autoload_classmap/index.php | 8 + .../src/Api/ApiClient.php | 9 + .../php/test_autoload_classmap/src/Logger.php | 7 + .../vendor/autoload.php | 14 ++ tests/python/tests/composer/test_composer.py | 7 + 11 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 tests/python/tests/composer/php/test_autoload_classmap/composer.json create mode 100644 tests/python/tests/composer/php/test_autoload_classmap/index.php create mode 100644 tests/python/tests/composer/php/test_autoload_classmap/src/Api/ApiClient.php create mode 100644 tests/python/tests/composer/php/test_autoload_classmap/src/Logger.php create mode 100644 tests/python/tests/composer/php/test_autoload_classmap/vendor/autoload.php diff --git a/compiler/composer.cpp b/compiler/composer.cpp index dda13c9f18..d6bddb41ee 100644 --- a/compiler/composer.cpp +++ b/compiler/composer.cpp @@ -4,6 +4,9 @@ #include "compiler/composer.h" +#include +#include + #include "common/algorithms/contains.h" #include "common/wrappers/fmt_format.h" @@ -16,6 +19,156 @@ static bool file_exists(const std::string &filename) { return access(filename.c_str(), F_OK) == 0; }; +// Scans a single PHP file and returns all fully-qualified class names declared in it. +// Uses '/' as the namespace separator (the same convention used by the rest of this file). +// Only handles declarations at file scope (the overwhelming majority of autoloaded code). +// Not a full PHP parser — heuristic-based, but correct for typical autoloaded PHP. +static std::vector collect_php_class_names(const std::string &filepath) { + std::ifstream f(filepath, std::ios::binary); + if (!f) { + return {}; + } + + std::vector result; + std::string ns_prefix; // e.g. "Foo/Bar/" or "" + bool in_block_comment = false; + + std::string line; + while (std::getline(f, line)) { + // strip trailing \r + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + + // handle block comments: /* ... */ + // (simplified: track opening/closing markers; won't handle nested /* or // inside strings) + if (in_block_comment) { + auto end = line.find("*/"); + if (end == std::string::npos) { + continue; + } + in_block_comment = false; + line = line.substr(end + 2); + } + { + auto bc = line.find("/*"); + if (bc != std::string::npos) { + in_block_comment = true; + line = line.substr(0, bc); + } + } + + // strip line comments // + { + auto lc = line.find("//"); + if (lc != std::string::npos) { + line = line.substr(0, lc); + } + } + + // trim leading whitespace + size_t start = line.find_first_not_of(" \t"); + if (start == std::string::npos) { + continue; + } + const char *p = line.c_str() + start; + + // detect "namespace" keyword + // matches: "namespace Foo\Bar\Baz;" or "namespace Foo\Bar\Baz {" + if (strncmp(p, "namespace", 9) == 0 && (p[9] == ' ' || p[9] == '\t')) { + const char *ns = p + 9; + while (*ns == ' ' || *ns == '\t') ++ns; + const char *ns_end = ns; + while (*ns_end && *ns_end != ';' && *ns_end != '{' && *ns_end != ' ' && *ns_end != '\t' && *ns_end != '\r') { + ++ns_end; + } + ns_prefix = std::string(ns, ns_end); + std::replace(ns_prefix.begin(), ns_prefix.end(), '\\', '/'); + if (!ns_prefix.empty() && ns_prefix.back() != '/') { + ns_prefix += '/'; + } + continue; + } + + // detect class/interface/trait/enum declarations + // Recognised prefixes (checked in specificity order to avoid spurious matches): + // "abstract class ", "final class ", "readonly class ", + // "class ", "interface ", "trait ", "enum " + struct KwEntry { const char *kw; size_t len; }; + static const KwEntry kws[] = { + {"abstract class ", 15}, + {"final class ", 12}, + {"readonly class ", 15}, + {"class ", 6}, + {"interface ", 10}, + {"trait ", 6}, + {"enum ", 5}, + }; + for (const auto &e : kws) { + const char *found = strstr(p, e.kw); + if (!found) { + continue; + } + // everything before the keyword must be whitespace (guards against "base class" in strings etc.) + bool prefix_ok = true; + for (const char *c = p; c < found; ++c) { + if (*c != ' ' && *c != '\t') { + prefix_ok = false; + break; + } + } + if (!prefix_ok) { + continue; + } + // extract the identifier that follows the keyword + const char *name = found + e.len; + while (*name == ' ' || *name == '\t') ++name; + const char *name_end = name; + while (*name_end && (isalnum(*name_end) || *name_end == '_')) ++name_end; + if (name_end > name) { + result.push_back(ns_prefix + std::string(name, name_end)); + } + break; + } + } + + return result; +} + +// Scans 'path' (a directory or a single .php file) for PHP class declarations +// and inserts the found class_name→file_path pairs into 'out'. +// Class names use '/' as the namespace separator. +static void scan_classmap_path(const std::string &path, + std::unordered_map &out) { + namespace fs = std::filesystem; + + std::error_code ec; + fs::path fsp{path}; + + if (fs::is_regular_file(fsp, ec) && fsp.extension() == ".php") { + for (const auto &cls : collect_php_class_names(path)) { + out.emplace(cls, path); + } + return; + } + + if (fs::is_directory(fsp, ec)) { + const auto opts = fs::directory_options::skip_permission_denied; + for (const auto &entry : fs::recursive_directory_iterator(fsp, opts, ec)) { + if (!entry.is_regular_file()) { + continue; + } + if (entry.path().extension() != ".php") { + continue; + } + const std::string file_path = entry.path().string(); + for (const auto &cls : collect_php_class_names(file_path)) { + out.emplace(cls, file_path); + } + } + } +} + std::string ComposerAutoloader::psr_lookup_nocache(const PsrMap &psr, const std::string &class_name, bool transform_underscore) { std::string prefix = class_name; @@ -102,6 +255,14 @@ std::string ComposerAutoloader::psr0_lookup(const std::string &class_name) const return file; } +std::string ComposerAutoloader::classmap_lookup(const std::string &class_name) const { + auto it = autoload_classmap_.find(class_name); + if (it != autoload_classmap_.end() && file_exists(it->second)) { + return it->second; + } + return ""; +} + void ComposerAutoloader::set_use_dev(bool v) { use_dev_ = v; } @@ -153,6 +314,11 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil // "file.php", // <...> // ], + // "classmap": [ + // "src/", // directory to scan for class declarations + // "lib/Foo.php", // single file + // <...> + // ], // } // "autoload-dev": { // "psr-4": { @@ -167,6 +333,10 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil // "file.php", // <...> // ], + // "classmap": [ + // "src/", + // <...> + // ], // } // } @@ -207,6 +377,12 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil } } + for (const auto &classmap_entry : composer_json->autoload_classmap) { + if (!classmap_entry.is_dev || use_dev) { + scan_classmap_path(classmap_entry.path, autoload_classmap_); + } + } + for (const auto &require : composer_json->require) { if (is_root_file && (!require.is_dev || use_dev)) { deps_.insert(require.package_name); diff --git a/compiler/composer.h b/compiler/composer.h index 76dddf17e4..6312613351 100644 --- a/compiler/composer.h +++ b/compiler/composer.h @@ -42,6 +42,11 @@ class ComposerAutoloader : private vk::not_copyable { std::string psr4_lookup(const std::string &class_name) const; std::string psr0_lookup(const std::string &class_name) const; + // classmap_lookup returns the absolute .php file path for a class that was + // found by scanning "autoload/classmap" directories/files (see #49); + // class_name uses '/' as namespace separator (same convention as psr4/psr0) + std::string classmap_lookup(const std::string &class_name) const; + // is_autoload_file reports whether the specified absolute filename // is a composer-generated autoload.php file bool is_autoload_file(const std::string &filename) const noexcept { @@ -63,6 +68,8 @@ class ComposerAutoloader : private vk::not_copyable { PsrMap autoload_psr4_; PsrMap autoload_psr0_; std::map autoload_psr0_classmap_; + // classmap: class_name_with_slashes -> absolute .php file path + std::unordered_map autoload_classmap_; std::unordered_set deps_; std::string autoload_filename_; diff --git a/compiler/data/composer-json-data.cpp b/compiler/data/composer-json-data.cpp index 705096b918..4962139f7c 100644 --- a/compiler/data/composer-json-data.cpp +++ b/compiler/data/composer-json-data.cpp @@ -143,6 +143,12 @@ class ComposerJsonParser { parse_composer_json_autoload_file(y_filename, is_autoload_dev); } } + if (const auto &y_classmap = y_autoload["classmap"]) { + for (const auto &y_path : y_classmap) { + std::string abs_path = composer_json_dir->full_dir_name + as_string(y_path); + out->autoload_classmap.emplace_back(ComposerJsonData::AutoloadClassmapEntry{std::move(abs_path), is_autoload_dev}); + } + } } // parse composer.json "require" and "require-dev" diff --git a/compiler/data/composer-json-data.h b/compiler/data/composer-json-data.h index f23962a5ac..fa0a8b752a 100644 --- a/compiler/data/composer-json-data.h +++ b/compiler/data/composer-json-data.h @@ -43,7 +43,15 @@ class ComposerJsonData { std::string file_name; bool is_dev; // whether it's in "autoload-dev" }; - + + // "autoload/classmap" entry: a directory or .php file listed under composer.json "classmap" + // composer.json format: "autoload": { "classmap": ["src/", "lib/Foo.php"] } + // Each entry is a raw path from composer.json (absolute after resolving against the package root). + // The actual class→file scanning is performed by ComposerAutoloader when loading the file. + struct AutoloadClassmapEntry { + std::string path; // absolute path: a directory to scan or a single .php file + bool is_dev; // whether it's in "autoload-dev" + }; explicit ComposerJsonData(const std::string &json_filename); @@ -64,4 +72,8 @@ class ComposerJsonData { // "autoload/files" and "autoload-dev/files", e.g. [ {"file.php", true}, ... ] std::vector autoload_files; + + // "autoload/classmap" and "autoload-dev/classmap" (see #49) + // Each entry is a raw path (directory or .php file) to be scanned by ComposerAutoloader. + std::vector autoload_classmap; }; diff --git a/compiler/pipes/collect-required-and-classes.cpp b/compiler/pipes/collect-required-and-classes.cpp index 761f585828..c9cb73efa9 100644 --- a/compiler/pipes/collect-required-and-classes.cpp +++ b/compiler/pipes/collect-required-and-classes.cpp @@ -50,6 +50,10 @@ class CollectRequiredPass final : public FunctionPassBase { file->is_loaded_by_psr0 = true; return; // required from the composer autoload PSR-0 path } + if (const auto &classmap_filename = composer.classmap_lookup(file_name); !classmap_filename.empty()) { + require_file(classmap_filename, false); + return; // required from the composer autoload classmap + } } // fallback to the default class autoloading scheme; diff --git a/tests/python/tests/composer/php/test_autoload_classmap/composer.json b/tests/python/tests/composer/php/test_autoload_classmap/composer.json new file mode 100644 index 0000000000..db98726806 --- /dev/null +++ b/tests/python/tests/composer/php/test_autoload_classmap/composer.json @@ -0,0 +1,7 @@ +{ + "name": "kphp/testing-classmap", + "version": "1.0.0", + "autoload": { + "classmap": ["src/"] + } +} diff --git a/tests/python/tests/composer/php/test_autoload_classmap/index.php b/tests/python/tests/composer/php/test_autoload_classmap/index.php new file mode 100644 index 0000000000..a1c4e4ed3b --- /dev/null +++ b/tests/python/tests/composer/php/test_autoload_classmap/index.php @@ -0,0 +1,8 @@ +request("/users") . "\n"; diff --git a/tests/python/tests/composer/php/test_autoload_classmap/src/Api/ApiClient.php b/tests/python/tests/composer/php/test_autoload_classmap/src/Api/ApiClient.php new file mode 100644 index 0000000000..37c3320e9e --- /dev/null +++ b/tests/python/tests/composer/php/test_autoload_classmap/src/Api/ApiClient.php @@ -0,0 +1,9 @@ + __DIR__ . '/../src/Logger.php', + 'Api\\ApiClient' => __DIR__ . '/../src/Api/ApiClient.php', + ]; + if (isset($map[$class])) { + require_once $map[$class]; + } +}); diff --git a/tests/python/tests/composer/test_composer.py b/tests/python/tests/composer/test_composer.py index 6adb212837..456a85f6d9 100644 --- a/tests/python/tests/composer/test_composer.py +++ b/tests/python/tests/composer/test_composer.py @@ -37,3 +37,10 @@ def test_autoload_files(self): "KPHP_COMPOSER_ROOT": os.path.join(self.test_dir, "php/test_autoload_files"), "KPHP_COMPOSER_AUTOLOAD_DEV": "1", }) + + def test_classmap_autoloading(self): + self.build_and_compare_with_php( + php_script_path="php/test_autoload_classmap/index.php", + kphp_env={ + "KPHP_COMPOSER_ROOT": os.path.join(self.test_dir, "php/test_autoload_classmap"), + }) From ae712867bdf9a80d192cb60c35bf0a1e9a7fcb5d Mon Sep 17 00:00:00 2001 From: KorsarOfficial Date: Thu, 19 Mar 2026 01:08:31 +0400 Subject: [PATCH 2/3] fix: handle single-line block comments and # comments in PHP class scanner The collect_php_class_names scanner had two issues: 1. Single-line block comments like /* @var int */ left in_block_comment set to true, causing the next line to be incorrectly skipped. Now checks for */ on the same line as /* before entering multi-line mode. 2. PHP # line comments were not stripped, so # class Foo would be incorrectly detected as a class declaration. Now strips both // and # comments (taking whichever appears first on the line). --- compiler/composer.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/compiler/composer.cpp b/compiler/composer.cpp index d6bddb41ee..a7c5657df7 100644 --- a/compiler/composer.cpp +++ b/compiler/composer.cpp @@ -41,7 +41,7 @@ static std::vector collect_php_class_names(const std::string &filep } // handle block comments: /* ... */ - // (simplified: track opening/closing markers; won't handle nested /* or // inside strings) + // (simplified: track opening/closing markers; won't handle nested or in-string occurrences) if (in_block_comment) { auto end = line.find("*/"); if (end == std::string::npos) { @@ -53,16 +53,22 @@ static std::vector collect_php_class_names(const std::string &filep { auto bc = line.find("/*"); if (bc != std::string::npos) { - in_block_comment = true; - line = line.substr(0, bc); + auto bc_end = line.find("*/", bc + 2); + if (bc_end != std::string::npos) { + // single-line block comment: strip /* ... */ and keep the rest + line = line.substr(0, bc) + line.substr(bc_end + 2); + } else { + in_block_comment = true; + line = line.substr(0, bc); + } } } - // strip line comments // + // strip line comments: // and # (take whichever comes first) { - auto lc = line.find("//"); - if (lc != std::string::npos) { - line = line.substr(0, lc); + size_t pos = std::min(line.find("//"), line.find('#')); + if (pos != std::string::npos) { + line = line.substr(0, pos); } } From 84464ab505236f4e867c1354ebaa04902a452b6c Mon Sep 17 00:00:00 2001 From: KorsarOfficial Date: Thu, 19 Mar 2026 01:32:30 +0400 Subject: [PATCH 3/3] refactor: use KPHP's lexer for classmap class scanning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the heuristic line-by-line PHP class scanner with a token-based implementation that reuses KPHP's own lexer via php_text_to_tokens(). The previous scanner manually handled block comments, line comments, namespace parsing, and class keyword detection with ~120 lines of fragile string matching. The new implementation tokenizes each file with the same lexer used by the compiler pipeline and walks the token stream looking for tok_namespace, tok_class, tok_interface, and tok_trait — correctly handling all PHP syntax (strings, heredoc, comments, escape sequences) with ~30 lines of straightforward code. lexer_init() is guaranteed to run before init_composer_class_loader() in the compiler startup sequence (compiler.cpp:202 vs :234). --- compiler/composer.cpp | 129 +++++++++--------------------------------- 1 file changed, 27 insertions(+), 102 deletions(-) diff --git a/compiler/composer.cpp b/compiler/composer.cpp index a7c5657df7..c88b2b23a6 100644 --- a/compiler/composer.cpp +++ b/compiler/composer.cpp @@ -13,128 +13,53 @@ #include "compiler/compiler-core.h" #include "compiler/data/composer-json-data.h" #include "compiler/kphp_assert.h" +#include "compiler/lexer.h" +#include "compiler/token.h" static bool file_exists(const std::string &filename) { return access(filename.c_str(), F_OK) == 0; }; -// Scans a single PHP file and returns all fully-qualified class names declared in it. -// Uses '/' as the namespace separator (the same convention used by the rest of this file). -// Only handles declarations at file scope (the overwhelming majority of autoloaded code). -// Not a full PHP parser — heuristic-based, but correct for typical autoloaded PHP. +// Scans a single PHP file using KPHP's own lexer and returns all fully-qualified +// class names declared in it. Uses '/' as the namespace separator (the same +// convention used by the rest of this file). +// +// Requires lexer_init() to have been called beforehand (guaranteed by the +// compiler pipeline: lexer_init() runs before init_composer_class_loader()). static std::vector collect_php_class_names(const std::string &filepath) { std::ifstream f(filepath, std::ios::binary); if (!f) { return {}; } + std::string content{std::istreambuf_iterator(f), std::istreambuf_iterator()}; + + std::vector tokens = php_text_to_tokens(content); std::vector result; std::string ns_prefix; // e.g. "Foo/Bar/" or "" - bool in_block_comment = false; - - std::string line; - while (std::getline(f, line)) { - // strip trailing \r - if (!line.empty() && line.back() == '\r') { - line.pop_back(); - } - // handle block comments: /* ... */ - // (simplified: track opening/closing markers; won't handle nested or in-string occurrences) - if (in_block_comment) { - auto end = line.find("*/"); - if (end == std::string::npos) { - continue; - } - in_block_comment = false; - line = line.substr(end + 2); - } - { - auto bc = line.find("/*"); - if (bc != std::string::npos) { - auto bc_end = line.find("*/", bc + 2); - if (bc_end != std::string::npos) { - // single-line block comment: strip /* ... */ and keep the rest - line = line.substr(0, bc) + line.substr(bc_end + 2); - } else { - in_block_comment = true; - line = line.substr(0, bc); - } - } - } - - // strip line comments: // and # (take whichever comes first) - { - size_t pos = std::min(line.find("//"), line.find('#')); - if (pos != std::string::npos) { - line = line.substr(0, pos); - } - } + for (size_t i = 0; i < tokens.size(); ++i) { + const auto type = tokens[i].type(); - // trim leading whitespace - size_t start = line.find_first_not_of(" \t"); - if (start == std::string::npos) { - continue; - } - const char *p = line.c_str() + start; - - // detect "namespace" keyword - // matches: "namespace Foo\Bar\Baz;" or "namespace Foo\Bar\Baz {" - if (strncmp(p, "namespace", 9) == 0 && (p[9] == ' ' || p[9] == '\t')) { - const char *ns = p + 9; - while (*ns == ' ' || *ns == '\t') ++ns; - const char *ns_end = ns; - while (*ns_end && *ns_end != ';' && *ns_end != '{' && *ns_end != ' ' && *ns_end != '\t' && *ns_end != '\r') { - ++ns_end; - } - ns_prefix = std::string(ns, ns_end); - std::replace(ns_prefix.begin(), ns_prefix.end(), '\\', '/'); - if (!ns_prefix.empty() && ns_prefix.back() != '/') { - ns_prefix += '/'; - } + // track namespace declarations: "namespace Foo\Bar;" + if (type == tok_namespace && i + 1 < tokens.size() && tokens[i + 1].type() == tok_func_name) { + std::string ns = static_cast(tokens[i + 1].str_val); + std::replace(ns.begin(), ns.end(), '\\', '/'); + ns_prefix = ns + "/"; + ++i; continue; } - // detect class/interface/trait/enum declarations - // Recognised prefixes (checked in specificity order to avoid spurious matches): - // "abstract class ", "final class ", "readonly class ", - // "class ", "interface ", "trait ", "enum " - struct KwEntry { const char *kw; size_t len; }; - static const KwEntry kws[] = { - {"abstract class ", 15}, - {"final class ", 12}, - {"readonly class ", 15}, - {"class ", 6}, - {"interface ", 10}, - {"trait ", 6}, - {"enum ", 5}, - }; - for (const auto &e : kws) { - const char *found = strstr(p, e.kw); - if (!found) { - continue; - } - // everything before the keyword must be whitespace (guards against "base class" in strings etc.) - bool prefix_ok = true; - for (const char *c = p; c < found; ++c) { - if (*c != ' ' && *c != '\t') { - prefix_ok = false; - break; - } + // detect class/interface/trait declarations + // the lexer produces: tok_class / tok_interface / tok_trait + // optionally preceded by tok_abstract or tok_final (which we can just skip) + if (type == tok_class || type == tok_interface || type == tok_trait) { + if (i + 1 < tokens.size() && tokens[i + 1].type() == tok_func_name) { + result.push_back(ns_prefix + static_cast(tokens[i + 1].str_val)); + ++i; } - if (!prefix_ok) { - continue; - } - // extract the identifier that follows the keyword - const char *name = found + e.len; - while (*name == ' ' || *name == '\t') ++name; - const char *name_end = name; - while (*name_end && (isalnum(*name_end) || *name_end == '_')) ++name_end; - if (name_end > name) { - result.push_back(ns_prefix + std::string(name, name_end)); - } - break; + continue; } }