diff --git a/.gitattributes b/.gitattributes index 2116ae2..7740ae7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,13 @@ +# Distribution archive exclusions +/tests export-ignore +/.github export-ignore +/bench.php export-ignore +phpstan*.neon export-ignore +phpunit.xml export-ignore +pint.json export-ignore +.env.example export-ignore +CHANGELOG.md export-ignore + # laravel default *.css linguist-vendored *.less linguist-vendored diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..38a184c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,49 @@ +name: CI +on: [pull_request] +jobs: + tests: + name: Searchable (PHP ${{ matrix.php-versions }} / ORC ${{ matrix.orchestra-versions }} ) + runs-on: ubuntu-latest + strategy: + matrix: + php-versions: [ '8.1', '8.2', '8.3', '8.4' ] + orchestra-versions: [ '8.0', '9.0', '10.0' ] + exclude: + - php-versions: 8.1 + orchestra-versions: 9.0 + - php-versions: 8.1 + orchestra-versions: 10.0 + - php-versions: 8.4 + orchestra-versions: 8.0 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + extensions: mbstring, dom, fileinfo, gmp, bcmath + + - name: Get composer cache directory + id: composer-cache + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT + + - name: Cache composer dependencies + uses: actions/cache@v4 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.json') }} + restore-keys: ${{ runner.os }}-composer- + + - name: Remove composer.lock + run: rm -f composer.lock + + - name: Remove QA dependencies + run: composer remove laravel/pint phpstan/phpstan --dev --no-update + + - name: Install Composer dependencies + run: composer install --no-progress --prefer-dist --optimize-autoloader + + - name: Test with phpunit + run: composer test diff --git a/.github/workflows/qa.yml b/.github/workflows/qa.yml new file mode 100644 index 0000000..ade5fcc --- /dev/null +++ b/.github/workflows/qa.yml @@ -0,0 +1,56 @@ +name: QA +on: + push: + branches: + - main + pull_request: + types: [ opened, synchronize ] +jobs: + tests: + name: Searchable QA + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: 8.4 + extensions: mbstring, dom, fileinfo, gmp, bcmath + coverage: xdebug + + - name: Get composer cache directory + id: composer-cache + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT + + - name: Cache composer dependencies + uses: actions/cache@v4 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: composer-${{ hashFiles('**/composer.json') }} + restore-keys: composer- + + - name: Remove composer.lock + run: rm -f composer.lock + + - name: Install Composer dependencies + run: composer install --no-progress --prefer-dist --optimize-autoloader + + - name: Check code style + run: vendor/bin/pint --ansi --config pint.json --test + + - name: PHPStan src + run: composer stan + + - name: PHPStan tests + run: composer stan-tests + + - name: Compute Coverage + run: vendor/bin/phpunit --colors --coverage-clover ./coverage.xml + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index be57bfd..8de3fa0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /vendor .*.cache +.phpstan composer.lock .env +/cov diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php deleted file mode 100644 index cad14c1..0000000 --- a/.php-cs-fixer.dist.php +++ /dev/null @@ -1,150 +0,0 @@ -in(__DIR__) - ->exclude('vendor') - ->name('*.php') - ->ignoreDotFiles(true) - ->ignoreVCS(true); -$config = new PhpCsFixer\Config(); - -$header = <<<'EOF' -This file is part of Searchable - (c) Fabrice de Stefanis / https://github.com/fab2s/Searchable -This source file is licensed under the MIT license which you will -find in the LICENSE file or at https://opensource.org/licenses/MIT -EOF; - -return $config - ->setUsingCache(true) - ->setRules([ - 'header_comment' => ['header' => $header], - 'array_syntax' => ['syntax' => 'short'], - 'binary_operator_spaces' => [ - 'default' => 'align_single_space', - ], - 'blank_line_after_namespace' => true, - 'blank_line_after_opening_tag' => true, - 'blank_line_before_statement' => [ - 'statements' => ['return'], - ], - 'braces' => true, - 'cast_spaces' => true, - 'combine_consecutive_unsets' => true, - 'class_attributes_separation' => [ - 'elements' => ['const' => 'only_if_meta', 'trait_import' => 'one', 'property' => 'only_if_meta'], - ], - 'class_definition' => true, - 'concat_space' => [ - 'spacing' => 'one', - ], - 'declare_equal_normalize' => true, - 'elseif' => true, - 'encoding' => true, - 'full_opening_tag' => true, - 'fully_qualified_strict_types' => true, - 'function_declaration' => true, - 'function_typehint_space' => true, - 'heredoc_to_nowdoc' => true, - 'include' => true, - 'increment_style' => ['style' => 'pre'], - 'indentation_type' => true, - 'linebreak_after_opening_tag' => true, - 'line_ending' => true, - 'lowercase_cast' => true, - 'constant_case' => ['case' => 'lower'], - 'lowercase_keywords' => true, - 'lowercase_static_reference' => true, - 'magic_method_casing' => true, - 'magic_constant_casing' => true, - 'method_argument_space' => true, - 'multiline_whitespace_before_semicolons' => [ - 'strategy' => 'no_multi_line', - ], - 'native_function_casing' => true, - 'no_extra_blank_lines' => [ - 'tokens' => [ - 'extra', - 'throw', - 'use', - ], - ], - 'no_blank_lines_after_class_opening' => true, - 'no_blank_lines_after_phpdoc' => true, - 'no_closing_tag' => true, - 'no_empty_phpdoc' => true, - 'no_empty_statement' => true, - 'no_leading_import_slash' => true, - 'no_leading_namespace_whitespace' => true, - 'no_mixed_echo_print' => [ - 'use' => 'echo', - ], - 'no_multiline_whitespace_around_double_arrow' => true, - 'multiline_whitespace_before_semicolons' => true, - 'no_short_bool_cast' => true, - 'no_singleline_whitespace_before_semicolons' => true, - 'no_spaces_after_function_name' => true, - 'no_spaces_around_offset' => true, - 'no_spaces_inside_parenthesis' => true, - 'no_trailing_comma_in_singleline' => true, - 'no_trailing_whitespace' => true, - 'no_trailing_whitespace_in_comment' => true, - 'no_unneeded_control_parentheses' => true, - 'no_unneeded_curly_braces' => true, - 'no_useless_else' => true, - 'no_useless_return' => true, - 'no_whitespace_before_comma_in_array' => true, - 'no_whitespace_in_blank_line' => true, - 'normalize_index_brace' => true, - 'object_operator_without_whitespace' => true, - 'ordered_class_elements' => true, - 'ordered_imports' => ['sort_algorithm' => 'alpha'], - 'php_unit_fqcn_annotation' => true, - 'phpdoc_add_missing_param_annotation' => true, - 'phpdoc_align' => true, - 'phpdoc_indent' => true, - 'phpdoc_annotation_without_dot' => true, - 'phpdoc_inline_tag_normalizer' => true, - 'phpdoc_no_alias_tag' => true, - 'general_phpdoc_tag_rename' => true, - 'phpdoc_no_empty_return' => true, - 'phpdoc_tag_type' => true, - 'phpdoc_no_access' => true, - 'phpdoc_no_package' => true, - 'phpdoc_no_useless_inheritdoc' => true, - 'phpdoc_order' => true, - 'phpdoc_scalar' => true, - 'phpdoc_separation' => true, - 'phpdoc_single_line_var_spacing' => true, - 'phpdoc_to_comment' => true, - 'phpdoc_summary' => false, - 'phpdoc_trim' => true, - 'phpdoc_types' => true, - 'phpdoc_var_without_name' => true, - 'semicolon_after_instruction' => true, - 'single_blank_line_at_eof' => true, - 'single_blank_line_before_namespace' => true, - 'single_class_element_per_statement' => true, - 'single_import_per_statement' => true, - 'no_unused_imports' => true, - 'single_line_after_imports' => true, - 'single_line_comment_style' => [ - 'comment_types' => ['hash'], - ], - 'single_quote' => true, - 'space_after_semicolon' => true, - 'standardize_not_equals' => true, - 'switch_case_semicolon_to_colon' => true, - 'switch_case_space' => true, - 'ternary_operator_spaces' => true, - 'trailing_comma_in_multiline' => [ - 'elements' => ['arrays'] - ], - 'trim_array_spaces' => true, - 'unary_operator_spaces' => true, - 'visibility_required' => [ - 'elements' => ['method', 'property'], - ], - 'whitespace_after_comma_in_array' => true, - ]) - ->setFinder($finder); diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4d24e74 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [1.0.0] - 2026-02-21 + +### Added + +- Fulltext search for Eloquent models using native database capabilities (`MATCH...AGAINST` on MySQL/MariaDB, `tsvector/tsquery` on PostgreSQL) +- `Searchable` trait and `SearchableInterface` for easy model integration +- `SearchQuery` class for advanced usage (joins, table aliases, custom field names) +- `TermParser` for driver-aware term parsing and content preparation +- Phonetic matching support with pluggable algorithms via `PhoneticInterface` — also provides typo tolerance by matching phonetically similar inputs +- Built-in French phonetic encoders: `Phonetic` (Phonetic Francais) and `Soundex2` — optimized PHP ports from [Talisman](https://github.com/Yomguithereal/talisman) +- `searchable:enable` artisan command to add columns, fulltext indexes, and (re)index data with optimized batch processing +- Automatic setup after migrations via `MigrationsEnded` event listener +- Support for PHP 8.1 - 8.4 +- Support for Laravel 10.x, 11.x, and 12.x +- Support for MySQL, MariaDB, and PostgreSQL diff --git a/README.md b/README.md index 98d0253..2fb1a9c 100644 --- a/README.md +++ b/README.md @@ -1,143 +1,376 @@ # Searchable -Searchable models for [**Laravel**](https://laravel.com/) (The Awesome) based on Mysql FullText indexes. +[![CI](https://github.com/fab2s/Searchable/actions/workflows/ci.yml/badge.svg)](https://github.com/fab2s/Searchable/actions/workflows/ci.yml) +[![QA](https://github.com/fab2s/Searchable/actions/workflows/qa.yml/badge.svg)](https://github.com/fab2s/Searchable/actions/workflows/qa.yml) +[![codecov](https://codecov.io/gh/fab2s/Searchable/graph/badge.svg?token=DKFT4Z9AML)](https://codecov.io/gh/fab2s/Searchable) +[![PHPStan](https://img.shields.io/badge/PHPStan-level%209-brightgreen.svg?style=flat)](https://phpstan.org/) +[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat)](http://makeapullrequest.com) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) -This package does not try to be smart, just KISS. It will allow you to make any filed of your model searchable by concatenating them into a new column indexed with a mysql FullText index. +**Add fulltext search to your Eloquent models in minutes — no external services, no Scout driver, just your existing database.** -> It requires mysql / mariadb and Laravel 9.x +This package keeps things simple: it concatenates model fields into a single indexed column and uses native fulltext capabilities (`MATCH...AGAINST` on MySQL, `tsvector/tsquery` on PostgreSQL) for fast prefix-based search, ideal for autocomplete. -## Installation +## Why Searchable? + +If you need fast autocomplete or simple search and already run MySQL/MariaDB or PostgreSQL, you don't need a separate search engine. + +| | Searchable | Laravel Scout + Driver | +|---|---|---| +| Infrastructure | Your existing database | External service (Algolia, Meilisearch, Typesense, ...) | +| Setup | Add a trait, run one command | Install driver, configure credentials, manage process/service | +| Sync | Automatic on Eloquent `save` | Queue workers, manual imports | +| Query integration | Standard Eloquent scopes & builder — composes with `where`, `join`, `orderBy`, etc. | Separate `::search()` API with limited query builder support | +| Phonetic matching | Built-in, pluggable algorithms (also provides typo tolerance) | Depends on the external service | +| Scalability | Performs well even with millions of rows thanks to single-column native fulltext indexes | Designed for very large-scale, multi-field search | +| Best for | Autocomplete, name/title/email search, up to millions of rows | Multi-field search, weighted ranking, facets, advanced typo tolerance | + +Searchable is not a replacement for a dedicated search engine — it's a lightweight alternative for the many cases where one isn't needed. The single-column approach is what makes it fast: native fulltext indexes on one column scale well, whereas indexing many columns separately (especially on MySQL) is where dedicated engines pull ahead. + +## Requirements -`Searchable` can be installed using composer: +- PHP 8.1+ +- Laravel 10.x / 11.x / 12.x +- MySQL / MariaDB or PostgreSQL +- `ext-intl` PHP extension + +## Installation ```shell -composer require "fab2s/searchable" +composer require fab2s/searchable ``` -## Usage +The service provider is auto-discovered. -To start using `Searchable` on a specific `model`, just `use` the `Searchable` trait and setup `$searchables`: +## Quick start + +Implement `SearchableInterface` on your model, use the `Searchable` trait, and list the fields to index: ```php -class MyModel extends Model +use fab2s\Searchable\SearchableInterface; +use fab2s\Searchable\Traits\Searchable; + +class Contact extends Model implements SearchableInterface { use Searchable; - - /** - * @var string[] - */ + protected $searchables = [ - 'field1', - 'field2', - // ... - 'fieldN', + 'first_name', + 'last_name', + 'email', ]; } ``` -By default, `field1` to `fieldN` will be concatenated and stored into the default `SearchQuery::SEARCHABLE_FIELD` field added to the model by the `Enable` command. +Then run the artisan command to add the column and fulltext index: -By default, this searchable field will be of type `VARCHAR(255)`, but you can customize it at will with any type and length supporting a FullText index by just overriding the `Searchable` trait method in your model: +```shell +php artisan searchable:enable +``` -````php - /** - * @return string - */ - public function getSearchableField(): string - { - return SearchQuery::SEARCHABLE_FIELD; // searchable - } +That's it. The `searchable` column is automatically populated on every save. - /** - * @return string any migration method such as string, text etc ... - */ - public function getSearchableFieldDbType(): string - { - return 'string'; - } +> **Choosing fields wisely:** The quality of matching depends directly on which fields you index. This package is designed for fast, simple autocomplete — not complex full-text search. Keep `$searchables` focused on the few fields users actually type into a search box (names, titles, emails). Adding large or numerous fields dilutes relevance and increases storage. If you need weighted fields, facets, or advanced ranking, consider a dedicated search engine instead. - /** - * @return int - */ - public function getSearchableFieldDbSize(): int - { - return 255; - } +## Searching + +The trait provides a `search` scope that handles everything automatically: + +```php +$results = Contact::search($request->input('q'))->get(); +``` + +It composes with other query builder methods: + +```php +$results = Contact::search('john') + ->where('active', true) + ->limit(10) + ->get(); +``` + +Results are ordered by relevance (DESC) by default. Pass `null` to disable: + +```php +$results = Contact::search('john', null)->latest()->get(); +``` + +The driver is detected automatically from the query's connection. The scope picks up the model's `tsConfig` and `phonetic` settings. + +> For IDE autocompletion, add a `@method` annotation to your model: +> ```php +> /** +> * @method static Builder search(string|array $search, ?string $order = 'DESC') +> */ +> class Contact extends Model implements SearchableInterface +> ``` + +### Empty search terms + +When the search input is empty or contains only operators/whitespace, the `search` scope is a no-op — no `WHERE` or `ORDER BY` clause is added. This means you can safely pass user input without checking for empty strings: + +```php +// Safe — returns all contacts (unfiltered) when $q is empty +$results = Contact::search($request->input('q', '')) + ->where('active', true) + ->get(); +``` + +### Advanced usage with SearchQuery + +For more control (table aliases in joins, custom field name), use `SearchQuery` directly: + +```php +use fab2s\Searchable\SearchQuery; + +$search = new SearchQuery('DESC', 'searchable', 'english', phonetic: true); +$query = Contact::query(); + +$search->addMatch($query, $request->input('q'), 'contacts'); + +$results = $query->get(); +``` + +This is particularly useful when searching across joined tables. The third argument to `addMatch` is a table alias that prefixes the searchable column, preventing ambiguity: + +```php +$search = new SearchQuery; +$query = Contact::query() + ->join('companies', 'contacts.company_id', '=', 'companies.id') + ->select('contacts.*'); + +// search in contacts +$search->addMatch($query, $request->input('q'), 'contacts'); + +// you could also search in companies with a second SearchQuery instance +// (new SearchQuery)->addMatch($query, $request->input('q'), 'companies'); + +$results = $query->get(); +``` + +## Configuration + +Every option can be set by declaring a property on your model. The trait picks them up automatically and falls back to sensible defaults when omitted: -```` +| Property | Type | Default | Description | +|-------------------------|----------------|-----------------|------------------------------------------| +| `$searchableField` | `string` | `'searchable'` | Column name for the searchable content | +| `$searchableFieldDbType`| `string` | `'string'` | Migration column type (`string`, `text`) | +| `$searchableFieldDbSize`| `int` | `500` | Column size (applies to `string` type) | +| `$searchables` | `array`| `[]` | Model fields to index | +| `$searchableTsConfig` | `string` | `'english'` | PostgreSQL text search configuration | +| `$searchablePhonetic` | `bool` | `false` | Enable phonetic matching | +| `$searchablePhoneticAlgorithm` | `class-string` | — (metaphone) | Custom phonetic encoder class | -You can customise concatenation as well overriding: +```php +class Contact extends Model implements SearchableInterface +{ + use Searchable; + + protected array $searchables = ['first_name', 'last_name', 'email']; + protected string $searchableTsConfig = 'french'; + protected bool $searchablePhonetic = true; + protected int $searchableFieldDbSize = 1000; +} +``` + +Each property has a corresponding getter method (`getSearchableField()`, `getSearchableFieldDbType()`, etc.) defined in `SearchableInterface`. You can override those methods instead if you need computed values. + +### Custom content + +Override `getSearchableContent()` to control what gets indexed. The `$additional` parameter lets you inject extra data (decrypted fields, computed values, etc.): -````php - /** - * @param string $additional for case where this method is overridden in users - * - * @return string - */ - public function getSearchableContent(string $additional = ''): string +```php +public function getSearchableContent(string $additional = ''): string +{ + $extra = implode(' ', [ + $this->decrypt('phone'), + $this->some_computed_value, + ]); + + return parent::getSearchableContent($extra); +} +``` + +### PostgreSQL text search configuration + +By default, PostgreSQL uses the `english` text search configuration. Set `$searchableTsConfig` to change it: + +```php +protected string $searchableTsConfig = 'french'; +``` + +The `search` scope picks this up automatically. When using `SearchQuery` directly, pass the same value: + +```php +$search = new SearchQuery('DESC', 'searchable', 'french'); +``` + +### Phonetic matching + +Enable phonetic matching to find results despite spelling variations (eg. "jon" matches "john", "smyth" matches "smith"). This uses PHP's `metaphone()` to append phonetic codes to the same searchable field — no extra column or extension needed. + +```php +protected bool $searchablePhonetic = true; +``` + +That's all — both storage and the `search` scope handle it automatically. Stored content becomes `john smith jn sm0`, and a search for `jon` produces the term `jn` which matches. + +When using `SearchQuery` directly, pass the phonetic flag: + +```php +$search = new SearchQuery('DESC', 'searchable', 'english', phonetic: true); +``` + +### Custom phonetic algorithm + +The default `metaphone()` works well for English. For other languages, set `$searchablePhoneticAlgorithm` to any class implementing `PhoneticInterface`: + +```php +use fab2s\Searchable\Phonetic\PhoneticInterface; + +class MyEncoder implements PhoneticInterface +{ + public static function encode(string $word): string { - return TermParser::prepareSearchable(array_map(function ($field) { - return $this->$field; - }, $this->getSearchables()), $additional); + // your encoding logic } -```` +} +``` -The `$additional` parameter can be used to preprocess model data if needed, can be handy to encrypt/decrypt or anonymize for example: +Then reference it on your model: -````php - /** - * @return string - */ - public function getSearchableContent(): string - { - $additional = [ - $this->decrypt('additional_field1'), - 0 . substr((string) $this->decrypt('phone'), 3, 6), // will allow for partial matches - ]; +```php +use fab2s\Searchable\Phonetic\Phonetic; - return $this->getSearchableContentTrait(implode(' ', $additional)); - } -```` +class Contact extends Model implements SearchableInterface +{ + use Searchable; -Once you have configured your model(s), you can use the `Enable` command to add the `searchable` field to your models and / or index them: + protected array $searchables = ['first_name', 'last_name']; + protected bool $searchablePhonetic = true; + protected string $searchablePhoneticAlgorithm = Phonetic::class; +} +``` -````shell -$ php artisan searchable:enable --help -Description: - Enable searchable for your models +The trait resolves the class to a closure internally — no method override needed. -Usage: - searchable:enable [options] +When using `SearchQuery` directly, pass the encoder as a closure: -Options: - --root[=ROOT] The place where to start looking for models, defaults to Laravel's app/Models - --index To also index / re index - -h, --help Display help for the given command. When no command is given display help for the list command - -q, --quiet Do not output any message - -V, --version Display this application version - --ansi|--no-ansi Force (or disable --no-ansi) ANSI output - -n, --no-interaction Do not ask any interactive question - --env[=ENV] The environment the command should run under - -v|vv|vvv, --verbose Increase the verbosity of messages: 1 for normal output, 2 for more verbose output and 3 for debug -```` +```php +$search = new SearchQuery('DESC', 'searchable', 'french', phonetic: true, phoneticAlgorithm: Phonetic::encode(...)); +``` -## Stopwords +### Built-in French encoders -`Searchable` comes with an english and french stop words files which you can use to reduce FullText indexing by ignoring words listed in [these files](./src/stopwords). +Two French phonetic algorithms are included, optimized PHP ports from [Talisman](https://github.com/Yomguithereal/talisman) (MIT): -The `StopWords` command can be used to populate a `stopwords` table with these words: +| Class | Algorithm | Description | +|-------|-----------|-------------| +| `Phonetic` | [Phonetic Français](http://www.roudoudou.com/phonetic.php) | Comprehensive French phonetic algorithm by Edouard Berge. Handles ligatures, silent letters, nasal vowels, and many French-specific spelling rules. | +| `Soundex2` | [Soundex2](http://sqlpro.developpez.com/cours/soundex/) | French adaptation of Soundex. Simpler and faster than `Phonetic`, produces 4-character codes. | -````shell -php artisan searchable:stopwords -```` +Both implement `PhoneticInterface` and handle Unicode normalization (accents, ligatures like œ and æ) internally. + +```php +use fab2s\Searchable\Phonetic\Phonetic; +use fab2s\Searchable\Phonetic\Soundex2; + +Phonetic::encode('jean'); // 'JAN' +Soundex2::encode('dupont'); // 'DIPN' +``` -The db server configuration must be configured as demonstrated in [innodb_full_text.cnf](./src/innodb_full_text.cnf) for these words to effectively be excluded from indexing. +### Phonetic encoder benchmarks +Measured on a set of 520 French words, 1000 iterations each (PHP 8.4): + +| Encoder | Per word | Throughput | +|------------|----------|------------| +| metaphone | ~2 µs | ~500k/s | +| Soundex2 | ~35 µs | ~28k/s | +| Phonetic | ~51 µs | ~20k/s | + +PHP's native `metaphone()` is a C extension and unsurprisingly the fastest. Both French encoders are pure PHP with extensive regex-based rule sets, yet fast enough for typical use — encoding 1000 words takes under 50ms. + +## Automatic setup after migrations + +The package listens to Laravel's `MigrationsEnded` event and automatically runs `searchable:enable` after every successful `up` migration. This means: + +- After `php artisan migrate`, the searchable column and fulltext index are added to any new Searchable model. +- After `php artisan migrate:fresh`, they are recreated along with the rest of your schema. +- Rollbacks (`down`) and pretended migrations (`--pretend`) are ignored. + +This is fully automatic — no configuration needed. If you need to re-index existing records, run the command manually with `--index`. + +## The Enable command + +```shell +# Add searchable column + index to all models using the Searchable trait +php artisan searchable:enable + +# Target a specific model +php artisan searchable:enable --model=App/Models/Contact + +# Also (re)index existing records +php artisan searchable:enable --model=App/Models/Contact --index + +# Scan a custom directory for models +php artisan searchable:enable --root=app/Domain/Models +``` + +The command detects the database driver and creates the appropriate index: +- **MySQL**: `ALTER TABLE ... ADD FULLTEXT` +- **PostgreSQL**: `CREATE INDEX ... USING GIN(to_tsvector(...))` + +### Adding Searchable to an existing model + +You can add the Searchable feature to a model with pre-existing data at any time. After implementing `SearchableInterface` and using the `Searchable` trait, run the enable command with `--index` to set up the column, create the fulltext index, and populate it for all existing records: + +```shell +php artisan searchable:enable --model=App/Models/Contact --index +``` + +You can also run it without `--model` to process all Searchable models at once. Indexing is optimized with batch processing to handle large tables efficiently. + +### When to re-index + +The searchable column is automatically kept in sync on every Eloquent `save`. Manual re-indexing is only needed when: + +- **Adding Searchable to a model with existing data** — existing rows have no searchable content yet. +- **Changing `$searchables`** — after adding or removing fields from the index, existing rows still contain the old content. +- **Mass imports that bypass Eloquent** — raw SQL inserts, `DB::insert()`, or bulk imports that skip model events won't populate the searchable column. + +In all these cases, run: + +```shell +# re-index a specific model +php artisan searchable:enable --model=App/Models/Contact --index + +# or re-index all Searchable models +php artisan searchable:enable --index +``` ## Contributing -Contributions are welcome, do not hesitate to open issues and submit pull requests. +Contributions are welcome. Feel free to open issues and submit pull requests. + +```shell +# fix code style +composer fix + +# run tests +composer test + +# run tests with coverage +composer cov + +# static analysis (src, level 9) +composer stan + +# static analysis (tests, level 5) +composer stan-tests +``` ## License -`Searchable` is open-sourced software licensed under the [MIT license](http://opensource.org/licenses/MIT). +`Searchable` is open-sourced software licensed under the [MIT license](https://opensource.org/licenses/MIT). diff --git a/bench.php b/bench.php new file mode 100644 index 0000000..e7a043d --- /dev/null +++ b/bench.php @@ -0,0 +1,67 @@ + fn (string $w) => metaphone($w), + 'Soundex2' => Soundex2::encode(...), + 'Phonetic' => Phonetic::encode(...), +]; + +$results = []; + +foreach ($encoders as $name => $encoder) { + // Warmup + foreach ($words as $w) { + $encoder($w); + } + + // Benchmark + $shuffled = $words; + $t0 = hrtime(true); + for ($i = 0; $i < $iterations; $i++) { + shuffle($shuffled); + foreach ($shuffled as $w) { + $encoder($w); + } + } + $elapsed = (hrtime(true) - $t0) / 1e9; + + $results[$name] = [ + 'time' => $elapsed, + 'per' => $elapsed / $total * 1e6, + 'throughput' => (int) ($total / $elapsed), + ]; +} + +// Output markdown table +printf("Benchmark: %d words x %d iterations = %s encodes\n\n", $count, $iterations, number_format($total)); +printf("| %-12s | %10s | %10s | %14s |\n", 'Encoder', 'Time (s)', 'Per word', 'Words/sec'); +printf("|%s|%s|%s|%s|\n", str_repeat('-', 14), str_repeat('-', 12), str_repeat('-', 12), str_repeat('-', 16)); + +foreach ($results as $name => $r) { + printf( + "| %-12s | %9.3f s | %7.1f µs | %14s |\n", + $name, + $r['time'], + $r['per'], + number_format($r['throughput']), + ); +} diff --git a/composer.json b/composer.json index 8c4387e..4e0ece4 100644 --- a/composer.json +++ b/composer.json @@ -1,6 +1,6 @@ { "name": "fab2s/searchable", - "description": "Laravel searchable models based on FullText indexes", + "description": "Laravel searchable models based on FullText indexes with phonetic matching", "type": "library", "authors": [{ "name": "Fabrice de Stefanis" @@ -12,22 +12,30 @@ "keywords": [ "Laravel", "Searchable", + "eloquent", + "search", + "fulltext-search", + "fulltext-index", "mysql", - "FullText", - "model" + "pgsql", + "mariaDb", + "phonetic", + "autocomplete" ], "license": [ "MIT" ], "require": { - "php": "^8.0", + "ext-intl": "*", + "php": "^8.1", "fab2s/strings": "^1.0" }, "require-dev": { "ext-pdo": "*", - "phpunit/phpunit": "^9.0", - "orchestra/testbench": "^7.0", - "friendsofphp/php-cs-fixer": "^3.0" + "phpunit/phpunit": "^10.0|^11.0", + "orchestra/testbench": "^8.0|^9.0|^10.0", + "laravel/pint": "^1.27", + "phpstan/phpstan": "^2.1" }, "autoload": { "psr-4": { @@ -47,15 +55,28 @@ } }, "scripts": { - "post-update-cmd": [ - "rm -f .*.cache" - ], - "post-install-cmd": [ - "rm -f .*.cache" - ], - "fix": "@php php-cs-fixer fix --config=./.php-cs-fixer.dist.php -vvv", "post-autoload-dump": [ "@php vendor/bin/testbench package:discover --ansi" + ], + "fix": [ + "Composer\\Config::disableProcessTimeout", + "@php vendor/bin/pint --ansi --config pint.json --" + ], + "test": [ + "Composer\\Config::disableProcessTimeout", + "@php vendor/bin/phpunit --colors --" + ], + "cov": [ + "Composer\\Config::disableProcessTimeout", + "@php vendor/bin/phpunit --colors --coverage-html ./cov --" + ], + "stan": [ + "Composer\\Config::disableProcessTimeout", + "@php vendor/bin/phpstan analyse -c ./phpstan.neon --ansi --" + ], + "stan-tests": [ + "Composer\\Config::disableProcessTimeout", + "@php vendor/bin/phpstan analyse -c ./phpstan-tests.neon --ansi --" ] } } diff --git a/phpstan-tests.neon b/phpstan-tests.neon new file mode 100644 index 0000000..a7fb60b --- /dev/null +++ b/phpstan-tests.neon @@ -0,0 +1,12 @@ + +parameters: + tmpDir: .phpstan + level: 5 + + paths: + - tests + + parallel: + maximumNumberOfProcesses: 4 + + ignoreErrors: diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..d043799 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,13 @@ + +parameters: + tmpDir: .phpstan + level: 9 + + paths: + - src + + parallel: + maximumNumberOfProcesses: 4 + + ignoreErrors: + - identifier: trait.unused diff --git a/phpunit.xml b/phpunit.xml index a623c1d..3bffdd5 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,29 +1,16 @@ - - - - src/ - - - - - ./tests - - - - - + + + + ./tests + + + + + + + + src/ + + diff --git a/pint.json b/pint.json new file mode 100644 index 0000000..03a259c --- /dev/null +++ b/pint.json @@ -0,0 +1,78 @@ +{ + "preset": "laravel", + "cache-file": ".pint.cache", + "rules": { + "header_comment": { + "header": "This file is part of fab2s/searchable.\n(c) Fabrice de Stefanis / https://github.com/fab2s/Searchable\nThis source file is licensed under the MIT license which you will\nfind in the LICENSE file or at https://opensource.org/licenses/MIT" + }, + "assign_null_coalescing_to_coalesce_equal": true, + "binary_operator_spaces": { + "default": "align_single_space_minimal" + }, + "class_definition": true, + "blank_lines_before_namespace": true, + "align_multiline_comment": true, + "class_attributes_separation": { + "elements": { + "method": "one", + "const": "only_if_meta", + "property": "only_if_meta", + "trait_import": "only_if_meta" + } + }, + "concat_space": { + "spacing": "one" + }, + "global_namespace_import": { + "import_classes": true, + "import_functions": false, + "import_constants": false + }, + "no_superfluous_phpdoc_tags": true, + "method_argument_space": { + "on_multiline": "ensure_fully_multiline" + }, + "method_chaining_indentation": true, + "multiline_whitespace_before_semicolons": { + "strategy": "new_line_for_chained_calls" + }, + "operator_linebreak": { + "position": "beginning" + }, + "ordered_class_elements": { + "order": [ + "use_trait", + "constant", + "case", + "property", + "method" + ] + }, + "phpdoc_align": true, + "phpdoc_separation": { + "groups": [ + [ + "param" + ], + [ + "return" + ] + ] + }, + "php_unit_method_casing": { + "case": "snake_case" + }, + "return_type_declaration": true, + "simplified_null_return": true, + "single_trait_insert_per_statement": true, + "trailing_comma_in_multiline": { + "elements": [ + "parameters", + "arguments" + ] + } + }, + "exclude": [ + "vendor" + ] +} diff --git a/src/Command/Enable.php b/src/Command/Enable.php index 477a1ac..6a425e9 100644 --- a/src/Command/Enable.php +++ b/src/Command/Enable.php @@ -1,20 +1,24 @@ index = (bool) $this->option('index'); - $this->modelRootDir = $this->option('root') ?? app_path('Models'); - $this->model = $this->option('model') ?? ''; + $root = $this->option('root'); + $this->modelRootDir = is_string($root) ? $root : app_path('Models'); + $model = $this->option('model'); + $this->model = is_string($model) ? $model : ''; + $this->hasProgress = (bool) $this->option('progress'); if ($this->model) { $this->model = str_replace('/', '\\', $this->model); - if (!class_exists($this->model)) { + if (! str_contains($this->model, '\\')) { + $this->model = '\\App\\Models\\' . $this->model; + } + + if (! class_exists($this->model)) { $this->error('Provided Model FQN not found: ' . $this->model); - return 1; + return self::FAILURE; } $this->handleModel($this->model); - $this->output->success('Done'); + $this->comment('Done'); - return 0; + return self::SUCCESS; } - if (!is_dir($this->modelRootDir)) { + if (! is_dir($this->modelRootDir)) { $this->warn('You must specify an existing directory to look for models'); - return 1; + return self::FAILURE; } + $this->comment('Searchable start'); $this->getModelFiles(); $foundSome = false; foreach (get_declared_classes() as $fqn) { - $this->handleModel($fqn); + if ($this->handleModel($fqn)) { + $foundSome = true; + } } - if (!$foundSome) { + if (! $foundSome) { $this->warn('Could not find any model using Searchable trait in ' . $this->modelRootDir); } else { - $this->output->success('Done'); + $this->comment('Done'); } - return 0; + return self::SUCCESS; } - public function handleModel(string $fqn): static + public function handleModel(string $fqn): bool { - if (in_array(Searchable::class, class_uses_recursive($fqn), true)) { - $this->output->info("Processing $fqn"); - $foundSome = true; - /** @var Model&Searchable $instance */ - $instance = new $fqn; - $this->configureModel($instance); - - if ($this->index) { - $this->index($instance); - } + if (! is_subclass_of($fqn, Model::class) || ! is_subclass_of($fqn, SearchableInterface::class)) { + return false; } - return $this; + $this->info("Processing $fqn"); + $instance = new $fqn; + $this->configureModel($instance); + + if ($this->index) { + $this->index($instance); + } + + return true; } - protected function configureModel(Model $model): static + protected function configureModel(Model&SearchableInterface $model): static { - /** @var Model&Searchable $model */ $searchableField = $model->getSearchableField(); $table = $model->getTable(); $connection = $model->getConnectionName(); $dbType = $model->getSearchableFieldDbType(); $dbSize = $model->getSearchableFieldDbSize(); + $driver = DB::connection($connection)->getDriverName(); - if (!Schema::connection($connection)->hasColumn($table, $searchableField)) { - $this->output->info("Adding $searchableField to $table"); + if (! Schema::connection($connection)->hasColumn($table, $searchableField)) { + $this->line("Adding $searchableField to $table"); $after = null; - if ($model->usesTimestamps()) { + if ($driver !== 'pgsql' && $model->usesTimestamps()) { $columns = Schema::connection($connection)->getColumnListing($table); $after = $this->getFirstBeforeAtField($columns); } Schema::connection($connection)->table($table, function (Blueprint $table) use ($searchableField, $after, $dbType, $dbSize) { - $this->output->info("Using spec $dbType $dbSize"); + $this->line("Using spec $dbType $dbSize"); if ($after) { - $this->output->info("After $after"); + $this->line("After $after"); $table->$dbType($searchableField, $dbSize)->default('')->after($after); } else { $table->$dbType($searchableField, $dbSize)->default(''); } }); - $this->output->info('Create full text index'); - DB::connection($connection)->statement("ALTER TABLE $table ADD FULLTEXT searchable($searchableField)"); + $this->line('Create full text index'); + if ($driver === 'pgsql') { // @codeCoverageIgnoreStart + $tsConfig = $model->getSearchableTsConfig(); + DB::connection($connection)->statement("CREATE INDEX {$table}_{$searchableField}_fulltext ON {$table} USING GIN(to_tsvector('{$tsConfig}', {$searchableField}))"); + } else { // @codeCoverageIgnoreEnd + DB::connection($connection)->statement("ALTER TABLE $table ADD FULLTEXT searchable($searchableField)"); + } } else { - $this->output->info("Found $searchableField in $table"); + $this->line("Found $searchableField in $table"); } return $this; } - /** - * @param Model $instance - */ - protected function index(Model $instance) + protected function index(Model&SearchableInterface $instance): void { - /** @var Searchable $instance */ $searchableField = $instance->getSearchableField(); - $this->output->info('Indexing: ' . $instance::class); - $this->output->progressStart(); - foreach ($instance->lazy() as $record) { - /* @var Model&Searchable $record */ - $record->{$searchableField} = $record->getSearchableContent(); - $record->save(); - $this->output->progressAdvance(); - } + $table = $instance->getTable(); + $keyName = $instance->getKeyName(); + $connection = $instance->getConnectionName(); + + $this->info('Indexing: ' . $instance::class); + $bar = $this->hasProgress ? $this->getOutput()->createProgressBar($instance->count()) : null; // @phpstan-ignore method.notFound + $bar?->setFormat(ProgressBar::FORMAT_VERY_VERBOSE); + // @phpstan-ignore method.notFound + $instance->chunkById( + 1000, + function (Collection $chunk) use ($bar, $searchableField, $table, $keyName, $connection) { + /** @var Collection $chunk */ + $cases = []; + $bindings = []; + $ids = []; + + foreach ($chunk as $model) { + /** @var Model&SearchableInterface $model */ + $id = $model->getKey(); + $ids[] = $id; + $cases[] = 'WHEN ? THEN ?'; + $bindings[] = $id; + $bindings[] = $model->getSearchableContent(); + } + + $idPlaceholders = implode(',', array_fill(0, count($ids), '?')); + $bindings = array_merge($bindings, $ids); + + DB::connection($connection)->update( + "UPDATE {$table} SET {$searchableField} = CASE {$keyName} " + . implode(' ', $cases) + . " END WHERE {$keyName} IN ({$idPlaceholders})", + $bindings, + ); + + $bar?->advance($chunk->count()); + }, + ); - $this->output->progressFinish(); + $bar?->finish(); + $this->newLine(); } - protected function getModelFiles() + protected function getModelFiles(): void { $finder = (new Finder) ->files() ->in($this->modelRootDir) ->name('*.php') - ->getIterator(); + ->getIterator() + ; foreach ($finder as $file) { require_once $file->getRealPath(); } } - /** - * @param array $columns - * - * @return string|null - */ - protected function getFirstBeforeAtField(array $columns): ? string + /** @param array $columns */ + protected function getFirstBeforeAtField(array $columns): ?string { $prev = null; foreach ($columns as $column) { diff --git a/src/Command/StopWords.php b/src/Command/StopWords.php deleted file mode 100644 index a424433..0000000 --- a/src/Command/StopWords.php +++ /dev/null @@ -1,102 +0,0 @@ -output->info('Create ' . self::STOP_WORDS_TABLE); - Schema::create(self::STOP_WORDS_TABLE, function (Blueprint $table) { - $table->string('value', 32); - }); - } - - $this->output->info('Empty ' . self::STOP_WORDS_TABLE); - DB::table(self::STOP_WORDS_TABLE)->truncate(); - $this->output->info('Populate ' . self::STOP_WORDS_TABLE); - - DB::table(self::STOP_WORDS_TABLE)->insert($this->getInsertStopWords()); - - $this->output->success('Done'); - - return 0; - } - - /** - * @throws FileNotFoundException - * - * @return array - */ - protected function getInsertStopWords(): array - { - $result = []; - foreach ($this->getStopWords() as $insertStopWord) { - $result[] = ['value' => $insertStopWord]; - } - - return $result; - } - - /** - * @throws FileNotFoundException - * - * @return \Generator - */ - protected function getStopWords(): \Generator - { - $fs = new Filesystem; - foreach ($fs->files(__DIR__ . '/../' . $this->stopWordDir) as $file) { - if ($file->getExtension() !== 'txt') { - continue; - } - - foreach ($fs->lines($file->getRealPath()) as $word) { - yield trim($word); - } - } - } -} diff --git a/src/Listener/SearchableEnableAfterMigrate.php b/src/Listener/SearchableEnableAfterMigrate.php new file mode 100644 index 0000000..7d4e78c --- /dev/null +++ b/src/Listener/SearchableEnableAfterMigrate.php @@ -0,0 +1,29 @@ +method === 'up' + && ! (isset($event->options['pretend']) && $event->options['pretend']) + ) { + Artisan::call('searchable:enable', [], new ConsoleOutput); + } + } +} diff --git a/src/Phonetic/Phonetic.php b/src/Phonetic/Phonetic.php new file mode 100644 index 0000000..4680243 --- /dev/null +++ b/src/Phonetic/Phonetic.php @@ -0,0 +1,143 @@ + */ + protected const FIRST_PATTERNS = ['~O[O]+~', '~SAOU~', '~OES~', '~CCH~', '~CC([IYE])~', '~(.)\1~']; + + /** @var list */ + protected const FIRST_REPLACEMENTS = ['OU', 'SOU', 'OS', 'K', 'KS$1', '$1']; + + /** @var list */ + protected const MAIN_PATTERNS = ['~OIN[GT]$~', '~E[RS]$~', '~(C|CH)OEU~', '~MOEU~', '~OE([UI]+)([BCDFGHJKLMNPQRSTVWXZ])~', '~^GEN[TS]$~', '~CUEI~', '~([^AEIOUYC])AE([BCDFGHJKLMNPQRSTVWXZ])~', '~AE([QS])~', '~AIE([BCDFGJKLMNPQRSTVWXZ])~', '~ANIEM~', '~(DRA|TRO|IRO)P$~', '~(LOM)B$~', '~(RON|POR)C$~', '~PECT$~', '~ECUL$~', '~(CHA|CA|E)M(P|PS)$~', '~(TAN|RAN)G$~', '~([^VO])ILAG~', '~([^TRH])UIL(AR|E)(.+)~', '~([G])UIL([AEO])~', '~([NSPM])AIL([AEO])~', '~DIL(AI|ON|ER|EM)~', '~RILON~', '~TAILE~', '~GAILET~', '~AIL(A[IR])~', '~OUILA~', '~EIL(AI|AR|ER|EM)~', '~REILET~', '~EILET~', '~AILOL~', '~([^AEIOUY])(SC|S)IEM([EA])~', '~^(SC|S)IEM([EA])~', '~([OAI])MB~', '~([OA])MP~', '~GEMB~', '~EM([BP])~', '~UMBL~', '~CIEN~', '~^ECEUR~', '~^CH(OG+|OL+|OR+|EU+|ARIS|M+|IRO|ONDR)~', '~(YN|RI)CH(OG+|OL+|OC+|OP+|OM+|ARIS|M+|IRO|ONDR)~', '~CHS~', '~CH(AIQ)~', '~^ECHO([^UIPY])~', '~ISCH(I|E)~', '~^ICHT~', '~ORCHID~', '~ONCHIO~', '~ACHIA~', '~([^C])ANICH~', '~OMANIK~', '~ACHY([^D])~', '~([AEIOU])C([BDFGJKLMNPQRTVWXZ])~', '~EUCHA~', '~YCH(IA|A|O|ED)~', '~([AR])CHEO~', '~RCHES~', '~ECHN~', '~OCHTO~', '~CHO(RA|NDR|RE)~', '~MACHM~', '~BRONCHO~', '~LICHO([SC])~', '~WA~', '~WO~', '~(?:WI|WHI|WHY)~', '~WHA~', '~WHO~', '~GNE([STR])~', '~GNE~', '~GI~', '~GNI~', '~GN(A|OU|UR)~', '~GY~', '~OUGAIN~', '~AGEO([LT])~', '~GEORG~', '~GEO(LO|M|P|G|S|R)~', '~([NU])GEOT~', '~GEO([TDC])~', '~GE([OA])~', '~GE~', '~QU?~', '~C[YI]~', '~CN~', '~ICM~', '~CEAT~', '~CE~', '~C([RO])~', '~CUEI~', '~CU~', '~VENCA~', '~C([AS])~', '~CLEN~', '~C([LZ])~', '~CTIQ~', '~CTI[CS]~', '~CTI([FL])~', '~CTIO~', '~CT([IUEOR])?~', '~PH~', '~TH~', '~OW~', '~LH~', '~RDL~', '~CH(LO|R)~', '~PTIA~', '~GU([^RLMBSTPZN])~', '~GNO(?=[MLTNRKG])~', '~BUTI([EA])~', '~BATIA~', '~ANTIEL~', '~RETION~', '~ENTI([EA])L~', '~ENTIO~', '~ENTIAI~', '~UJETION~', '~ATIEM~', '~PETIEN~', '~CETIE~', '~OFETIE~', '~IPETI~', '~LBUTION~', '~BLUTION~', '~L([EA])TION~', '~SATIET~', '~(.+)ANTI(AL|O)~', '~(.+)INUTI([^V])~', '~([^O])UTIEN~', '~([^DE])RATI([E])$~', '~([^SNEU]|KU|KO|RU|LU|BU|TU|AU)T(IEN|ION)~', '~([^CS])H~', '~([EN])SH~', '~SH~', '~OMT~', '~IM([BP])~', '~UMD~', '~([TRD])IENT~', '~IEN~', '~YM([UOAEIN])~', '~YM~', '~AHO~', '~([FDS])AIM~', '~EIN~', '~AINS~', '~AIN$~', '~AIN([BTDK])~', '~([^O])UND~', '~([JTVLFMRPSBD])UN([^IAE])~', '~([JTVLFMRPSBD])UN$~', '~RFUM$~', '~LUMB~', '~([^BCDFGHJKLMNPQRSTVWXZ])EN~', '~([VTLJMRPDSBFKNG])EN(?=[BRCTDKZSVN])~', '~^EN([BCDFGHJKLNPQRSTVXZ]|CH|IV|ORG|OB|UI|UA|UY)~', '~(^[JRVTH])EN([DRTFGSVJMP])~', '~SEN([ST])~', '~^DESENIV~', '~([^M])EN(U[IY])~', '~(.+[JTVLFMRPSBD])EN([JLFDSTG])~', '~([VSBSTNRLPM])E[IY]([ACDFRJLGZ])~', '~EAU~', '~EU~', '~Y~', '~EOI~', '~JEA~', '~OIEM~', '~OUANJ~', '~OUA~', '~OUENJ~', '~AU([^E])~', '~^BENJ~', '~RTIEL~', '~PINK~', '~KIND~', '~KUM(N|P)~', '~LKOU~', '~EDBE~', '~ARCM~', '~SCH~', '~^OINI~', '~([^NDCGRHKO])APT~', '~([L]|KON)PT~', '~OTB~', '~IXA~', '~TG~', '~^TZ~', '~PTIE~', '~GT~', '~ANKIEM~', '~(LO|RE)KEMAN~', '~NT(B|M)~', '~GSU~', '~ESD~', '~LESKEL~', '~CK~', '~USIL$~', '~X$|[TD]S$|[DS]$~', '~([^KL]+)T$~', '~^[H]~']; + + /** @var list */ + protected const MAIN_REPLACEMENTS = ['OIN', 'E', 'KE', 'ME', 'E$1$2', 'JAN', 'KEI', '$1E$2', 'E$1', 'AI$1', 'ANIM', '$1', '$1', '$1', 'PET', 'CU', '$1N', '$1', '$1IAJ', '$1UI$2$3', '$1UI$2', '$1AI$2', 'DI$1', 'RION', 'TAIE', 'GAIET', 'AI$1', 'OUIA', 'AI$1', 'RAIET', 'EIET', 'AIOL', '$1$2IAM$3', '$1IAM$2', '$1NB', '$1NP', 'JANB', 'AN$1', 'INBL', 'SIAN', 'EKEUR', 'K$1', '$1K$2', 'CH', 'K$1', 'EKO$1', 'ISK$1', 'IKT', 'ORKID', 'ONKIO', 'AKIA', '$1ANIK', 'OMANICH', 'AKI$1', '$1K$2', 'EKA', 'IK$1', '$1KEO', 'RKES', 'EKN', 'OKTO', 'KO$1', 'MAKM', 'BRONKO', 'LIKO$1', 'OI', 'O', 'OUI', 'OUA', 'OU', 'NIE$1', 'NE', 'JI', 'NI', 'NI$1', 'JI', 'OUGIN', 'AJO$1', 'JORJ', 'JEO$1', '$1JOT', 'JEO$1', 'J$1', 'JE', 'K', 'SI', 'KN', 'IKM', 'SAT', 'SE', 'K$1', 'KEI', 'KU', 'VANSA', 'K$1', 'KLAN', 'K$1', 'KTIK', 'KTIS', 'KTI$1', 'KSIO', 'KT$1', 'F', 'T', 'OU', 'L', 'RL', 'K$1', 'PSIA', 'G$1', 'NIO', 'BUSI$1', 'BASIA', 'ANSIEL', 'RESION', 'ENSI$1L', 'ENSIO', 'ENSIAI', 'UJESION', 'ASIAM', 'PESIEN', 'CESIE', 'OFESIE', 'IPESI', 'LBUSION', 'BLUSION', 'L$1SION', 'SASIET', '$1ANSI$2', '$1INUSI$2', '$1USIEN', '$1RASI$2', '$1S$2', '$1', '$1S', 'CH', 'ONT', 'IN$1', 'OND', '$1IANT', 'IN', 'IM$1', 'IN', 'AO', '$1IN', 'AIN', 'INS', 'IN', 'IN$1', '$1IND', '$1IN$2', '$1IN', 'RFIN', 'LINB', '$1AN', '$1AN', 'AN$1', '$1AN$2', 'SAN$1', 'DESANIV', '$1AN$2', '$1AN$2', '$1AI$2', 'O', 'E', 'I', 'OI', 'JA', 'OIM', 'OUENJ', 'OI', 'OUANJ', 'O$1', 'BINJ', 'RSIEL', 'PONK', 'KOND', 'KON$1', 'LKO', 'EBE', 'ARKM', 'CH', 'ONI', '$1AT', '$1T', 'OB', 'ISA', 'G', 'TS', 'TIE', 'T', 'ANKILEM', '$1KAMAN', 'N$1', 'SU', 'ED', 'LEKEL', 'K', 'USI', '', '$1', '']; + + /** @var list */ + protected const END_PATTERNS = ['~TIL$~', '~LC$~', '~L[E]?[S]?$~', '~(.+)N[E]?[S]?$~', '~EZ$~', '~OIG$~', '~OUP$~', '~([^R])OM$~', '~LOP$~', '~NTANP$~', '~TUN$~', '~AU$~', '~EI$~', '~R[DG]$~', '~ANC$~', '~KROC$~', '~HOUC$~', '~OMAC$~', '~([J])O([NU])[CG]$~', '~([^GTR])([AO])NG$~', '~UC$~', '~AING$~', '~([EISOARN])C$~', '~([ABD-MO-Z]+)[EH]+$~', '~EN$~', '~(NJ)EN$~', '~^PAIEM~', '~([^NTB])EF$~', '~(.)\1~']; + + /** @var list */ + protected const END_REPLACEMENTS = ['TI', 'LK', 'L', '$1N', 'E', 'OI', 'OU', '$1ON', 'LO', 'NTAN', 'TIN', 'O', 'AI', 'R', 'AN', 'KRO', 'HOU', 'OMA', '$1O$2', '$1$2N', 'UK', 'IN', '$1K', '$1', 'AN', '$1AN', 'PAIM', '$1', '$1']; + protected const EXCEPTIONS = [ + 'CD' => 'CD', + 'BD' => 'BD', + 'BV' => 'BV', + 'TABAC' => 'TABA', + 'FEU' => 'FE', + 'FE' => 'FE', + 'FER' => 'FER', + 'FIEF' => 'FIEF', + 'FJORD' => 'FJORD', + 'GOAL' => 'GOL', + 'FLEAU' => 'FLEO', + 'HIER' => 'IER', + 'HEU' => 'E', + 'HE' => 'E', + 'OS' => 'OS', + 'RIZ' => 'RI', + 'RAZ' => 'RA', + 'ECHO' => 'EKO', + ]; + + public static function encode(string $word): string + { + // Preparing the word + $word = mb_strtoupper($word); + + // Handle ligatures and cedilla before deburring + $word = str_replace( + ['Œ', 'Æ', 'Ç'], + ['OEU', 'E', 'S'], + $word, + ); + + // Strip remaining accents + $normalized = Normalizer::normalize($word, Normalizer::FORM_D); + if ($normalized !== false) { + $word = (string) preg_replace('~\p{Mn}~u', '', $normalized); + } + + // Strip non-alpha + $word = (string) preg_replace('~[^A-Z]+~', '', $word); + + if ($word === '') { + return ''; + } + + $code = $word; + + // First preprocessing + $code = (string) preg_replace(static::FIRST_PATTERNS, static::FIRST_REPLACEMENTS, $code); + + // Check exceptions + if (isset(static::EXCEPTIONS[$code])) { + return static::EXCEPTIONS[$code]; + } + + // Second preprocessing + main rules + first endings + $code = (string) preg_replace(static::MAIN_PATTERNS, static::MAIN_REPLACEMENTS, $code); + + // Save backup for short word recovery + $backupCode = $code; + + // Second endings + $code = (string) preg_replace(static::END_PATTERNS, static::END_REPLACEMENTS, $code); + + // Special case + if ($code === 'FUEL') { + $code = 'FIOUL'; + } + + // "O" is the only acceptable single-letter code + if ($code === 'O') { + return $code; + } + + // Attempt to save short codes + if (strlen($code) < 2) { + // Abbreviations (3+ consecutive consonants) + if (preg_match('~[BCDFGHJKLMNPQRSTVWXYZ]{3,}~', $word)) { + return $word; + } + + // Simple words (consonant + vowel, 3-4 chars) + if (preg_match('~[RFMLVSPJDF][AEIOU]~', $word)) { + $len = strlen($word); + if ($len === 3 || $len === 4) { + return substr($word, 0, -1); + } + } + + if (strlen($backupCode) > 1) { + return $backupCode; + } + } + + if (strlen($code) > 1) { + return $code; + } + + return ''; + } +} diff --git a/src/Phonetic/PhoneticInterface.php b/src/Phonetic/PhoneticInterface.php new file mode 100644 index 0000000..0ac7c23 --- /dev/null +++ b/src/Phonetic/PhoneticInterface.php @@ -0,0 +1,17 @@ + + */ + protected const FIRST_PATTERNS = ['~GU([IE])~', '~G([AO])~', '~GU~', '~C([AOU])~', '~(?:Q|CC|CK)~', '~(?!^)[AEIOU]~']; + + /** @var list */ + protected const FIRST_REPLACEMENTS = ['K$1', 'K$1', 'K', 'K$1', 'K', 'A']; + + /** + * Remove H (not after C/S), Y (not after A), trailing A/D/T/S, + * non-initial A placeholders, and squeeze consecutive duplicates. + * + * @var list + */ + protected const END_PATTERNS = ['~([^CS])H~', '~([^A])Y~', '~[ADTS]$~', '~(?!^)A~', '~(.)\1+~']; + + /** @var list */ + protected const END_REPLACEMENTS = ['$1', '$1', '', '', '$1']; + protected const PREFIXES = [ + 'MAC' => 'MCC', + 'SCH' => 'SSS', + 'ASA' => 'AZA', + 'KN' => 'NN', + 'PH' => 'FF', + 'PF' => 'FF', + ]; + + public static function encode(string $name): string + { + $code = mb_strtoupper(static::deburr(trim($name))); + $code = (string) preg_replace('~[^A-Z]+~', '', $code); + + if ($code === '') { + return ''; + } + + // Letter groups + non-initial vowels → A + $code = (string) preg_replace(static::FIRST_PATTERNS, static::FIRST_REPLACEMENTS, $code); + + // Replacing prefixes + foreach (static::PREFIXES as $prefix => $replacement) { + if (str_starts_with($code, $prefix)) { + $code = $replacement . substr($code, strlen($prefix)); + break; + } + } + + // Cleanup + remove A placeholders + squeeze, then truncate to 4 + $code = (string) preg_replace(static::END_PATTERNS, static::END_REPLACEMENTS, $code); + + if ($code === '') { + return ''; + } + + return substr($code, 0, 4); + } + + /** + * Strip diacritical marks and decompose ligatures. + */ + protected static function deburr(string $string): string + { + $string = str_replace( + ['Œ', 'œ', 'Æ', 'æ'], + ['OE', 'oe', 'AE', 'ae'], + $string, + ); + + $normalized = Normalizer::normalize($string, Normalizer::FORM_D); + + return (string) preg_replace('~\p{Mn}~u', '', (string) $normalized); + } +} diff --git a/src/SearchQuery.php b/src/SearchQuery.php index ab7d66a..3a0ff69 100644 --- a/src/SearchQuery.php +++ b/src/SearchQuery.php @@ -1,69 +1,73 @@ order = $this->getOrder($order); - $this->searchableField = $searchableField; + $this->order = $this->getOrder($order); + $this->searchableField = $searchableField; + $this->tsConfig = $tsConfig; + $this->phonetic = $phonetic; + $this->phoneticAlgorithm = $phoneticAlgorithm ?? metaphone(...); } /** - * @param Builder $query + * @param Builder $query * @param string|array $search - * @param string $tableAlias - * @param string|null $order */ public function addMatch(Builder $query, string|array $search, string $tableAlias = '', ?string $order = null): void { - $terms = TermParser::parse($search); + $driver = $query->getConnection()->getDriverName(); // @phpstan-ignore method.notFound + $terms = TermParser::parse($search, $driver, $this->phonetic, $this->phoneticAlgorithm); if (empty($terms)) { return; } - $searchField = ($tableAlias ? "$tableAlias." : '') . $this->searchableField; + $searchField = ($tableAlias ? "$tableAlias." : '') . $this->searchableField; + $order = $order ? $this->getOrder($order) : $this->order; + + if ($driver === 'pgsql') { + $query->whereRaw("to_tsvector('{$this->tsConfig}', {$searchField}) @@ to_tsquery('{$this->tsConfig}', ?)", [$terms]); + + if ($order) { + $query->orderByRaw("ts_rank(to_tsvector('{$this->tsConfig}', {$searchField}), to_tsquery('{$this->tsConfig}', ?)) {$order}", [$terms]); + } + + return; + } + $query->whereRaw('MATCH (' . $searchField . ') AGAINST (? IN BOOLEAN MODE)', [$terms]); - $order = $order ? $this->getOrder($order) : $this->order; if ($order) { $query->orderByRaw('(MATCH (' . $searchField . ') AGAINST (? IN BOOLEAN MODE)) ' . $order, [$terms]); } } - /** - * @param string|null $order - * - * @return string - */ public function getOrder(?string $order = null): string { return $order && in_array(strtoupper($order), ['DESC', 'ASC'], true) ? $order : ''; diff --git a/src/SearchableInterface.php b/src/SearchableInterface.php new file mode 100644 index 0000000..c8d1c01 --- /dev/null +++ b/src/SearchableInterface.php @@ -0,0 +1,38 @@ + */ + public function getSearchables(): array; + + public function getSearchableTsConfig(): string; + + public function getSearchablePhonetic(): bool; + + /** @return Closure(string): string */ + public function getSearchablePhoneticClosure(): Closure; +} diff --git a/src/SearchableServiceProvider.php b/src/SearchableServiceProvider.php index 551d002..09f1d36 100644 --- a/src/SearchableServiceProvider.php +++ b/src/SearchableServiceProvider.php @@ -1,8 +1,10 @@ > */ protected array $commands = [ - StopWords::class, Enable::class, ]; @@ -29,4 +34,11 @@ public function register() $this->commands($this->commands); } } + + public function boot(): void + { + if ($this->app->runningInConsole()) { + Event::listen(MigrationsEnded::class, SearchableEnableAfterMigrate::class); + } + } } diff --git a/src/TermParser.php b/src/TermParser.php index 2b2cb0c..b44e790 100644 --- a/src/TermParser.php +++ b/src/TermParser.php @@ -1,14 +1,17 @@ $search - * - * @return string + * @param ?Closure(string): string $phoneticAlgorithm */ - public static function parse(string|array $search): string + public static function parse(string|array $search, string $driver = 'mysql', bool $phonetic = false, ?Closure $phoneticAlgorithm = null): string { + $filtered = static::filter($search); + + if ($phonetic) { + $filtered = static::phoneticize($filtered, $phoneticAlgorithm ?? metaphone(...)); + } + + if ($driver === 'pgsql') { + return implode(' & ', array_filter(array_map(function ($value) { + return $value !== '' ? $value . ':*' : ''; + }, explode(' ', $filtered)))); + } + return implode(' ', array_map(function ($value) { - return $value ? $value . '*' : ''; - }, explode(' ', static::filter($search)))); + return $value !== '' ? $value . '*' : ''; + }, explode(' ', $filtered))); } /** * @param string|array $search - * - * @return string */ public static function filter(string|array $search): string { @@ -37,24 +49,36 @@ public static function filter(string|array $search): string $search = str_replace('Array', '', implode(' ', array_filter($search))); } - $search = trim(preg_replace([ + $search = trim((string) preg_replace([ // drop operator (+, -, > <, ( ), ~, *, ", @distance) // and some punctuation - '`[+\-><\(\)~*\"@,.:;?!]+`', - //'`[+\-><\(\)~*\",.:;?!]+`', + '`[+\-><\(\)~*\"@,.:;?!&|]+`', + // '`[+\-><\(\)~*\",.:;?!]+`', ], ' ', Strings::singleLineIze(Strings::normalizeText($search)))); - return preg_replace('`\s{2,}`', ' ', Utf8::strtolower(Strings::singleWsIze($search, true))); + return (string) preg_replace('`\s{2,}`', ' ', Utf8::strtolower(Strings::singleWsIze($search, true))); + } + + /** @param Closure(string): string $encoder */ + public static function phoneticize(string $filtered, Closure $encoder): string + { + $words = array_filter(explode(' ', $filtered), fn (string $word) => $word !== ''); + $codes = []; + foreach ($words as $word) { + $code = (string) $encoder($word); + if ($code !== '') { + $codes[] = strtolower($code); + } + } + + return $codes ? $filtered . ' ' . implode(' ', $codes) : $filtered; } /** - * @param string|array ...$input - * - * @return string + * @param string|array ...$input */ public static function prepareSearchable(string|array ...$input): string { - $input = is_string($input) ? func_get_args() : $input; $result = []; foreach ($input as $value) { $result = array_merge($result, (array) $value); diff --git a/src/Traits/Searchable.php b/src/Traits/Searchable.php index 1b8b07d..effa320 100644 --- a/src/Traits/Searchable.php +++ b/src/Traits/Searchable.php @@ -1,26 +1,39 @@ $searchables + * @property string $searchableTsConfig + * @property bool $searchablePhonetic + * @property class-string $searchablePhoneticAlgorithm + */ trait Searchable { - /** - * @return string - */ public function getSearchableField(): string { - return SearchQuery::SEARCHABLE_FIELD; + return $this->searchableField ?? SearchQuery::SEARCHABLE_FIELD; } /** @@ -28,41 +41,37 @@ public function getSearchableField(): string */ public function getSearchableFieldDbType(): string { - return 'string'; + return $this->searchableFieldDbType ?? 'string'; } - /** - * @return int - */ public function getSearchableFieldDbSize(): int { - return 255; + return $this->searchableFieldDbSize ?? 500; } /** * @param string $additional for case where this method is overridden in users - * - * @return string */ public function getSearchableContent(string $additional = ''): string { - return TermParser::prepareSearchable(array_map(function ($field) { + $content = TermParser::prepareSearchable(array_map(function ($field) { return $this->$field; }, $this->getSearchables()), $additional); + + if ($this->getSearchablePhonetic()) { + $content = TermParser::phoneticize($content, $this->getSearchablePhoneticClosure()); + } + + return $content; } - public static function bootSearchable():void + public function initializeSearchable(): void { - static::creating(function (Model $model) { - /* @var Searchable $model */ - $model->makeHidden($model->getSearchableField()); - }); - - static::retrieved(function (Model $model) { - /* @var Searchable $model */ - $model->makeHidden($model->getSearchableField()); - }); + $this->makeHidden($this->getSearchableField()); + } + public static function bootSearchable(): void + { static::saving(function (Model $model) { /* @var Searchable $model */ $model->{$model->getSearchableField()} = $model->getSearchableContent(); @@ -73,4 +82,30 @@ public function getSearchables(): array { return $this->searchables ?? []; } + + public function getSearchableTsConfig(): string + { + return $this->searchableTsConfig ?? 'english'; + } + + public function getSearchablePhonetic(): bool + { + return $this->searchablePhonetic ?? false; + } + + /** @return Closure(string): string */ + public function getSearchablePhoneticClosure(): Closure + { + return isset($this->searchablePhoneticAlgorithm) + ? $this->searchablePhoneticAlgorithm::encode(...) + : metaphone(...); + } + + /** @param Builder $query */ + public function scopeSearch(Builder $query, string|array $search, ?string $order = 'DESC'): void + { + (new SearchQuery($order, $this->getSearchableField(), $this->getSearchableTsConfig(), $this->getSearchablePhonetic(), $this->getSearchablePhoneticClosure())) + ->addMatch($query, $search) + ; + } } diff --git a/src/innodb_full_text.cnf b/src/innodb_full_text.cnf deleted file mode 100644 index 397dff2..0000000 --- a/src/innodb_full_text.cnf +++ /dev/null @@ -1,6 +0,0 @@ -# Full text indexes -innodb_ft_min_token_size = 2 -innodb_ft_max_token_size = 32 -innodb_ft_enable_stopword = ON -# need to set the db here -innodb_ft_server_stopword_table = database/stop_words \ No newline at end of file diff --git a/src/stopwords/stop_words_english.txt b/src/stopwords/stop_words_english.txt deleted file mode 100644 index 972f618..0000000 --- a/src/stopwords/stop_words_english.txt +++ /dev/null @@ -1,851 +0,0 @@ -able -about -above -abroad -according -accordingly -across -actually -adj -after -afterwards -again -against -ago -ahead -ain't -all -allow -allows -almost -alone -along -alongside -already -also -although -always -am -amid -amidst -among -amongst -an -and -another -any -anybody -anyhow -anyone -anything -anyway -anyways -anywhere -apart -appear -appreciate -appropriate -are -aren't -around -as -a's -aside -ask -asking -associated -at -available -away -awfully -back -backward -backwards -be -became -because -become -becomes -becoming -been -before -beforehand -begin -behind -being -believe -below -beside -besides -best -better -between -beyond -both -brief -but -by -came -can -cannot -cant -can't -caption -cause -causes -certain -certainly -changes -clearly -c'mon -co -co. -com -come -comes -concerning -consequently -consider -considering -contain -containing -contains -corresponding -could -couldn't -course -c's -currently -dare -daren't -definitely -described -despite -did -didn't -different -directly -do -does -doesn't -doing -done -don't -down -downwards -during -each -edu -eg -eight -eighty -either -else -elsewhere -end -ending -enough -entirely -especially -et -etc -even -ever -evermore -every -everybody -everyone -everything -everywhere -ex -exactly -example -except -fairly -far -farther -few -fewer -fifth -first -five -followed -following -follows -for -forever -former -formerly -forth -forward -found -four -from -further -furthermore -get -gets -getting -given -gives -go -goes -going -gone -got -gotten -greetings -had -hadn't -half -happens -hardly -has -hasn't -have -haven't -having -he -he'd -he'll -hello -help -hence -her -here -hereafter -hereby -herein -here's -hereupon -hers -herself -he's -hi -him -himself -his -hither -hopefully -how -howbeit -however -hundred -i'd -ie -if -ignored -i'll -i'm -immediate -in -inasmuch -inc -inc. -indeed -indicate -indicated -indicates -inner -inside -insofar -instead -into -inward -is -isn't -it -it'd -it'll -its -it's -itself -i've -just -k -keep -keeps -kept -know -known -knows -last -lately -later -latter -latterly -least -less -lest -let -let's -like -liked -likely -likewise -little -look -looking -looks -low -lower -ltd -made -mainly -make -makes -many -may -maybe -mayn't -me -mean -meantime -meanwhile -merely -might -mightn't -mine -minus -miss -more -moreover -most -mostly -mr -mrs -much -must -mustn't -my -myself -name -namely -nd -near -nearly -necessary -need -needn't -needs -neither -never -neverf -neverless -nevertheless -new -next -nine -ninety -no -nobody -non -none -nonetheless -noone -no-one -nor -normally -not -nothing -notwithstanding -novel -now -nowhere -obviously -of -off -often -oh -ok -okay -old -on -once -one -ones -one's -only -onto -opposite -or -other -others -otherwise -ought -oughtn't -our -ours -ourselves -out -outside -over -overall -own -particular -particularly -past -per -perhaps -placed -please -plus -possible -presumably -probably -provided -provides -que -quite -qv -rather -rd -re -really -reasonably -recent -recently -regarding -regardless -regards -relatively -respectively -right -round -said -same -saw -say -saying -says -second -secondly -see -seeing -seem -seemed -seeming -seems -seen -self -selves -sensible -sent -serious -seriously -seven -several -shall -shan't -she -she'd -she'll -she's -should -shouldn't -since -six -so -some -somebody -someday -somehow -someone -something -sometime -sometimes -somewhat -somewhere -soon -sorry -specified -specify -specifying -still -sub -such -sup -sure -take -taken -taking -tell -tends -th -than -thank -thanks -thanx -that -that'll -thats -that's -that've -the -their -theirs -them -themselves -then -thence -there -thereafter -thereby -there'd -therefore -therein -there'll -there're -theres -there's -thereupon -there've -these -they -they'd -they'll -they're -they've -thing -things -think -third -thirty -this -thorough -thoroughly -those -though -three -through -throughout -thru -thus -till -to -together -too -took -toward -towards -tried -tries -truly -try -trying -t's -twice -two -un -under -underneath -undoing -unfortunately -unless -unlike -unlikely -until -unto -up -upon -upwards -us -use -used -useful -uses -using -usually -v -value -various -versus -very -via -viz -vs -want -wants -was -wasn't -way -we -we'd -welcome -well -we'll -went -were -we're -weren't -we've -what -whatever -what'll -what's -what've -when -whence -whenever -where -whereafter -whereas -whereby -wherein -where's -whereupon -wherever -whether -which -whichever -while -whilst -whither -who -who'd -whoever -whole -who'll -whom -whomever -who's -whose -why -will -willing -wish -with -within -without -wonder -won't -would -wouldn't -yes -yet -you -you'd -you'll -your -you're -yours -yourself -yourselves -you've -zero -a -how's -i -when's -why's -b -c -d -e -f -g -h -j -l -m -n -o -p -q -r -s -t -u -uucp -w -x -y -z -I -www -amount -bill -bottom -call -computer -con -couldnt -cry -de -describe -detail -due -eleven -empty -fifteen -fifty -fill -find -fire -forty -front -full -give -hasnt -herse -himse -interest -itse” -mill -move -myse” -part -put -show -side -sincere -sixty -system -ten -thick -thin -top -twelve -twenty -abst -accordance -act -added -adopted -affected -affecting -affects -ah -announce -anymore -apparently -approximately -aren -arent -arise -auth -beginning -beginnings -begins -biol -briefly -ca -date -ed -effect -et-al -ff -fix -gave -giving -heres -hes -hid -home -id -im -immediately -importance -important -index -information -invention -itd -keys -kg -km -largely -lets -line -'ll -means -mg -million -ml -mug -na -nay -necessarily -nos -noted -obtain -obtained -omitted -ord -owing -page -pages -poorly -possibly -potentially -pp -predominantly -present -previously -primarily -promptly -proud -quickly -ran -readily -ref -refs -related -research -resulted -resulting -results -run -sec -section -shed -shes -showed -shown -showns -shows -significant -significantly -similar -similarly -slightly -somethan -specifically -state -states -stop -strongly -substantially -successfully -sufficiently -suggest -thered -thereof -therere -thereto -theyd -theyre -thou -thoughh -thousand -throug -til -tip -ts -ups -usefully -usefulness -'ve -vol -vols -wed -whats -wheres -whim -whod -whos -widely -words -world -youd -youre \ No newline at end of file diff --git a/src/stopwords/stop_words_french.txt b/src/stopwords/stop_words_french.txt deleted file mode 100644 index a6c9efb..0000000 --- a/src/stopwords/stop_words_french.txt +++ /dev/null @@ -1,496 +0,0 @@ -a -à -â -abord -afin -ah -ai -aie -ainsi -allaient -allo -allô -allons -après -assez -attendu -au -aucun -aucune -aujourd -aujourd'hui -auquel -aura -auront -aussi -autre -autres -aux -auxquelles -auxquels -avaient -avais -avait -avant -avec -avoir -ayant -b -bah -beaucoup -bien -bigre -boum -bravo -brrr -c -ça -car -ce -ceci -cela -celle -celle-ci -celle-là -celles -celles-ci -celles-là -celui -celui-ci -celui-là -cent -cependant -certain -certaine -certaines -certains -certes -ces -cet -cette -ceux -ceux-ci -ceux-là -chacun -chaque -cher -chère -chères -chers -chez -chiche -chut -ci -cinq -cinquantaine -cinquante -cinquantième -cinquième -clac -clic -combien -comme -comment -compris -concernant -contre -couic -crac -d -da -dans -de -debout -dedans -dehors -delà -depuis -derrière -des -dès -désormais -desquelles -desquels -dessous -dessus -deux -deuxième -deuxièmement -devant -devers -devra -différent -différente -différentes -différents -dire -divers -diverse -diverses -dix -dix-huit -dixième -dix-neuf -dix-sept -doit -doivent -donc -dont -douze -douzième -dring -du -duquel -durant -e -effet -eh -elle -elle-même -elles -elles-mêmes -en -encore -entre -envers -environ -es -ès -est -et -etant -étaient -étais -était -étant -etc -été -etre -être -eu -euh -eux -eux-mêmes -excepté -f -façon -fais -faisaient -faisant -fait -feront -fi -flac -floc -font -g -gens -h -ha -hé -hein -hélas -hem -hep -hi -ho -holà -hop -hormis -hors -hou -houp -hue -hui -huit -huitième -hum -hurrah -i -il -ils -importe -j -je -jusqu -jusque -k -l -la -là -laquelle -las -le -lequel -les -lès -lesquelles -lesquels -leur -leurs -longtemps -lorsque -lui -lui-même -m -ma -maint -mais -malgré -me -même -mêmes -merci -mes -mien -mienne -miennes -miens -mille -mince -moi -moi-même -moins -mon -moyennant -n -na -ne -néanmoins -neuf -neuvième -ni -nombreuses -nombreux -non -nos -notre -nôtre -nôtres -nous -nous-mêmes -nul -o -o| -ô -oh -ohé -olé -ollé -on -ont -onze -onzième -ore -ou -où -ouf -ouias -oust -ouste -outre -p -paf -pan -par -parmi -partant -particulier -particulière -particulièrement -pas -passé -pendant -personne -peu -peut -peuvent -peux -pff -pfft -pfut -pif -plein -plouf -plus -plusieurs -plutôt -pouah -pour -pourquoi -premier -première -premièrement -près -proche -psitt -puisque -q -qu -quand -quant -quanta -quant-à-soi -quarante -quatorze -quatre -quatre-vingt -quatrième -quatrièmement -que -quel -quelconque -quelle -quelles -quelque -quelques -quelqu'un -quels -qui -quiconque -quinze -quoi -quoique -r -revoici -revoilà -rien -s -sa -sacrebleu -sans -sapristi -sauf -se -seize -selon -sept -septième -sera -seront -ses -si -sien -sienne -siennes -siens -sinon -six -sixième -soi -soi-même -soit -soixante -son -sont -sous -stop -suis -suivant -sur -surtout -t -ta -tac -tant -te -té -tel -telle -tellement -telles -tels -tenant -tes -tic -tien -tienne -tiennes -tiens -toc -toi -toi-même -ton -touchant -toujours -tous -tout -toute -toutes -treize -trente -très -trois -troisième -troisièmement -trop -tsoin -tsouin -tu -u -un -une -unes -uns -v -va -vais -vas -vé -vers -via -vif -vifs -vingt -vivat -vive -vives -vlan -voici -voilà -vont -vos -votre -vôtre -vôtres -vous -vous-mêmes -vu -w -x -y -z -zut -alors -aucuns -bon -devrait -dos -droite -début -essai -faites -fois -force -haut -ici -juste -maintenant -mine -mot -nommés -nouveaux -parce -parole -personnes -pièce -plupart -seulement -soyez -sujet -tandis -valeur -voie -voient -état -étions \ No newline at end of file diff --git a/tests/DefaultSizeModel.php b/tests/DefaultSizeModel.php new file mode 100644 index 0000000..dbefd9e --- /dev/null +++ b/tests/DefaultSizeModel.php @@ -0,0 +1,26 @@ +set('database.default', 'testing'); + $app['config']->set('database.connections.testing', [ + 'driver' => 'sqlite', + 'database' => ':memory:', + ]); + } + + protected function setUp(): void + { + parent::setUp(); + + Schema::create('models', function (Blueprint $table) { + $table->id(); + $table->string('field1')->default(''); + $table->string('field2')->default(''); + $table->string('searchable', 500)->default(''); + $table->timestamps(); + }); + + Schema::create('no_timestamp_models', function (Blueprint $table) { + $table->id(); + $table->string('field1')->default(''); + $table->string('searchable', 500)->default(''); + }); + } + + /** + * @throws ReflectionException + */ + public function test_get_first_before_at_field(): void + { + $command = new Enable; + $method = new ReflectionMethod($command, 'getFirstBeforeAtField'); + + $this->assertSame('email', $method->invoke($command, ['id', 'name', 'email', 'created_at', 'updated_at'])); + $this->assertSame('id', $method->invoke($command, ['id', 'created_at'])); + $this->assertSame('created_at', $method->invoke($command, ['created_at'])); + $this->assertSame('name', $method->invoke($command, ['id', 'name'])); + $this->assertNull($method->invoke($command, [])); + } + + public function test_command_with_nonexistent_model(): void + { + $this->artisan('searchable:enable', ['--model' => 'NonExistent\\ClassName']) + ->assertExitCode(1) + ; + } + + public function test_command_with_short_model_name_prepends_app_models(): void + { + $this->artisan('searchable:enable', ['--model' => 'SomeModel']) + ->expectsOutput('Provided Model FQN not found: \\App\\Models\\SomeModel') + ->assertExitCode(1) + ; + } + + public function test_command_with_nonexistent_root(): void + { + $this->artisan('searchable:enable', ['--root' => '/nonexistent/path']) + ->assertExitCode(1) + ; + } + + public function test_command_scans_directory_no_models_found(): void + { + $emptyDir = sys_get_temp_dir() . '/searchable_empty_' . uniqid(); + mkdir($emptyDir); + + try { + $this->artisan('searchable:enable', ['--root' => $emptyDir]) + ->assertExitCode(0) + ; + } finally { + rmdir($emptyDir); + } + } + + public function test_configure_model_adds_column(): void + { + Schema::drop('models'); + Schema::create('models', function (Blueprint $table) { + $table->id(); + $table->string('field1')->default(''); + $table->string('field2')->default(''); + $table->timestamps(); + }); + + $this->assertFalse(Schema::hasColumn('models', 'searchable')); + + try { + Artisan::call('searchable:enable', ['--model' => Model::class]); + } catch (Throwable) { + // FULLTEXT index creation is not supported on SQLite + } + + $this->assertTrue(Schema::hasColumn('models', 'searchable')); + } + + public function test_saving_hook_populates_searchable_field(): void + { + $model = Model::create(['field1' => 'John', 'field2' => 'Doe']); // @phpstan-ignore staticMethod.notFound + + $this->assertSame('john doe', $model->searchable); + $this->assertSame('john doe', DB::table('models')->where('id', $model->id)->value('searchable')); + } + + public function test_index(): void + { + DB::table('models')->insert([ + ['field1' => 'John', 'field2' => 'Doe', 'searchable' => '', 'created_at' => now(), 'updated_at' => now()], + ['field1' => 'Jane', 'field2' => 'Smith', 'searchable' => '', 'created_at' => now(), 'updated_at' => now()], + ]); + + $this->artisan('searchable:enable', ['--model' => Model::class, '--index' => true]) + ->assertExitCode(0) + ; + + $this->assertSame('john doe', DB::table('models')->where('id', 1)->value('searchable')); + $this->assertSame('jane smith', DB::table('models')->where('id', 2)->value('searchable')); + } + + public function test_index_phonetic(): void + { + DB::table('models')->insert([ + ['field1' => 'John', 'field2' => 'Smith', 'searchable' => '', 'created_at' => now(), 'updated_at' => now()], + ]); + + $this->artisan('searchable:enable', ['--model' => PhoneticModel::class, '--index' => true]) + ->assertExitCode(0) + ; + + $this->assertSame('john smith jn sm0', DB::table('models')->where('id', 1)->value('searchable')); + } + + public function test_command_scans_model_directory(): void + { + $this->artisan('searchable:enable', ['--root' => __DIR__]) + ->assertExitCode(0) + ; + } + + public function test_configure_model_without_timestamps(): void + { + Schema::drop('no_timestamp_models'); + Schema::create('no_timestamp_models', function (Blueprint $table) { + $table->id(); + $table->string('field1')->default(''); + }); + + $this->assertFalse(Schema::hasColumn('no_timestamp_models', 'searchable')); + + try { + Artisan::call('searchable:enable', ['--model' => NoTimestampModel::class]); + } catch (Throwable) { + // FULLTEXT index creation is not supported on SQLite + } + + $this->assertTrue(Schema::hasColumn('no_timestamp_models', 'searchable')); + } +} diff --git a/tests/ListenerTest.php b/tests/ListenerTest.php new file mode 100644 index 0000000..70a47af --- /dev/null +++ b/tests/ListenerTest.php @@ -0,0 +1,99 @@ +getConstructor() + ->getNumberOfParameters() > 1 + ; + } + + public function test_runs_enable_after_up_migration(): void + { + $mock = Mockery::mock(); + $mock->shouldReceive('call') // @phpstan-ignore method.notFound + ->once() + ->with('searchable:enable', [], Mockery::type(ConsoleOutput::class)) + ; + Artisan::swap($mock); + + (new SearchableEnableAfterMigrate)->handle(new MigrationsEnded('up')); + } + + public function test_skips_on_down_migration(): void + { + $mock = Mockery::mock(); + $mock->shouldReceive('call')->never(); // @phpstan-ignore method.notFound + Artisan::swap($mock); + + (new SearchableEnableAfterMigrate)->handle(new MigrationsEnded('down')); + } + + public function test_skips_on_pretend(): void + { + if (! self::supportsOptions()) { + $this->markTestSkipped('MigrationsEnded does not support options before Laravel 11'); + } + + $mock = Mockery::mock(); + $mock->shouldReceive('call')->never(); // @phpstan-ignore method.notFound + Artisan::swap($mock); + + (new SearchableEnableAfterMigrate)->handle(new MigrationsEnded('up', ['pretend' => true])); + } + + public function test_runs_when_pretend_is_false(): void + { + if (! self::supportsOptions()) { + $this->markTestSkipped('MigrationsEnded does not support options before Laravel 11'); + } + + $mock = Mockery::mock(); + $mock->shouldReceive('call') // @phpstan-ignore method.notFound + ->once() + ->with('searchable:enable', [], Mockery::type(ConsoleOutput::class)) + ; + Artisan::swap($mock); + + (new SearchableEnableAfterMigrate)->handle(new MigrationsEnded('up', ['pretend' => false])); + } + + public function test_runs_when_options_missing_pretend(): void + { + if (! self::supportsOptions()) { + $this->markTestSkipped('MigrationsEnded does not support options before Laravel 11'); + } + + $mock = Mockery::mock(); + $mock->shouldReceive('call') // @phpstan-ignore method.notFound + ->once() + ->with('searchable:enable', [], Mockery::type(ConsoleOutput::class)) + ; + Artisan::swap($mock); + + (new SearchableEnableAfterMigrate)->handle(new MigrationsEnded('up', [])); + } +} diff --git a/tests/Model.php b/tests/Model.php index 689bf93..b319b6d 100644 --- a/tests/Model.php +++ b/tests/Model.php @@ -1,17 +1,24 @@ search(string|array $search, ?string $order = 'DESC') + */ +class Model extends \Illuminate\Database\Eloquent\Model implements SearchableInterface { use Searchable; protected $guarded = []; @@ -20,9 +27,6 @@ class Model extends \Illuminate\Database\Eloquent\Model 'field2', ]; - /** - * @return int - */ public function getSearchableFieldDbSize(): int { return 500; diff --git a/tests/ModelTest.php b/tests/ModelTest.php index f7081fc..d2f9e3e 100644 --- a/tests/ModelTest.php +++ b/tests/ModelTest.php @@ -1,8 +1,10 @@ assertSame(['field1', 'field2'], (new Model)->getSearchables()); } - public function testGetSearchableContent() + public function test_get_searchable_content(): void { $this->assertSame('value1 value2', (new Model)->fill([ 'field1' => 'value1', @@ -25,24 +27,82 @@ public function testGetSearchableContent() ])->getSearchableContent()); } - public function testGetSearchableFieldDbSize() + public function test_get_searchable_content_with_additional(): void + { + $this->assertSame('value1 value2 extra', (new Model)->fill([ + 'field1' => 'value1', + 'field2' => 'value2', + ])->getSearchableContent('Extra')); + } + + public function test_get_searchable_field_db_size(): void { $this->assertSame(500, (new Model)->getSearchableFieldDbSize()); } - public function testGetSearchableFieldDbType() + public function test_get_searchable_field_db_type(): void { $this->assertSame('string', (new Model)->getSearchableFieldDbType()); } - public function testGetSearchableField() + public function test_get_searchable_field(): void { $this->assertSame('searchable', (new Model)->getSearchableField()); } - public function testBootSearchable() + public function test_get_searchable_ts_config(): void + { + $this->assertSame('english', (new Model)->getSearchableTsConfig()); + } + + public function test_get_searchable_phonetic_default(): void + { + $this->assertFalse((new Model)->getSearchablePhonetic()); + } + + public function test_get_searchable_phonetic_enabled(): void + { + $this->assertTrue((new PhoneticModel)->getSearchablePhonetic()); + } + + public function test_get_searchable_content_phonetic(): void { + $content = (new PhoneticModel)->fill([ + 'field1' => 'John', + 'field2' => 'Smith', + ])->getSearchableContent(); + + $this->assertSame('john smith jn sm0', $content); + } + + public function test_get_searchable_content_phonetic_with_additional(): void + { + $content = (new PhoneticModel)->fill([ + 'field1' => 'John', + 'field2' => 'Smith', + ])->getSearchableContent('Extra'); + + $this->assertSame('john smith extra jn sm0 ekstr', $content); + } + + public function test_get_searchable_content_custom_phonetic(): void + { + $content = (new PhoneticFrModel)->fill([ + 'field1' => 'Jean', + 'field2' => 'Dupont', + ])->getSearchableContent(); + + $this->assertSame('jean dupont jan dupon', $content); + } + + public function test_boot_searchable(): void + { + $this->expectNotToPerformAssertions(); Model::bootSearchable(); - $this->assertTrue(true); + } + + public function test_get_searchable_field_db_size_default(): void + { + $this->assertSame(500, (new DefaultSizeModel)->getSearchableFieldDbSize()); } } diff --git a/tests/NoTimestampModel.php b/tests/NoTimestampModel.php new file mode 100644 index 0000000..de3a734 --- /dev/null +++ b/tests/NoTimestampModel.php @@ -0,0 +1,31 @@ +assertSame($expected, Phonetic::encode($input), "$input => $expected"); + } + + public static function encodeProvider(): array + { + return [ + ['e', ''], + ['Gendarme', 'JANDARM'], + ['Athmosphérique', 'ATMOSFERIK'], + ['Morceaux', 'MORSO'], + ['Sciemment', 'SIAMAN'], + ['Comportement', 'KONPORTEMAN'], + ['Sceau', 'SO'], + ['Seau', 'SO'], + ['Sot', 'SO'], + ['Saut', 'SO'], + ['Soûl', 'SOUL'], + ['Description', 'DESKRIPSION'], + ['Verre', 'VER'], + ['Vert', 'VER'], + ['Vers', 'VER'], + ['Saule', 'SOL'], + ['Sol', 'SOL'], + ['Gnognotte', 'NIONIOT'], + ['Pendentif', 'PANDANTIF'], + ['fable', 'FABL'], + ['limer', 'LIM'], + ['inconvenante', 'INKONVENANT'], + ['époussette', 'EPOUSET'], + ['brinqueballai', 'BRINKEBALAI'], + ["entr'égorgerions", 'ANTREGORJERION'], + ['rengraciais', 'RANGRASIAI'], + ['chaufour', 'CHOFOUR'], + ['invoquai', 'INVOKAI'], + ['arborescente', 'ARBORESANT'], + ['gérai', 'JERAI'], + ['émoi', 'EMOI'], + ['humification', 'UMIFIKASION'], + ['anachorétique', 'ANAKORETIK'], + ['réputer', 'REPUT'], + ['poli', 'POLI'], + ['dégazonnais', 'DEGAZONAI'], + ['épicycloïdale', 'EPISIKLOIDAL'], + ['girofle', 'JIROFL'], + ['flush', 'FLUCH'], + ['mea', 'MEA'], + ['tronquer', 'TRONK'], + ['chérir', 'CHERIR'], + ['alléchée', 'ALECH'], + ['rabattais', 'RABATAI'], + ['arrimeur', 'ARIMER'], + ['prêche', 'PRECH'], + ['pinailleuse', 'PINAIES'], + ['armorier', 'ARMORI'], + ['insulte', 'INSULT'], + ['fondre', 'FONDR'], + ['du', 'DU'], + ['solmiser', 'SOLMIS'], + ['typhlite', 'TIFLIT'], + ['vous', 'VOU'], + ['végétale', 'VEJETAL'], + ['vulcanales', 'VULKANAL'], + ['bistrais', 'BISTRAI'], + ['circonvallation', 'SIRKONVALASION'], + ['calligraphier', 'KALIGRAFI'], + ['poétiserai', 'POETISERAI'], + ['cérémonielle', 'SEREMONIEL'], + ['zinzinulais', 'ZINZINULAI'], + ['douais', 'DOI'], + ['surine', 'SURIN'], + ['hiérarchiquement', 'IERARCHIKEMAN'], + ['tapiner', 'TAPIN'], + ['manouche', 'MANOUCH'], + ['épeuler', 'EPEL'], + ['démaillais', 'DEMAIAI'], + ['hocher', 'OCH'], + ['dol', 'DOL'], + ['bombement', 'BONBEMAN'], + ['contrapontiste', 'KONTRAPONTIST'], + ['duègne', 'DUAN'], + ['staphylococcie', 'STAFILOKOKSI'], + ['empestée', 'ANPEST'], + ['jaseran', 'JASERAN'], + ['chanfreinerai', 'CHANFRAINERAI'], + ['jobardai', 'JOBARDAI'], + ['éclairerai', 'EKLAIRERAI'], + ['wallonisme', 'OILONISM'], + ['ouvrageais', 'OUVRAJAI'], + ['hostilement', 'OSTILEMAN'], + ['alarmais', 'ALARMAI'], + ['comparaison', 'KONPARAISON'], + ['déjetai', 'DEJETAI'], + ['empapillote', 'ANPAPILOT'], + ['caillouterai', 'KAILOUTERAI'], + ['mystagogie', 'MISTAGOJI'], + ['ceinturai', 'SINTURAI'], + ['métaphoriquement', 'METAFORIKEMAN'], + ['dinothérium', 'DINOTERIUM'], + ['subrécargue', 'SUBREKARG'], + ['étoilais', 'ETOILAI'], + ['ridicule', 'RIDIKUL'], + ['organisatrice', 'ORGANISATRIS'], + ['réinscrivis', 'RINSKRIVI'], + ['morcelle', 'MORSEL'], + ['dotation', 'DOTASION'], + ['triolet', 'TRIOL'], + ['live', 'LIV'], + ['reformerai', 'REFORMERAI'], + ['vulcanologue', 'VULKANOLOG'], + ['levrettai', 'LEVRETAI'], + ['dépravai', 'DEPRAVAI'], + ['jugerai', 'JUJERAI'], + ['amadouerai', 'AMADOUERAI'], + ['mélancoliquement', 'MELANKOLIKEMAN'], + ['nécrologique', 'NEKROLOJIK'], + ['contremarque', 'KONTREMARK'], + ['maussaderie', 'MOSADERI'], + ['poile', 'POIL'], + ['laitier', 'LAITI'], + ['motiver', 'MOTIV'], + ['onglette', 'ONGLET'], + ['apostasié', 'APOSTASI'], + ['prorata', 'PRORATA'], + ['transept', 'TRANSEP'], + ['échaulerai', 'ECHOLERAI'], + ['barmaid', 'BARMAI'], + ['totalisation', 'TOTALISASION'], + ['warrante', 'OIRANT'], + ['dégoiserai', 'DEGOISERAI'], + ['absenterions', 'ABSANTERION'], + ['vocalisme', 'VOKALISM'], + ['brumera', 'BRUMERA'], + ['précepte', 'PRESEPT'], + ['bimillénaire', 'BIMILENAIR'], + ['conditionnais', 'KONDISIONAI'], + ['obéir', 'OBAIR'], + ['rentrer', 'RANTR'], + ['extérioriser', 'EXTERIORIS'], + ['moulineuse', 'MOULINES'], + ['interjetais', 'INTERJETAI'], + ['trépanais', 'TREPANAI'], + ['pestiférer', 'PESTIFER'], + ['jeunette', 'JENET'], + ['agenouillassions', 'AJENOUIASION'], + ['divinisais', 'DIVINISAI'], + ['étalon', 'ETALON'], + ['mouliner', 'MOULIN'], + ['incontinente', 'INKONTINANT'], + ['approcherai', 'APROCHERAI'], + ['engendrerai', 'ANJANDRERAI'], + ['monologuerai', 'MONOLOGERAI'], + ['viticulture', 'VITIKULTUR'], + ['palettais', 'PALETAI'], + ['préhensile', 'PREANSIL'], + ['phosphatais', 'FOSFATAI'], + ['clownerie', 'KLOUNERI'], + ['féminin', 'FEMININ'], + ['gras', 'GRA'], + ['embuscade', 'ANBUSKAD'], + ['vomirai', 'VOMIRAI'], + ['débarrai', 'DEBARAI'], + ['haltérophile', 'ALTEROFIL'], + ['juxtapose', 'JUXTAPOS'], + ['clorai', 'KLORAI'], + ['ascèse', 'ASES'], + ['Évreux', 'EVR'], + ['cidre', 'SIDR'], + ['lithiasique', 'LITIASIK'], + ['ministérielle', 'MINISTERIEL'], + ['cantilever', 'KANTILEV'], + ['rai', 'RAI'], + ['engouffrai', 'ANGOUFRAI'], + ['orpheline', 'ORFELIN'], + ['luciférienne', 'LUSIFERIN'], + ['assainis', 'ASAINI'], + ['changerai', 'CHANJERAI'], + ['liliacée', 'LILIAS'], + ['minimisation', 'MINIMISASION'], + ['panorama', 'PANORAMA'], + ['pommadai', 'POMADAI'], + ['disloquer', 'DISLOK'], + ['dramatiser', 'DRAMATIS'], + ['profiteuse', 'PROFITES'], + ['garnirai', 'GARNIRAI'], + ['anévrisme', 'ANEVRISM'], + ['palanquerai', 'PALANKERAI'], + ['déflation', 'DEFLASION'], + ['frôlement', 'FROLEMAN'], + ['confluent', 'KONFLUAN'], + ['déstructurer', 'DESTRUKTUR'], + ['Hecke', 'EK'], + ['assemblée', 'ASANBL'], + ['comploteuse', 'KONPLOTES'], + ['conseille', 'KONSAIL'], + ['pallier', 'PALI'], + ['partance', 'PARTANS'], + ['grange', 'GRANJ'], + ['insensible', 'INSANSIBL'], + ['salutaire', 'SALUTAIR'], + ['transsubstantier', 'TRANSUBSTANTI'], + ['pantellerai', 'PANTELERAI'], + ['affectionne', 'AFEKSION'], + ['locherai', 'LOCHERAI'], + ['enfermai', 'ANFERMAI'], + ['redémolirai', 'REDEMOLIRAI'], + ['raclement', 'RAKLEMAN'], + ['barbouilleuse', 'BARBOUIES'], + ['déréglais', 'DEREGLAI'], + ['trinque', 'TRINK'], + ['séléniate', 'SELENIAT'], + ['liqueur', 'LIKER'], + ['quelque', 'KELK'], + ['distinguable', 'DISTINGABL'], + ['herbais', 'ERBAI'], + ['figement', 'FIJEMAN'], + ['sépulture', 'SEPULTUR'], + ['fustige', 'FUSTIJ'], + ['manicle', 'MANIKL'], + ['huppe', 'UP'], + ['quanta', 'KANTA'], + ['isochronisme', 'ISOKRONISM'], + ['anastomosassions', 'ANASTOMOSASION'], + ['sensibilisation', 'SANSIBILISASION'], + ['vidéodisque', 'VIDEODISK'], + ['décongestionne', 'DEKONJESTION'], + ['antéposai', 'ANTEPOSAI'], + ['antérieurement', 'ANTERIEREMAN'], + ['épincèle', 'EPINSEL'], + ['aplanirai', 'APLANIRAI'], + ['pistil', 'PISTI'], + ['consternerai', 'KONSTERNERAI'], + ['verduniserai', 'VERDUNISERAI'], + ['don', 'DON'], + ['anticolonialisme', 'ANTIKOLONIALISM'], + ['histologique', 'ISTOLOJIK'], + ['ombrette', 'ONBRET'], + ['grossier', 'GROSI'], + ['sexuellement', 'SEXUELEMAN'], + ['daguerai', 'DAGERAI'], + ['démêlais', 'DEMELAI'], + ['fouinai', 'FOUINAI'], + ['bourrèlement', 'BOURELEMAN'], + ['suçais', 'SUSAI'], + ['constitutionnalise', 'KONSTITUSIONALIS'], + ['indécision', 'INDESISION'], + ['relayer', 'RELAI'], + ['Raymonde', 'RAIMOND'], + ['entraînement', 'ANTRAINEMAN'], + ['ouïssent', 'OUISAN'], + ['Ghislain', 'GISLIN'], + ['hébraïsme', 'EBRAISM'], + ['romarin', 'ROMARIN'], + ['bonimenter', 'BONIMANT'], + ['mégis', 'MEJI'], + ['lessiverai', 'LESIVERAI'], + ['envieillir', 'ANVIEILIR'], + ['airé', 'AIR'], + ['prédicable', 'PREDIKABL'], + ['vérifiable', 'VERIFIABL'], + ['cric', 'KRIK'], + ['désenvenimai', 'DESANVENIMAI'], + ['orseille', 'ORSAIL'], + ['ramoner', 'RAMON'], + ['agglutinais', 'AGLUTINAI'], + ['fuyarde', 'FUIARD'], + ['hourdirai', 'OURDIRAI'], + ['hovercraft', 'OVERKRAF'], + ['ravilirai', 'RAVILIRAI'], + ['aplatisseur', 'APLATISER'], + ['stertoreuse', 'STERTORES'], + ['ébourrais', 'EBOURAI'], + ['bédane', 'BEDAN'], + ['contrindique', 'KONTRINDIK'], + ['encorder', 'ANKORD'], + ['extravaserai', 'EXTRAVASERAI'], + ['aride', 'ARID'], + ['entente', 'ANTANT'], + ['urobilinogène', 'UROBILINOJAN'], + ['bouteroue', 'BOUTEROU'], + ['Andrée', 'ANDR'], + ['engraisserai', 'ANGRAISERAI'], + ['acolytat', 'AKOLITA'], + ['allocation', 'ALOKASION'], + ['contrindiquai', 'KONTRINDIKAI'], + ['caboche', 'KABOCH'], + ['affiloir', 'AFILOIR'], + ['abstrait', 'ABSTRAI'], + ['terreautage', 'TEROTAJ'], + ['concupiscente', 'KONKUPISANT'], + ['lave', 'LAV'], + ['hippiatre', 'IPIATR'], + ['goulache', 'GOULACH'], + ['exonder', 'EXOND'], + ['néocolonialiste', 'NEOKOLONIALIST'], + ['tailleuse', 'TAIES'], + ['uracile', 'URASIL'], + ['conductibilité', 'KONDUKTIBILIT'], + ['paix', 'PAI'], + ['typique', 'TIPIK'], + ['grigri', 'GRIGRI'], + ['débourrais', 'DEBOURAI'], + ['approfondissais', 'APROFONDISAI'], + ['Radeon', 'RADEON'], + ['couscous', 'KOUSKOU'], + ['déchausse', 'DECHOS'], + ['dorais', 'DORAI'], + ['indirecte', 'INDIREKT'], + ['enchaîne', 'ANCHAIN'], + ['dépeuplais', 'DEPEPLAI'], + ['libellai', 'LIBELAI'], + ['pleurale', 'PLERAL'], + ['crayonner', 'KRAION'], + ['désemmanche', 'DESEMANCH'], + ['lydienne', 'LIDIN'], + ['démêloir', 'DEMELOIR'], + ['armoise', 'ARMOIS'], + ['épouvantement', 'EPOUVANTEMAN'], + ['helléniserai', 'ELENISERAI'], + ['directive', 'DIREKTIV'], + ['justifiais', 'JUSTIFIAI'], + ['casserole', 'KASEROL'], + ['écloper', 'EKLOP'], + ['bêta', 'BETA'], + ['astigmatisme', 'ASTIGMATISM'], + ['rythmicité', 'RITMISIT'], + ['alimentaire', 'ALIMANTAIR'], + ['saurisseur', 'SORISER'], + ['retournement', 'RETOURNEMAN'], + ['interruptive', 'INTERUPTIV'], + ['crame', 'KRAM'], + ['pistolet', 'PISTOL'], + ['faïencée', 'FINS'], + ['Creuse', 'KRES'], + ['arraisonnais', 'ARAISONAI'], + ['examinai', 'EXAMINAI'], + ['déclenche', 'DEKLANCH'], + ['capotais', 'KAPOTAI'], + ['sensationnalisme', 'SANSASIONALISM'], + ['retrait', 'RETRAI'], + ['recourber', 'REKOURB'], + ['forais', 'FORAI'], + ['vomissure', 'VOMISUR'], + ['magnétocassette', 'MANIETOKASET'], + ['bubonique', 'BUBONIK'], + ['inconditionnelle', 'INKONDISIONEL'], + ['pâtre', 'PATR'], + ['hachurai', 'ACHURAI'], + ['extrairai', 'EXTRAIRAI'], + ['trifouiller', 'TRIFOUIL'], + ['impulsion', 'INPULSION'], + ['besant', 'BESAN'], + ['piscine', 'PISIN'], + ['fructose', 'FRUKTOS'], + ['graveleuse', 'GRAVELES'], + ['illuminerai', 'ILUMINERAI'], + ['sapin', 'SAPIN'], + ['louerai', 'LOUERAI'], + ['affirmativement', 'AFIRMATIVEMAN'], + ['mastologie', 'MASTOLOJI'], + ['égrugeoir', 'EGRUJOIR'], + ['précautionner', 'PREKOSION'], + ['rectangulaire', 'REKTANGULAIR'], + ['glace', 'GLAS'], + ['empennais', 'ANPENAI'], + ['fronteau', 'FRONTO'], + ['gringalet', 'GRINGAL'], + ['celui', 'SELUI'], + ['effrontément', 'EFRONTEMAN'], + ['saloper', 'SALOP'], + ['fibrinogène', 'FIBRINOJAN'], + ['détonnelle', 'DETONEL'], + ['chatouilleuse', 'CHATOUIES'], + ['gambader', 'GANBAD'], + ['soulage', 'SOULAJ'], + ['graciai', 'GRASIAI'], + ['digitalisais', 'DIJITALISAI'], + ['aberrai', 'ABERAI'], + ['bisexualité', 'BISEXUALIT'], + ['urinais', 'URINAI'], + ['afflictive', 'AFLIKTIV'], + ['taponne', 'TAPON'], + ['fredaine', 'FREDAIN'], + ['désapprobation', 'DESAPROBASION'], + ['cuveler', 'KUVEL'], + ['remplage', 'RANPLAJ'], + ["D'Alix", 'DALI'], + ['picturale', 'PIKTURAL'], + ['igloo', 'IGLOU'], + ['décreusage', 'DEKRESAJ'], + ['emparâtes', 'ANPARAT'], + ['subsistais', 'SUBSISTAI'], + ['réquisitionner', 'REKISISION'], + ['engraver', 'ANGRAV'], + ['perquisitionnais', 'PERKISISIONAI'], + ['gazonne', 'GAZON'], + ['trucidais', 'TRUSIDAI'], + ['piédouche', 'PIEDOUCH'], + ['insolite', 'INSOLIT'], + ['cholédoque', 'KOLEDOK'], + ['fourvoie', 'FOURVOI'], + ['rouscaillerai', 'ROUSKAILERAI'], + ['remouchai', 'REMOUCHAI'], + ['lido', 'LIDO'], + ['francophile', 'FRANKOFIL'], + ['présélecteur', 'PRESELEKTER'], + ['rhumb', 'RUMB'], + ['purification', 'PURIFIKASION'], + ['tornade', 'TORNAD'], + ['javelle', 'JAVEL'], + ['imposais', 'INPOSAI'], + ['passim', 'PASIM'], + ['exécution', 'EXEKUSION'], + ['emmottée', 'EMOT'], + ['ironique', 'IRONIK'], + ['bizarroïde', 'BIZAROID'], + ['genièvre', 'JENIEVR'], + ['acanthe', 'AKANT'], + ['resocialisais', 'RESOSIALISAI'], + ['Rokhlin', 'ROKLIN'], + ['quartile', 'KARTIL'], + ['sursis', 'SURSI'], + ['archéologie', 'ARKEOLOJI'], + ['réessaye', 'RESAI'], + ['germon', 'JERMON'], + ['baladin', 'BALADIN'], + ['louvette', 'LOUVET'], + ['régentais', 'REJANTAI'], + ['exèdre', 'EXEDR'], + ['dénigrai', 'DENIGRAI'], + ['irresponsable', 'IRESPONSABL'], + ['adjectivai', 'ADJEKTIVAI'], + ['senau', 'SENO'], + ['ululerai', 'ULULERAI'], + ['prorogeais', 'PROROJAI'], + ['entichassions', 'ANTICHASION'], + ['tudesque', 'TUDESK'], + ['inférioriser', 'INFERIORIS'], + ['carolus', 'KAROLU'], + ['vicennale', 'VISENAL'], + ['greffe', 'GR'], + ['privatisais', 'PRIVATISAI'], + ['monogamie', 'MONOGAMI'], + ['anaphore', 'ANAFOR'], + ['concassage', 'KONKASAJ'], + ['escortai', 'ESKORTAI'], + ['patent', 'PATAN'], + ['verdoiement', 'VERDOIMAN'], + ['néanmoins', 'NEANMOIN'], + ['encartouchai', 'ANKARTOUCHAI'], + ['isobare', 'ISOBAR'], + ['moucheron', 'MOUCHERON'], + ['douce', 'DOUS'], + ['bretessée', 'BRETES'], + ['agonie', 'AGONI'], + ['atomise', 'ATOMIS'], + ['godronne', 'GODRON'], + ['boursicotais', 'BOURSIKOTAI'], + ['vibrisse', 'VIBRIS'], + ['soucieuse', 'SOUSIES'], + ['incertaine', 'INSERTAIN'], + ['câpre', 'KAPR'], + ['torpide', 'TORPID'], + ['mécomptais', 'MEKONTAI'], + ['poinçonne', 'POINSON'], + ['carier', 'KARI'], + ['antidéflagrante', 'ANTIDEFLAGRANT'], + ['arraisonnée', 'ARAISON'], + ['pâtissier', 'PATISI'], + ['rossolis', 'ROSOLI'], + ['pronostiquerai', 'PRONOSTIKERAI'], + ['ossifiée', 'OSIFI'], + ['rentrais', 'RANTRAI'], + ['pifomètre', 'PIFOMETR'], + ['empiècement', 'ANPIESEMAN'], + ['intimité', 'INTIMIT'], + ['toilettais', 'TOILETAI'], + ['cagné', 'KAN'], + ['pesamment', 'PESAMAN'], + ['marivaudage', 'MARIVODAJ'], + ['dissimulai', 'DISIMULAI'], + ['empourprerai', 'ANPOURPRERAI'], + ['influencée', 'INFLUANS'], + ['bistournerai', 'BISTOURNERAI'], + ['brimer', 'BRIM'], + ['bouleverser', 'BOULEVERS'], + ['enchatonnée', 'ANCHATON'], + ['restreignais', 'RESTRAINIAI'], + ['marquis', 'MARKI'], + ['Roch', 'ROCH'], + ['écornée', 'EKORN'], + ['proposais', 'PROPOSAI'], + ['cafetière', 'KAFETIER'], + ['vomir', 'VOMIR'], + ['ballottage', 'BALOTAJ'], + ['châle', 'CHAL'], + ['festivalière', 'FESTIVALIER'], + ['poétise', 'POETIS'], + ['discutable', 'DISKUTABL'], + ['bobiner', 'BOBIN'], + ['autosuggestionnâmes', 'OTOSUJESTIONAM'], + ['audiomètre', 'ODIOMETR'], + ['déviationniste', 'DEVIASIONIST'], + ['sarcler', 'SARKL'], + ['gérante', 'JERANT'], + ['conga', 'KONGA'], + ['gnognotte', 'NIONIOT'], + ['philatélique', 'FILATELIK'], + ['égaliseur', 'EGALISER'], + ['concélébrais', 'KONSELEBRAI'], + ['cachucha', 'KACHUCHA'], + ['sprintai', 'SPRINTAI'], + ['décachetai', 'DEKACHETAI'], + ['factice', 'FAKTIS'], + ['dépensais', 'DEPANSAI'], + ['agrichais', 'AGRICHAI'], + ['intervenante', 'INTERVENANT'], + ['engrumellerai', 'ANGRUMELERAI'], + ['ébarber', 'EBARB'], + ['incomprise', 'INKONPRIS'], + ['anciennement', 'ANSIANEMAN'], + ['remarcher', 'REMARCH'], + ['spéculatrice', 'SPEKULATRIS'], + ['détirais', 'DETIRAI'], + ['couillon', 'KOUILON'], + ['crotte', 'KROT'], + ['saumure', 'SOMUR'], + // Edge cases for coverage + ['tabac', 'TABA'], // exception match (line 93) + ['fuel', 'FIOUL'], // FUEL special case (line 107) + ['eau', 'O'], // single letter O return (line 112) + ['bds', 'BDS'], // abbreviation: 3+ consecutive consonants (line 119) + ['pet', 'PE'], // simple word: consonant+vowel, 3 chars (lines 124-126) + ['le', 'LE'], // backup code fallback (line 131) + ]; + } + + public function test_empty_string(): void + { + $this->assertSame('', Phonetic::encode('')); + } + + public function test_homophones(): void + { + // Sceau, Seau, Sot, Saut should all produce 'SO' + $codes = array_map(fn ($w) => Phonetic::encode($w), ['Sceau', 'Seau', 'Sot', 'Saut']); + $this->assertCount(1, array_unique($codes)); + $this->assertSame('SO', current($codes)); + + // Verre, Vert, Vers should all produce 'VER' + $codes = array_map(fn ($w) => Phonetic::encode($w), ['Verre', 'Vert', 'Vers']); + $this->assertCount(1, array_unique($codes)); + $this->assertSame('VER', current($codes)); + + // Saule, Sol should produce 'SOL' + $codes = array_map(fn ($w) => Phonetic::encode($w), ['Saule', 'Sol']); + $this->assertCount(1, array_unique($codes)); + $this->assertSame('SOL', current($codes)); + } +} diff --git a/tests/SearchQueryTest.php b/tests/SearchQueryTest.php new file mode 100644 index 0000000..7979a15 --- /dev/null +++ b/tests/SearchQueryTest.php @@ -0,0 +1,228 @@ +set('database.default', 'mysql_fake'); + $app['config']->set('database.connections.mysql_fake', [ + 'driver' => 'mysql', + 'host' => '', + 'database' => '', + 'username' => '', + 'password' => '', + ]); + $app['config']->set('database.connections.pgsql_fake', [ + 'driver' => 'pgsql', + 'host' => '', + 'database' => '', + 'username' => '', + 'password' => '', + ]); + } + + private function queryForDriver(string $connection = 'mysql_fake'): Builder + { + $model = new Model; + $model->setConnection($connection); + + return $model->newQuery(); + } + + public function test_get_order(): void + { + $sq = new SearchQuery; + + $this->assertSame('DESC', $sq->getOrder('DESC')); + $this->assertSame('ASC', $sq->getOrder('ASC')); + $this->assertSame('', $sq->getOrder('INVALID')); + $this->assertSame('', $sq->getOrder(null)); + $this->assertSame('', $sq->getOrder('')); + } + + public function test_add_match_mysql(): void + { + $sq = new SearchQuery('DESC'); + $query = $this->queryForDriver(); + $sq->addMatch($query, 'hello world'); + + $sql = $query->toSql(); + $bindings = $query->getBindings(); + + $this->assertStringContainsString('MATCH', $sql); + $this->assertStringContainsString('AGAINST', $sql); + $this->assertStringContainsString('BOOLEAN MODE', $sql); + $this->assertStringContainsString('DESC', $sql); + $this->assertSame(['hello* world*', 'hello* world*'], $bindings); + } + + public function test_add_match_pgsql(): void + { + $sq = new SearchQuery('DESC'); + $query = $this->queryForDriver('pgsql_fake'); + $sq->addMatch($query, 'hello world'); + + $sql = $query->toSql(); + $bindings = $query->getBindings(); + + $this->assertStringContainsString('to_tsvector', $sql); + $this->assertStringContainsString('to_tsquery', $sql); + $this->assertStringContainsString('ts_rank', $sql); + $this->assertStringContainsString('DESC', $sql); + $this->assertSame(['hello:* & world:*', 'hello:* & world:*'], $bindings); + } + + public function test_add_match_empty_terms(): void + { + $sq = new SearchQuery; + $query = $this->queryForDriver(); + $sq->addMatch($query, ''); + + $this->assertEmpty($query->getQuery()->wheres); + $this->assertEmpty($query->getQuery()->orders); + $this->assertEmpty($query->getBindings()); + } + + public function test_add_match_with_table_alias(): void + { + $sq = new SearchQuery('DESC'); + $query = $this->queryForDriver(); + $sq->addMatch($query, 'hello', 'alias'); + + $this->assertStringContainsString('alias.searchable', $query->toSql()); + } + + public function test_add_match_without_order(): void + { + $sq = new SearchQuery(null); + $query = $this->queryForDriver(); + $sq->addMatch($query, 'hello'); + + $sql = $query->toSql(); + + $this->assertStringContainsString('MATCH', $sql); + $this->assertSame(1, substr_count($sql, 'MATCH')); + $this->assertEmpty($query->getQuery()->orders); + $this->assertSame(['hello*'], $query->getBindings()); + } + + public function test_add_match_with_explicit_order(): void + { + $sq = new SearchQuery(null); + $query = $this->queryForDriver(); + $sq->addMatch($query, 'hello', '', 'ASC'); + + $sql = $query->toSql(); + + $this->assertSame(2, substr_count($sql, 'MATCH')); + $this->assertStringContainsString('ASC', $sql); + } + + public function test_add_match_phonetic_mysql(): void + { + $sq = new SearchQuery('DESC', SearchQuery::SEARCHABLE_FIELD, 'english', true); + $query = $this->queryForDriver(); + $sq->addMatch($query, 'john'); + + $this->assertSame(['john* jn*', 'john* jn*'], $query->getBindings()); + } + + public function test_add_match_phonetic_pgsql(): void + { + $sq = new SearchQuery('DESC', SearchQuery::SEARCHABLE_FIELD, 'english', true); + $query = $this->queryForDriver('pgsql_fake'); + $sq->addMatch($query, 'john'); + + $this->assertSame(['john:* & jn:*', 'john:* & jn:*'], $query->getBindings()); + } + + public function test_add_match_pgsql_custom_ts_config(): void + { + $sq = new SearchQuery('DESC', SearchQuery::SEARCHABLE_FIELD, 'french'); + $query = $this->queryForDriver('pgsql_fake'); + $sq->addMatch($query, 'bonjour'); + + $sql = $query->toSql(); + + $this->assertStringContainsString("'french'", $sql); + $this->assertStringNotContainsString("'english'", $sql); + } + + public function test_scope_search_mysql(): void + { + $query = $this->queryForDriver(); + $query->search('hello world'); // @phpstan-ignore method.notFound + + $sql = $query->toSql(); + + $this->assertStringContainsString('MATCH', $sql); + $this->assertStringContainsString('AGAINST', $sql); + $this->assertStringContainsString('DESC', $sql); + $this->assertSame(['hello* world*', 'hello* world*'], $query->getBindings()); + } + + public function test_scope_search_pgsql(): void + { + $query = $this->queryForDriver('pgsql_fake'); + $query->search('hello world'); // @phpstan-ignore method.notFound + + $sql = $query->toSql(); + + $this->assertStringContainsString('to_tsvector', $sql); + $this->assertStringContainsString('ts_rank', $sql); + $this->assertSame(['hello:* & world:*', 'hello:* & world:*'], $query->getBindings()); + } + + public function test_scope_search_without_order(): void + { + $query = $this->queryForDriver(); + $query->search('hello', null); // @phpstan-ignore method.notFound + + $this->assertSame(1, substr_count($query->toSql(), 'MATCH')); + $this->assertSame(['hello*'], $query->getBindings()); + } + + public function test_scope_search_phonetic(): void + { + $model = new PhoneticModel; + $model->setConnection('mysql_fake'); + $query = $model->newQuery(); + $query->search('john'); // @phpstan-ignore method.notFound + + $this->assertSame(['john* jn*', 'john* jn*'], $query->getBindings()); + } + + public function test_add_match_custom_phonetic_mysql(): void + { + $sq = new SearchQuery('DESC', SearchQuery::SEARCHABLE_FIELD, 'english', true, Phonetic::encode(...)); + $query = $this->queryForDriver(); + $sq->addMatch($query, 'jean'); + + $this->assertSame(['jean* jan*', 'jean* jan*'], $query->getBindings()); + } + + public function test_scope_search_custom_phonetic(): void + { + $model = new PhoneticFrModel; + $model->setConnection('mysql_fake'); + $query = $model->newQuery(); + $query->search('jean'); // @phpstan-ignore method.notFound + + $this->assertSame(['jean* jan*', 'jean* jan*'], $query->getBindings()); + } +} diff --git a/tests/Soundex2Test.php b/tests/Soundex2Test.php new file mode 100644 index 0000000..e1910c7 --- /dev/null +++ b/tests/Soundex2Test.php @@ -0,0 +1,76 @@ +assertSame($expected, Soundex2::encode($input)); + } + + public static function encodeProvider(): array + { + return [ + ['Asamian', 'AZMN'], + ['Knight', 'NG'], + ['MacKenzie', 'MKNZ'], + ['Pfeifer', 'FR'], + ['Philippe', 'FLP'], + ['Schindler', 'SNDL'], + ['Chateau', 'CHT'], + ['Habitat', 'HBT'], + ['Téhéran', 'TRN'], + ['Essayer', 'ESYR'], + ['Crayon', 'CRYN'], + ['Plyne', 'PLN'], + ['Barad', 'BR'], + ['Martin', 'MRTN'], + ['Bernard', 'BRNR'], + ['Faure', 'FR'], + ['Perez', 'PRZ'], + ['Gros', 'GR'], + ['Chapuis', 'CHP'], + ['Boyer', 'BYR'], + ['Gauthier', 'KTR'], + ['Rey', 'RY'], + ['Barthélémy', 'BRTL'], + ['Henry', 'HNR'], + ['Moulin', 'MLN'], + ['Rousseau', 'RS'], + ]; + } + + public function test_phonetic_equivalence(): void + { + $this->assertSame(Soundex2::encode('Faure'), Soundex2::encode('Phaure')); + } + + public function test_empty_string(): void + { + $this->assertSame('', Soundex2::encode('')); + } + + public function test_whitespace_only(): void + { + $this->assertSame('', Soundex2::encode(' ')); + } + + public function test_empty_after_cleanup(): void + { + $this->assertSame('', Soundex2::encode('a')); + } +} diff --git a/tests/TermParserTest.php b/tests/TermParserTest.php index 7ed0e72..de56e72 100644 --- a/tests/TermParserTest.php +++ b/tests/TermParserTest.php @@ -1,30 +1,29 @@ assertSame($expected, TermParser::parse($input)); } - public function parseProvider(): array + public static function parseProvider(): array { return [ 'single_single' => [ @@ -37,4 +36,199 @@ public function parseProvider(): array ], ]; } + + #[DataProvider('parsePgsqlProvider')] + public function test_parse_pgsql(string $input, string $expected): void + { + $this->assertSame($expected, TermParser::parse($input, 'pgsql')); + } + + public static function parsePgsqlProvider(): array + { + return [ + 'single_term' => [ + 'term', + 'term:*', + ], + 'multiple_terms' => [ + 'term1 term2', + 'term1:* & term2:*', + ], + 'trims_and_filters' => [ + ' hello world ', + 'hello:* & world:*', + ], + ]; + } + + #[DataProvider('parseArrayProvider')] + public function test_parse_array(array $input, string $driver, string $expected): void + { + $this->assertSame($expected, TermParser::parse($input, $driver)); + } + + /** @return array, string, string}> */ + public static function parseArrayProvider(): array + { + return [ + 'mysql_array' => [ + ['hello', 'world'], + 'mysql', + 'hello* world*', + ], + 'pgsql_array' => [ + ['hello', 'world'], + 'pgsql', + 'hello:* & world:*', + ], + ]; + } + + #[DataProvider('parseEmptyProvider')] + public function test_parse_empty(string|array $input, string $driver): void + { + $this->assertSame('', TermParser::parse($input, $driver)); + } + + /** @return array, string}> */ + public static function parseEmptyProvider(): array + { + return [ + 'mysql_empty_string' => ['', 'mysql'], + 'pgsql_empty_string' => ['', 'pgsql'], + 'mysql_whitespace' => [' ', 'mysql'], + 'pgsql_whitespace' => [' ', 'pgsql'], + 'mysql_only_operators' => ['+- ~*"@', 'mysql'], + 'pgsql_only_operators' => ['+- ~*"@', 'pgsql'], + 'mysql_empty_array' => [[], 'mysql'], + 'pgsql_empty_array' => [[], 'pgsql'], + ]; + } + + #[DataProvider('filterOperatorsProvider')] + public function test_filter_strips_operators(string $input, string $expected): void + { + $this->assertSame($expected, TermParser::filter($input)); + } + + /** @return array */ + public static function filterOperatorsProvider(): array + { + return [ + 'ampersand_pipe' => ['foo & bar | baz&qux', 'foo bar baz qux'], + 'plus_minus' => ['+foo -bar', 'foo bar'], + 'tilde' => ['~foo', 'foo'], + 'double_quotes' => ['"foo bar"', 'foo bar'], + 'at_sign' => ['foo @3 bar', 'foo 3 bar'], + 'parentheses' => ['(foo) (bar)', 'foo bar'], + 'angle_brackets' => ['>foo ['foo* bar*', 'foo bar'], + 'punctuation_mix' => ['hello, world! how? yes.', 'hello world how yes'], + ]; + } + + #[DataProvider('phoneticizeProvider')] + public function test_phoneticize(string $input, string $expected): void + { + $this->assertSame($expected, TermParser::phoneticize($input, metaphone(...))); + } + + public static function phoneticizeProvider(): array + { + return [ + 'single_word' => [ + 'john', + 'john jn', + ], + 'multiple_words' => [ + 'john smith', + 'john smith jn sm0', + ], + 'phonetic_match' => [ + 'jon', + 'jon jn', + ], + 'numbers_only' => [ + '123', + '123', + ], + 'empty_string' => [ + '', + '', + ], + ]; + } + + public function test_parse_mysql_phonetic(): void + { + $this->assertSame('john* jn*', TermParser::parse('john', 'mysql', true)); + } + + public function test_parse_mysql_phonetic_multiple(): void + { + $this->assertSame('john* smith* jn* sm0*', TermParser::parse('john smith', 'mysql', true)); + } + + public function test_parse_pgsql_phonetic(): void + { + $this->assertSame('jon:* & jn:*', TermParser::parse('jon', 'pgsql', true)); + } + + public function test_parse_pgsql_phonetic_multiple(): void + { + $this->assertSame('jon:* & smith:* & jn:* & sm0:*', TermParser::parse('jon smith', 'pgsql', true)); + } + + public function test_phoneticize_custom_encoder(): void + { + $this->assertSame('jean jan', TermParser::phoneticize('jean', Phonetic::encode(...))); + } + + public function test_phoneticize_custom_encoder_multiple(): void + { + $this->assertSame('jean dupont jan dupon', TermParser::phoneticize('jean dupont', Phonetic::encode(...))); + } + + public function test_parse_mysql_custom_phonetic(): void + { + $this->assertSame('jean* jan*', TermParser::parse('jean', 'mysql', true, Phonetic::encode(...))); + } + + public function test_parse_pgsql_custom_phonetic(): void + { + $this->assertSame('jean:* & jan:*', TermParser::parse('jean', 'pgsql', true, Phonetic::encode(...))); + } + + #[DataProvider('prepareSearchableProvider')] + public function test_prepare_searchable(array $input, string $expected): void + { + $this->assertSame($expected, TermParser::prepareSearchable(...$input)); + } + + /** @return array>, string}> */ + public static function prepareSearchableProvider(): array + { + return [ + 'single_string' => [ + ['Hello World'], + 'hello world', + ], + 'multiple_strings' => [ + ['Hello', 'World'], + 'hello world', + ], + 'array_input' => [ + [['Hello', 'World']], + 'hello world', + ], + 'mixed_inputs' => [ + [['Hello', 'World'], 'Extra'], + 'hello world extra', + ], + 'strips_operators' => [ + ['Hello +World "Foo"'], + 'hello world foo', + ], + ]; + } } diff --git a/tests/TestCase.php b/tests/TestCase.php index 474ce74..49045d3 100644 --- a/tests/TestCase.php +++ b/tests/TestCase.php @@ -1,8 +1,10 @@ > */ - protected function getPackageProviders($app) + protected function getPackageProviders($app): array { return [ SearchableServiceProvider::class, ]; } - protected function getEnvironmentSetUp($app) + protected function getEnvironmentSetUp($app): void { // make sure, our .env file is loaded $app->useEnvironmentPath(dirname(__DIR__));