From 2d916a8b93274a3b304a947255e83002db64fa2d Mon Sep 17 00:00:00 2001 From: Rodrigo Lopez Date: Tue, 18 Nov 2025 15:11:50 +0100 Subject: [PATCH 1/6] Add spfresh support --- requirements-dev.txt | 2 +- setup.cfg | 2 +- weaviate_cli/commands/create.py | 1 + weaviate_cli/managers/collection_manager.py | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 3646e43..8c1ba6d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -weaviate-client>=4.16.7 +weaviate-client@git+https://github.com/weaviate/weaviate-python-client.git@rob/spfresh click==8.1.7 twine pytest diff --git a/setup.cfg b/setup.cfg index c9e6125..612aadb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ classifiers = include_package_data = True python_requires = >=3.9 install_requires = - weaviate-client>=4.16.7 + weaviate-client@git+https://github.com/weaviate/weaviate-python-client.git@rob/spfresh click==8.1.7 semver>=3.0.2 numpy>=1.24.0 diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index d92c7a4..37f703a 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -77,6 +77,7 @@ def create() -> None: "hnsw_acorn", "hnsw_multivector", "flat_bq", + "spfresh", ] ), help="Vector index type (default: 'hnsw').", diff --git a/weaviate_cli/managers/collection_manager.py b/weaviate_cli/managers/collection_manager.py index b0aaddb..9ec4913 100644 --- a/weaviate_cli/managers/collection_manager.py +++ b/weaviate_cli/managers/collection_manager.py @@ -239,6 +239,9 @@ def create_collection( "flat_bq_cache": wvc.Configure.VectorIndex.flat( quantizer=wvc.Configure.VectorIndex.Quantizer.bq(cache=True) ), + "spfresh": wvc.Configure.VectorIndex.spfresh( + quantizer=wvc.Configure.VectorIndex.Quantizer.rq(), + ), } # Vectorizer configurations From 05bc7a3caaffda0222587e17bacd8f0bc358a715 Mon Sep 17 00:00:00 2001 From: Rodrigo Lopez Date: Tue, 18 Nov 2025 15:40:53 +0100 Subject: [PATCH 2/6] Add support for spfresh parameters --- weaviate_cli/commands/create.py | 56 +++++++++++++++ weaviate_cli/defaults.py | 7 ++ weaviate_cli/managers/collection_manager.py | 75 ++++++++++++++++++++- 3 files changed, 135 insertions(+), 3 deletions(-) diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index 37f703a..4de15ce 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -159,6 +159,48 @@ def create() -> None: ), help="Replication deletion strategy (default: 'delete_on_conflict').", ) +@click.option( + "--spfresh_max_posting_size", + default=CreateCollectionDefaults.spfresh_max_posting_size, + type=int, + help="SPFresh max posting size (default: None).", +) +@click.option( + "--spfresh_min_posting_size", + default=CreateCollectionDefaults.spfresh_min_posting_size, + type=int, + help="SPFresh min posting size (default: None).", +) +@click.option( + "--spfresh_replicas", + default=CreateCollectionDefaults.spfresh_replicas, + type=int, + help="SPFresh replicas (default: None).", +) +@click.option( + "--spfresh_rng_factor", + default=CreateCollectionDefaults.spfresh_rng_factor, + type=int, + help="SPFresh RNG factor (default: None).", +) +@click.option( + "--spfresh_search_probe", + default=CreateCollectionDefaults.spfresh_search_probe, + type=int, + help="SPFresh search probe (default: None).", +) +@click.option( + "--spfresh_centroids_index_type", + default=CreateCollectionDefaults.spfresh_centroids_index_type, + type=click.Choice(["flat", "hnsw"]), + help="SPFresh centroids index type (default: None).", +) +@click.option( + "--spfresh_quantizer", + default=CreateCollectionDefaults.spfresh_quantizer, + type=click.Choice(["rq8", "rq1"]), + help="SPFresh quantizer type (default: None).", +) @click.pass_context def create_collection_cli( ctx: click.Context, @@ -178,6 +220,13 @@ def create_collection_cli( replication_deletion_strategy: str, named_vector: bool, named_vector_name: Optional[str], + spfresh_max_posting_size: Optional[int], + spfresh_min_posting_size: Optional[int], + spfresh_replicas: Optional[int], + spfresh_rng_factor: Optional[int], + spfresh_search_probe: Optional[int], + spfresh_centroids_index_type: Optional[str], + spfresh_quantizer: Optional[str], ) -> None: """Create a collection in Weaviate.""" @@ -203,6 +252,13 @@ def create_collection_cli( replication_deletion_strategy=replication_deletion_strategy, named_vector=named_vector, named_vector_name=named_vector_name, + spfresh_max_posting_size=spfresh_max_posting_size, + spfresh_min_posting_size=spfresh_min_posting_size, + spfresh_replicas=spfresh_replicas, + spfresh_rng_factor=spfresh_rng_factor, + spfresh_search_probe=spfresh_search_probe, + spfresh_centroids_index_type=spfresh_centroids_index_type, + spfresh_quantizer=spfresh_quantizer, ) except Exception as e: click.echo(f"Error: {e}") diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index 40098a5..7479052 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -78,6 +78,13 @@ class CreateCollectionDefaults: replication_deletion_strategy: str = "no_automated_resolution" named_vector: bool = False named_vector_name: Optional[str] = "default" + spfresh_max_posting_size: Optional[int] = None + spfresh_min_posting_size: Optional[int] = None + spfresh_replicas: Optional[int] = None + spfresh_rng_factor: Optional[int] = None + spfresh_search_probe: Optional[int] = None + spfresh_centroids_index_type: Optional[str] = None + spfresh_quantizer: Optional[str] = None @dataclass diff --git a/weaviate_cli/managers/collection_manager.py b/weaviate_cli/managers/collection_manager.py index 9ec4913..92c93fc 100644 --- a/weaviate_cli/managers/collection_manager.py +++ b/weaviate_cli/managers/collection_manager.py @@ -5,7 +5,8 @@ from weaviate.collections import Collection from weaviate.collections.classes.config import _CollectionConfigSimple from weaviate.collections.classes.tenants import TenantActivityStatus -from weaviate.classes.config import VectorFilterStrategy +from weaviate.collections.classes.config_vector_index import VectorCentroidsIndexType +from weaviate.collections.classes.config_vector_index import VectorFilterStrategy from weaviate_cli.defaults import ( CreateCollectionDefaults, UpdateCollectionDefaults, @@ -117,6 +118,53 @@ def get_collection( def get_all_collections(self) -> dict[str, _CollectionConfigSimple]: return self.client.collections.list_all() + def _build_spfresh_config( + self, + max_posting_size: Optional[int] = None, + min_posting_size: Optional[int] = None, + replicas: Optional[int] = None, + rng_factor: Optional[int] = None, + search_probe: Optional[int] = None, + centroids_index_type: Optional[str] = None, + quantizer: Optional[str] = None, + ): + """Build SPFresh configuration with provided parameters.""" + kwargs = {} + + if max_posting_size is not None: + kwargs["max_posting_size"] = max_posting_size + if min_posting_size is not None: + kwargs["min_posting_size"] = min_posting_size + if replicas is not None: + kwargs["replicas"] = replicas + if rng_factor is not None: + kwargs["rng_factor"] = rng_factor + if search_probe is not None: + kwargs["search_probe"] = search_probe + + # Handle centroids index type + if centroids_index_type is not None: + if centroids_index_type == "flat": + kwargs["centroids_index_type"] = VectorCentroidsIndexType.FLAT + elif centroids_index_type == "hnsw": + kwargs["centroids_index_type"] = VectorCentroidsIndexType.HNSW + + # Handle quantizer + quantizer_config = None + if quantizer is not None: + if quantizer == "rq8": + quantizer_config = wvc.Configure.VectorIndex.Quantizer.rq(bits=8) + elif quantizer == "rq1": + quantizer_config = wvc.Configure.VectorIndex.Quantizer.rq(bits=1) + else: + # Default quantizer if none specified + quantizer_config = wvc.Configure.VectorIndex.Quantizer.rq(bits=8) + + if quantizer_config is not None: + kwargs["quantizer"] = quantizer_config + + return wvc.Configure.VectorIndex.spfresh(**kwargs) + def create_collection( self, collection: str = CreateCollectionDefaults.collection, @@ -139,6 +187,21 @@ def create_collection( ] = CreateCollectionDefaults.replication_deletion_strategy, named_vector: bool = CreateCollectionDefaults.named_vector, named_vector_name: Optional[str] = CreateCollectionDefaults.named_vector_name, + spfresh_max_posting_size: Optional[ + int + ] = CreateCollectionDefaults.spfresh_max_posting_size, + spfresh_min_posting_size: Optional[ + int + ] = CreateCollectionDefaults.spfresh_min_posting_size, + spfresh_replicas: Optional[int] = CreateCollectionDefaults.spfresh_replicas, + spfresh_rng_factor: Optional[int] = CreateCollectionDefaults.spfresh_rng_factor, + spfresh_search_probe: Optional[ + int + ] = CreateCollectionDefaults.spfresh_search_probe, + spfresh_centroids_index_type: Optional[ + str + ] = CreateCollectionDefaults.spfresh_centroids_index_type, + spfresh_quantizer: Optional[str] = CreateCollectionDefaults.spfresh_quantizer, ) -> None: if self.client.collections.exists(collection): @@ -239,8 +302,14 @@ def create_collection( "flat_bq_cache": wvc.Configure.VectorIndex.flat( quantizer=wvc.Configure.VectorIndex.Quantizer.bq(cache=True) ), - "spfresh": wvc.Configure.VectorIndex.spfresh( - quantizer=wvc.Configure.VectorIndex.Quantizer.rq(), + "spfresh": self._build_spfresh_config( + max_posting_size=spfresh_max_posting_size, + min_posting_size=spfresh_min_posting_size, + replicas=spfresh_replicas, + rng_factor=spfresh_rng_factor, + search_probe=spfresh_search_probe, + centroids_index_type=spfresh_centroids_index_type, + quantizer=spfresh_quantizer, ), } From 899d0daa3e75b65671434dff704f0ee939646635 Mon Sep 17 00:00:00 2001 From: Rodrigo Lopez Date: Wed, 28 Jan 2026 12:35:54 +0100 Subject: [PATCH 3/6] Rename to Hfresh --- weaviate_cli/commands/create.py | 72 ++++++++++----------- weaviate_cli/defaults.py | 14 ++-- weaviate_cli/managers/collection_manager.py | 52 +++++++-------- 3 files changed, 65 insertions(+), 73 deletions(-) diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index 4de15ce..9a570b2 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -77,7 +77,7 @@ def create() -> None: "hnsw_acorn", "hnsw_multivector", "flat_bq", - "spfresh", + "hfresh", ] ), help="Vector index type (default: 'hnsw').", @@ -160,46 +160,46 @@ def create() -> None: help="Replication deletion strategy (default: 'delete_on_conflict').", ) @click.option( - "--spfresh_max_posting_size", - default=CreateCollectionDefaults.spfresh_max_posting_size, + "--hfresh_max_posting_size", + default=CreateCollectionDefaults.hfresh_max_posting_size, type=int, - help="SPFresh max posting size (default: None).", + help="hfresh max posting size (default: None).", ) @click.option( - "--spfresh_min_posting_size", - default=CreateCollectionDefaults.spfresh_min_posting_size, + "--hfresh_min_posting_size", + default=CreateCollectionDefaults.hfresh_min_posting_size, type=int, - help="SPFresh min posting size (default: None).", + help="hfresh min posting size (default: None).", ) @click.option( - "--spfresh_replicas", - default=CreateCollectionDefaults.spfresh_replicas, + "--hfresh_replicas", + default=CreateCollectionDefaults.hfresh_replicas, type=int, - help="SPFresh replicas (default: None).", + help="hfresh replicas (default: None).", ) @click.option( - "--spfresh_rng_factor", - default=CreateCollectionDefaults.spfresh_rng_factor, + "--hfresh_rng_factor", + default=CreateCollectionDefaults.hfresh_rng_factor, type=int, - help="SPFresh RNG factor (default: None).", + help="hfresh RNG factor (default: None).", ) @click.option( - "--spfresh_search_probe", - default=CreateCollectionDefaults.spfresh_search_probe, + "--hfresh_search_probe", + default=CreateCollectionDefaults.hfresh_search_probe, type=int, - help="SPFresh search probe (default: None).", + help="hfresh search probe (default: None).", ) @click.option( - "--spfresh_centroids_index_type", - default=CreateCollectionDefaults.spfresh_centroids_index_type, + "--hfresh_centroids_index_type", + default=CreateCollectionDefaults.hfresh_centroids_index_type, type=click.Choice(["flat", "hnsw"]), - help="SPFresh centroids index type (default: None).", + help="hfresh centroids index type (default: None).", ) @click.option( - "--spfresh_quantizer", - default=CreateCollectionDefaults.spfresh_quantizer, + "--hfresh_quantizer", + default=CreateCollectionDefaults.hfresh_quantizer, type=click.Choice(["rq8", "rq1"]), - help="SPFresh quantizer type (default: None).", + help="hfresh quantizer type (default: None).", ) @click.pass_context def create_collection_cli( @@ -220,13 +220,13 @@ def create_collection_cli( replication_deletion_strategy: str, named_vector: bool, named_vector_name: Optional[str], - spfresh_max_posting_size: Optional[int], - spfresh_min_posting_size: Optional[int], - spfresh_replicas: Optional[int], - spfresh_rng_factor: Optional[int], - spfresh_search_probe: Optional[int], - spfresh_centroids_index_type: Optional[str], - spfresh_quantizer: Optional[str], + hfresh_max_posting_size: Optional[int], + hfresh_min_posting_size: Optional[int], + hfresh_replicas: Optional[int], + hfresh_rng_factor: Optional[int], + hfresh_search_probe: Optional[int], + hfresh_centroids_index_type: Optional[str], + hfresh_quantizer: Optional[str], ) -> None: """Create a collection in Weaviate.""" @@ -252,13 +252,13 @@ def create_collection_cli( replication_deletion_strategy=replication_deletion_strategy, named_vector=named_vector, named_vector_name=named_vector_name, - spfresh_max_posting_size=spfresh_max_posting_size, - spfresh_min_posting_size=spfresh_min_posting_size, - spfresh_replicas=spfresh_replicas, - spfresh_rng_factor=spfresh_rng_factor, - spfresh_search_probe=spfresh_search_probe, - spfresh_centroids_index_type=spfresh_centroids_index_type, - spfresh_quantizer=spfresh_quantizer, + hfresh_max_posting_size=hfresh_max_posting_size, + hfresh_min_posting_size=hfresh_min_posting_size, + hfresh_replicas=hfresh_replicas, + hfresh_rng_factor=hfresh_rng_factor, + hfresh_search_probe=hfresh_search_probe, + hfresh_centroids_index_type=hfresh_centroids_index_type, + hfresh_quantizer=hfresh_quantizer, ) except Exception as e: click.echo(f"Error: {e}") diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index 7479052..49b6dd6 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -78,13 +78,13 @@ class CreateCollectionDefaults: replication_deletion_strategy: str = "no_automated_resolution" named_vector: bool = False named_vector_name: Optional[str] = "default" - spfresh_max_posting_size: Optional[int] = None - spfresh_min_posting_size: Optional[int] = None - spfresh_replicas: Optional[int] = None - spfresh_rng_factor: Optional[int] = None - spfresh_search_probe: Optional[int] = None - spfresh_centroids_index_type: Optional[str] = None - spfresh_quantizer: Optional[str] = None + hfresh_max_posting_size: Optional[int] = None + hfresh_min_posting_size: Optional[int] = None + hfresh_replicas: Optional[int] = None + hfresh_rng_factor: Optional[int] = None + hfresh_search_probe: Optional[int] = None + hfresh_centroids_index_type: Optional[str] = None + hfresh_quantizer: Optional[str] = None @dataclass diff --git a/weaviate_cli/managers/collection_manager.py b/weaviate_cli/managers/collection_manager.py index 92c93fc..b198227 100644 --- a/weaviate_cli/managers/collection_manager.py +++ b/weaviate_cli/managers/collection_manager.py @@ -5,7 +5,6 @@ from weaviate.collections import Collection from weaviate.collections.classes.config import _CollectionConfigSimple from weaviate.collections.classes.tenants import TenantActivityStatus -from weaviate.collections.classes.config_vector_index import VectorCentroidsIndexType from weaviate.collections.classes.config_vector_index import VectorFilterStrategy from weaviate_cli.defaults import ( CreateCollectionDefaults, @@ -118,7 +117,7 @@ def get_collection( def get_all_collections(self) -> dict[str, _CollectionConfigSimple]: return self.client.collections.list_all() - def _build_spfresh_config( + def _build_hfresh_config( self, max_posting_size: Optional[int] = None, min_posting_size: Optional[int] = None, @@ -128,7 +127,7 @@ def _build_spfresh_config( centroids_index_type: Optional[str] = None, quantizer: Optional[str] = None, ): - """Build SPFresh configuration with provided parameters.""" + """Build hfresh configuration with provided parameters.""" kwargs = {} if max_posting_size is not None: @@ -142,13 +141,6 @@ def _build_spfresh_config( if search_probe is not None: kwargs["search_probe"] = search_probe - # Handle centroids index type - if centroids_index_type is not None: - if centroids_index_type == "flat": - kwargs["centroids_index_type"] = VectorCentroidsIndexType.FLAT - elif centroids_index_type == "hnsw": - kwargs["centroids_index_type"] = VectorCentroidsIndexType.HNSW - # Handle quantizer quantizer_config = None if quantizer is not None: @@ -163,7 +155,7 @@ def _build_spfresh_config( if quantizer_config is not None: kwargs["quantizer"] = quantizer_config - return wvc.Configure.VectorIndex.spfresh(**kwargs) + return wvc.Configure.VectorIndex.hfresh(**kwargs) def create_collection( self, @@ -187,21 +179,21 @@ def create_collection( ] = CreateCollectionDefaults.replication_deletion_strategy, named_vector: bool = CreateCollectionDefaults.named_vector, named_vector_name: Optional[str] = CreateCollectionDefaults.named_vector_name, - spfresh_max_posting_size: Optional[ + hfresh_max_posting_size: Optional[ int - ] = CreateCollectionDefaults.spfresh_max_posting_size, - spfresh_min_posting_size: Optional[ + ] = CreateCollectionDefaults.hfresh_max_posting_size, + hfresh_min_posting_size: Optional[ int - ] = CreateCollectionDefaults.spfresh_min_posting_size, - spfresh_replicas: Optional[int] = CreateCollectionDefaults.spfresh_replicas, - spfresh_rng_factor: Optional[int] = CreateCollectionDefaults.spfresh_rng_factor, - spfresh_search_probe: Optional[ + ] = CreateCollectionDefaults.hfresh_min_posting_size, + hfresh_replicas: Optional[int] = CreateCollectionDefaults.hfresh_replicas, + hfresh_rng_factor: Optional[int] = CreateCollectionDefaults.hfresh_rng_factor, + hfresh_search_probe: Optional[ int - ] = CreateCollectionDefaults.spfresh_search_probe, - spfresh_centroids_index_type: Optional[ + ] = CreateCollectionDefaults.hfresh_search_probe, + hfresh_centroids_index_type: Optional[ str - ] = CreateCollectionDefaults.spfresh_centroids_index_type, - spfresh_quantizer: Optional[str] = CreateCollectionDefaults.spfresh_quantizer, + ] = CreateCollectionDefaults.hfresh_centroids_index_type, + hfresh_quantizer: Optional[str] = CreateCollectionDefaults.hfresh_quantizer, ) -> None: if self.client.collections.exists(collection): @@ -302,14 +294,14 @@ def create_collection( "flat_bq_cache": wvc.Configure.VectorIndex.flat( quantizer=wvc.Configure.VectorIndex.Quantizer.bq(cache=True) ), - "spfresh": self._build_spfresh_config( - max_posting_size=spfresh_max_posting_size, - min_posting_size=spfresh_min_posting_size, - replicas=spfresh_replicas, - rng_factor=spfresh_rng_factor, - search_probe=spfresh_search_probe, - centroids_index_type=spfresh_centroids_index_type, - quantizer=spfresh_quantizer, + "hfresh": self._build_hfresh_config( + max_posting_size=hfresh_max_posting_size, + min_posting_size=hfresh_min_posting_size, + replicas=hfresh_replicas, + rng_factor=hfresh_rng_factor, + search_probe=hfresh_search_probe, + centroids_index_type=hfresh_centroids_index_type, + quantizer=hfresh_quantizer, ), } From f10dff6bbdcc44d331ed84f23f22d99050837116 Mon Sep 17 00:00:00 2001 From: Rodrigo Lopez Date: Wed, 28 Jan 2026 14:35:09 +0100 Subject: [PATCH 4/6] Update with current params --- weaviate_cli/commands/create.py | 48 +++++--------- weaviate_cli/defaults.py | 8 +-- weaviate_cli/managers/collection_manager.py | 72 +++++++++------------ 3 files changed, 51 insertions(+), 77 deletions(-) diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index 9a570b2..85632ab 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -160,29 +160,17 @@ def create() -> None: help="Replication deletion strategy (default: 'delete_on_conflict').", ) @click.option( - "--hfresh_max_posting_size", - default=CreateCollectionDefaults.hfresh_max_posting_size, + "--hfresh_max_posting_size_kb", + default=CreateCollectionDefaults.hfresh_max_posting_size_kb, type=int, help="hfresh max posting size (default: None).", ) -@click.option( - "--hfresh_min_posting_size", - default=CreateCollectionDefaults.hfresh_min_posting_size, - type=int, - help="hfresh min posting size (default: None).", -) @click.option( "--hfresh_replicas", default=CreateCollectionDefaults.hfresh_replicas, type=int, help="hfresh replicas (default: None).", ) -@click.option( - "--hfresh_rng_factor", - default=CreateCollectionDefaults.hfresh_rng_factor, - type=int, - help="hfresh RNG factor (default: None).", -) @click.option( "--hfresh_search_probe", default=CreateCollectionDefaults.hfresh_search_probe, @@ -190,16 +178,16 @@ def create() -> None: help="hfresh search probe (default: None).", ) @click.option( - "--hfresh_centroids_index_type", - default=CreateCollectionDefaults.hfresh_centroids_index_type, - type=click.Choice(["flat", "hnsw"]), - help="hfresh centroids index type (default: None).", + "--distance_metric", + default=CreateCollectionDefaults.distance_metric, + type=click.Choice(["cosine", "dot", "l2-squared", "hamming", "manhattan"]), + help="Distance metric for hfresh (default: 'cosine').", ) @click.option( - "--hfresh_quantizer", - default=CreateCollectionDefaults.hfresh_quantizer, - type=click.Choice(["rq8", "rq1"]), - help="hfresh quantizer type (default: None).", + "--rescore_limit", + default=CreateCollectionDefaults.rescore_limit, + type=int, + help="Rescore limit for hfresh (default: None).", ) @click.pass_context def create_collection_cli( @@ -220,13 +208,11 @@ def create_collection_cli( replication_deletion_strategy: str, named_vector: bool, named_vector_name: Optional[str], - hfresh_max_posting_size: Optional[int], - hfresh_min_posting_size: Optional[int], + hfresh_max_posting_size_kb: Optional[int], hfresh_replicas: Optional[int], - hfresh_rng_factor: Optional[int], hfresh_search_probe: Optional[int], - hfresh_centroids_index_type: Optional[str], - hfresh_quantizer: Optional[str], + distance_metric: Optional[str], + rescore_limit: Optional[int], ) -> None: """Create a collection in Weaviate.""" @@ -252,13 +238,11 @@ def create_collection_cli( replication_deletion_strategy=replication_deletion_strategy, named_vector=named_vector, named_vector_name=named_vector_name, - hfresh_max_posting_size=hfresh_max_posting_size, - hfresh_min_posting_size=hfresh_min_posting_size, + hfresh_max_posting_size_kb=hfresh_max_posting_size_kb, hfresh_replicas=hfresh_replicas, - hfresh_rng_factor=hfresh_rng_factor, hfresh_search_probe=hfresh_search_probe, - hfresh_centroids_index_type=hfresh_centroids_index_type, - hfresh_quantizer=hfresh_quantizer, + distance_metric=distance_metric, + rescore_limit=rescore_limit, ) except Exception as e: click.echo(f"Error: {e}") diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index 49b6dd6..144e7a6 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -78,13 +78,11 @@ class CreateCollectionDefaults: replication_deletion_strategy: str = "no_automated_resolution" named_vector: bool = False named_vector_name: Optional[str] = "default" - hfresh_max_posting_size: Optional[int] = None - hfresh_min_posting_size: Optional[int] = None + hfresh_max_posting_size_kb: Optional[int] = None hfresh_replicas: Optional[int] = None - hfresh_rng_factor: Optional[int] = None hfresh_search_probe: Optional[int] = None - hfresh_centroids_index_type: Optional[str] = None - hfresh_quantizer: Optional[str] = None + distance_metric: Optional[str] = "cosine" + rescore_limit: Optional[int] = None @dataclass diff --git a/weaviate_cli/managers/collection_manager.py b/weaviate_cli/managers/collection_manager.py index b198227..0171276 100644 --- a/weaviate_cli/managers/collection_manager.py +++ b/weaviate_cli/managers/collection_manager.py @@ -119,41 +119,41 @@ def get_all_collections(self) -> dict[str, _CollectionConfigSimple]: def _build_hfresh_config( self, - max_posting_size: Optional[int] = None, - min_posting_size: Optional[int] = None, + max_posting_size_kb: Optional[int] = None, + distance_metric: Optional[str] = "cosine", + rescore_limit: Optional[int] = None, replicas: Optional[int] = None, - rng_factor: Optional[int] = None, search_probe: Optional[int] = None, - centroids_index_type: Optional[str] = None, - quantizer: Optional[str] = None, ): """Build hfresh configuration with provided parameters.""" + # Explicit mapping of distance metric strings to enum values + distance_metric_map = { + "cosine": wvc.VectorDistances.COSINE, + "dot": wvc.VectorDistances.DOT, + "l2-squared": wvc.VectorDistances.L2_SQUARED, + "hamming": wvc.VectorDistances.HAMMING, + "manhattan": wvc.VectorDistances.MANHATTAN, + } + kwargs = {} - if max_posting_size is not None: - kwargs["max_posting_size"] = max_posting_size - if min_posting_size is not None: - kwargs["min_posting_size"] = min_posting_size + if max_posting_size_kb is not None: + kwargs["max_posting_size_kb"] = max_posting_size_kb + if distance_metric is not None: + if distance_metric not in distance_metric_map: + raise ValueError( + f"Invalid distance_metric: '{distance_metric}'. " + f"Must be one of: {list(distance_metric_map.keys())}" + ) + kwargs["distance_metric"] = distance_metric_map[distance_metric] if replicas is not None: kwargs["replicas"] = replicas - if rng_factor is not None: - kwargs["rng_factor"] = rng_factor if search_probe is not None: kwargs["search_probe"] = search_probe - - # Handle quantizer - quantizer_config = None - if quantizer is not None: - if quantizer == "rq8": - quantizer_config = wvc.Configure.VectorIndex.Quantizer.rq(bits=8) - elif quantizer == "rq1": - quantizer_config = wvc.Configure.VectorIndex.Quantizer.rq(bits=1) - else: - # Default quantizer if none specified - quantizer_config = wvc.Configure.VectorIndex.Quantizer.rq(bits=8) - - if quantizer_config is not None: - kwargs["quantizer"] = quantizer_config + if rescore_limit is not None: + kwargs["quantizer"] = wvc.Configure.VectorIndex.Quantizer.rq( + bits=8, rescore_limit=rescore_limit + ) return wvc.Configure.VectorIndex.hfresh(**kwargs) @@ -179,21 +179,15 @@ def create_collection( ] = CreateCollectionDefaults.replication_deletion_strategy, named_vector: bool = CreateCollectionDefaults.named_vector, named_vector_name: Optional[str] = CreateCollectionDefaults.named_vector_name, - hfresh_max_posting_size: Optional[ + hfresh_max_posting_size_kb: Optional[ int - ] = CreateCollectionDefaults.hfresh_max_posting_size, - hfresh_min_posting_size: Optional[ - int - ] = CreateCollectionDefaults.hfresh_min_posting_size, + ] = CreateCollectionDefaults.hfresh_max_posting_size_kb, hfresh_replicas: Optional[int] = CreateCollectionDefaults.hfresh_replicas, - hfresh_rng_factor: Optional[int] = CreateCollectionDefaults.hfresh_rng_factor, hfresh_search_probe: Optional[ int ] = CreateCollectionDefaults.hfresh_search_probe, - hfresh_centroids_index_type: Optional[ - str - ] = CreateCollectionDefaults.hfresh_centroids_index_type, - hfresh_quantizer: Optional[str] = CreateCollectionDefaults.hfresh_quantizer, + distance_metric: Optional[str] = CreateCollectionDefaults.distance_metric, + rescore_limit: Optional[int] = CreateCollectionDefaults.rescore_limit, ) -> None: if self.client.collections.exists(collection): @@ -295,13 +289,11 @@ def create_collection( quantizer=wvc.Configure.VectorIndex.Quantizer.bq(cache=True) ), "hfresh": self._build_hfresh_config( - max_posting_size=hfresh_max_posting_size, - min_posting_size=hfresh_min_posting_size, + max_posting_size_kb=hfresh_max_posting_size_kb, + distance_metric=distance_metric, + rescore_limit=rescore_limit, replicas=hfresh_replicas, - rng_factor=hfresh_rng_factor, search_probe=hfresh_search_probe, - centroids_index_type=hfresh_centroids_index_type, - quantizer=hfresh_quantizer, ), } From 5b82a934e0136709cc39975fd2d4354f57b2bf98 Mon Sep 17 00:00:00 2001 From: Rodrigo Lopez Date: Thu, 19 Mar 2026 11:35:30 +0100 Subject: [PATCH 5/6] Install latest python client version --- requirements-dev.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 8c1ba6d..64cd126 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -weaviate-client@git+https://github.com/weaviate/weaviate-python-client.git@rob/spfresh +weaviate-client>=4.20.4 click==8.1.7 twine pytest diff --git a/setup.cfg b/setup.cfg index bae19a6..c7e11bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ classifiers = include_package_data = True python_requires = >=3.9 install_requires = - weaviate-client>=4.19.0 + weaviate-client>=4.20.4 click==8.1.7 semver>=3.0.2 numpy>=1.24.0 From 721d7bf1b16ea98d5142b8aeaa212bdbedd1552f Mon Sep 17 00:00:00 2001 From: Rodrigo Lopez Date: Fri, 20 Mar 2026 13:25:06 +0100 Subject: [PATCH 6/6] Configure rescore limit and distance metric in all vector indexes --- weaviate_cli/commands/create.py | 10 +-- weaviate_cli/defaults.py | 2 +- weaviate_cli/managers/collection_manager.py | 82 +++++++++++++++------ 3 files changed, 66 insertions(+), 28 deletions(-) diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index 59eed27..0276cf8 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -189,31 +189,31 @@ def create() -> None: "--hfresh_max_posting_size_kb", default=CreateCollectionDefaults.hfresh_max_posting_size_kb, type=int, - help="hfresh max posting size (default: None).", + help="hfresh - max posting size in KB (default: None).", ) @click.option( "--hfresh_replicas", default=CreateCollectionDefaults.hfresh_replicas, type=int, - help="hfresh replicas (default: None).", + help="hfresh - number of replicas for each element in different posting lists (default: None).", ) @click.option( "--hfresh_search_probe", default=CreateCollectionDefaults.hfresh_search_probe, type=int, - help="hfresh search probe (default: None).", + help="hfresh - search probe (default: None).", ) @click.option( "--distance_metric", default=CreateCollectionDefaults.distance_metric, type=click.Choice(["cosine", "dot", "l2-squared", "hamming", "manhattan"]), - help="Distance metric for hfresh (default: 'cosine').", + help="Distance metric (default: None, set by Weaviate server).", ) @click.option( "--rescore_limit", default=CreateCollectionDefaults.rescore_limit, type=int, - help="Rescore limit for hfresh (default: None).", + help="Rescore limit (default: None, set by Weaviate server).", ) @click.pass_context def create_collection_cli( diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index dc4475c..9d0fb4e 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -81,7 +81,7 @@ class CreateCollectionDefaults: hfresh_max_posting_size_kb: Optional[int] = None hfresh_replicas: Optional[int] = None hfresh_search_probe: Optional[int] = None - distance_metric: Optional[str] = "cosine" + distance_metric: Optional[str] = None rescore_limit: Optional[int] = None object_ttl_type: str = "create" object_ttl_time: Optional[int] = None diff --git a/weaviate_cli/managers/collection_manager.py b/weaviate_cli/managers/collection_manager.py index 0eacc97..d783252 100644 --- a/weaviate_cli/managers/collection_manager.py +++ b/weaviate_cli/managers/collection_manager.py @@ -249,40 +249,55 @@ def create_collection( ) vector_index_map: Dict[str, wvc.VectorIndexConfig] = { - "hnsw": wvc.Configure.VectorIndex.hnsw(), - "flat": wvc.Configure.VectorIndex.flat(), + "hnsw": wvc.Configure.VectorIndex.hnsw(distance_metric=distance_metric), + "flat": wvc.Configure.VectorIndex.flat(distance_metric=distance_metric), "dynamic": wvc.Configure.VectorIndex.dynamic(), "dynamic_flat_bq": wvc.Configure.VectorIndex.dynamic( flat=wvc.Configure.VectorIndex.flat( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq(), + distance_metric=distance_metric, ) ), "dynamic_flat_bq_hnsw_pq": wvc.Configure.VectorIndex.dynamic( flat=wvc.Configure.VectorIndex.flat( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), hnsw=wvc.Configure.VectorIndex.hnsw( quantizer=wvc.Configure.VectorIndex.Quantizer.pq( training_limit=training_limit - ) + ), + distance_metric=distance_metric, ), ), "dynamic_flat_bq_hnsw_sq": wvc.Configure.VectorIndex.dynamic( flat=wvc.Configure.VectorIndex.flat( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), hnsw=wvc.Configure.VectorIndex.hnsw( quantizer=wvc.Configure.VectorIndex.Quantizer.sq( - training_limit=training_limit - ) + rescore_limit=rescore_limit, training_limit=training_limit + ), + distance_metric=distance_metric, ), ), "dynamic_flat_bq_hnsw_bq": wvc.Configure.VectorIndex.dynamic( flat=wvc.Configure.VectorIndex.flat( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), hnsw=wvc.Configure.VectorIndex.hnsw( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), ), "dynamic_hnsw_pq": wvc.Configure.VectorIndex.dynamic( @@ -295,45 +310,68 @@ def create_collection( "dynamic_hnsw_sq": wvc.Configure.VectorIndex.dynamic( hnsw=wvc.Configure.VectorIndex.hnsw( quantizer=wvc.Configure.VectorIndex.Quantizer.sq( - training_limit=training_limit - ) + rescore_limit=rescore_limit, training_limit=training_limit + ), + distance_metric=distance_metric, ) ), "dynamic_hnsw_bq": wvc.Configure.VectorIndex.dynamic( hnsw=wvc.Configure.VectorIndex.hnsw( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ) ), "hnsw_pq": wvc.Configure.VectorIndex.hnsw( quantizer=wvc.Configure.VectorIndex.Quantizer.pq( training_limit=training_limit - ) + ), + distance_metric=distance_metric, ), "hnsw_bq": wvc.Configure.VectorIndex.hnsw( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), "hnsw_bq_cache": wvc.Configure.VectorIndex.hnsw( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq(cache=True) + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + cache=True, rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), "hnsw_sq": wvc.Configure.VectorIndex.hnsw( quantizer=wvc.Configure.VectorIndex.Quantizer.sq( - training_limit=training_limit - ) + rescore_limit=rescore_limit, training_limit=training_limit + ), + distance_metric=distance_metric, ), "hnsw_rq": wvc.Configure.VectorIndex.hnsw( - quantizer=wvc.Configure.VectorIndex.Quantizer.rq() + quantizer=wvc.Configure.VectorIndex.Quantizer.rq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), "hnsw_acorn": wvc.Configure.VectorIndex.hnsw( - filter_strategy=VectorFilterStrategy.ACORN + filter_strategy=VectorFilterStrategy.ACORN, + distance_metric=distance_metric, ), "hnsw_multivector": wvc.Configure.VectorIndex.hnsw( multi_vector=wvc.Configure.VectorIndex.MultiVector.multi_vector(), + distance_metric=distance_metric, ), "flat_bq": wvc.Configure.VectorIndex.flat( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq() + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), "flat_bq_cache": wvc.Configure.VectorIndex.flat( - quantizer=wvc.Configure.VectorIndex.Quantizer.bq(cache=True) + quantizer=wvc.Configure.VectorIndex.Quantizer.bq( + cache=True, rescore_limit=rescore_limit + ), + distance_metric=distance_metric, ), "hfresh": self._build_hfresh_config( max_posting_size_kb=hfresh_max_posting_size_kb,