From 6e76ad3a44778bd0b05220d5d0994b2feec69dc8 Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Tue, 24 Feb 2026 11:23:46 -0500 Subject: [PATCH 1/7] Updates to configuration.md --- docs/configuration.md | 203 +++++++++++++++++++++++++++--------------- 1 file changed, 132 insertions(+), 71 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 20bd763..d1b3257 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,12 +1,24 @@ # Configuration -pg_semantic_cache provides flexible configuration options for vector dimensions, index types, and cache behavior. +pg_semantic_cache provides flexible configuration options for vector +dimensions, index types, and cache behavior. -## Vector Dimensions +!!! tip "Start Simple" + + When configuring semantic caching, begin with simple defaults (1536 + dimensions, IVFFlat, 0.95 threshold) and adjust your system based on + monitoring. -The extension supports configurable embedding dimensions to match your chosen embedding model. +!!! warning "Test Before Production" + + Always test configuration changes in development before applying to + production! -### Supported Dimensions +## Vector Dimensions + +The extension supports configurable embedding dimensions to match your +chosen embedding model. pg_semantic_cache supports the following dimensions +and associated models: | Dimension | Common Models | |-----------|---------------| @@ -19,7 +31,9 @@ The extension supports configurable embedding dimensions to match your chosen em ### Setting Dimensions !!! warning "Rebuild Required" - Changing dimensions requires rebuilding the index, which **clears all cached data**. + + Changing dimensions requires rebuilding the index, which clears all + cached data. ```sql -- Set vector dimension (default: 1536) @@ -32,7 +46,7 @@ SELECT semantic_cache.rebuild_index(); SELECT semantic_cache.get_vector_dimension(); ``` -### Initial Setup for Custom Dimensions +### Initial Setup For Custom Dimensions If you know your embedding model before installation: @@ -49,18 +63,19 @@ SELECT semantic_cache.rebuild_index(); ## Vector Index Types -Choose between IVFFlat (fast, approximate) or HNSW (accurate, slower build). +Choose between IVFFlat (fast, approximate) or HNSW (accurate, slower +build). ### IVFFlat Index (Default) Best for most use cases - fast lookups with good recall. -**Characteristics:** -- **Lookup Speed**: Very fast (< 5ms typical) -- **Build Time**: Fast -- **Recall**: Good (95%+) -- **Memory**: Moderate -- **Best For**: Production caches with frequent updates +Characteristics: +- Lookup Speed: Very fast (< 5ms typical) +- Build Time: Fast +- Recall: Good (95%+) +- Memory: Moderate +- Best For: Production caches with frequent updates ```sql -- Set index type @@ -68,7 +83,7 @@ SELECT semantic_cache.set_index_type('ivfflat'); SELECT semantic_cache.rebuild_index(); ``` -**IVFFlat Parameters** (set during `init_schema()`): +IVFFlat Parameters (set during `init_schema()`): ```sql -- Default configuration @@ -76,6 +91,7 @@ lists = 100 -- For < 100K entries -- For larger caches, increase lists -- Adjust in the init_schema() function or manually: + DROP INDEX IF EXISTS semantic_cache.idx_cache_entries_embedding; CREATE INDEX idx_cache_entries_embedding ON semantic_cache.cache_entries @@ -87,12 +103,12 @@ WITH (lists = 1000); -- For 100K-1M entries More accurate but slower to build - requires pgvector 0.5.0+. -**Characteristics:** -- **Lookup Speed**: Fast (1-3ms typical) -- **Build Time**: Slower -- **Recall**: Excellent (98%+) -- **Memory**: Higher -- **Best For**: Read-heavy caches with infrequent updates +Characteristics: +- Lookup Speed: Fast (1-3ms typical) +- Build Time: Slower +- Recall: Excellent (98%+) +- Memory: Higher +- Best For: Read-heavy caches with infrequent updates ```sql -- Set index type (requires pgvector 0.5.0+) @@ -100,10 +116,11 @@ SELECT semantic_cache.set_index_type('hnsw'); SELECT semantic_cache.rebuild_index(); ``` -**HNSW Parameters:** +HNSW Parameters: ```sql -- Adjust manually for optimal performance + DROP INDEX IF EXISTS semantic_cache.idx_cache_entries_embedding; CREATE INDEX idx_cache_entries_embedding ON semantic_cache.cache_entries @@ -123,19 +140,25 @@ WITH (m = 16, ef_construction = 64); ## Cache Configuration -The extension stores configuration in the `semantic_cache.cache_config` table. +The extension stores configuration in the +`semantic_cache.cache_config` table. ### View Current Configuration +Use the following command to view the current configuration: + ```sql SELECT * FROM semantic_cache.cache_config ORDER BY key; ``` ### Key Configuration Parameters +Use the following configuration parameters to control cache settings: + #### max_cache_size_mb -Maximum cache size in megabytes before auto-eviction triggers. +Use max_cache_size_mb to specify the maximum cache size in megabytes +before auto-eviction triggers. ```sql -- Set to 2GB @@ -148,7 +171,8 @@ WHERE key = 'max_cache_size_mb'; #### default_ttl_seconds -Default time-to-live for cached entries (can be overridden per query). +Use default_ttl_seconds to specify the default time-to-live for cached +entries (can be overridden per query). ```sql -- Set default to 2 hours @@ -161,7 +185,8 @@ WHERE key = 'default_ttl_seconds'; #### eviction_policy -Automatic eviction strategy when cache size limit is reached. +Use eviction_policy to specify the automatic eviction strategy when +cache size limit is reached. ```sql -- Options: 'lru', 'lfu', 'ttl' @@ -170,15 +195,16 @@ SET value = 'lru' WHERE key = 'eviction_policy'; ``` -**Eviction Policies:** +Eviction Policies: -- **lru**: Least Recently Used - evicts oldest accessed entries -- **lfu**: Least Frequently Used - evicts least accessed entries -- **ttl**: Time To Live - evicts entries closest to expiration +- lru: Least Recently Used - evicts oldest accessed entries +- lfu: Least Frequently Used - evicts least accessed entries +- ttl: Time To Live - evicts entries closest to expiration #### similarity_threshold -Default similarity threshold for cache hits (0.0 - 1.0). +Use similarity_threshold to specify the default similarity threshold for +cache hits (0.0 - 1.0). ```sql -- More strict matching (fewer hits, more accurate) @@ -196,9 +222,13 @@ WHERE key = 'similarity_threshold'; ## Production Configurations +The following sections detail configuration settings useful in a +production environment. + ### High-Throughput Configuration -For applications with thousands of queries per second: +Use the following configuration for applications with thousands of queries +per second: ```sql -- Use IVFFlat with optimized lists @@ -206,13 +236,16 @@ SELECT semantic_cache.set_index_type('ivfflat'); SELECT semantic_cache.rebuild_index(); -- Increase cache size -UPDATE semantic_cache.cache_config SET value = '5000' WHERE key = 'max_cache_size_mb'; +UPDATE semantic_cache.cache_config SET value = '5000' +WHERE key = 'max_cache_size_mb'; -- Use LRU for fast eviction -UPDATE semantic_cache.cache_config SET value = 'lru' WHERE key = 'eviction_policy'; +UPDATE semantic_cache.cache_config SET value = 'lru' +WHERE key = 'eviction_policy'; -- Shorter TTL to keep cache fresh -UPDATE semantic_cache.cache_config SET value = '1800' WHERE key = 'default_ttl_seconds'; +UPDATE semantic_cache.cache_config SET value = '1800' +WHERE key = 'default_ttl_seconds'; ``` PostgreSQL settings: @@ -226,7 +259,7 @@ maintenance_work_mem = 2GB ### High-Accuracy Configuration -For applications requiring maximum precision: +Use the following configuration for applications requiring maximum precision: ```sql -- Use HNSW for best recall @@ -234,15 +267,18 @@ SELECT semantic_cache.set_index_type('hnsw'); SELECT semantic_cache.rebuild_index(); -- Strict similarity threshold -UPDATE semantic_cache.cache_config SET value = '0.98' WHERE key = 'similarity_threshold'; +UPDATE semantic_cache.cache_config SET value = '0.98' +WHERE key = 'similarity_threshold'; -- Longer TTL for stable results -UPDATE semantic_cache.cache_config SET value = '14400' WHERE key = 'default_ttl_seconds'; +UPDATE semantic_cache.cache_config SET value = '14400' +WHERE key = 'default_ttl_seconds'; ``` ### LLM/AI Application Configuration -Optimized for caching expensive AI API calls: +Use the following configuration settings to optimize caching for expensive AI +API calls: ```sql -- OpenAI ada-002 dimensions @@ -250,18 +286,22 @@ SELECT semantic_cache.set_vector_dimension(1536); SELECT semantic_cache.rebuild_index(); -- Balance between accuracy and coverage -UPDATE semantic_cache.cache_config SET value = '0.93' WHERE key = 'similarity_threshold'; +UPDATE semantic_cache.cache_config SET value = '0.93' +WHERE key = 'similarity_threshold'; -- Cache longer (AI responses stable) -UPDATE semantic_cache.cache_config SET value = '7200' WHERE key = 'default_ttl_seconds'; +UPDATE semantic_cache.cache_config SET value = '7200' +WHERE key = 'default_ttl_seconds'; -- Large cache for many queries -UPDATE semantic_cache.cache_config SET value = '10000' WHERE key = 'max_cache_size_mb'; +UPDATE semantic_cache.cache_config SET value = '10000' +WHERE key = 'max_cache_size_mb'; ``` ### Analytics Query Configuration -For caching expensive analytical queries: +The following configuration is well-suited for caching expensive analytical +queries: ```sql -- Use standard dimensions @@ -269,19 +309,26 @@ SELECT semantic_cache.set_vector_dimension(768); SELECT semantic_cache.rebuild_index(); -- Moderate similarity (query variations common) -UPDATE semantic_cache.cache_config SET value = '0.90' WHERE key = 'similarity_threshold'; +UPDATE semantic_cache.cache_config SET value = '0.90' +WHERE key = 'similarity_threshold'; -- Short TTL (data changes frequently) -UPDATE semantic_cache.cache_config SET value = '900' WHERE key = 'default_ttl_seconds'; +UPDATE semantic_cache.cache_config SET value = '900' +WHERE key = 'default_ttl_seconds'; -- LFU policy (popular queries cached longer) -UPDATE semantic_cache.cache_config SET value = 'lfu' WHERE key = 'eviction_policy'; +UPDATE semantic_cache.cache_config SET value = 'lfu' +WHERE key = 'eviction_policy'; ``` ## Monitoring Configuration Impact +Use the following commands to monitor your semantic cache. + ### Check Index Performance +Use the following query to view index usage statistics: + ```sql -- View index usage SELECT @@ -297,6 +344,8 @@ WHERE schemaname = 'semantic_cache'; ### Measure Lookup Times +Use the following commands to measure lookup performance: + ```sql -- Enable timing \timing on @@ -312,6 +361,8 @@ Target: < 5ms for most queries ### Cache Hit Rate +Use the following query to monitor cache hit rate: + ```sql -- Monitor hit rate with current config SELECT * FROM semantic_cache.cache_stats(); @@ -319,59 +370,69 @@ SELECT * FROM semantic_cache.cache_stats(); Target: > 70% for effective caching -## Configuration Best Practices +### Tuning Checklist -!!! tip "Start Simple" - Begin with defaults (1536 dimensions, IVFFlat, 0.95 threshold) and adjust based on monitoring. +Follow this checklist when tuning your cache configuration: -!!! warning "Test Before Production" - Always test configuration changes in development before applying to production. +- Choose a dimension matching your embedding model. +- Select an index type based on workload (IVFFlat for most cases). +- Set a similarity threshold based on accuracy requirements. +- Configure cache size based on available memory. +- Choose an eviction policy matching access patterns. +- Set TTL based on data freshness requirements. +- Monitor hit rate and adjust as needed. -### Tuning Checklist +### Common Mistakes -- [ ] Choose dimension matching your embedding model -- [ ] Select index type based on workload (IVFFlat for most cases) -- [ ] Set similarity threshold based on accuracy requirements -- [ ] Configure cache size based on available memory -- [ ] Choose eviction policy matching access patterns -- [ ] Set TTL based on data freshness requirements -- [ ] Monitor hit rate and adjust as needed +The following common mistakes have simple remediations: -### Common Mistakes +#### Using Wrong Dimensions -❌ **Using wrong dimensions** ```sql --- Extension configured for 1536, but sending 768-dim vectors +-- Extension configured for 1536, but sending 768-dim +-- vectors -- Result: Error or poor performance ``` -✓ **Match model dimensions** +You should use matching model dimensions: + ```sql -SELECT semantic_cache.set_vector_dimension(768); -- Match your model +-- Match your model +SELECT semantic_cache.set_vector_dimension(768); SELECT semantic_cache.rebuild_index(); ``` -❌ **Too strict threshold** +#### Too Strict Threshold + ```sql -UPDATE semantic_cache.cache_config SET value = '0.99' WHERE key = 'similarity_threshold'; +UPDATE semantic_cache.cache_config SET value = '0.99' +WHERE key = 'similarity_threshold'; -- Result: Very low hit rate ``` -✓ **Balanced threshold** +Use a more balanced threshold: + ```sql -UPDATE semantic_cache.cache_config SET value = '0.93' WHERE key = 'similarity_threshold'; +UPDATE semantic_cache.cache_config SET value = '0.93' +WHERE key = 'similarity_threshold'; -- Allows reasonable variation ``` -❌ **Forgetting to rebuild** +#### Forgetting To Rebuild + ```sql SELECT semantic_cache.set_vector_dimension(768); -- Forgot: SELECT semantic_cache.rebuild_index(); -- Result: Old index still in use! ``` +Rebuild your cache to use the new index! + ## Next Steps -- [Functions Reference](functions/index.md) - Learn about all configuration functions -- [Monitoring](monitoring.md) - Track performance and tune configuration -- [Use Cases](use_cases.md) - See configuration examples in practice +- [Functions Reference](functions/index.md) - Learn about all + configuration functions. +- [Monitoring](monitoring.md) - Track performance and tune + configuration. +- [Use Cases](use_cases.md) - See configuration examples in + practice. From fab196bdf95de97cb9b53ccd8e0d33884cb8b205 Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Tue, 24 Feb 2026 11:35:11 -0500 Subject: [PATCH 2/7] Updates to FAQ.md and configuration.md --- docs/FAQ.md | 229 +++++++++++++++++++++++++----------------- docs/configuration.md | 4 +- 2 files changed, 140 insertions(+), 93 deletions(-) diff --git a/docs/FAQ.md b/docs/FAQ.md index 5779a97..214c2af 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -4,40 +4,53 @@ ### What is semantic caching? -Semantic caching uses vector embeddings to understand the *meaning* of queries, not just exact text matching. When you search for "What was Q4 revenue?", the cache can return results for semantically similar queries like "Show Q4 revenue" or "Q4 revenue please" even though the exact text is different. +Semantic caching uses vector embeddings to understand the meaning of +queries, not just exact text matching. When you search for "What was Q4 +revenue?", the cache can return results for semantically similar queries +like "Show Q4 revenue" or "Q4 revenue please" even though the exact text +is different. -Traditional caching requires exact string matches, while semantic caching matches based on similarity scores (typically 90-98%). +Traditional caching requires exact string matches, while semantic caching +matches based on similarity scores (typically 90-98%). ### Why use pg_semantic_cache instead of a traditional cache like Redis? -**Use pg_semantic_cache when:** -- Queries are phrased differently but mean the same thing (LLM applications, natural language queries) -- You need semantic understanding of query similarity -- You're already using PostgreSQL and want tight integration -- You need persistent caching with complex querying capabilities +Use pg_semantic_cache when: -**Use traditional caching (Redis, Memcached) when:** -- You need exact key-value matching -- Sub-millisecond latency is critical -- Queries are deterministic and rarely vary -- You need distributed caching across multiple services +- Queries are phrased differently but mean the same thing (LLM + applications, natural language queries). +- You need semantic understanding of query similarity. +- You're already using PostgreSQL and want tight integration. +- You need persistent caching with complex querying capabilities. -**Use both:** pg_semantic_cache for semantic matching + Redis for hot-path exact matches! +Use traditional caching (Redis, Memcached) when: + +- You need exact key-value matching. +- Sub-millisecond latency is critical. +- Queries are deterministic and rarely vary. +- You need distributed caching across multiple services. + +Use both: pg_semantic_cache for semantic matching + Redis for hot-path +exact matches! ### How does it compare to application-level caching? +The following table compares pg_semantic_cache to application-level +caching: + | Feature | pg_semantic_cache | Application Cache | |---------|-------------------|-------------------| -| Semantic Matching | ✅ Yes | ❌ No | -| Database Integration | ✅ Native | ⚠️ Requires sync | -| Multi-language | ✅ Yes | ⚠️ Per-instance | -| Persistence | ✅ Automatic | ⚠️ Manual | -| Vector Operations | ✅ Optimized | ❌ Not available | -| Shared Across Apps | ✅ Yes | ❌ No | +| Semantic Matching | Yes | No | +| Database Integration | Native | Requires sync | +| Multi-language | Yes | Per-instance | +| Persistence | Automatic | Manual | +| Vector Operations | Optimized | Not available | +| Shared Across Apps | Yes | No | ### Is it production-ready? -Yes! pg_semantic_cache is: +Yes! pg_semantic_cache is production-ready and has the following +characteristics: - Written in C using stable PostgreSQL APIs - Tested with PostgreSQL 14-18 - Used in production environments @@ -48,7 +61,8 @@ Yes! pg_semantic_cache is: ### Do I need to install pgvector separately? -Yes, pgvector is a required dependency. Install it before pg_semantic_cache: +Yes, pgvector is a required dependency. Install it before +pg_semantic_cache: ```bash # Install pgvector @@ -65,12 +79,12 @@ make && sudo make install It depends on the service: -- **Self-hosted PostgreSQL**: ✅ Yes -- **AWS RDS**: ✅ Yes (if you can install extensions) -- **Azure Database for PostgreSQL**: ✅ Yes (flexible server) -- **Google Cloud SQL**: ⚠️ Check extension support -- **Supabase**: ✅ Yes (pgvector supported) -- **Neon**: ✅ Yes (pgvector supported) +- Self-hosted PostgreSQL: Yes +- AWS RDS: Yes (if you can install extensions) +- Azure Database for PostgreSQL: Yes (flexible server) +- Google Cloud SQL: Check extension support +- Supabase: Yes (pgvector supported) +- Neon: Yes (pgvector supported) Check if your provider supports custom C extensions and pgvector. @@ -80,6 +94,8 @@ PostgreSQL 14, 15, 16, 17, and 18 are fully supported and tested. ### How do I upgrade the extension? +Use one of the following methods to upgrade the extension: + ```sql -- Drop and recreate (WARNING: clears cache) DROP EXTENSION pg_semantic_cache CASCADE; @@ -93,14 +109,19 @@ ALTER EXTENSION pg_semantic_cache UPDATE TO '0.4.0'; ### How fast are cache lookups? -**Target**: < 5ms for most queries +Cache lookups are very fast, with the following performance +characteristics: + +Target: < 5ms for most queries + +Typical Performance: -**Typical Performance:** - IVFFlat index: 2-5ms - HNSW index: 1-3ms - Without index: 50-500ms (don't do this!) -**Factors affecting speed:** +Factors affecting speed: + - Cache size (more entries = slightly slower) - Vector dimension (1536 vs 3072) - Index type and parameters @@ -114,25 +135,31 @@ SELECT * FROM semantic_cache.get_cached_result('[...]'::text, 0.95); ### How much storage does it use? -**Storage per entry:** +Storage requirements vary based on vector dimensions and result sizes: + +Storage per entry: + - Vector embedding: ~6KB (1536 dimensions) - Result data: Varies (your cached JSONB) - Metadata: ~200 bytes -- **Total**: 6KB + your data size +- Total: 6KB + your data size + +Example: -**Example:** - 10K entries with 10KB results each = ~160MB - 100K entries with 5KB results each = ~1.1GB ### What's the maximum cache size? -There's no hard limit, but practical considerations: +There's no hard limit, but consider the following practical +considerations: + +- < 100K entries: Excellent performance with default settings +- 100K - 1M entries: Increase IVFFlat lists parameter +- > 1M entries: Consider partitioning or HNSW index -- **< 100K entries**: Excellent performance with default settings -- **100K - 1M entries**: Increase IVFFlat lists parameter -- **> 1M entries**: Consider partitioning or HNSW index +Use the following command to configure max size: -Configure max size: ```sql UPDATE semantic_cache.cache_config SET value = '5000' -- 5GB @@ -141,7 +168,8 @@ WHERE key = 'max_cache_size_mb'; ### Does it work with large result sets? -Yes, but consider: +Yes, but consider the following factors: + - Large results (> 1MB) consume more storage - Serializing/deserializing large JSONB has overhead - Consider caching aggregated results instead of full datasets @@ -161,7 +189,8 @@ FROM huge_table; -- 1KB result Any embedding model that produces fixed-dimension vectors: -**Popular Models:** +Popular Models: + - OpenAI text-embedding-ada-002 (1536 dim) - OpenAI text-embedding-3-small (1536 dim) - OpenAI text-embedding-3-large (3072 dim) @@ -169,7 +198,8 @@ Any embedding model that produces fixed-dimension vectors: - Sentence Transformers all-MiniLM-L6-v2 (384 dim) - Sentence Transformers all-mpnet-base-v2 (768 dim) -Configure dimension: +Use the following commands to configure dimension: + ```sql SELECT semantic_cache.set_vector_dimension(768); SELECT semantic_cache.rebuild_index(); @@ -177,9 +207,11 @@ SELECT semantic_cache.rebuild_index(); ### Do I need to generate embeddings myself? -Yes. pg_semantic_cache stores and searches embeddings, but doesn't generate them. +Yes. pg_semantic_cache stores and searches embeddings, but doesn't +generate them. + +Typical workflow: -**Typical workflow:** 1. Generate embedding using your chosen model/API 2. Pass embedding to `cache_query()` or `get_cached_result()` 3. Extension handles similarity search @@ -202,15 +234,17 @@ SELECT semantic_cache.rebuild_index(); ### What similarity threshold should I use? -**Recommendations:** +Use the following recommendations to select an appropriate similarity +threshold: + +- 0.98-0.99: Nearly identical queries (financial data, strict matching) +- 0.95-0.97: Very similar queries (recommended starting point) +- 0.90-0.94: Similar queries (good for exploratory queries) +- 0.85-0.89: Somewhat related (use with caution) +- < 0.85: Too lenient (likely irrelevant results) -- **0.98-0.99**: Nearly identical queries (financial data, strict matching) -- **0.95-0.97**: Very similar queries (recommended starting point) -- **0.90-0.94**: Similar queries (good for exploratory queries) -- **0.85-0.89**: Somewhat related (use with caution) -- **< 0.85**: Too lenient (likely irrelevant results) +Start with 0.95 and adjust based on your hit rate: -**Start with 0.95** and adjust based on your hit rate: - Hit rate too low? Lower threshold (0.92) - Getting irrelevant results? Raise threshold (0.97) @@ -218,13 +252,17 @@ SELECT semantic_cache.rebuild_index(); ### How do I choose between IVFFlat and HNSW? -**Use IVFFlat (default) when:** +Choose the index type based on your workload characteristics: + +Use IVFFlat (default) when: + - Cache updates frequently - Build time matters - < 100K entries - Good enough recall (95%+) -**Use HNSW when:** +Use HNSW when: + - Maximum accuracy needed - Cache mostly read-only - Have pgvector 0.5.0+ @@ -238,7 +276,7 @@ SELECT semantic_cache.rebuild_index(); ### What TTL should I set? -Depends on data freshness requirements: +The TTL depends on your data freshness requirements: ```sql -- Real-time data (stock prices, weather) @@ -256,7 +294,7 @@ ttl_seconds := NULL -- Never expires ### How often should I run maintenance? -**Recommended Schedule:** +Follow this recommended maintenance schedule: ```sql -- Every 15 minutes: Evict expired entries @@ -279,26 +317,27 @@ SELECT cron.schedule('cache-evict', '*/15 * * * *', ### Why is my hit rate so low? -**Common causes:** +Low hit rates typically have one of the following common causes: -1. **Threshold too high** +1. Threshold too high ```sql -- Lower from 0.95 to 0.90 SELECT * FROM semantic_cache.get_cached_result('[...]'::text, 0.90); ``` -2. **TTL too short** +2. TTL too short ```sql -- Check average entry lifetime - SELECT AVG(EXTRACT(EPOCH FROM (NOW() - created_at))) / 3600 as avg_age_hours + SELECT AVG(EXTRACT(EPOCH FROM (NOW() - created_at))) / 3600 + as avg_age_hours FROM semantic_cache.cache_entries; ``` -3. **Poor embedding quality** +3. Poor embedding quality - Use better embedding model - Ensure consistent embedding generation -4. **Cache too small** +4. Cache too small ```sql -- Check if entries being evicted too quickly SELECT * FROM semantic_cache.cache_stats(); @@ -306,7 +345,7 @@ SELECT cron.schedule('cache-evict', '*/15 * * * *', ### Cache lookups are returning no results -**Debugging steps:** +Use the following debugging steps to troubleshoot this issue: ```sql -- 1. Check cache has entries @@ -333,11 +372,13 @@ LIMIT 5; ### Extension won't load +If you encounter the following error: + ```sql ERROR: could not open extension control file ``` -**Solution:** +Use this solution: ```bash # Check installation ls -l $(pg_config --sharedir)/extension/pg_semantic_cache* @@ -352,11 +393,13 @@ ls -l $(pg_config --pkglibdir)/vector.so ### Build errors +If you encounter the following build error: + ```bash fatal error: postgres.h: No such file or directory ``` -**Solution:** +Use this solution: ```bash # Debian/Ubuntu sudo apt-get install postgresql-server-dev-17 @@ -371,23 +414,25 @@ export PATH="/opt/homebrew/opt/postgresql@17/bin:$PATH" ### Out of memory errors +If you encounter the following error: + ```sql ERROR: out of memory ``` -**Solutions:** +Try one of these solutions: -1. **Increase work_mem** +1. Increase work_mem ```sql SET work_mem = '512MB'; ``` -2. **Reduce cache size** +2. Reduce cache size ```sql SELECT semantic_cache.evict_lru(5000); -- Keep only 5K entries ``` -3. **Lower vector dimension** +3. Lower vector dimension ```sql SELECT semantic_cache.set_vector_dimension(768); -- Use smaller model SELECT semantic_cache.rebuild_index(); @@ -398,19 +443,24 @@ ERROR: out of memory ### Should I cache everything? No! Cache queries that are: -- ✅ Expensive (slow execution) -- ✅ Frequently repeated (similar queries) -- ✅ Tolerant of slight staleness -- ✅ Semantically searchable + +- Expensive (slow execution) +- Frequently repeated (similar queries) +- Tolerant of slight staleness +- Semantically searchable Don't cache: -- ❌ Simple key-value lookups (use Redis) -- ❌ Real-time critical data -- ❌ Unique, one-off queries -- ❌ Queries that must be current + +- Simple key-value lookups (use Redis) +- Real-time critical data +- Unique, one-off queries +- Queries that must be current ### How do I test if caching helps? +Use the following approach to measure the performance improvement from +caching: + ```sql -- Measure query time without cache \timing on @@ -423,18 +473,19 @@ SELECT * FROM semantic_cache.get_cached_result('[...]'::text, 0.95); -- With cache (subsequent calls - hit) SELECT * FROM semantic_cache.get_cached_result('[...]'::text, 0.95); --- Time: 3.456 ms (cache hit!) +-- Time: 3.456 ms (cache hit) --- Speedup: 450 / 3.5 = 128x faster! +-- Speedup: 450 / 3.5 = 128x faster ``` ### Should I use tags? Yes! Tags are useful for: -- **Organization**: Group by feature (`ARRAY['dashboard', 'sales']`) -- **Bulk invalidation**: `invalidate_cache(tag := 'user_123')` -- **Analytics**: `SELECT * FROM semantic_cache.cache_by_tag` -- **Debugging**: Find entries by category + +- Organization: Group by feature (`ARRAY['dashboard', 'sales']`) +- Bulk invalidation: `invalidate_cache(tag := 'user_123')` +- Analytics: `SELECT * FROM semantic_cache.cache_by_tag` +- Debugging: Find entries by category ```sql -- Tag everything @@ -447,16 +498,12 @@ SELECT semantic_cache.cache_query( ); ``` -## See Also - -- [Getting Started](index.md) -- [Installation Guide](installation.md) -- [Configuration](configuration.md) -- [Use Cases](use_cases.md) -- [Functions Reference](functions/index.md) -- [Monitoring](monitoring.md) ## Still Have Questions? -- **GitHub Issues**: [Report bugs or ask questions](https://github.com/pgedge/pg_semantic_cache/issues) -- **Discussions**: [Community discussions](https://github.com/pgedge/pg_semantic_cache/discussions) +Contact us through the following channels: + +- GitHub Issues: [Report bugs or ask + questions](https://github.com/pgedge/pg_semantic_cache/issues) +- Discussions: [Community + discussions](https://github.com/pgedge/pg_semantic_cache/discussions) diff --git a/docs/configuration.md b/docs/configuration.md index d1b3257..c357dad 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,7 +1,7 @@ # Configuration -pg_semantic_cache provides flexible configuration options for vector -dimensions, index types, and cache behavior. +This guide describes how to configure pg_semantic_cache for your use case, +including vector dimensions, index types, and cache behavior. !!! tip "Start Simple" From 7f23c145b519cbde31369215e447c8b23ecc32ef Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Thu, 26 Feb 2026 11:05:31 -0500 Subject: [PATCH 3/7] Updated file contents to break into sections; add edits, etc --- docs/architecture.md | 54 ++++++++++++++ docs/index.md | 161 +++++++++------------------------------- docs/installation.md | 123 ++++++++++-------------------- docs/quick_start.md | 60 +++++++++++++++ docs/troubleshooting.md | 84 +++++++++++++++++++++ mkdocs.yml | 7 +- 6 files changed, 278 insertions(+), 211 deletions(-) create mode 100644 docs/architecture.md create mode 100644 docs/quick_start.md create mode 100644 docs/troubleshooting.md diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..764e9ee --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,54 @@ +# Architecture + +pg_semantic_cache is implemented in pure C using the PostgreSQL extension API +(PGXS), providing: + +- Small binary size of ~100KB vs 2-5MB for Rust-based extensions. +- Fast build times of 10-30 seconds vs 2-5 minutes. +- Immediate compatibility works with new PostgreSQL versions immediately. +- Standard packaging is compatible with all PostgreSQL packaging tools. + +## How It Works + +```mermaid +graph LR + A[Query] --> B[Generate Embedding] + B --> C{Cache Lookup} + C -->|Hit| D[Return Cached Result] + C -->|Miss| E[Execute Query] + E --> F[Store Result + Embedding] + F --> G[Return Result] +``` + +1. Generate an embedding by converting your query text into a vector embedding + using your preferred model (OpenAI, Cohere, etc.). +2. Check the cache by searching for semantically similar cached queries using + cosine similarity. +3. On a cache hit, if a similar query exists above the similarity threshold, + the cached result is returned. +4. On a cache miss, the actual query is executed and the result is cached with + its embedding for future use. +5. Automatic maintenance evicts expired entries based on TTL and configured + policies. + +## Performance + +- Lookup time is < 5ms for most queries with IVFFlat index. +- Scalability handles 100K+ cached entries efficiently. +- Throughput reaches thousands of cache lookups per second. +- Storage provides configurable cache size limits with automatic eviction. + +!!! tip "Pro Tip" + + Start with the default IVFFlat index and 1536 dimensions (OpenAI + ada-002). You can always reconfigure your cache later with the + `set_vector_dimension()` and `rebuild_index()` functions. + +## Getting Help + +- Browse the sections in the navigation menu for documentation. +- Report issues at + [GitHub Issues](https://github.com/pgedge/pg_semantic_cache/issues). +- See [Use Cases](use_cases.md) for practical implementation examples. +- Check the [FAQ](FAQ.md) for answers to common questions. + diff --git a/docs/index.md b/docs/index.md index 478ecc4..e776e85 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,103 +1,32 @@ # pg_semantic_cache -!!! info "Welcome to pg_semantic_cache" - Semantic query result caching for PostgreSQL using vector embeddings - making expensive queries fast through intelligent reuse. - -## Overview - -pg_semantic_cache is a PostgreSQL extension that implements semantic query result caching using vector embeddings. Unlike traditional query caching that relies on exact string matching, pg_semantic_cache understands the *meaning* of queries through vector similarity, enabling cache hits even when queries are phrased differently. +pg_semantic_cache is a PostgreSQL extension that implements semantic query +result caching using vector embeddings. Unlike traditional query caching that +relies on exact string matching, pg_semantic_cache understands the *meaning* +of queries through vector similarity, enabling cache hits even when queries +are phrased differently. This extension is particularly valuable for: -- **AI/LLM Applications**: Cache expensive LLM API calls and RAG (Retrieval Augmented Generation) results -- **Analytics Workloads**: Reuse results from complex analytical queries with similar parameters -- **External API Queries**: Cache results from expensive external data sources -- **Database Query Optimization**: Reduce load on expensive database operations - -## Key Features - -- **Semantic Matching**: Uses pgvector for similarity-based cache lookups -- **Flexible TTL**: Per-entry time-to-live configuration -- **Tag-Based Management**: Organize and invalidate cache entries by tags -- **Multiple Eviction Policies**: LRU, LFU, and TTL-based automatic eviction -- **Cost Tracking**: Monitor and report on query cost savings -- **Configurable Dimensions**: Support for various embedding models (768, 1536, 3072+ dimensions) -- **Multiple Index Types**: IVFFlat (fast) or HNSW (accurate) vector indexes -- **Comprehensive Monitoring**: Built-in statistics, views, and health metrics - -## How It Works - -```mermaid -graph LR - A[Query] --> B[Generate Embedding] - B --> C{Cache Lookup} - C -->|Hit| D[Return Cached Result] - C -->|Miss| E[Execute Query] - E --> F[Store Result + Embedding] - F --> G[Return Result] -``` - -1. **Generate Embedding**: Convert your query text into a vector embedding using your preferred model (OpenAI, Cohere, etc.) -2. **Check Cache**: Search for semantically similar cached queries using cosine similarity -3. **Cache Hit**: If a similar query exists above the similarity threshold, return the cached result -4. **Cache Miss**: Execute the actual query, cache the result with its embedding for future use -5. **Automatic Maintenance**: Expired entries are evicted based on TTL and configured policies - -## Quick Start - -### Prerequisites - -- PostgreSQL 14, 15, 16, 17, or 18 -- pgvector extension installed -- C compiler (gcc or clang) -- PostgreSQL development headers - -### Installation - -```bash -# Clone the repository -git clone https://github.com/pgedge/pg_semantic_cache.git -cd pg_semantic_cache +- AI/LLM applications can cache expensive LLM API calls and RAG (Retrieval + Augmented Generation) results. +- Analytics workloads can reuse results from complex analytical queries with + similar parameters. +- External API queries can cache results from expensive external data + sources. +- Database query optimization can reduce load on expensive database + operations. -# Build and install -make clean -make -sudo make install -``` +### Why Use Semantic Caching -### Setup +Semantic caching transforms how applications handle query results by +using vector matching rather than matching exact queries. Traditional caching +systems can miss cached result sets when queries are phrased differently, +while semantic caching recognizes that "What was Q4 revenue?" and "Show Q4 revenue" as the same question. This approach dramatically increases cache hit rates +and reduces costs for AI applications, analytics workloads, and external API +calls. -```sql --- Install required extensions -CREATE EXTENSION IF NOT EXISTS vector; -CREATE EXTENSION IF NOT EXISTS pg_semantic_cache; - --- Verify installation -SELECT * FROM semantic_cache.cache_health; -``` - -## Simple Example - -```sql --- Cache a query result with its embedding -SELECT semantic_cache.cache_query( - query_text := 'What was our Q4 2024 revenue?', - query_embedding := '[0.123, 0.456, ...]'::text, -- From your embedding model - result_data := '{"answer": "Q4 2024 revenue was $2.4M"}'::jsonb, - ttl_seconds := 1800, -- 30 minutes - tags := ARRAY['llm', 'revenue'] -); - --- Retrieve with a semantically similar query -SELECT * FROM semantic_cache.get_cached_result( - query_embedding := '[0.124, 0.455, ...]'::text, -- Slightly different query - similarity_threshold := 0.95 -- 95% similarity required -); -``` - -## Why Use pg_semantic_cache? - -### Traditional Caching vs Semantic Caching +Queries that would overlook cached result sets work with a semantic cache: | Traditional Cache | Semantic Cache | |-------------------|----------------| @@ -108,40 +37,24 @@ SELECT * FROM semantic_cache.get_cached_result( ### Cost Savings Example For an LLM application making 10,000 queries per day: -- Without caching: $200/day (at $0.02 per query) -- With 80% cache hit rate: $40/day -- **Savings: $160/day or $58,400/year** - -## Architecture - -pg_semantic_cache is implemented in pure C using the PostgreSQL extension API (PGXS), providing: - -- **Small Binary Size**: ~100KB vs 2-5MB for Rust-based extensions -- **Fast Build Times**: 10-30 seconds vs 2-5 minutes -- **Immediate Compatibility**: Works with new PostgreSQL versions immediately -- **Standard Packaging**: Compatible with all PostgreSQL packaging tools - -## Performance - -- **Lookup Time**: < 5ms for most queries with IVFFlat index -- **Scalability**: Handles 100K+ cached entries efficiently -- **Throughput**: Thousands of cache lookups per second -- **Storage**: Configurable cache size limits with automatic eviction -## Getting Help +- Without caching costs $200/day (at $0.02 per query). +- With 80% cache hit rate costs $40/day. +- Savings are $160/day or $58,400/year. -- **Documentation**: Browse the sections in the navigation menu -- **Issues**: Report bugs at [GitHub Issues](https://github.com/pgedge/pg_semantic_cache/issues) -- **Examples**: See [Use Cases](use_cases.md) for practical implementations -- **FAQ**: Check the [FAQ](FAQ.md) for common questions +### Key Features -## Next Steps +- Semantic matching uses pgvector for similarity-based cache lookups. +- Flexible TTL provides per-entry time-to-live configuration. +- Tag-based management organizes and invalidates cache entries by tags. +- Multiple eviction policies include LRU, LFU, and TTL-based automatic + eviction. +- Cost tracking monitors and reports on query cost savings. +- Configurable dimensions support various embedding models (768, 1536, + 3072+ dimensions). +- Multiple index types include IVFFlat (fast) or HNSW (accurate) vector + indexes. +- Comprehensive monitoring provides built-in statistics, views, and health + metrics. -- [Installation Guide](installation.md) - Detailed installation instructions -- [Configuration](configuration.md) - Configure dimensions, indexes, and policies -- [Functions Reference](functions/index.md) - Complete function documentation -- [Use Cases](use_cases.md) - Practical examples and integration patterns -- [Monitoring](monitoring.md) - Track performance and optimize cache usage -!!! tip "Pro Tip" - Start with the default IVFFlat index and 1536 dimensions (OpenAI ada-002). You can always reconfigure later with `set_vector_dimension()` and `rebuild_index()`. diff --git a/docs/installation.md b/docs/installation.md index 1b248bb..0173931 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -4,16 +4,19 @@ This guide covers installing pg_semantic_cache from source on various platforms. ## Prerequisites -### Required +Before installing pg_semantic_cache, you must install: -- **PostgreSQL**: Version 14, 15, 16, 17, or 18 -- **pgvector**: Must be installed before pg_semantic_cache -- **C Compiler**: gcc or clang -- **make**: GNU Make or compatible -- **PostgreSQL Development Headers**: Required for building extensions +- PostgreSQL: Version 14, 15, 16, 17, or 18 +- pgvector: Must be installed before pg_semantic_cache +- C Compiler: gcc or clang +- make: GNU Make or compatible +- PostgreSQL Development Headers: Required for building extensions ### Platform-Specific Packages +Use the following platform-specific commands to ensure that your host is +prepared for pg_semantic_cache: + === "Debian/Ubuntu" ```bash sudo apt-get install -y \ @@ -63,7 +66,8 @@ This guide covers installing pg_semantic_cache from source on various platforms. ## Building from Source -### Standard Installation +After installing the prerequisites, build pg_semantic_cache using the standard +PostgreSQL extension build commands. ```bash # Clone the repository @@ -80,6 +84,9 @@ sudo make install ### Multi-Version PostgreSQL +Use PG_CONFIG to target specific PostgreSQL versions when multiple versions +are installed. + If you have multiple PostgreSQL versions installed: ```bash @@ -94,6 +101,8 @@ done ### Development Build +Development builds include verbose output and debugging information. + For development with verbose output: ```bash @@ -102,6 +111,8 @@ make dev-install ### View Build Configuration +Check your build environment and configuration settings before compiling. + ```bash make info ``` @@ -114,7 +125,9 @@ Output includes: ## Verifying Installation -### Check Extension Files +After installation completes, verify that all extension files are in place. + +Check for the extension files: ```bash # Verify shared library is installed @@ -127,7 +140,7 @@ ls -lh $(pg_config --sharedir)/extension/pg_semantic_cache.control ls -lh $(pg_config --sharedir)/extension/pg_semantic_cache--*.sql ``` -### Check pgvector Installation +Use the following command to confirm that pgvector is installed: ```bash # pgvector must be installed first @@ -136,9 +149,12 @@ ls -lh $(pg_config --pkglibdir)/vector.so ## PostgreSQL Configuration +Optimize PostgreSQL settings for better performance with semantic caching. + ### Update postgresql.conf -pg_semantic_cache works out of the box without special configuration, but for optimal performance with large caches: +pg_semantic_cache works out of the box without special configuration, but for +optimal performance with large caches: ```ini # Recommended for production with large caches @@ -151,7 +167,7 @@ maintenance_work_mem = 1GB # For index creation track_io_timing = on ``` -Restart PostgreSQL after configuration changes: +Restart PostgreSQL after making configuration changes: ```bash # Systemd @@ -163,7 +179,8 @@ pg_ctl restart -D /var/lib/postgresql/data ## Creating the Extension -### In psql +Create the extension in your PostgreSQL database to begin using semantic +caching. Open the psql command line, and run the following commands: ```sql -- Connect to your database @@ -189,6 +206,8 @@ Expected output: ### Verify Schema Creation +Check that the semantic_cache schema and tables were created successfully. + ```sql -- Check that schema and tables were created \dt semantic_cache.* @@ -197,75 +216,11 @@ Expected output: SELECT * FROM semantic_cache.cache_health; ``` -## Troubleshooting Installation - -### pg_config not found - -```bash -# Find PostgreSQL installation -sudo find / -name pg_config 2>/dev/null - -# Add to PATH -export PATH="/usr/pgsql-17/bin:$PATH" - -# Or specify directly -PG_CONFIG=/path/to/pg_config make install -``` - -### Permission Denied During Installation - -```bash -# Use sudo for system directories -sudo make install - -# Or install to custom directory (no sudo required) -make install DESTDIR=/path/to/custom/location -``` - -### pgvector Not Found - -```sql --- Error: could not open extension control file --- Solution: Install pgvector first -``` - -```bash -cd /tmp -git clone https://github.com/pgvector/pgvector.git -cd pgvector -make -sudo make install -``` - -### Extension Already Exists - -```sql --- If you're upgrading, drop the old version first -DROP EXTENSION IF EXISTS pg_semantic_cache CASCADE; - --- Then reinstall -CREATE EXTENSION pg_semantic_cache; -``` - -!!! warning "Data Loss Warning" - Dropping the extension will delete all cached data. Use `ALTER EXTENSION UPDATE` for upgrades when available. - -### Compilation Errors - -```bash -# Ensure development headers are installed -# Debian/Ubuntu -sudo apt-get install postgresql-server-dev-17 - -# RHEL/Rocky -sudo yum install postgresql17-devel - -# Verify pg_config works -pg_config --includedir-server -``` ## Testing Installation +Validate your installation by running the test suite or manual tests. + Run the included test suite: ```bash @@ -285,13 +240,16 @@ Or run manual tests: ## Uninstalling -### Remove Extension from Database +You can remove pg_semantic_cache from your database and system when it is no +longer needed. + +Use the following command to remove the extension from your database: ```sql DROP EXTENSION IF EXISTS pg_semantic_cache CASCADE; ``` -### Remove Files from System +Then, clean up extension files from PostgreSQL directories: ```bash cd pg_semantic_cache @@ -303,8 +261,3 @@ This removes: - Control file - SQL installation files -## Next Steps - -- [Configuration](configuration.md) - Configure vector dimensions and index types -- [Functions Reference](functions/index.md) - Learn about available functions -- [Use Cases](use_cases.md) - See practical examples diff --git a/docs/quick_start.md b/docs/quick_start.md new file mode 100644 index 0000000..5df92c2 --- /dev/null +++ b/docs/quick_start.md @@ -0,0 +1,60 @@ +# Quick Start + +The steps that follow are designed to get you started with semantic caching +quickly and easily. Before using pg_semantic_cache, you must install: + +- PostgreSQL 14, 15, 16, 17, or 18 +- the pgvector extension +- a C compiler (gcc or clang) +- PostgreSQL development headers + +## Installation + +Use the following commands to build the extension from the Github +repository: + +```bash +# Clone the repository +git clone https://github.com/pgedge/pg_semantic_cache.git +cd pg_semantic_cache + +# Build and install +make clean +make +sudo make install +``` + +After building the extension, you need to install and create the extensions +you'll be using: + +```sql +-- Install required extensions +CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS pg_semantic_cache; + +-- Verify installation +SELECT * FROM semantic_cache.cache_health; +``` + +### Using pg_semantic_cache + +Use the following commands to add a result set to a cache, and then query the +cache with a similar query: + +```sql +-- Cache a query result with its embedding +SELECT semantic_cache.cache_query( + query_text := 'What was our Q4 2024 revenue?', + query_embedding := '[0.123, 0.456, ...]'::text, -- From embedding model + result_data := '{"answer": "Q4 2024 revenue was $2.4M"}'::jsonb, + ttl_seconds := 1800, -- 30 minutes + tags := ARRAY['llm', 'revenue'] +); + +-- Retrieve with a semantically similar query +SELECT * FROM semantic_cache.get_cached_result( + query_embedding := '[0.124, 0.455, ...]'::text, -- Slightly different + similarity_threshold := 0.95 -- 95% similarity required +); +``` + diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..c5eeec5 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,84 @@ +# Troubleshooting Installation + +The following lists some common issues encountered during installation, and +how to resolve the problems. + +## pg_config not found + +The build system needs pg_config to locate PostgreSQL installation paths. If +pg_config is not in your PATH, the build will fail. + +```bash +# Find PostgreSQL installation +sudo find / -name pg_config 2>/dev/null + +# Add to PATH +export PATH="/usr/pgsql-17/bin:$PATH" + +# Or specify directly +PG_CONFIG=/path/to/pg_config make install +``` + +## Permission Denied During Installation + +Installing extensions requires write access to PostgreSQL's system directories. +Use sudo for standard installations or specify a custom directory. + +```bash +# Use sudo for system directories +sudo make install + +# Or install to custom directory (no sudo required) +make install DESTDIR=/path/to/custom/location +``` + +## pgvector Not Found + +pg_semantic_cache depends on pgvector and will fail to create if pgvector is +not installed. Install pgvector before installing pg_semantic_cache. + +```sql +-- Error: could not open extension control file +-- Solution: Install pgvector first +``` + +```bash +cd /tmp +git clone https://github.com/pgvector/pgvector.git +cd pgvector +make +sudo make install +``` + +## Extension Already Exists + +When reinstalling or upgrading, PostgreSQL may report that the extension +already exists. Drop the existing extension before creating a new one. + +```sql +-- If you're upgrading, drop the old version first +DROP EXTENSION IF EXISTS pg_semantic_cache CASCADE; + +-- Then reinstall +CREATE EXTENSION pg_semantic_cache; +``` + +!!! warning "Data Loss Warning" + Dropping the extension will delete all cached data. Use `ALTER EXTENSION UPDATE` for upgrades when available. + +## Compilation Errors + +Compilation failures typically occur when PostgreSQL development headers are +missing. Install the appropriate development package for your platform. + +```bash +# Ensure development headers are installed +# Debian/Ubuntu +sudo apt-get install postgresql-server-dev-17 + +# RHEL/Rocky +sudo yum install postgresql17-devel + +# Verify pg_config works +pg_config --includedir-server +``` diff --git a/mkdocs.yml b/mkdocs.yml index 6852334..78fbff7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,9 +69,11 @@ markdown_extensions: nav: - Home: index.md + - Using Semantic Caching: architecture.md - Getting Started: - - Installation: installation.md - - Configuration: configuration.md + - Quick Start Guide: quick_start.md + - Building from Source: installation.md + - Configuring pg_semantic_cache: configuration.md - Usage: - Use Cases: use_cases.md - Monitoring: monitoring.md @@ -102,4 +104,5 @@ nav: - get_cost_savings: functions/get_cost_savings.md - Utility: - init_schema: functions/init_schema.md + - Troubleshooting: troubleshooting.md - FAQ: FAQ.md From 1c0401805ced4c1ff24f5c2f101daca078bc6405 Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Fri, 6 Mar 2026 10:01:28 -0500 Subject: [PATCH 4/7] Updates to use_cases.md - editing --- docs/use_cases.md | 216 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 168 insertions(+), 48 deletions(-) diff --git a/docs/use_cases.md b/docs/use_cases.md index 6bc6358..2a9ecbf 100644 --- a/docs/use_cases.md +++ b/docs/use_cases.md @@ -1,16 +1,24 @@ # Use Cases -Practical examples and integration patterns for pg_semantic_cache in real-world applications. +This document provides practical examples and integration patterns for the +pg_semantic_cache extension in real-world applications. ## LLM and AI Applications +This section demonstrates how to use the pg_semantic_cache extension to +optimize costs and performance in LLM and AI-powered applications. + ### RAG (Retrieval Augmented Generation) Caching -Cache expensive LLM API calls based on semantic similarity of user questions. +The RAG caching pattern addresses the challenge of expensive LLM API calls by +caching responses based on semantic similarity of user questions. -**Problem**: LLM API calls cost $0.02-$0.05 per request. Users ask similar questions differently. +LLM API calls typically cost between $0.02 and $0.05 per request, and users +often ask similar questions using different wording. The pg_semantic_cache +extension solves this problem by caching LLM responses with semantic matching. -**Solution**: Cache LLM responses with semantic matching. +In the following example, the `SemanticLLMCache` class uses the OpenAI API to +generate embeddings and cache LLM responses based on semantic similarity. ```python import openai @@ -45,7 +53,8 @@ class SemanticLLMCache: result = cur.fetchone() if result: # Cache hit - print(f"✓ Cache HIT (similarity: {result[2]:.4f}, age: {result[3]}s)") + print(f"✓ Cache HIT (similarity: {result[2]:.4f}, " + f"age: {result[3]}s)") return json.loads(result[1]) # Cache miss - call actual LLM @@ -82,14 +91,23 @@ cache = SemanticLLMCache("dbname=mydb user=postgres") # These similar questions will hit the cache cache.ask_llm_cached("What was our Q4 revenue?") -cache.ask_llm_cached("Show me Q4 revenue") # Cache hit! -cache.ask_llm_cached("Q4 revenue please") # Cache hit! +cache.ask_llm_cached("Show me Q4 revenue") # Cache hit! +cache.ask_llm_cached("Q4 revenue please") # Cache hit! ``` -**Savings**: With 80% hit rate on 10K daily queries: **$140/day** or **$51,100/year** +An organization processing 10,000 daily queries with an 80% cache hit rate +can save approximately $140 per day or $51,100 per year using this approach. ### Chatbot Response Caching +The chatbot response caching pattern optimizes conversational AI +applications by storing and reusing responses for semantically similar +user messages. + +In the following example, the `ChatbotCache` class uses TypeScript to +implement a caching layer for chatbot responses with configurable +similarity thresholds. + ```typescript import { OpenAI } from 'openai'; import { Pool } from 'pg'; @@ -123,7 +141,7 @@ class ChatbotCache { // Check cache const cacheResult = await this.pool.query( - 'SELECT * FROM semantic_cache.get_cached_result($1, 0.92)', + `SELECT * FROM semantic_cache.get_cached_result($1, 0.92)`, [embeddingStr] ); @@ -148,7 +166,8 @@ class ChatbotCache { // Cache response await this.pool.query( - `SELECT semantic_cache.cache_query($1, $2, $3::jsonb, 3600, ARRAY['chatbot'])`, + `SELECT semantic_cache.cache_query( + $1, $2, $3::jsonb, 3600, ARRAY['chatbot'])`, [userMessage, embeddingStr, JSON.stringify({ answer })] ); @@ -159,9 +178,18 @@ class ChatbotCache { ## Analytics and Reporting +This section demonstrates how to use the pg_semantic_cache extension to +improve performance of analytical queries and reporting workloads. + ### Dashboard Query Caching -Cache expensive analytical queries that power dashboards. +The dashboard query caching pattern reduces latency for expensive +analytical queries that power business intelligence dashboards and +reporting tools. + +In the following example, the `app.get_sales_analytics` function uses +a deterministic embedding to cache analytics results for a configurable +TTL period. ```sql -- Application caching wrapper for analytics @@ -180,7 +208,8 @@ BEGIN -- (In production, use actual embedding service) query_embedding := ( SELECT array_agg( - (hashtext((query_text || params::text)::text) + i)::float / 2147483647 + (hashtext((query_text || params::text)::text) + i)::float + / 2147483647 )::text FROM generate_series(1, 1536) i ); @@ -234,16 +263,24 @@ $$ LANGUAGE plpgsql; -- Usage SELECT app.get_sales_analytics( 'Total sales and order metrics', - '{"period": "Q4", "start_date": "2024-10-01", "end_date": "2024-12-31"}'::jsonb + '{"period": "Q4", "start_date": "2024-10-01", + "end_date": "2024-12-31"}'::jsonb ); ``` ### Time-Series Report Caching +The time-series report caching pattern optimizes recurring reports by +adjusting cache TTL based on the temporal granularity of the data being +reported. + +In the following example, the `app.cached_time_series_report` function +uses different TTL values for daily, weekly, and monthly reports. + ```sql -- Cache daily/weekly/monthly reports CREATE OR REPLACE FUNCTION app.cached_time_series_report( - report_type TEXT, -- 'daily', 'weekly', 'monthly' + report_type TEXT, -- 'daily', 'weekly', 'monthly' metric_name TEXT ) RETURNS TABLE(period DATE, value NUMERIC) AS $$ DECLARE @@ -265,12 +302,15 @@ BEGIN END; -- Try cache - SELECT * INTO cached FROM semantic_cache.get_cached_result(query_emb, 0.95); + SELECT * INTO cached + FROM semantic_cache.get_cached_result(query_emb, 0.95); IF cached.found IS NOT NULL THEN -- Return cached data as table RETURN QUERY - SELECT (item->>'period')::DATE, (item->>'value')::NUMERIC + SELECT + (item->>'period')::DATE, + (item->>'value')::NUMERIC FROM jsonb_array_elements(cached.result_data->'data') item; RETURN; END IF; @@ -279,7 +319,7 @@ BEGIN PERFORM semantic_cache.cache_query( format('Report: %s - %s', report_type, metric_name), query_emb, - '{"data": []}'::jsonb, -- Your actual query results + '{"data": []}'::jsonb, -- Your actual query results ttl_seconds, ARRAY['reports', report_type] ); @@ -291,9 +331,18 @@ $$ LANGUAGE plpgsql; ## External API Results +This section demonstrates how to use the pg_semantic_cache extension to +reduce costs and latency when integrating with third-party external APIs. + ### Third-Party API Response Caching -Cache responses from expensive external APIs (weather, geocoding, stock prices, etc.). +The external API caching pattern stores responses from expensive +third-party APIs such as weather services, geocoding providers, and stock +price feeds. + +In the following example, the `APICache` class uses the +sentence-transformers library to generate embeddings and cache API +responses with semantic matching. ```python import requests @@ -310,7 +359,8 @@ class APICache: Fetch from API with semantic caching Args: - query: Natural language query (e.g., "weather in San Francisco") + query: Natural language query + (e.g., "weather in San Francisco") api_call_fn: Function to call API ttl: Cache TTL in seconds """ @@ -338,22 +388,27 @@ class APICache: import json cur.execute(""" SELECT semantic_cache.cache_query( - %s, %s, %s::jsonb, %s, ARRAY['api', 'external'] + %s, %s, %s::jsonb, %s, + ARRAY['api', 'external'] ) """, (query, embedding_str, json.dumps(api_response), ttl)) self.conn.commit() return api_response +``` -# Usage examples +The following examples demonstrate how to use the `APICache` class with +different external APIs using appropriate TTL values for each use case. +```python # Weather API def get_weather(city): cache = APICache("dbname=mydb") return cache.fetch_with_cache( f"weather in {city}", - lambda: requests.get(f"https://api.weather.com/{city}").json(), - ttl=1800 # 30 minutes + lambda: requests.get( + f"https://api.weather.com/{city}").json(), + ttl=1800 # 30 minutes ) # Geocoding API @@ -361,7 +416,8 @@ def geocode(address): cache = APICache("dbname=mydb") return cache.fetch_with_cache( f"geocode {address}", - lambda: requests.get(f"https://api.geocode.com?q={address}").json(), + lambda: requests.get( + f"https://api.geocode.com?q={address}").json(), ttl=86400 # 24 hours (addresses don't change) ) @@ -370,21 +426,31 @@ def get_stock_price(symbol): cache = APICache("dbname=mydb") return cache.fetch_with_cache( f"stock price {symbol}", - lambda: requests.get(f"https://api.stocks.com/{symbol}").json(), - ttl=60 # 1 minute (real-time data) + lambda: requests.get( + f"https://api.stocks.com/{symbol}").json(), + ttl=60 # 1 minute (real-time data) ) ``` ## Database Query Optimization +This section demonstrates how to use the pg_semantic_cache extension to +optimize expensive database queries and reduce computational overhead. + ### Expensive Join Caching -Cache results from expensive multi-table joins. +The expensive join caching pattern stores results from complex multi-table +joins to avoid repeated execution of resource-intensive database +operations. + +In the following example, the `app.get_customer_summary` function caches +the results of a complex customer data aggregation query with multiple +joins. ```sql -- Wrap expensive queries with semantic caching CREATE OR REPLACE FUNCTION app.get_customer_summary( - customer_identifier TEXT -- email, name, or ID + customer_identifier TEXT -- email, name, or ID ) RETURNS JSONB AS $$ DECLARE query_emb TEXT; @@ -393,13 +459,18 @@ DECLARE BEGIN -- Simple embedding generation (replace with actual service) query_emb := ( - SELECT array_agg((hashtext(customer_identifier || i::text)::float / 2147483647)::float4)::text + SELECT array_agg( + (hashtext(customer_identifier || i::text)::float + / 2147483647)::float4 + )::text FROM generate_series(1, 1536) i ); -- Check cache SELECT * INTO cached - FROM semantic_cache.get_cached_result(query_emb, 0.98, 300); + FROM semantic_cache.get_cached_result( + query_emb, 0.98, 300 + ); IF cached.found IS NOT NULL THEN RETURN cached.result_data; @@ -444,19 +515,37 @@ $$ LANGUAGE plpgsql; -- Usage - these similar queries hit cache: SELECT app.get_customer_summary('[email protected]'); -SELECT app.get_customer_summary('john@example.com'); -- Exact match -SELECT app.get_customer_summary('John Doe'); -- By name -SELECT app.get_customer_summary('john'); -- Partial match +SELECT app.get_customer_summary('john@example.com'); + -- Exact match +SELECT app.get_customer_summary('John Doe'); + -- By name +SELECT app.get_customer_summary('john'); + -- Partial match ``` ## Scheduled Maintenance +This section demonstrates how to implement automated maintenance routines +for the pg_semantic_cache extension to ensure optimal performance and +storage use. + ### Automatic Cache Cleanup +The automatic cache cleanup pattern uses scheduled maintenance functions +to evict expired entries and optimize cache storage on a regular basis. + +In the following example, the `semantic_cache.scheduled_maintenance` +function performs multiple maintenance operations and returns timing +information. + ```sql -- Create maintenance function CREATE OR REPLACE FUNCTION semantic_cache.scheduled_maintenance() -RETURNS TABLE(operation TEXT, affected_rows BIGINT, duration INTERVAL) AS $$ +RETURNS TABLE( + operation TEXT, + affected_rows BIGINT, + duration INTERVAL +) AS $$ DECLARE start_time TIMESTAMPTZ; evicted BIGINT; @@ -502,7 +591,12 @@ SELECT * FROM semantic_cache.scheduled_maintenance(); ### Cache Warming -Pre-populate cache with common queries. +The cache warming pattern pre-populates the cache with common queries to +improve application performance during startup or after cache +invalidation. + +In the following example, the `app.warm_cache` function pre-caches +frequently accessed dashboard queries to reduce initial page load times. ```sql -- Warm cache with popular queries @@ -514,8 +608,10 @@ BEGIN -- Example: Pre-cache common dashboard queries PERFORM semantic_cache.cache_query( 'Total sales this month', - (SELECT array_agg(random()::float4)::text FROM generate_series(1, 1536)), - (SELECT jsonb_build_object('total', SUM(amount)) FROM orders + (SELECT array_agg(random()::float4)::text + FROM generate_series(1, 1536)), + (SELECT jsonb_build_object('total', SUM(amount)) + FROM orders WHERE created_at >= DATE_TRUNC('month', NOW())), 3600, ARRAY['dashboard', 'warmed'] @@ -534,9 +630,19 @@ SELECT app.warm_cache(); ## Multi-Language Support +This section demonstrates how to use the pg_semantic_cache extension to +support caching across multiple languages using multilingual embedding +models. + ### Caching Across Languages -Cache queries regardless of language using embeddings. +The multilingual caching pattern enables cache hits across different +languages by using multilingual embedding models that map semantically +similar queries. + +In the following example, the `MultilingualCache` class uses the +multilingual mpnet model to cache queries across English, Spanish, French, +and Portuguese. ```python from sentence_transformers import SentenceTransformer @@ -546,7 +652,9 @@ class MultilingualCache: def __init__(self, db_conn_string): self.conn = psycopg2.connect(db_conn_string) # Use multilingual model - self.encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2') + self.encoder = SentenceTransformer( + 'paraphrase-multilingual-mpnet-base-v2' + ) def cached_query(self, query_text, language): """Cache works across languages!""" @@ -556,7 +664,8 @@ class MultilingualCache: # Check cache (works for all languages) cur = self.conn.cursor() cur.execute(""" - SELECT * FROM semantic_cache.get_cached_result(%s, 0.90) + SELECT * + FROM semantic_cache.get_cached_result(%s, 0.90) """, (embedding_str,)) result = cur.fetchone() @@ -566,18 +675,29 @@ class MultilingualCache: # Execute query and cache # ... your query logic ... -# These queries in different languages can hit the same cache entry! +# These queries in different languages can hit the same cache +# entry! cache = MultilingualCache("dbname=mydb") cache.cached_query("What is the total revenue?", "en") -cache.cached_query("¿Cuál es el ingreso total?", "es") # Cache hit! -cache.cached_query("Quel est le revenu total?", "fr") # Cache hit! -cache.cached_query("Qual é a receita total?", "pt") # Cache hit! +cache.cached_query("¿Cuál es el ingreso total?", "es") + # Cache hit! +cache.cached_query("Quel est le revenu total?", "fr") + # Cache hit! +cache.cached_query("Qual é a receita total?", "pt") + # Cache hit! ``` ## Next Steps -- [Functions Reference](functions/index.md) - Learn all available functions -- [Monitoring](monitoring.md) - Track cache performance -- [Configuration](configuration.md) - Optimize for your use case -- [FAQ](FAQ.md) - Common questions and solutions +The following resources provide additional information about the +pg_semantic_cache extension: + +- The [Functions Reference](functions/index.md) document describes all + available functions. +- The [Monitoring](monitoring.md) document explains how to track cache + performance. +- The [Configuration](configuration.md) document provides optimization + guidance for your use case. +- The [FAQ](FAQ.md) document answers common questions and provides + solutions. From 4e779ffb0456ee39b4db17b7a7ac6645ef482cb0 Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Mon, 9 Mar 2026 10:43:24 -0400 Subject: [PATCH 5/7] Added functions.md to control access to functions --- README.md | 164 +++++++++++++++++++++------------------------- docs/functions.md | 26 ++++++++ 2 files changed, 101 insertions(+), 89 deletions(-) create mode 100644 docs/functions.md diff --git a/README.md b/README.md index 359969b..267f0ba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ -
- -# 🗄️ pg_semantic_cache +# pg_semantic_cache ### Intelligent Query Result Caching for PostgreSQL @@ -10,27 +8,27 @@ [![License](https://img.shields.io/badge/License-PostgreSQL-blue.svg)](LICENSE) [![pgvector](https://img.shields.io/badge/Requires-pgvector-orange.svg)](https://github.com/pgvector/pgvector) -[Quick Start](#-quick-start) • -[Features](#-key-features) • -[API Reference](#-api-reference) • -[Examples](#-integration-examples) • -[Performance](#-performance) +[Quick Start](#quick-start) • +[Features](#key-features) • +[API Reference](#api-reference) • +[Examples](#integration-examples) • +[Performance](#performance)
--- -## 🎯 Overview +## Overview `pg_semantic_cache` enables **semantic query result caching** in PostgreSQL. Unlike traditional caching that requires exact query matches, this extension uses vector embeddings to find and retrieve cached results for semantically similar queries. ### Perfect For -- 🤖 **AI/LLM Applications** - Cache expensive LLM responses for similar questions -- 🔍 **RAG Pipelines** - Speed up retrieval-augmented generation workflows -- 📊 **Analytics Dashboards** - Reuse results for similar analytical queries -- 💬 **Chatbots** - Reduce latency by caching semantically similar conversations -- 🔎 **Search Systems** - Handle query variations without re-execution +- **AI/LLM Applications** - Cache expensive LLM responses for similar questions +- **RAG Pipelines** - Speed up retrieval-augmented generation workflows +- **Analytics Dashboards** - Reuse results for similar analytical queries +- **Chatbots** - Reduce latency by caching semantically similar conversations +- **Search Systems** - Handle query variations without re-execution ### How It Works @@ -43,40 +41,42 @@ │ Similar cached query found: │ │ "Show me revenue for last quarter" (similarity: 97%) │ │ ↓ Return cached result (2ms instead of 500ms) │ -│ ✅ Cache HIT - 250x faster! │ +│ Cache HIT - 250x faster! │ └─────────────────────────────────────────────────────────────┘ ``` --- -## ✨ Key Features +## Key Features + +`pg_semantic_cache` provides a comprehensive set of capabilities designed for production use. -### 🧠 Semantic Intelligence +### Semantic Intelligence - **Vector-based matching** using pgvector for similarity search - **Configurable similarity thresholds** (default: 95%) - **Cosine distance** calculations for accurate semantic matching - Support for any embedding model (OpenAI, Cohere, custom, etc.) -### ⚡ High Performance +### High Performance - **Sub-5ms cache lookups** with optimized vector indexing - **Efficient storage** with minimal overhead per entry - **Fast eviction** mechanisms to maintain cache health - **Index optimization** support for large-scale deployments (100k+ entries) -### 🎛️ Flexible Cache Management +### Flexible Cache Management - **Multiple eviction policies**: LRU, LFU, and TTL-based - **Per-query TTL** or global defaults - **Tag-based organization** for grouped invalidation - **Pattern-based invalidation** using SQL LIKE patterns - **Auto-eviction** with configurable policies -### 📊 Observability & Monitoring +### Observability & Monitoring - **Real-time statistics**: hit rate, total entries, cache size - **Health metrics**: expired entries, memory usage, eviction counts - **Performance tracking**: lookup times, similarity scores - **Built-in views** for monitoring and analysis -### 🔧 Production Ready +### Production Ready - **Comprehensive logging** with configurable levels - **Crash-safe** error handling - **ACID compliance** for cache operations @@ -85,7 +85,7 @@ --- -## 🚀 Quick Start +## Quick Start ### Installation @@ -130,13 +130,13 @@ SELECT semantic_cache.init_schema(); SELECT * FROM semantic_cache.cache_stats(); ``` -✅ **You're ready to go!** +**You're ready to go!** --- -## 📘 Basic Usage +## Basic Usage -### 1️⃣ Cache a Query Result +### 1. Cache a Query Result ```sql SELECT semantic_cache.cache_query( @@ -149,7 +149,7 @@ SELECT semantic_cache.cache_query( -- Returns: cache_id (bigint) ``` -### 2️⃣ Retrieve Cached Result +### 2. Retrieve Cached Result ```sql SELECT * FROM semantic_cache.get_cached_result( @@ -167,7 +167,7 @@ SELECT * FROM semantic_cache.get_cached_result( true | {"total": 150, "orders"... | 0.973 | 245 ``` -### 3️⃣ Monitor Performance +### 3. Monitor Performance ```sql -- Comprehensive statistics @@ -182,7 +182,9 @@ SELECT * FROM semantic_cache.recent_cache_activity LIMIT 10; --- -## 📚 API Reference +## API Reference + +The extension provides a complete set of SQL functions for caching, eviction, monitoring, and configuration. ### Core Functions @@ -220,6 +222,8 @@ Retrieve a cached result by semantic similarity. ### Cache Eviction +Multiple eviction strategies are available to manage cache size and freshness. + #### `evict_expired()` Remove all expired cache entries. @@ -249,7 +253,7 @@ SELECT semantic_cache.auto_evict(); ``` #### `clear_cache()` -⚠️ Remove **all** cache entries (use with caution). +Remove **all** cache entries (use with caution). ```sql SELECT semantic_cache.clear_cache(); @@ -259,6 +263,8 @@ SELECT semantic_cache.clear_cache(); ### Statistics & Monitoring +Built-in functions and views provide real-time visibility into cache performance. + #### `cache_stats()` Get comprehensive cache statistics. @@ -280,6 +286,8 @@ hit_rate_percent | Hit rate as a percentage ### Configuration +All runtime settings can be configured through the cache configuration table. + Configuration settings are stored in the `semantic_cache.cache_config` table. You can view and modify them directly: ```sql @@ -305,7 +313,9 @@ SELECT value FROM semantic_cache.cache_config WHERE key = 'eviction_policy'; --- -## 🔨 Build & Development +## Build & Development + +The extension uses the standard PostgreSQL PGXS build system for compilation and installation. ### Build Commands @@ -341,35 +351,37 @@ Fully compatible with all PostgreSQL-supported platforms: | Platform | Status | Notes | |----------|--------|-------| -| 🐧 Linux | ✅ | Ubuntu, Debian, RHEL, Rocky, Fedora, etc. | -| 🍎 macOS | ✅ | Intel & Apple Silicon | -| 🪟 Windows | ✅ | Via MinGW or MSVC | -| 🔧 BSD | ✅ | FreeBSD, OpenBSD | +| Linux | Supported | Ubuntu, Debian, RHEL, Rocky, Fedora, etc. | +| macOS | Supported | Intel & Apple Silicon | +| Windows | Supported | Via MinGW or MSVC | +| BSD | Supported | FreeBSD, OpenBSD | ### Tested PostgreSQL Versions | Version | Status | Notes | |---------|--------|-------| -| PG 14 | ✅ Tested | Full support | -| PG 15 | ✅ Tested | Full support | -| PG 16 | ✅ Tested | Full support | -| PG 17 | ✅ Tested | Full support | -| PG 18 | ✅ Tested | Full support | -| Future versions | ✅ Expected | Standard PGXS compatibility | +| PG 14 | Tested | Full support | +| PG 15 | Tested | Full support | +| PG 16 | Tested | Full support | +| PG 17 | Tested | Full support | +| PG 18 | Tested | Full support | +| Future versions | Expected | Standard PGXS compatibility | --- -## ⚡ Performance +## Performance + +The extension is optimized for sub-millisecond cache lookups with minimal overhead. ### Runtime Metrics | Operation | Performance | Notes | |-----------|-------------|-------| -| 🔍 Cache lookup | **< 5ms** | With optimized vector index | -| 💾 Cache insert | **< 10ms** | Including embedding storage | -| 🗑️ Eviction (1000 entries) | **< 50ms** | Efficient batch operations | -| 📊 Statistics query | **< 1ms** | Materialized views | -| 🎯 Similarity search | **2-3ms avg** | IVFFlat/HNSW indexed | +| Cache lookup | **< 5ms** | With optimized vector index | +| Cache insert | **< 10ms** | Including embedding storage | +| Eviction (1000 entries) | **< 50ms** | Efficient batch operations | +| Statistics query | **< 1ms** | Materialized views | +| Similarity search | **2-3ms avg** | IVFFlat/HNSW indexed | ### Expected Hit Rates @@ -407,7 +419,9 @@ Evict LRU | 500 | ~25ms | 0.05ms --- -## 🏭 Production Deployment +## Production Deployment + +For production environments, optimize PostgreSQL settings and set up automated maintenance. ### PostgreSQL Configuration @@ -451,6 +465,8 @@ SELECT * FROM cron.job WHERE jobname LIKE 'semantic-cache%'; ### Index Optimization +Choose the appropriate vector index strategy based on your cache size. + #### Small to Medium Caches (< 100k entries) Default IVFFlat index works well out of the box. @@ -483,6 +499,8 @@ CREATE INDEX idx_cache_embedding_hnsw ### Monitoring Setup +Set up custom views to monitor cache health and performance metrics. + Create a monitoring dashboard view: ```sql @@ -501,6 +519,8 @@ SELECT * FROM semantic_cache.production_dashboard; ### High Availability Considerations +The cache integrates seamlessly with PostgreSQL's replication and backup mechanisms. + ```sql -- Regular backups of cache metadata (optional) pg_dump -U postgres -d your_db -t semantic_cache.cache_entries -t semantic_cache.cache_metadata -F c -f cache_backup.dump @@ -511,7 +531,7 @@ pg_dump -U postgres -d your_db -t semantic_cache.cache_entries -t semantic_cache --- -## 🔗 Integration Examples +## Integration Examples ### Python with OpenAI @@ -569,10 +589,10 @@ class SemanticCache: result = cur.fetchone() if result and result[0]: # Cache hit - print(f"✅ Cache HIT (similarity: {result[2]:.3f}, age: {result[3]}s)") + print(f"Cache HIT (similarity: {result[2]:.3f}, age: {result[3]}s)") return json.loads(result[1]) else: - print("❌ Cache MISS") + print("Cache MISS") return None def stats(self) -> Dict[str, Any]: @@ -651,10 +671,10 @@ class SemanticCache { const { found, result_data, similarity_score, age_seconds } = res.rows[0]; if (found) { - console.log(`✅ Cache HIT (similarity: ${similarity_score.toFixed(3)}, age: ${age_seconds}s)`); + console.log(`Cache HIT (similarity: ${similarity_score.toFixed(3)}, age: ${age_seconds}s)`); return JSON.parse(result_data); } else { - console.log('❌ Cache MISS'); + console.log('Cache MISS'); return null; } } @@ -689,7 +709,7 @@ For additional integration patterns and use cases, see: --- -## 🤝 Contributing +## Contributing Contributions are welcome! This extension is built with standard PostgreSQL C APIs. @@ -708,48 +728,14 @@ Contributions are welcome! This extension is built with standard PostgreSQL C AP --- -## 📄 License +## License This project is licensed under the **PostgreSQL License**. --- -## 📞 Support & Resources - -### Documentation -- **Getting Started**: [GETTING_STARTED.md](GETTING_STARTED.md) -- **API Examples**: `examples/usage_examples.sql` -- **Logging Guide**: [LOGGING_FEATURE_GUIDE.md](LOGGING_FEATURE_GUIDE.md) -- **PostgreSQL Documentation**: [postgresql.org/docs](https://www.postgresql.org/docs/) +## Support & Resources -### Getting Help - **GitHub Issues**: Report bugs and request features - **Example Code**: Check `examples/` directory for usage patterns - **Test Suite**: See `test/` directory for comprehensive examples - -### Related Projects -- [pgvector](https://github.com/pgvector/pgvector) - Vector similarity search for PostgreSQL -- [pg_cron](https://github.com/citusdata/pg_cron) - Job scheduler for PostgreSQL - ---- - -## 🏆 Credits - -**Created by**: Muhammad Aqeel - PostgreSQL Infrastructure Engineer - -**Built with**: -- Standard PostgreSQL C API -- [pgvector](https://github.com/pgvector/pgvector) for vector operations -- PGXS build infrastructure - ---- - -
- -### ⭐ Star this repository if you find it useful! - -**pg_semantic_cache** - Intelligent semantic caching for PostgreSQL - -[Quick Start](#-quick-start) • [Documentation](#-api-reference) • [Examples](#-integration-examples) - -
diff --git a/docs/functions.md b/docs/functions.md new file mode 100644 index 0000000..2dbfee5 --- /dev/null +++ b/docs/functions.md @@ -0,0 +1,26 @@ +# Using pg_semantic_cache Functions + +This page provides a comprehensive reference for all available functions in the pg_semantic_cache extension. + +## Function Reference + +| Function | Description | +|----------|-------------| +| [auto_evict](functions/auto_evict.md) | Automatically evicts entries based on configured policy (LRU, LFU, or TTL). | +| [cache_hit_rate](functions/cache_hit_rate.md) | Gets current cache hit rate as a percentage. | +| [cache_query](functions/cache_query.md) | Stores a query result with its vector embedding in the cache. | +| [cache_stats](functions/cache_stats.md) | Gets comprehensive cache statistics including hits, misses, and hit rate. | +| [clear_cache](functions/clear_cache.md) | Removes all cache entries (use with caution). | +| [evict_expired](functions/evict_expired.md) | Removes all expired cache entries based on TTL. | +| [evict_lfu](functions/evict_lfu.md) | Evicts least frequently used entries, keeping only specified count. | +| [evict_lru](functions/evict_lru.md) | Evicts least recently used entries, keeping only specified count. | +| [get_cached_result](functions/get_cached_result.md) | Retrieves a cached result by semantic similarity search. | +| [get_cost_savings](functions/get_cost_savings.md) | Calculates estimated cost savings from cache usage. | +| [get_index_type](functions/get_index_type.md) | Gets the current vector index type (IVFFlat or HNSW). | +| [get_vector_dimension](functions/get_vector_dimension.md) | Gets the current vector embedding dimension. | +| [init_schema](functions/init_schema.md) | Initializes cache schema and creates required tables, indexes, and views. | +| [invalidate_cache](functions/invalidate_cache.md) | Invalidates cache entries by pattern matching or tags. | +| [log_cache_access](functions/log_cache_access.md) | Logs cache access events for debugging and analysis. | +| [rebuild_index](functions/rebuild_index.md) | Rebuilds the vector similarity index for optimal performance. | +| [set_index_type](functions/set_index_type.md) | Sets the vector index type for similarity search. | +| [set_vector_dimension](functions/set_vector_dimension.md) | Sets the vector embedding dimension. | From 270f38db458b70def7c4f9381feb66a072e0a92b Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Mon, 9 Mar 2026 10:48:57 -0400 Subject: [PATCH 6/7] Added integration.md for integration examples --- integration.md | 178 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 integration.md diff --git a/integration.md b/integration.md new file mode 100644 index 0000000..d6cc83e --- /dev/null +++ b/integration.md @@ -0,0 +1,178 @@ +# Integration Examples + +Refer to the following integration examples when configuring pg_semantic_cache. + +### Python with OpenAI + +Complete example integrating semantic cache with OpenAI embeddings: + +```python +import psycopg2 +import openai +import json +from typing import Optional, Dict, Any + +class SemanticCache: + """Semantic cache wrapper for PostgreSQL""" + + def __init__(self, conn_string: str, openai_api_key: str): + self.conn = psycopg2.connect(conn_string) + self.client = openai.OpenAI(api_key=openai_api_key) + + def _get_embedding(self, text: str) -> str: + """Generate embedding using OpenAI""" + response = self.client.embeddings.create( + model="text-embedding-ada-002", + input=text + ) + embedding = response.data[0].embedding + return f"[{','.join(map(str, embedding))}]" + + def cache(self, query: str, result: Dict[Any, Any], + ttl: int = 3600, tags: Optional[list] = None) -> int: + """Cache a query result""" + embedding = self._get_embedding(query) + + with self.conn.cursor() as cur: + cur.execute(""" + SELECT semantic_cache.cache_query( + %s::text, %s::text, %s::jsonb, %s::int, %s::text[] + ) + """, (query, embedding, json.dumps(result), ttl, tags)) + cache_id = cur.fetchone()[0] + self.conn.commit() + return cache_id + + def get(self, query: str, similarity: float = 0.95, + max_age: Optional[int] = None) -> Optional[Dict[Any, Any]]: + """Retrieve from cache""" + embedding = self._get_embedding(query) + + with self.conn.cursor() as cur: + cur.execute(""" + SELECT found, result_data, similarity_score, age_seconds + FROM semantic_cache.get_cached_result( + %s::text, %s::float4, %s::int + ) + """, (embedding, similarity, max_age)) + + result = cur.fetchone() + if result and result[0]: # Cache hit + print(f"Cache HIT (similarity: {result[2]:.3f}, age: {result[3]}s)") + return json.loads(result[1]) + else: + print("Cache MISS") + return None + + def stats(self) -> Dict[str, Any]: + """Get cache statistics""" + with self.conn.cursor() as cur: + cur.execute("SELECT * FROM semantic_cache.cache_stats()") + columns = [desc[0] for desc in cur.description] + values = cur.fetchone() + return dict(zip(columns, values)) + +# Usage example +cache = SemanticCache( + conn_string="dbname=mydb user=postgres", + openai_api_key="sk-..." +) + +# Try to get from cache, compute if miss +def get_revenue_data(query: str) -> Dict: + result = cache.get(query, similarity=0.95) + + if result: + return result # Cache hit! + + # Cache miss - compute the result + result = expensive_database_query() # Your expensive query here + cache.cache(query, result, ttl=3600, tags=['revenue', 'analytics']) + return result + +# Example queries +data1 = get_revenue_data("What was Q4 2024 revenue?") +data2 = get_revenue_data("Show me revenue for last quarter") # Will hit cache! +data3 = get_revenue_data("Q4 sales figures?") # Will also hit cache! + +# View statistics +print(cache.stats()) +``` + +### Node.js with OpenAI + +```javascript +const { Client } = require('pg'); +const OpenAI = require('openai'); + +class SemanticCache { + constructor(pgConfig, openaiApiKey) { + this.client = new Client(pgConfig); + this.openai = new OpenAI({ apiKey: openaiApiKey }); + this.client.connect(); + } + + async getEmbedding(text) { + const response = await this.openai.embeddings.create({ + model: 'text-embedding-ada-002', + input: text + }); + const embedding = response.data[0].embedding; + return `[${embedding.join(',')}]`; + } + + async cache(query, result, ttl = 3600, tags = null) { + const embedding = await this.getEmbedding(query); + const res = await this.client.query( + `SELECT semantic_cache.cache_query($1::text, $2::text, $3::jsonb, $4::int, $5::text[])`, + [query, embedding, JSON.stringify(result), ttl, tags] + ); + return res.rows[0].cache_query; + } + + async get(query, similarity = 0.95, maxAge = null) { + const embedding = await this.getEmbedding(query); + const res = await this.client.query( + `SELECT * FROM semantic_cache.get_cached_result($1::text, $2::float4, $3::int)`, + [embedding, similarity, maxAge] + ); + + const { found, result_data, similarity_score, age_seconds } = res.rows[0]; + + if (found) { + console.log(`Cache HIT (similarity: ${similarity_score.toFixed(3)}, age: ${age_seconds}s)`); + return JSON.parse(result_data); + } else { + console.log('Cache MISS'); + return null; + } + } + + async stats() { + const res = await this.client.query('SELECT * FROM semantic_cache.cache_stats()'); + return res.rows[0]; + } +} + +// Usage +const cache = new SemanticCache( + { host: 'localhost', database: 'mydb', user: 'postgres' }, + 'sk-...' +); + +async function getRevenueData(query) { + const cached = await cache.get(query); + if (cached) return cached; + + const result = await expensiveDatabaseQuery(); + await cache.cache(query, result, 3600, ['revenue', 'analytics']); + return result; +} +``` + +### More Examples + +For additional integration patterns and use cases, see: +- `examples/usage_examples.sql` - Comprehensive SQL examples +- `test/benchmark.sql` - Performance testing examples + From ecda1f4048018b43a3fb400207a23f72c67252af Mon Sep 17 00:00:00 2001 From: Susan Douglas Date: Mon, 9 Mar 2026 11:57:16 -0400 Subject: [PATCH 7/7] Updates to pg_semantic_cache documentation --- README.md | 769 ++++---------------------- docs/LICENSE.md | 19 + docs/architecture.md | 14 +- docs/development.md | 53 ++ docs/functions.md | 99 +++- docs/index.md | 12 + integration.md => docs/integration.md | 39 +- docs/performance.md | 70 +++ docs/production.md | 118 ++++ 9 files changed, 513 insertions(+), 680 deletions(-) create mode 100644 docs/LICENSE.md create mode 100644 docs/development.md rename integration.md => docs/integration.md (78%) create mode 100644 docs/performance.md create mode 100644 docs/production.md diff --git a/README.md b/README.md index 267f0ba..a8590a1 100644 --- a/README.md +++ b/README.md @@ -1,286 +1,71 @@ # pg_semantic_cache -### Intelligent Query Result Caching for PostgreSQL - -**Leverage vector embeddings to cache and retrieve query results based on semantic similarity** - -[![PostgreSQL](https://img.shields.io/badge/PostgreSQL-14%20|%2015%20|%2016%20|%2017%20|%2018-336791?style=flat&logo=postgresql&logoColor=white)](https://www.postgresql.org/) -[![License](https://img.shields.io/badge/License-PostgreSQL-blue.svg)](LICENSE) -[![pgvector](https://img.shields.io/badge/Requires-pgvector-orange.svg)](https://github.com/pgvector/pgvector) - -[Quick Start](#quick-start) • -[Features](#key-features) • -[API Reference](#api-reference) • -[Examples](#integration-examples) • -[Performance](#performance) - - - ---- - -## Overview - -`pg_semantic_cache` enables **semantic query result caching** in PostgreSQL. Unlike traditional caching that requires exact query matches, this extension uses vector embeddings to find and retrieve cached results for semantically similar queries. - -### Perfect For - -- **AI/LLM Applications** - Cache expensive LLM responses for similar questions -- **RAG Pipelines** - Speed up retrieval-augmented generation workflows -- **Analytics Dashboards** - Reuse results for similar analytical queries -- **Chatbots** - Reduce latency by caching semantically similar conversations -- **Search Systems** - Handle query variations without re-execution - -### How It Works - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Query: "What was Q4 2024 revenue?" │ -│ ↓ Generate embedding via OpenAI/etc │ -│ ↓ Check semantic cache (similarity > 95%) │ -│ │ -│ Similar cached query found: │ -│ "Show me revenue for last quarter" (similarity: 97%) │ -│ ↓ Return cached result (2ms instead of 500ms) │ -│ Cache HIT - 250x faster! │ -└─────────────────────────────────────────────────────────────┘ -``` +pg_semantic_cache allows you to leverage vector embeddings to cache and retrieve query results based on semantic similarity. + +[pg_semantic_cache Introduction](docs/index.md) +[pg_semantic_cache Architecture](docs/architecture.md) +[pg_semantic_cache Use Cases](docs/use_cases.md) +[Quick Start](docs/quick_start.md) +[Installation](docs/installation.md) +[Configuration](docs/configuration.md) +[Deploying in a Production Environment](docs/deployment.md) +[Using pg_semantic_cache Functions](docs/functions.md) +[Sample Integrations](docs/integration.md) +[Monitoring](docs/logging.md) +[Performance and Benchmarking](docs/performance.md) +[Logging](docs/logging.md) +[Troubleshooting](docs/troubleshooting.md) +[FAQ](docs/FAQ.md) +[Developers](docs/development.md) --- -## Key Features - -`pg_semantic_cache` provides a comprehensive set of capabilities designed for production use. - -### Semantic Intelligence -- **Vector-based matching** using pgvector for similarity search -- **Configurable similarity thresholds** (default: 95%) -- **Cosine distance** calculations for accurate semantic matching -- Support for any embedding model (OpenAI, Cohere, custom, etc.) - -### High Performance -- **Sub-5ms cache lookups** with optimized vector indexing -- **Efficient storage** with minimal overhead per entry -- **Fast eviction** mechanisms to maintain cache health -- **Index optimization** support for large-scale deployments (100k+ entries) - -### Flexible Cache Management -- **Multiple eviction policies**: LRU, LFU, and TTL-based -- **Per-query TTL** or global defaults -- **Tag-based organization** for grouped invalidation -- **Pattern-based invalidation** using SQL LIKE patterns -- **Auto-eviction** with configurable policies - -### Observability & Monitoring -- **Real-time statistics**: hit rate, total entries, cache size -- **Health metrics**: expired entries, memory usage, eviction counts -- **Performance tracking**: lookup times, similarity scores -- **Built-in views** for monitoring and analysis - -### Production Ready -- **Comprehensive logging** with configurable levels -- **Crash-safe** error handling -- **ACID compliance** for cache operations -- **Multi-version support**: PostgreSQL 14 through 18+ -- **Standard PGXS** build system for easy packaging - ---- +`pg_semantic_cache` enables **semantic query result caching** for PostgreSQL. Unlike traditional caching that requires exact query matches, this extension uses vector embeddings to find and retrieve cached results for semantically similar queries. ## Quick Start -### Installation - -**Step 1: Install Dependencies** - -```bash -# Ubuntu/Debian -sudo apt-get install postgresql-16 postgresql-server-dev-16 postgresql-16-pgvector - -# Rocky Linux/RHEL -sudo dnf install postgresql16 postgresql16-devel postgresql16-contrib - -# macOS (with Homebrew) -brew install postgresql@16 -# Install pgvector separately -``` - -**Step 2: Build & Install Extension** - -```bash -git clone https://github.com/pgedge/pg_semantic_cache.git -cd pg_semantic_cache - -make clean && make -sudo make install -``` - -**Step 3: Enable in PostgreSQL** - -```sql --- Connect to your database -psql -U postgres -d your_database - --- Install required extensions -CREATE EXTENSION IF NOT EXISTS vector; -CREATE EXTENSION IF NOT EXISTS pg_semantic_cache; - --- Initialize the cache schema (run once per database) -SELECT semantic_cache.init_schema(); - --- Verify installation -SELECT * FROM semantic_cache.cache_stats(); -``` - -**You're ready to go!** - ---- - -## Basic Usage - -### 1. Cache a Query Result - -```sql -SELECT semantic_cache.cache_query( - query_text := 'SELECT * FROM orders WHERE status = ''completed''', - embedding := '[0.1, 0.2, 0.3, ...]'::text, -- From OpenAI, Cohere, etc. - result_data := '{"total": 150, "orders": [...]}'::jsonb, - ttl_seconds := 3600, -- 1 hour - tags := ARRAY['orders', 'analytics'] -- Optional tags -); --- Returns: cache_id (bigint) -``` - -### 2. Retrieve Cached Result - -```sql -SELECT * FROM semantic_cache.get_cached_result( - embedding := '[0.11, 0.19, 0.31, ...]'::text, -- Similar query embedding - similarity_threshold := 0.95, -- 95% similarity required - max_age_seconds := NULL -- Any age (optional) -); --- Returns: (found boolean, result_data jsonb, similarity_score float4, age_seconds int) -``` - -**Example Result:** -``` - found | result_data | similarity_score | age_seconds --------+----------------------------+------------------+------------- - true | {"total": 150, "orders"... | 0.973 | 245 -``` - -### 3. Monitor Performance - -```sql --- Comprehensive statistics -SELECT * FROM semantic_cache.cache_stats(); - --- Health overview (includes hit rate and more details) -SELECT * FROM semantic_cache.cache_health; - --- Recent cache activity -SELECT * FROM semantic_cache.recent_cache_activity LIMIT 10; -``` - ---- - -## API Reference - -The extension provides a complete set of SQL functions for caching, eviction, monitoring, and configuration. - -### Core Functions - -#### `init_schema()` -Initialize the cache schema, creating all required tables, indexes, and views. - -```sql -SELECT semantic_cache.init_schema(); -``` - -#### `cache_query(query_text, embedding, result_data, ttl_seconds, tags)` -Store a query result with its embedding for future retrieval. - -**Parameters:** -- `query_text` (text) - The original query text -- `embedding` (text) - Vector embedding as text: `'[0.1, 0.2, ...]'` -- `result_data` (jsonb) - The query result to cache -- `ttl_seconds` (int) - Time-to-live in seconds -- `tags` (text[]) - Optional tags for organization - -**Returns:** `bigint` - Cache entry ID - -#### `get_cached_result(embedding, similarity_threshold, max_age_seconds)` -Retrieve a cached result by semantic similarity. - -**Parameters:** -- `embedding` (text) - Query embedding to search for -- `similarity_threshold` (float4) - Minimum similarity (0.0 to 1.0) -- `max_age_seconds` (int) - Maximum age in seconds (NULL = any age) - -**Returns:** `record` - `(found boolean, result_data jsonb, similarity_score float4, age_seconds int)` - - ---- - -### Cache Eviction - -Multiple eviction strategies are available to manage cache size and freshness. - -#### `evict_expired()` -Remove all expired cache entries. - -```sql -SELECT semantic_cache.evict_expired(); -- Returns count of evicted entries -``` - -#### `evict_lru(keep_count)` -Evict least recently used entries, keeping only the specified number of most recent entries. - -```sql -SELECT semantic_cache.evict_lru(1000); -- Keep only 1000 most recently used entries -``` - -#### `evict_lfu(keep_count)` -Evict least frequently used entries, keeping only the specified number of most frequently used entries. +The following steps walk you through installing and configuring the extension. -```sql -SELECT semantic_cache.evict_lfu(1000); -- Keep only 1000 most frequently used entries -``` +1. Install the required dependencies for your operating system. -#### `auto_evict()` -Automatically evict entries based on configured policy (LRU, LFU, or TTL). + ```bash + # Ubuntu/Debian + sudo apt-get install postgresql-16 postgresql-server-dev-16 postgresql-16-pgvector -```sql -SELECT semantic_cache.auto_evict(); -``` + # Rocky Linux/RHEL + sudo dnf install postgresql16 postgresql16-devel postgresql16-contrib -#### `clear_cache()` -Remove **all** cache entries (use with caution). + # macOS (with Homebrew) + brew install postgresql@16 + # Install pgvector separately + ``` -```sql -SELECT semantic_cache.clear_cache(); -``` +2. Build and install the extension from source. ---- + ```bash + git clone https://github.com/pgedge/pg_semantic_cache.git + cd pg_semantic_cache -### Statistics & Monitoring + make clean && make + sudo make install + ``` -Built-in functions and views provide real-time visibility into cache performance. +3. Enable the extension in your PostgreSQL database. -#### `cache_stats()` -Get comprehensive cache statistics. + ```sql + -- Connect to your database + psql -U postgres -d your_database -```sql -SELECT * FROM semantic_cache.cache_stats(); -``` + -- Install required extensions + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS pg_semantic_cache; -**Returns:** -``` -total_entries | Total number of cached queries -total_hits | Total number of cache hits -total_misses | Total number of cache misses -hit_rate_percent | Hit rate as a percentage -``` + -- Initialize the cache schema (run once per database) + SELECT semantic_cache.init_schema(); -**Note:** For more detailed statistics including cache size, expired entries, and access patterns, use the `semantic_cache.cache_health` view. + -- Verify installation + SELECT * FROM semantic_cache.cache_stats(); + ``` --- @@ -313,429 +98,101 @@ SELECT value FROM semantic_cache.cache_config WHERE key = 'eviction_policy'; --- -## Build & Development - -The extension uses the standard PostgreSQL PGXS build system for compilation and installation. - -### Build Commands - -```bash -# Standard build -make clean && make -sudo make install - -# Run tests -make installcheck - -# Development build with debug symbols -make CFLAGS="-g -O0" clean all - -# View build configuration -make info -``` - -### Multi-Version PostgreSQL Build - -Build for multiple PostgreSQL versions simultaneously: - -```bash -for PG in 14 15 16 17 18; do - echo "Building for PostgreSQL $PG..." - PG_CONFIG=/usr/pgsql-${PG}/bin/pg_config make clean install -done -``` - -### Cross-Platform Support - -Fully compatible with all PostgreSQL-supported platforms: - -| Platform | Status | Notes | -|----------|--------|-------| -| Linux | Supported | Ubuntu, Debian, RHEL, Rocky, Fedora, etc. | -| macOS | Supported | Intel & Apple Silicon | -| Windows | Supported | Via MinGW or MSVC | -| BSD | Supported | FreeBSD, OpenBSD | - -### Tested PostgreSQL Versions - -| Version | Status | Notes | -|---------|--------|-------| -| PG 14 | Tested | Full support | -| PG 15 | Tested | Full support | -| PG 16 | Tested | Full support | -| PG 17 | Tested | Full support | -| PG 18 | Tested | Full support | -| Future versions | Expected | Standard PGXS compatibility | +## Basic Usage ---- +The following examples demonstrate the core workflow for storing, retrieving, +and monitoring cached query results. -## Performance +1. Store a query result with its vector embedding in the cache. -The extension is optimized for sub-millisecond cache lookups with minimal overhead. + In the following example, the `cache_query` function stores a completed + orders query with a one-hour TTL and analytics tags. -### Runtime Metrics + ```sql + SELECT semantic_cache.cache_query( + query_text := 'SELECT * FROM orders WHERE status = ''completed''', + embedding := '[0.1, 0.2, 0.3, ...]'::text, -- From OpenAI, Cohere, etc. + result_data := '{"total": 150, "orders": [...]}'::jsonb, + ttl_seconds := 3600, -- 1 hour + tags := ARRAY['orders', 'analytics'] -- Optional tags + ); + -- Returns: cache_id (bigint) + ``` -| Operation | Performance | Notes | -|-----------|-------------|-------| -| Cache lookup | **< 5ms** | With optimized vector index | -| Cache insert | **< 10ms** | Including embedding storage | -| Eviction (1000 entries) | **< 50ms** | Efficient batch operations | -| Statistics query | **< 1ms** | Materialized views | -| Similarity search | **2-3ms avg** | IVFFlat/HNSW indexed | +2. Retrieve a cached result using semantic similarity search. -### Expected Hit Rates + In the following example, the `get_cached_result` function searches for + cached results with at least 95% similarity to the query embedding. -| Workload Type | Typical Hit Rate | -|---------------|------------------| -| AI/LLM queries | 40-60% | -| Analytics dashboards | 60-80% | -| Search systems | 50-70% | -| Chatbot conversations | 45-65% | + ```sql + SELECT * FROM semantic_cache.get_cached_result( + embedding := '[0.11, 0.19, 0.31, ...]'::text, -- Similar query embedding + similarity_threshold := 0.95, -- 95% similarity required + max_age_seconds := NULL -- Any age (optional) + ); + -- Returns: (found boolean, result_data jsonb, similarity_score float4, age_seconds int) + ``` -### Memory Overhead + The function returns a table with the following columns: -- **Per cache entry**: ~1-2KB (metadata + indexes) -- **Vector storage**: Depends on embedding dimension (1536D = ~6KB) -- **Total overhead**: Minimal for typical workloads + ``` + found | result_data | similarity_score | age_seconds + -------+----------------------------+------------------+------------- + true | {"total": 150, "orders"... | 0.973 | 245 + ``` -### Benchmarks +3. Monitor cache performance using built-in statistics and health views. -Run the included benchmark suite: + In the following example, the queries retrieve comprehensive statistics, + health metrics, and recent activity for the semantic cache. -```bash -psql -U postgres -d your_database -f test/benchmark.sql -``` + ```sql + -- Comprehensive statistics + SELECT * FROM semantic_cache.cache_stats(); -**Expected Results:** + -- Health overview (includes hit rate and more details) + SELECT * FROM semantic_cache.cache_health; -``` -Operation | Count | Total Time | Avg Time ------------------------+--------+------------+---------- -Insert entries | 1,000 | ~500ms | 0.5ms -Lookup (hits) | 100 | ~200ms | 2.0ms -Lookup (misses) | 100 | ~150ms | 1.5ms -Evict LRU | 500 | ~25ms | 0.05ms -``` + -- Recent cache activity + SELECT * FROM semantic_cache.recent_cache_activity LIMIT 10; + ``` --- -## Production Deployment - -For production environments, optimize PostgreSQL settings and set up automated maintenance. - -### PostgreSQL Configuration - -Optimize PostgreSQL settings for semantic caching workloads: - -```sql --- Memory settings -ALTER SYSTEM SET shared_buffers = '4GB'; -- Adjust based on available RAM -ALTER SYSTEM SET effective_cache_size = '12GB'; -- Typically 50-75% of RAM -ALTER SYSTEM SET work_mem = '256MB'; -- For vector operations - --- Reload configuration -SELECT pg_reload_conf(); -``` - -### Automated Maintenance - -Set up automatic cache maintenance using `pg_cron`: - -```sql --- Install pg_cron -CREATE EXTENSION IF NOT EXISTS pg_cron; - --- Schedule auto-eviction every 15 minutes -SELECT cron.schedule( - 'semantic-cache-eviction', - '*/15 * * * *', - $$SELECT semantic_cache.auto_evict()$$ -); - --- Schedule expired entry cleanup every hour -SELECT cron.schedule( - 'semantic-cache-cleanup', - '0 * * * *', - $$SELECT semantic_cache.evict_expired()$$ -); - --- Verify scheduled jobs -SELECT * FROM cron.job WHERE jobname LIKE 'semantic-cache%'; -``` - -### Index Optimization - -Choose the appropriate vector index strategy based on your cache size. - -#### Small to Medium Caches (< 100k entries) -Default IVFFlat index works well out of the box. - -#### Large Caches (100k - 1M entries) -Increase IVFFlat lists for better performance: - -```sql -DROP INDEX IF EXISTS semantic_cache.idx_cache_embedding; -CREATE INDEX idx_cache_embedding - ON semantic_cache.cache_entries - USING ivfflat (query_embedding vector_cosine_ops) - WITH (lists = 1000); -- Increase lists for larger caches -``` - -#### Very Large Caches (> 1M entries) -Use HNSW index for optimal performance (requires pgvector 0.5.0+): - -```sql -DROP INDEX IF EXISTS semantic_cache.idx_cache_embedding; -CREATE INDEX idx_cache_embedding_hnsw - ON semantic_cache.cache_entries - USING hnsw (query_embedding vector_cosine_ops) - WITH (m = 16, ef_construction = 64); -``` - -**HNSW Benefits:** -- Faster queries (1-2ms vs 3-5ms) -- Better recall at high similarity thresholds -- Scales linearly with cache size - -### Monitoring Setup +## Building the Documentation -Set up custom views to monitor cache health and performance metrics. +Before building the documentation, install Python 3.8+ and pip. -Create a monitoring dashboard view: +1. Install dependencies: + ```bash + pip install -r docs-requirements.txt + ``` -```sql -CREATE OR REPLACE VIEW semantic_cache.production_dashboard AS -SELECT - (SELECT hit_rate_percent FROM semantic_cache.cache_stats())::numeric(5,2) || '%' as hit_rate, - (SELECT total_entries FROM semantic_cache.cache_stats()) as total_entries, - (SELECT pg_size_pretty(SUM(result_size_bytes)::BIGINT) FROM semantic_cache.cache_entries) as cache_size, - (SELECT COUNT(*) FROM semantic_cache.cache_entries WHERE expires_at <= NOW()) as expired_entries, - (SELECT value FROM semantic_cache.cache_config WHERE key = 'eviction_policy') as eviction_policy, - NOW() as snapshot_time; - --- Query the dashboard -SELECT * FROM semantic_cache.production_dashboard; -``` - -### High Availability Considerations - -The cache integrates seamlessly with PostgreSQL's replication and backup mechanisms. - -```sql --- Regular backups of cache metadata (optional) -pg_dump -U postgres -d your_db -t semantic_cache.cache_entries -t semantic_cache.cache_metadata -F c -f cache_backup.dump - --- Replication: Cache data is automatically replicated with PostgreSQL streaming replication --- No special configuration needed -``` - ---- +2. Use the following command to review the documentation locally: + ```bash + mkdocs serve + ``` -## Integration Examples - -### Python with OpenAI - -Complete example integrating semantic cache with OpenAI embeddings: - -```python -import psycopg2 -import openai -import json -from typing import Optional, Dict, Any - -class SemanticCache: - """Semantic cache wrapper for PostgreSQL""" - - def __init__(self, conn_string: str, openai_api_key: str): - self.conn = psycopg2.connect(conn_string) - self.client = openai.OpenAI(api_key=openai_api_key) - - def _get_embedding(self, text: str) -> str: - """Generate embedding using OpenAI""" - response = self.client.embeddings.create( - model="text-embedding-ada-002", - input=text - ) - embedding = response.data[0].embedding - return f"[{','.join(map(str, embedding))}]" - - def cache(self, query: str, result: Dict[Any, Any], - ttl: int = 3600, tags: Optional[list] = None) -> int: - """Cache a query result""" - embedding = self._get_embedding(query) - - with self.conn.cursor() as cur: - cur.execute(""" - SELECT semantic_cache.cache_query( - %s::text, %s::text, %s::jsonb, %s::int, %s::text[] - ) - """, (query, embedding, json.dumps(result), ttl, tags)) - cache_id = cur.fetchone()[0] - self.conn.commit() - return cache_id - - def get(self, query: str, similarity: float = 0.95, - max_age: Optional[int] = None) -> Optional[Dict[Any, Any]]: - """Retrieve from cache""" - embedding = self._get_embedding(query) - - with self.conn.cursor() as cur: - cur.execute(""" - SELECT found, result_data, similarity_score, age_seconds - FROM semantic_cache.get_cached_result( - %s::text, %s::float4, %s::int - ) - """, (embedding, similarity, max_age)) - - result = cur.fetchone() - if result and result[0]: # Cache hit - print(f"Cache HIT (similarity: {result[2]:.3f}, age: {result[3]}s)") - return json.loads(result[1]) - else: - print("Cache MISS") - return None - - def stats(self) -> Dict[str, Any]: - """Get cache statistics""" - with self.conn.cursor() as cur: - cur.execute("SELECT * FROM semantic_cache.cache_stats()") - columns = [desc[0] for desc in cur.description] - values = cur.fetchone() - return dict(zip(columns, values)) - -# Usage example -cache = SemanticCache( - conn_string="dbname=mydb user=postgres", - openai_api_key="sk-..." -) - -# Try to get from cache, compute if miss -def get_revenue_data(query: str) -> Dict: - result = cache.get(query, similarity=0.95) - - if result: - return result # Cache hit! - - # Cache miss - compute the result - result = expensive_database_query() # Your expensive query here - cache.cache(query, result, ttl=3600, tags=['revenue', 'analytics']) - return result - -# Example queries -data1 = get_revenue_data("What was Q4 2024 revenue?") -data2 = get_revenue_data("Show me revenue for last quarter") # Will hit cache! -data3 = get_revenue_data("Q4 sales figures?") # Will also hit cache! - -# View statistics -print(cache.stats()) -``` - -### Node.js with OpenAI - -```javascript -const { Client } = require('pg'); -const OpenAI = require('openai'); - -class SemanticCache { - constructor(pgConfig, openaiApiKey) { - this.client = new Client(pgConfig); - this.openai = new OpenAI({ apiKey: openaiApiKey }); - this.client.connect(); - } - - async getEmbedding(text) { - const response = await this.openai.embeddings.create({ - model: 'text-embedding-ada-002', - input: text - }); - const embedding = response.data[0].embedding; - return `[${embedding.join(',')}]`; - } - - async cache(query, result, ttl = 3600, tags = null) { - const embedding = await this.getEmbedding(query); - const res = await this.client.query( - `SELECT semantic_cache.cache_query($1::text, $2::text, $3::jsonb, $4::int, $5::text[])`, - [query, embedding, JSON.stringify(result), ttl, tags] - ); - return res.rows[0].cache_query; - } - - async get(query, similarity = 0.95, maxAge = null) { - const embedding = await this.getEmbedding(query); - const res = await this.client.query( - `SELECT * FROM semantic_cache.get_cached_result($1::text, $2::float4, $3::int)`, - [embedding, similarity, maxAge] - ); - - const { found, result_data, similarity_score, age_seconds } = res.rows[0]; - - if (found) { - console.log(`Cache HIT (similarity: ${similarity_score.toFixed(3)}, age: ${age_seconds}s)`); - return JSON.parse(result_data); - } else { - console.log('Cache MISS'); - return null; - } - } - - async stats() { - const res = await this.client.query('SELECT * FROM semantic_cache.cache_stats()'); - return res.rows[0]; - } -} - -// Usage -const cache = new SemanticCache( - { host: 'localhost', database: 'mydb', user: 'postgres' }, - 'sk-...' -); - -async function getRevenueData(query) { - const cached = await cache.get(query); - if (cached) return cached; - - const result = await expensiveDatabaseQuery(); - await cache.cache(query, result, 3600, ['revenue', 'analytics']); - return result; -} -``` + Then open http://127.0.0.1:8000 in your browser. -### More Examples +3. To build a static site: + ```bash + mkdocs build + ``` -For additional integration patterns and use cases, see: -- `examples/usage_examples.sql` - Comprehensive SQL examples -- `test/benchmark.sql` - Performance testing examples + Documentation will added to the `site/` directory. --- -## Contributing - -Contributions are welcome! This extension is built with standard PostgreSQL C APIs. - -**Development setup:** -1. Fork the repository -2. Create a feature branch -3. Make your changes -4. Run tests: `make installcheck` -5. Submit a pull request +## Support & Resources -**Code guidelines:** -- Follow existing code style -- Add tests for new features -- Update documentation -- Ensure compatibility with PostgreSQL 14-18 +- Report bugs and request features through the GitHub Issues page. +- Check the `examples/` directory for usage patterns and code samples. +- See the `test/` directory for comprehensive testing examples. --- ## License -This project is licensed under the **PostgreSQL License**. - ---- - -## Support & Resources +This project is licensed under the [PostgreSQL License](docs/LICENSE.md). -- **GitHub Issues**: Report bugs and request features -- **Example Code**: Check `examples/` directory for usage patterns -- **Test Suite**: See `test/` directory for comprehensive examples diff --git a/docs/LICENSE.md b/docs/LICENSE.md new file mode 100644 index 0000000..075d616 --- /dev/null +++ b/docs/LICENSE.md @@ -0,0 +1,19 @@ +PostgreSQL License + +Copyright (c) 2024, Aqeel + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, +ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE +AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE +AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, +ENHANCEMENTS, OR MODIFICATIONS. diff --git a/docs/architecture.md b/docs/architecture.md index 764e9ee..7e0e3fa 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -31,22 +31,10 @@ graph LR 5. Automatic maintenance evicts expired entries based on TTL and configured policies. -## Performance - -- Lookup time is < 5ms for most queries with IVFFlat index. -- Scalability handles 100K+ cached entries efficiently. -- Throughput reaches thousands of cache lookups per second. -- Storage provides configurable cache size limits with automatic eviction. - -!!! tip "Pro Tip" - - Start with the default IVFFlat index and 1536 dimensions (OpenAI - ada-002). You can always reconfigure your cache later with the - `set_vector_dimension()` and `rebuild_index()` functions. ## Getting Help -- Browse the sections in the navigation menu for documentation. +- Browse the documentation. - Report issues at [GitHub Issues](https://github.com/pgedge/pg_semantic_cache/issues). - See [Use Cases](use_cases.md) for practical implementation examples. diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 0000000..0552bdd --- /dev/null +++ b/docs/development.md @@ -0,0 +1,53 @@ +# Development Resources + +Developer contributions are welcome! This extension is built with standard PostgreSQL C APIs. + +To create a development installation: + +1. Fork the repository. +2. Create a feature branch for your changes. +3. Make your changes to the codebase. +4. Run the test suite with `make installcheck`. +5. Submit a pull request with your changes. + +Code guidelines: + +- Follow the existing code style throughout the project. +- Add tests for any new features you implement. +- Update the documentation to reflect your changes. +- Ensure your changes are compatible with PostgreSQL versions 14 through 18. + +--- + +## Building From Source + +The extension uses the standard PostgreSQL PGXS build system for compilation and installation. + + +```bash +# Standard build +make clean && make +sudo make install + +# Run tests +make installcheck + +# Development build with debug symbols +make CFLAGS="-g -O0" clean all + +# View build configuration +make info +``` + +## Performing a Multi-Version PostgreSQL Build + +The extension supports building for multiple PostgreSQL versions in sequence. + +Build for multiple PostgreSQL versions simultaneously: + +```bash +for PG in 14 15 16 17 18; do + echo "Building for PostgreSQL $PG..." + PG_CONFIG=/usr/pgsql-${PG}/bin/pg_config make clean install +done +``` diff --git a/docs/functions.md b/docs/functions.md index 2dbfee5..d39adbe 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -1,6 +1,6 @@ # Using pg_semantic_cache Functions -This page provides a comprehensive reference for all available functions in the pg_semantic_cache extension. +The extension provides a complete set of SQL functions for caching, eviction, monitoring, and configuration. This page provides a comprehensive reference for all available functions in the pg_semantic_cache extension. ## Function Reference @@ -24,3 +24,100 @@ This page provides a comprehensive reference for all available functions in the | [rebuild_index](functions/rebuild_index.md) | Rebuilds the vector similarity index for optimal performance. | | [set_index_type](functions/set_index_type.md) | Sets the vector index type for similarity search. | | [set_vector_dimension](functions/set_vector_dimension.md) | Sets the vector embedding dimension. | + + +### Core Functions + +#### `init_schema()` +Initialize the cache schema, creating all required tables, indexes, and views. + +```sql +SELECT semantic_cache.init_schema(); +``` + +#### `cache_query(query_text, embedding, result_data, ttl_seconds, tags)` +Store a query result with its embedding for future retrieval. + +**Parameters:** +- `query_text` (text) - The original query text +- `embedding` (text) - Vector embedding as text: `'[0.1, 0.2, ...]'` +- `result_data` (jsonb) - The query result to cache +- `ttl_seconds` (int) - Time-to-live in seconds +- `tags` (text[]) - Optional tags for organization + +**Returns:** `bigint` - Cache entry ID + +#### `get_cached_result(embedding, similarity_threshold, max_age_seconds)` +Retrieve a cached result by semantic similarity. + +**Parameters:** +- `embedding` (text) - Query embedding to search for +- `similarity_threshold` (float4) - Minimum similarity (0.0 to 1.0) +- `max_age_seconds` (int) - Maximum age in seconds (NULL = any age) + +**Returns:** `record` - `(found boolean, result_data jsonb, similarity_score float4, age_seconds int)` + + +--- + +### Cache Eviction + +Multiple eviction strategies are available to manage cache size and freshness. + +#### `evict_expired()` +Remove all expired cache entries. + +```sql +SELECT semantic_cache.evict_expired(); -- Returns count of evicted entries +``` + +#### `evict_lru(keep_count)` +Evict least recently used entries, keeping only the specified number of most recent entries. + +```sql +SELECT semantic_cache.evict_lru(1000); -- Keep only 1000 most recently used entries +``` + +#### `evict_lfu(keep_count)` +Evict least frequently used entries, keeping only the specified number of most frequently used entries. + +```sql +SELECT semantic_cache.evict_lfu(1000); -- Keep only 1000 most frequently used entries +``` + +#### `auto_evict()` +Automatically evict entries based on configured policy (LRU, LFU, or TTL). + +```sql +SELECT semantic_cache.auto_evict(); +``` + +#### `clear_cache()` +Remove **all** cache entries (use with caution). + +```sql +SELECT semantic_cache.clear_cache(); +``` + +--- + +### Statistics & Monitoring + +Built-in functions and views provide real-time visibility into cache performance. + +#### `cache_stats()` +Get comprehensive cache statistics. + +```sql +SELECT * FROM semantic_cache.cache_stats(); +``` + +**Returns:** +``` +total_entries | Total number of cached queries +total_hits | Total number of cache hits +total_misses | Total number of cache misses +hit_rate_percent | Hit rate as a percentage +``` + +**Note:** For more detailed statistics including cache size, expired entries, and access patterns, use the `semantic_cache.cache_health` view. diff --git a/docs/index.md b/docs/index.md index e776e85..b33ac1b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,4 +57,16 @@ For an LLM application making 10,000 queries per day: - Comprehensive monitoring provides built-in statistics, views, and health metrics. +### Cross-Platform Support + +The extension is fully compatible with all PostgreSQL-supported platforms. + +Fully compatible with all PostgreSQL-supported platforms: + +| Platform | Status | Notes | +|----------|--------|-------| +| Linux | Supported | Ubuntu, Debian, RHEL, Rocky, Fedora, etc. | +| macOS | Supported | Intel & Apple Silicon | +| Windows | Supported | Via MinGW or MSVC | +| BSD | Supported | FreeBSD, OpenBSD | diff --git a/integration.md b/docs/integration.md similarity index 78% rename from integration.md rename to docs/integration.md index d6cc83e..c0c4169 100644 --- a/integration.md +++ b/docs/integration.md @@ -1,10 +1,15 @@ # Integration Examples -Refer to the following integration examples when configuring pg_semantic_cache. +This page provides integration examples for using pg_semantic_cache with +popular programming languages and embedding providers. -### Python with OpenAI +## Python with OpenAI -Complete example integrating semantic cache with OpenAI embeddings: +The following example demonstrates how to integrate the semantic cache with +OpenAI embeddings using Python and the psycopg2 library. + +In the following example, the `SemanticCache` class wraps the cache functions +and handles embedding generation through the OpenAI API. ```python import psycopg2 @@ -92,14 +97,26 @@ def get_revenue_data(query: str) -> Dict: # Example queries data1 = get_revenue_data("What was Q4 2024 revenue?") -data2 = get_revenue_data("Show me revenue for last quarter") # Will hit cache! -data3 = get_revenue_data("Q4 sales figures?") # Will also hit cache! +data2 = get_revenue_data("Show me revenue for last quarter") +data3 = get_revenue_data("Q4 sales figures?") # View statistics print(cache.stats()) ``` -### Node.js with OpenAI +The preceding example demonstrates three key operations: + +- The cache initialization with database connection and API credentials. +- The automatic fallback from cache lookup to computation when needed. +- The statistical monitoring to track cache performance over time. + +## Node.js with OpenAI + +The following example shows how to use the semantic cache with Node.js and +the OpenAI API through an asynchronous interface. + +In the following example, the `SemanticCache` class uses async/await patterns +to handle database operations and embedding generation. ```javascript const { Client } = require('pg'); @@ -170,9 +187,11 @@ async function getRevenueData(query) { } ``` -### More Examples +## Additional Resources + +The repository includes additional integration examples and test files. -For additional integration patterns and use cases, see: -- `examples/usage_examples.sql` - Comprehensive SQL examples -- `test/benchmark.sql` - Performance testing examples +For more comprehensive examples, refer to the following files: +- The `examples/usage_examples.sql` file contains comprehensive SQL examples. +- The `test/benchmark.sql` file provides performance testing examples. diff --git a/docs/performance.md b/docs/performance.md new file mode 100644 index 0000000..edb0639 --- /dev/null +++ b/docs/performance.md @@ -0,0 +1,70 @@ +# Performance and Benchmarking + +The extension is optimized for sub-millisecond cache lookups with minimal overhead. + +- Lookup time is < 5ms for most queries with IVFFlat index. +- Scalability handles 100K+ cached entries efficiently. +- Throughput reaches thousands of cache lookups per second. +- Storage provides configurable cache size limits with automatic eviction. + +!!! tip "Pro Tip" + + Start with the default IVFFlat index and 1536 dimensions (OpenAI + ada-002). You can always reconfigure your cache later with the + `set_vector_dimension()` and `rebuild_index()` functions. + +## Runtime Metrics + +The following table shows typical performance metrics for common cache operations. + +| Operation | Performance | Notes | +|-----------|-------------|-------| +| Cache lookup | **< 5ms** | With optimized vector index | +| Cache insert | **< 10ms** | Including embedding storage | +| Eviction (1000 entries) | **< 50ms** | Efficient batch operations | +| Statistics query | **< 1ms** | Materialized views | +| Similarity search | **2-3ms avg** | IVFFlat/HNSW indexed | + +### Expected Hit Rates + +Cache hit rates vary by workload type and query similarity patterns. + +| Workload Type | Typical Hit Rate | +|---------------|------------------| +| AI/LLM queries | 40-60% | +| Analytics dashboards | 60-80% | +| Search systems | 50-70% | +| Chatbot conversations | 45-65% | + +### Memory Overhead + +The cache maintains a minimal memory footprint for typical workloads. + +- Each cache entry requires approximately 1-2KB for metadata and indexes. +- Vector storage size depends on the embedding dimension (1536D requires approximately 6KB). +- The total overhead remains minimal for typical workloads. + +## Benchmarking + +The extension includes a comprehensive benchmark suite for performance testing. + +Use the following command to run the included benchmark suite: + +```bash +psql -U postgres -d your_database -f test/benchmark.sql +``` + +**Expected Results:** + +``` +Operation | Count | Total Time | Avg Time +-----------------------+--------+------------+---------- +Insert entries | 1,000 | ~500ms | 0.5ms +Lookup (hits) | 100 | ~200ms | 2.0ms +Lookup (misses) | 100 | ~150ms | 1.5ms +Evict LRU | 500 | ~25ms | 0.05ms +``` + + + + diff --git a/docs/production.md b/docs/production.md new file mode 100644 index 0000000..3240ca6 --- /dev/null +++ b/docs/production.md @@ -0,0 +1,118 @@ +# Deploying in a Production Environment + +For production environments, optimize PostgreSQL settings and set up automated maintenance. + +### PostgreSQL Configuration + +Optimize PostgreSQL memory and performance settings for semantic caching workloads. + +Optimize PostgreSQL settings for semantic caching workloads: + +```sql +-- Memory settings +ALTER SYSTEM SET shared_buffers = '4GB'; -- Adjust based on available RAM +ALTER SYSTEM SET effective_cache_size = '12GB'; -- Typically 50-75% of RAM +ALTER SYSTEM SET work_mem = '256MB'; -- For vector operations + +-- Reload configuration +SELECT pg_reload_conf(); +``` + +### Automated Maintenance + +Schedule automatic cache maintenance tasks using the pg_cron extension. + +Set up automatic cache maintenance using `pg_cron`: + +```sql +-- Install pg_cron +CREATE EXTENSION IF NOT EXISTS pg_cron; + +-- Schedule auto-eviction every 15 minutes +SELECT cron.schedule( + 'semantic-cache-eviction', + '*/15 * * * *', + $$SELECT semantic_cache.auto_evict()$$ +); + +-- Schedule expired entry cleanup every hour +SELECT cron.schedule( + 'semantic-cache-cleanup', + '0 * * * *', + $$SELECT semantic_cache.evict_expired()$$ +); + +-- Verify scheduled jobs +SELECT * FROM cron.job WHERE jobname LIKE 'semantic-cache%'; +``` + +### Index Optimization + +Choose the appropriate vector index strategy based on your cache size. + +#### Small to Medium Caches (< 100k entries) +Default IVFFlat index works well out of the box. + +#### Large Caches (100k - 1M entries) +Increase IVFFlat lists for better performance: + +```sql +DROP INDEX IF EXISTS semantic_cache.idx_cache_embedding; +CREATE INDEX idx_cache_embedding + ON semantic_cache.cache_entries + USING ivfflat (query_embedding vector_cosine_ops) + WITH (lists = 1000); -- Increase lists for larger caches +``` + +#### Very Large Caches (> 1M entries) +Use HNSW index for optimal performance (requires pgvector 0.5.0+): + +```sql +DROP INDEX IF EXISTS semantic_cache.idx_cache_embedding; +CREATE INDEX idx_cache_embedding_hnsw + ON semantic_cache.cache_entries + USING hnsw (query_embedding vector_cosine_ops) + WITH (m = 16, ef_construction = 64); +``` + +HNSW provides the following benefits: + +- The HNSW index delivers faster queries with 1-2ms response times compared to 3-5ms for IVFFlat. +- HNSW provides better recall accuracy at high similarity thresholds. +- HNSW scales linearly with cache size for consistent performance. + +### Monitoring Setup + +Set up custom views to monitor cache health and performance metrics. + +Create a monitoring dashboard view: + +```sql +CREATE OR REPLACE VIEW semantic_cache.production_dashboard AS +SELECT + (SELECT hit_rate_percent FROM semantic_cache.cache_stats())::numeric(5,2) || '%' as hit_rate, + (SELECT total_entries FROM semantic_cache.cache_stats()) as total_entries, + (SELECT pg_size_pretty(SUM(result_size_bytes)::BIGINT) FROM semantic_cache.cache_entries) as cache_size, + (SELECT COUNT(*) FROM semantic_cache.cache_entries WHERE expires_at <= NOW()) as expired_entries, + (SELECT value FROM semantic_cache.cache_config WHERE key = 'eviction_policy') as eviction_policy, + NOW() as snapshot_time; + +-- Query the dashboard +SELECT * FROM semantic_cache.production_dashboard; +``` + +### High Availability Considerations + +The cache integrates seamlessly with PostgreSQL's replication and backup mechanisms. + +```sql +-- Regular backups of cache metadata (optional) +pg_dump -U postgres -d your_db -t semantic_cache.cache_entries -t semantic_cache.cache_metadata -F c -f cache_backup.dump + +-- Replication: Cache data is automatically replicated with PostgreSQL streaming replication +-- No special configuration needed +``` + +--- + +---