Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions orchestrator/core/samplestore/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
import pandas as pd
from rich.console import RenderableType

# Process-level cache of tablename prefixes for which the four DDL tables have
# already been verified to exist. Skips the four `CREATE TABLE IF NOT EXISTS`
# round-trips on every subsequent SQLSampleStore construction for the same store.
_source_tables_verified: set[str] = set()


class SQLSampleStoreConfiguration(pydantic.BaseModel):
identifier: Annotated[
Expand Down Expand Up @@ -376,8 +381,33 @@ def __init__(
self._tablename = f"sqlsource_{self._identifier}"
self._engine = engine_for_sql_store(storageLocation)

# Create a table for this sample store
self._create_source_table()
# Create the four backing tables only when they do not yet exist.
# Use a single raw SQL probe (1 round-trip) as a fast path to avoid
# the ~4 SQL queries that create_all(checkfirst=True) issues when
# the tables are already present (4 table-existence checks)
# The module level _source_table_verified enables skipping
# even the probe for subsequent constructions within the same process.
#
# We use a direct information_schema / sqlite_master query rather than
# sqlalchemy.inspect() to avoid the Inspector's internal connection
# overhead (it opens its own connection on top of the borrowed one).
if self._tablename not in _source_tables_verified:
if self.engine.dialect.name == "sqlite":
existence_query = sqlalchemy.text(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=:name"
).bindparams(name=self._tablename)
else:
existence_query = sqlalchemy.text(
"SELECT 1 FROM information_schema.tables"
" WHERE table_schema = DATABASE() AND table_name = :name LIMIT 1"
).bindparams(name=self._tablename)
Comment on lines +395 to +403
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the same as what we added for the resources table (which is why I was asking to update from main, because at first glance I thought the code was the same).

At this point it's worth having a parametrized statement in orchestrator/metastore/sql/statements.py for this


with self.engine.connect() as conn:
table_exists = conn.execute(existence_query).fetchone() is not None

if not table_exists:
self._create_source_table()
_source_tables_verified.add(self._tablename)

# Initialize entities cache as empty dict for lazy loading
# Empty dict is falsy, so lazy loading check `if not self._entities:` still works
Expand Down
Loading