-
Notifications
You must be signed in to change notification settings - Fork 43
test: dbt helpers — loadRawManifest, getUniqueId, extractColumns, listModelNames #690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
anandgupta42
wants to merge
1
commit into
main
Choose a base branch
from
test/hourly-20260410-2018
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+278
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,278 @@ | ||
| /** | ||
| * Unit tests for dbt helper functions: loadRawManifest, getUniqueId, | ||
| * extractColumns, and listModelNames. | ||
| * | ||
| * These pure/cached functions are shared foundations used by dbtLineage(), | ||
| * generateDbtUnitTests(), and parseManifest(). Zero tests existed. | ||
| * | ||
| * Risk: a bug in loadRawManifest's mtime cache silently serves stale | ||
| * manifest data to every dbt tool (lineage, unit-test gen, manifest). | ||
| * A bug in extractColumns produces wrong column types in generated YAML. | ||
| * A bug in getUniqueId means model lookup fails silently. | ||
| */ | ||
|
|
||
| import { describe, test, expect, afterEach } from "bun:test" | ||
| import { writeFileSync, mkdtempSync, rmSync, symlinkSync, utimesSync } from "fs" | ||
| import { tmpdir } from "os" | ||
| import { join } from "path" | ||
| import { | ||
| loadRawManifest, | ||
| getUniqueId, | ||
| extractColumns, | ||
| listModelNames, | ||
| } from "../../src/altimate/native/dbt/helpers" | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Fixtures | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| let tmpDirs: string[] = [] | ||
|
|
||
| function makeTmpDir(): string { | ||
| const dir = mkdtempSync(join(tmpdir(), "dbt-helpers-test-")) | ||
| tmpDirs.push(dir) | ||
| return dir | ||
| } | ||
|
|
||
| afterEach(() => { | ||
| for (const dir of tmpDirs) { | ||
| rmSync(dir, { recursive: true, force: true }) | ||
| } | ||
| tmpDirs = [] | ||
| }) | ||
|
|
||
| const NODES: Record<string, any> = { | ||
| "model.proj.orders": { | ||
| resource_type: "model", | ||
| name: "orders", | ||
| }, | ||
| "model.proj.users": { | ||
| resource_type: "model", | ||
| name: "users", | ||
| }, | ||
| "test.proj.not_null": { | ||
| resource_type: "test", | ||
| name: "not_null", | ||
| }, | ||
| "seed.proj.country_codes": { | ||
| resource_type: "seed", | ||
| name: "country_codes", | ||
| }, | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // loadRawManifest | ||
| // | ||
| // NOTE: loadRawManifest uses a module-level cache keyed by (resolved path, | ||
| // mtime). Every test uses a unique file name within its own tmpdir to avoid | ||
| // cross-test cache contamination. Assertions use content equality (not object | ||
| // identity) unless the test is explicitly verifying cache behaviour. | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| describe("loadRawManifest", () => { | ||
| test("returns null for non-existent file", () => { | ||
| const result = loadRawManifest("/tmp/does-not-exist-manifest-" + Date.now() + ".json") | ||
| expect(result).toBeNull() | ||
| }) | ||
|
|
||
| test("parses valid JSON manifest and returns object", () => { | ||
| const dir = makeTmpDir() | ||
| const manifestPath = join(dir, "valid-manifest.json") | ||
| const data = { metadata: { adapter_type: "snowflake" }, nodes: {} } | ||
| writeFileSync(manifestPath, JSON.stringify(data)) | ||
|
|
||
| const result = loadRawManifest(manifestPath) | ||
| expect(result).toEqual(data) | ||
| }) | ||
|
|
||
| test("throws on invalid JSON", () => { | ||
| const dir = makeTmpDir() | ||
| const manifestPath = join(dir, "bad-json.json") | ||
| writeFileSync(manifestPath, "not json {{{") | ||
|
|
||
| expect(() => loadRawManifest(manifestPath)).toThrow() | ||
| }) | ||
|
|
||
| test("does not throw for JSON array (typeof [] is 'object')", () => { | ||
| // Note: the guard in loadRawManifest checks `typeof parsed !== "object"`. | ||
| // In JavaScript, `typeof [] === "object"`, so arrays pass through. | ||
| // This is a known edge case — a manifest that is a bare array is invalid | ||
| // but the guard won't catch it. The callers handle this gracefully since | ||
| // they access .nodes/.sources which will be undefined on an array. | ||
| const dir = makeTmpDir() | ||
| const manifestPath = join(dir, "array-manifest.json") | ||
| writeFileSync(manifestPath, "[1, 2, 3]") | ||
|
|
||
| const result = loadRawManifest(manifestPath) | ||
| expect(Array.isArray(result)).toBe(true) | ||
| }) | ||
|
|
||
| test("throws when manifest is a JSON primitive (string)", () => { | ||
| const dir = makeTmpDir() | ||
| const manifestPath = join(dir, "string-manifest.json") | ||
| writeFileSync(manifestPath, '"just a string"') | ||
|
|
||
| expect(() => loadRawManifest(manifestPath)).toThrow("Manifest is not a JSON object") | ||
| }) | ||
|
|
||
| test("returns cached result when file unchanged (same ref)", () => { | ||
| const dir = makeTmpDir() | ||
| const manifestPath = join(dir, "cache-hit.json") | ||
| const data = { metadata: {}, nodes: { a: 1 } } | ||
| writeFileSync(manifestPath, JSON.stringify(data)) | ||
|
|
||
| const first = loadRawManifest(manifestPath) | ||
| const second = loadRawManifest(manifestPath) | ||
| // Same object reference means cache was used (same path + same mtime) | ||
| expect(first).toBe(second) | ||
| }) | ||
|
|
||
| test("re-reads file when content and mtime change", () => { | ||
| const dir = makeTmpDir() | ||
| const manifestPath = join(dir, "cache-miss.json") | ||
| const data1 = { metadata: {}, nodes: { version: 1 } } | ||
| writeFileSync(manifestPath, JSON.stringify(data1)) | ||
|
|
||
| const first = loadRawManifest(manifestPath) | ||
| expect(first.nodes.version).toBe(1) | ||
|
|
||
| // Write new content, then bump mtime to guarantee it differs from the | ||
| // cached mtime (some filesystems have 1-second granularity). | ||
| const data2 = { metadata: {}, nodes: { version: 2 } } | ||
| writeFileSync(manifestPath, JSON.stringify(data2)) | ||
| const futureMs = Date.now() / 1000 + 5 | ||
| utimesSync(manifestPath, futureMs, futureMs) | ||
|
|
||
| const second = loadRawManifest(manifestPath) | ||
| expect(second.nodes.version).toBe(2) | ||
| }) | ||
|
|
||
| test("resolves symlinks before caching", () => { | ||
| const dir = makeTmpDir() | ||
| const realPath = join(dir, "real-for-symlink.json") | ||
| const symPath = join(dir, "link-for-symlink.json") | ||
| const data = { metadata: {}, nodes: { sym: true } } | ||
| writeFileSync(realPath, JSON.stringify(data)) | ||
| symlinkSync(realPath, symPath) | ||
|
|
||
| const viaReal = loadRawManifest(realPath) | ||
| const viaSym = loadRawManifest(symPath) | ||
| // Both resolve to the same real path + same mtime → content must match | ||
| expect(viaSym).toEqual(viaReal) | ||
| expect(viaSym.nodes.sym).toBe(true) | ||
| }) | ||
| }) | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // getUniqueId | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| describe("getUniqueId", () => { | ||
| test("returns unique_id when passed a unique_id key directly", () => { | ||
| expect(getUniqueId(NODES, "model.proj.orders")).toBe("model.proj.orders") | ||
| }) | ||
|
|
||
| test("returns unique_id when passed a model name", () => { | ||
| expect(getUniqueId(NODES, "orders")).toBe("model.proj.orders") | ||
| }) | ||
|
|
||
| test("returns undefined for non-existent model", () => { | ||
| expect(getUniqueId(NODES, "nonexistent")).toBeUndefined() | ||
| }) | ||
|
|
||
| test("does not match test nodes by name", () => { | ||
| expect(getUniqueId(NODES, "not_null")).toBeUndefined() | ||
| }) | ||
|
|
||
| test("does not match seed nodes by name", () => { | ||
| expect(getUniqueId(NODES, "country_codes")).toBeUndefined() | ||
| }) | ||
|
|
||
| test("does not match by unique_id if resource_type is not model", () => { | ||
| expect(getUniqueId(NODES, "test.proj.not_null")).toBeUndefined() | ||
| expect(getUniqueId(NODES, "seed.proj.country_codes")).toBeUndefined() | ||
| }) | ||
| }) | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // extractColumns | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| describe("extractColumns", () => { | ||
| test("extracts columns with name and data_type", () => { | ||
| const dict = { | ||
| id: { name: "id", data_type: "INTEGER" }, | ||
| email: { name: "email", data_type: "VARCHAR" }, | ||
| } | ||
| const result = extractColumns(dict) | ||
| expect(result).toHaveLength(2) | ||
| expect(result).toContainEqual({ name: "id", data_type: "INTEGER", description: undefined }) | ||
| expect(result).toContainEqual({ name: "email", data_type: "VARCHAR", description: undefined }) | ||
| }) | ||
|
|
||
| test("falls back to dict key when col.name is missing", () => { | ||
| const dict = { | ||
| user_id: { data_type: "BIGINT" }, | ||
| } | ||
| const result = extractColumns(dict) | ||
| expect(result[0].name).toBe("user_id") | ||
| expect(result[0].data_type).toBe("BIGINT") | ||
| }) | ||
|
|
||
| test("falls back to col.type when data_type is missing", () => { | ||
| const dict = { | ||
| amount: { name: "amount", type: "DECIMAL(10,2)" }, | ||
| } | ||
| const result = extractColumns(dict) | ||
| expect(result[0].data_type).toBe("DECIMAL(10,2)") | ||
| }) | ||
|
|
||
| test("includes description when present", () => { | ||
| const dict = { | ||
| status: { name: "status", data_type: "VARCHAR", description: "Order status" }, | ||
| } | ||
| const result = extractColumns(dict) | ||
| expect(result[0].description).toBe("Order status") | ||
| }) | ||
|
|
||
| test("returns empty array for empty dict", () => { | ||
| expect(extractColumns({})).toEqual([]) | ||
| }) | ||
|
|
||
| test("handles both name and type fallbacks simultaneously", () => { | ||
| const dict = { | ||
| my_col: { type: "TEXT" }, | ||
| } | ||
| const result = extractColumns(dict) | ||
| expect(result[0].name).toBe("my_col") | ||
| expect(result[0].data_type).toBe("TEXT") | ||
| expect(result[0].description).toBeUndefined() | ||
| }) | ||
| }) | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // listModelNames | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| describe("listModelNames", () => { | ||
| test("returns only model names, not tests or seeds", () => { | ||
| const result = listModelNames(NODES) | ||
| expect(result).toContain("orders") | ||
| expect(result).toContain("users") | ||
| expect(result).not.toContain("not_null") | ||
| expect(result).not.toContain("country_codes") | ||
| expect(result).toHaveLength(2) | ||
| }) | ||
|
|
||
| test("returns empty for nodes with no models", () => { | ||
| const result = listModelNames({ | ||
| "test.proj.x": { resource_type: "test", name: "x" }, | ||
| "seed.proj.y": { resource_type: "seed", name: "y" }, | ||
| }) | ||
| expect(result).toEqual([]) | ||
| }) | ||
|
|
||
| test("returns empty for empty nodes", () => { | ||
| expect(listModelNames({})).toEqual([]) | ||
| }) | ||
| }) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the repository tmpdir fixture pattern instead of manual mkdtemp/rm cleanup.
This test file currently bypasses the required test tempdir lifecycle (
mkdtempSync+ shared array +afterEach). Please switch totmpdir()fromfixture/fixture.tswithawait usingandtmp.pathin each test.Suggested refactor pattern
As per coding guidelines:
packages/opencode/test/**/*.test.tsmust usetmpdirfromfixture/fixture.ts,await usingfor cleanup, and temporary path access viatmp.path.🤖 Prompt for AI Agents