From 737c5961dcba4fbb35a646ab930a140828e213d8 Mon Sep 17 00:00:00 2001 From: David de Boer Date: Sat, 7 Mar 2026 20:31:07 +0100 Subject: [PATCH 1/2] feat(pipeline): include triple count in import result reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add optional tripleCount to ImportSuccessful (sparql-importer) - Parse num-triples from QLever container logs instead of reading metadata file from host filesystem (volume-agnostic) - Thread tripleCount through ResolvedDistribution and ProgressReporter - Display formatted triple count in console reporter, e.g. '✔ Imported …/file.nt (4.8M triples, to http://…) in 22.8s' --- .../src/consoleReporter.ts | 7 ++- .../test/consoleReporter.test.ts | 53 ++++++++++++++++++- .../pipeline-console-reporter/vite.config.ts | 8 +-- .../src/distribution/importResolver.ts | 1 + .../pipeline/src/distribution/resolver.ts | 1 + packages/pipeline/src/pipeline.ts | 1 + packages/pipeline/src/progressReporter.ts | 1 + .../test/distribution/importResolver.test.ts | 2 + packages/pipeline/test/pipeline.test.ts | 5 +- packages/pipeline/vite.config.ts | 4 +- packages/sparql-importer/src/index.ts | 7 +-- packages/sparql-qlever/src/importer.ts | 25 +++++++-- packages/sparql-qlever/test/importer.test.ts | 1 + 13 files changed, 99 insertions(+), 17 deletions(-) diff --git a/packages/pipeline-console-reporter/src/consoleReporter.ts b/packages/pipeline-console-reporter/src/consoleReporter.ts index 02f64531..00605429 100644 --- a/packages/pipeline-console-reporter/src/consoleReporter.ts +++ b/packages/pipeline-console-reporter/src/consoleReporter.ts @@ -85,15 +85,20 @@ export class ConsoleReporter implements ProgressReporter { distribution: Distribution, importedFrom?: Distribution, importDuration?: number, + tripleCount?: number, ): void { const s = ora({ discardStdin: false }); if (importedFrom) { + const count = + tripleCount !== undefined + ? `${compactNumber.format(tripleCount)} triples, ` + : ''; const duration = importDuration !== undefined ? ` in ${chalk.bold(prettyMilliseconds(importDuration))}` : ''; s.start( - `Imported ${importedFrom.accessUrl.toString()} (to ${distribution.accessUrl.toString()})${duration}`, + `Imported ${importedFrom.accessUrl.toString()} (${count}to ${distribution.accessUrl.toString()})${duration}`, ); } else { s.start( diff --git a/packages/pipeline-console-reporter/test/consoleReporter.test.ts b/packages/pipeline-console-reporter/test/consoleReporter.test.ts index 69f57b69..d58e06be 100644 --- a/packages/pipeline-console-reporter/test/consoleReporter.test.ts +++ b/packages/pipeline-console-reporter/test/consoleReporter.test.ts @@ -1,9 +1,60 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { ConsoleReporter } from '../src/consoleReporter.js'; +import { Dataset, Distribution } from '@lde/dataset'; + +function makeDataset(): Dataset { + return new Dataset({ + iri: new URL('http://example.org/dataset'), + distributions: [], + }); +} describe('ConsoleReporter', () => { it('can be instantiated', () => { const reporter = new ConsoleReporter(); expect(reporter).toBeInstanceOf(ConsoleReporter); }); + + describe('distributionSelected', () => { + it('includes triple count when present', () => { + const reporter = new ConsoleReporter(); + const spy = vi.spyOn(process.stderr, 'write').mockReturnValue(true); + + reporter.distributionSelected( + makeDataset(), + Distribution.sparql(new URL('http://localhost:7001/sparql')), + new Distribution( + new URL('http://example.org/data.nt'), + 'application/n-triples', + ), + 5000, + 4800000, + ); + + const output = spy.mock.calls.map((c) => String(c[0])).join(''); + expect(output).toContain('4.8M triples'); + expect(output).toContain('to http://localhost:7001/sparql'); + spy.mockRestore(); + }); + + it('omits triple count when absent', () => { + const reporter = new ConsoleReporter(); + const spy = vi.spyOn(process.stderr, 'write').mockReturnValue(true); + + reporter.distributionSelected( + makeDataset(), + Distribution.sparql(new URL('http://localhost:7001/sparql')), + new Distribution( + new URL('http://example.org/data.nt'), + 'application/n-triples', + ), + 5000, + ); + + const output = spy.mock.calls.map((c) => String(c[0])).join(''); + expect(output).not.toContain('triples'); + expect(output).toContain('to http://localhost:7001/sparql'); + spy.mockRestore(); + }); + }); }); diff --git a/packages/pipeline-console-reporter/vite.config.ts b/packages/pipeline-console-reporter/vite.config.ts index 4c8cc1b1..76e089bd 100644 --- a/packages/pipeline-console-reporter/vite.config.ts +++ b/packages/pipeline-console-reporter/vite.config.ts @@ -10,10 +10,10 @@ export default mergeConfig( coverage: { thresholds: { autoUpdate: true, - functions: 0, - lines: 8.62, - branches: 0, - statements: 8.62, + functions: 7.14, + lines: 18.64, + branches: 12.5, + statements: 18.64, }, }, }, diff --git a/packages/pipeline/src/distribution/importResolver.ts b/packages/pipeline/src/distribution/importResolver.ts index 361c436d..fc47821e 100644 --- a/packages/pipeline/src/distribution/importResolver.ts +++ b/packages/pipeline/src/distribution/importResolver.ts @@ -82,6 +82,7 @@ export class ImportResolver implements DistributionResolver { probeResults, importResult.distribution, Date.now() - importStart, + importResult.tripleCount, ); } diff --git a/packages/pipeline/src/distribution/resolver.ts b/packages/pipeline/src/distribution/resolver.ts index 468a15ec..6f965d6a 100644 --- a/packages/pipeline/src/distribution/resolver.ts +++ b/packages/pipeline/src/distribution/resolver.ts @@ -8,6 +8,7 @@ export class ResolvedDistribution { readonly probeResults: ProbeResultType[], readonly importedFrom?: Distribution, readonly importDuration?: number, + readonly tripleCount?: number, ) {} } diff --git a/packages/pipeline/src/pipeline.ts b/packages/pipeline/src/pipeline.ts index d46cc528..10c8b079 100644 --- a/packages/pipeline/src/pipeline.ts +++ b/packages/pipeline/src/pipeline.ts @@ -161,6 +161,7 @@ export class Pipeline { resolved.distribution, resolved.importedFrom, resolved.importDuration, + resolved.tripleCount, ); try { diff --git a/packages/pipeline/src/progressReporter.ts b/packages/pipeline/src/progressReporter.ts index 4dc5c799..fc29475e 100644 --- a/packages/pipeline/src/progressReporter.ts +++ b/packages/pipeline/src/progressReporter.ts @@ -21,6 +21,7 @@ export interface ProgressReporter { distribution: Distribution, importedFrom?: Distribution, importDuration?: number, + tripleCount?: number, ): void; stageStart?(stage: string): void; stageProgress?(update: { diff --git a/packages/pipeline/test/distribution/importResolver.test.ts b/packages/pipeline/test/distribution/importResolver.test.ts index c35f0a1a..4d8503c9 100644 --- a/packages/pipeline/test/distribution/importResolver.test.ts +++ b/packages/pipeline/test/distribution/importResolver.test.ts @@ -84,6 +84,7 @@ describe('ImportResolver', () => { new ImportSuccessful( Distribution.sparql(new URL('http://localhost:7878/sparql')), 'test-graph', + 42000, ), ), }; @@ -104,6 +105,7 @@ describe('ImportResolver', () => { ); expect(resolved.probeResults).toHaveLength(1); expect(resolved.probeResults[0]).toBeInstanceOf(DataDumpProbeResult); + expect(resolved.tripleCount).toBe(42000); }); it('sets importedFrom on ResolvedDistribution when import succeeds', async () => { diff --git a/packages/pipeline/test/pipeline.test.ts b/packages/pipeline/test/pipeline.test.ts index f8f76dc4..c82095e9 100644 --- a/packages/pipeline/test/pipeline.test.ts +++ b/packages/pipeline/test/pipeline.test.ts @@ -636,6 +636,8 @@ describe('Pipeline', () => { sparqlDistribution, [], importedFromDistribution, + 1000, + 42000, ); const pipeline = new Pipeline({ @@ -652,7 +654,8 @@ describe('Pipeline', () => { dataset, sparqlDistribution, importedFromDistribution, - undefined, + 1000, + 42000, ); }); diff --git a/packages/pipeline/vite.config.ts b/packages/pipeline/vite.config.ts index 867d4027..1ae5a5e2 100644 --- a/packages/pipeline/vite.config.ts +++ b/packages/pipeline/vite.config.ts @@ -12,9 +12,9 @@ export default mergeConfig( thresholds: { autoUpdate: true, functions: 90.99, - lines: 93.97, + lines: 93.98, branches: 89.15, - statements: 93.27, + statements: 93.28, }, }, }, diff --git a/packages/sparql-importer/src/index.ts b/packages/sparql-importer/src/index.ts index 607919a5..d1acfdf0 100644 --- a/packages/sparql-importer/src/index.ts +++ b/packages/sparql-importer/src/index.ts @@ -13,7 +13,7 @@ export interface Importer { * Import a {@link Dataset} to a SPARQL server. */ import( - dataset: Dataset + dataset: Dataset, ): Promise; } @@ -25,14 +25,15 @@ export interface Importer { export class ImportSuccessful { constructor( public readonly distribution: Distribution, - public readonly identifier?: string + public readonly identifier?: string, + public readonly tripleCount?: number, ) {} } export class ImportFailed { constructor( public readonly distribution: Distribution, - public readonly error: string + public readonly error: string, ) {} } diff --git a/packages/sparql-qlever/src/importer.ts b/packages/sparql-qlever/src/importer.ts index 697e6f75..5e1af6d7 100644 --- a/packages/sparql-qlever/src/importer.ts +++ b/packages/sparql-qlever/src/importer.ts @@ -92,12 +92,13 @@ export class Importer implements ImporterInterface { distribution: Distribution & { mimeType: string }, ): Promise { const localFile = await this.downloader.download(distribution); - await this.index( + const logs = await this.index( localFile, this.fileFormatFromMimeType(distribution.mimeType), ); + const tripleCount = this.parseTripleCount(logs); - return new ImportSuccessful(distribution); + return new ImportSuccessful(distribution, undefined, tripleCount); } private fileFormatFromMimeType(mimeType: string): fileFormat { @@ -108,7 +109,20 @@ export class Importer implements ImporterInterface { return format; } - private async index(file: string, format: fileFormat): Promise { + private parseTripleCount(logs: string): number | undefined { + // The index command appends the metadata JSON to its logs. + // Extract num-triples.normal from it. + const metadataStart = logs.lastIndexOf('{'); + if (metadataStart === -1) return undefined; + try { + const metadata = JSON.parse(logs.slice(metadataStart)); + return metadata['num-triples']?.normal; + } catch { + return undefined; + } + } + + private async index(file: string, format: fileFormat): Promise { const workingDir = dirname(file); const settingsFile = 'index.settings.json'; // Turtle is not line-delimited, so QLever's parallel parser can't split @@ -121,13 +135,14 @@ export class Importer implements ImporterInterface { // TODO: write index to named volume instead of bind mount for better performance. + const metadataFile = `${this.indexName}.meta-data.json`; const indexTask = await this.taskRunner.run( `(zcat '${basename(file)}' 2>/dev/null || cat '${basename( file, )}') | qlever-index -i ${ this.indexName - } -s ${settingsFile} -F ${format} -f -`, + } -s ${settingsFile} -F ${format} -f - && cat ${metadataFile}`, ); - await this.taskRunner.wait(indexTask); + return await this.taskRunner.wait(indexTask); } } diff --git a/packages/sparql-qlever/test/importer.test.ts b/packages/sparql-qlever/test/importer.test.ts index a44770f6..444d68e0 100644 --- a/packages/sparql-qlever/test/importer.test.ts +++ b/packages/sparql-qlever/test/importer.test.ts @@ -36,6 +36,7 @@ describe('Importer', () => { const result = await importer.import(dataset); expect(result).toBeInstanceOf(ImportSuccessful); + expect((result as ImportSuccessful).tripleCount).toBe(1); }, 30_000); }); }); From 40dcb71fdf567210020607ca3e55f6413c62afba Mon Sep 17 00:00:00 2001 From: David de Boer Date: Sat, 7 Mar 2026 20:37:47 +0100 Subject: [PATCH 2/2] fix(sparql-qlever): parse triple count from container logs via regex - Use regex instead of JSON.parse to extract num-triples from the metadata JSON that the index command cats to stdout - Docker log multiplexing prepends binary frame headers that corrupt JSON parsing; regex is resilient to these --- packages/sparql-qlever/src/importer.ts | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/sparql-qlever/src/importer.ts b/packages/sparql-qlever/src/importer.ts index 5e1af6d7..127fe6dc 100644 --- a/packages/sparql-qlever/src/importer.ts +++ b/packages/sparql-qlever/src/importer.ts @@ -110,16 +110,11 @@ export class Importer implements ImporterInterface { } private parseTripleCount(logs: string): number | undefined { - // The index command appends the metadata JSON to its logs. - // Extract num-triples.normal from it. - const metadataStart = logs.lastIndexOf('{'); - if (metadataStart === -1) return undefined; - try { - const metadata = JSON.parse(logs.slice(metadataStart)); - return metadata['num-triples']?.normal; - } catch { - return undefined; - } + // Extract num-triples.normal from the metadata JSON that the index + // command cats to stdout. Use a regex rather than JSON.parse because + // Docker log multiplexing prepends binary frame headers to each chunk. + const match = logs.match(/"num-triples":\{[^}]*"normal":(\d+)/); + return match ? Number(match[1]) : undefined; } private async index(file: string, format: fileFormat): Promise {