From 639142935a818614f0cbe88da61b39292ab1b3ce Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 01:46:20 +0100 Subject: [PATCH 01/27] Add Nim CLI port with curl-based resumable model downloads Port Chrome extension to standalone Nim binary using local LLM runtime. Includes Ollama registry integration (qwen3.5-0.8b/2b/4b, gemma4-e2b), 3-phase AI bookmark organizer, SQLite storage, and cligen CLI with 10 subcommands. Downloads use curl --progress-bar with -C - for resumable partial downloads. --- assets/models.json | 32 +++ lazybookmarks.nimble | 24 ++ nim.cfg | 2 + src/lazybookmarks/bootstrap.nim | 24 ++ src/lazybookmarks/client.nim | 63 +++++ src/lazybookmarks/config.nim | 99 +++++++ src/lazybookmarks/main.nim | 210 +++++++++++++++ src/lazybookmarks/model.nim | 157 +++++++++++ src/lazybookmarks/organizer.nim | 445 ++++++++++++++++++++++++++++++++ src/lazybookmarks/prompts.nim | 118 +++++++++ src/lazybookmarks/runtime.nim | 131 ++++++++++ src/lazybookmarks/storage.nim | 291 +++++++++++++++++++++ src/lazybookmarks/ui.nim | 61 +++++ 13 files changed, 1657 insertions(+) create mode 100644 assets/models.json create mode 100644 lazybookmarks.nimble create mode 100644 nim.cfg create mode 100644 src/lazybookmarks/bootstrap.nim create mode 100644 src/lazybookmarks/client.nim create mode 100644 src/lazybookmarks/config.nim create mode 100644 src/lazybookmarks/main.nim create mode 100644 src/lazybookmarks/model.nim create mode 100644 src/lazybookmarks/organizer.nim create mode 100644 src/lazybookmarks/prompts.nim create mode 100644 src/lazybookmarks/runtime.nim create mode 100644 src/lazybookmarks/storage.nim create mode 100644 src/lazybookmarks/ui.nim diff --git a/assets/models.json b/assets/models.json new file mode 100644 index 0000000..49f3a89 --- /dev/null +++ b/assets/models.json @@ -0,0 +1,32 @@ +{ + "entries": [ + { + "name": "qwen3.5-0.8b", + "ollamaModel": "qwen3.5", + "ollamaTag": "0.8b", + "digest": "sha256:afb707b6b8fac6e475acc42bc8380fc0b8d2e0e4190be5a969fbf62fcc897db5", + "sizeBytes": 1036034688 + }, + { + "name": "qwen3.5-2b", + "ollamaModel": "qwen3.5", + "ollamaTag": "2b", + "digest": "sha256:b709d81508a078a686961de6ca07a953b895d9b286c46e17f00fb267f4f2d297", + "sizeBytes": 2741180928 + }, + { + "name": "qwen3.5-4b", + "ollamaModel": "qwen3.5", + "ollamaTag": "4b", + "digest": "sha256:81fb60c7daa80fc1123380b98970b320ae233409f0f71a72ed7b9b0d62f40490", + "sizeBytes": 3389971840 + }, + { + "name": "gemma4-e2b", + "ollamaModel": "gemma4", + "ollamaTag": "e2b", + "digest": "sha256:4e30e2665218745ef463f722c0bf86be0cab6ee676320f1cfadf91e989107448", + "sizeBytes": 7162394016 + } + ] +} diff --git a/lazybookmarks.nimble b/lazybookmarks.nimble new file mode 100644 index 0000000..02c956a --- /dev/null +++ b/lazybookmarks.nimble @@ -0,0 +1,24 @@ +# Package + +version = "0.1.0" +author = "corv89" +description = "CLI bookmark organizer powered by local LLM" +license = "MIT" +srcDir = "src" +bin = @["lazybookmarks/main"] +installDirs = @["lazybookmarks"] + +# Dependencies + +requires "nim >= 2.0.0" +requires "cligen >= 1.6" +requires "db_connector >= 0.1" +requires "jsony >= 1.1" + +# Tasks + +task build, "Build release binary": + self.exec "nim c -d:release -o:build/lazybookmarks src/lazybookmarks/main.nim" + +task buildDebug, "Build debug binary": + self.exec "nim c -o:build/lazybookmarks src/lazybookmarks/main.nim" diff --git a/nim.cfg b/nim.cfg new file mode 100644 index 0000000..a57e87d --- /dev/null +++ b/nim.cfg @@ -0,0 +1,2 @@ +--opt:size +--mm:orc diff --git a/src/lazybookmarks/bootstrap.nim b/src/lazybookmarks/bootstrap.nim new file mode 100644 index 0000000..f9f3b92 --- /dev/null +++ b/src/lazybookmarks/bootstrap.nim @@ -0,0 +1,24 @@ +import ./config +import ./model +import ./runtime +import ./ui + +proc ensureReady*(cfg: Config, registry: ModelRegistry) = + if not cfg.runtimeManaged: + return + + if not isRuntimeRunning(cfg): + discard downloadRuntime(cfg) + ensureModel(cfg, registry) + let modelPath = getModelPath(cfg, registry) + discard spawnRuntime(cfg, modelPath) + + infoMsg "Waiting for runtime to start..." + if not pollHealth(cfg): + errorMsg "Runtime failed to start within timeout. Check logs:" + dimMsg cfg.logFilePath() + quit(1) + + infoMsg "Runtime ready" + else: + ensureModel(cfg, registry) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim new file mode 100644 index 0000000..84f82b0 --- /dev/null +++ b/src/lazybookmarks/client.nim @@ -0,0 +1,63 @@ +import std/[httpclient, json, os] +import ./config + +type + Message* = object + role*: string + content*: string + +proc chatCompletion*(cfg: Config, messages: seq[Message], + jsonSchema: string = "", + maxRetries: int = 3): JsonNode = + let body = %*{ + "model": "local", + "messages": messages, + "temperature": 0.1, + "max_tokens": 1024, + } + + if jsonSchema.len > 0: + body["response_format"] = %*{ + "type": "json_schema", + "json_schema": { + "strict": true, + "schema": parseJson(jsonSchema), + } + } + + let client = newHttpClient(timeout = 120000) + client.headers = newHttpHeaders([("Content-Type", "application/json")]) + defer: client.close() + + var lastError = "" + for attempt in 1..maxRetries: + try: + let url = cfg.llmUrl / "chat" / "completions" + let response = client.postContent(url, body = $body) + + if cfg.verbose: + stderr.writeLine("[attempt " & $attempt & "] POST " & url & " -> " & $response.len & " bytes") + + let parsed = parseJson(response) + if parsed.hasKey("choices") and parsed["choices"].len > 0: + let content = parsed["choices"][0]["message"]["content"].getStr() + return parseJson(content) + else: + lastError = "No choices in response: " & response[0..min(200, response.high)] + except CatchableError as e: + lastError = e.msg + if cfg.verbose: + stderr.writeLine("[attempt " & $attempt & "] Error: " & e.msg) + if attempt < maxRetries: + let delay = 1000 * (1 shl (attempt - 1)) + os.sleep(delay) + + raise newException(CatchableError, "chatCompletion failed after " & $maxRetries & " attempts: " & lastError) + +proc chatCompletionSimple*(cfg: Config, systemPrompt: string, userMessage: string, + jsonSchema: string = ""): JsonNode = + let messages = @[ + Message(role: "system", content: systemPrompt), + Message(role: "user", content: userMessage), + ] + return chatCompletion(cfg, messages, jsonSchema) diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim new file mode 100644 index 0000000..9e541ef --- /dev/null +++ b/src/lazybookmarks/config.nim @@ -0,0 +1,99 @@ +import std/os + +type Config* = object + llmUrl*: string + modelVariant*: string + dataDir*: string + runtimeManaged*: bool + autoAcceptHigh*: bool + batchSize*: int + verbose*: bool + +const DefaultLlmUrl* = "http://127.0.0.1:18080/v1" +const DefaultModelVariant* = "qwen3.5-0.8b" +const DefaultBatchSize* = 1 + +proc xdgDataHome*: string = + result = getEnv("XDG_DATA_HOME") + if result.len == 0: + result = getHomeDir() / ".local" / "share" + +proc xdgConfigHome*: string = + result = getEnv("XDG_CONFIG_HOME") + if result.len == 0: + result = getHomeDir() / ".config" + +proc defaultDataDir*: string = + xdgDataHome() / "lazybookmarks" + +proc defaultConfigDir*: string = + xdgConfigHome() / "lazybookmarks" + +proc ensureDir*(dir: string) = + createDir(dir) + +proc loadConfig*(overrides: Config = Config()): Config = + result = Config( + llmUrl: DefaultLlmUrl, + modelVariant: DefaultModelVariant, + dataDir: defaultDataDir(), + runtimeManaged: true, + autoAcceptHigh: false, + batchSize: DefaultBatchSize, + verbose: false, + ) + + let envLlmUrl = getEnv("LLM_URL") + if envLlmUrl.len > 0: + result.llmUrl = envLlmUrl + result.runtimeManaged = false + + let envModel = getEnv("LB_MODEL") + if envModel.len > 0: + result.modelVariant = envModel + + let envDataDir = getEnv("LB_DATA_DIR") + if envDataDir.len > 0: + result.dataDir = envDataDir + + let envAutoAccept = getEnv("LB_AUTO_ACCEPT") + if envAutoAccept.len > 0: + result.autoAcceptHigh = true + + if overrides.llmUrl.len > 0 and overrides.llmUrl != DefaultLlmUrl: + result.llmUrl = overrides.llmUrl + result.runtimeManaged = false + if overrides.modelVariant.len > 0: + result.modelVariant = overrides.modelVariant + if overrides.dataDir.len > 0: + result.dataDir = overrides.dataDir + if overrides.batchSize > 0: + result.batchSize = overrides.batchSize + if overrides.verbose: + result.verbose = true + if overrides.autoAcceptHigh: + result.autoAcceptHigh = true + +proc dbPath*(cfg: Config): string = + cfg.dataDir / "bookmarks.db" + +proc binDir*(cfg: Config): string = + cfg.dataDir / "bin" + +proc modelsDir*(cfg: Config): string = + cfg.dataDir / "models" + +proc logsDir*(cfg: Config): string = + cfg.dataDir / "logs" + +proc runtimeBinPath*(cfg: Config): string = + cfg.binDir() / "llama-server" + +proc pidFilePath*(cfg: Config): string = + cfg.dataDir / "runtime.pid" + +proc logFilePath*(cfg: Config): string = + cfg.logsDir() / "llama-server.log" + +proc configFilePath*: string = + defaultConfigDir() / "config.toml" diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim new file mode 100644 index 0000000..fd9b81f --- /dev/null +++ b/src/lazybookmarks/main.nim @@ -0,0 +1,210 @@ +import std/[os, strutils, httpclient, terminal, strformat] +import cligen +import db_connector/db_sqlite +import ./config +import ./storage +import ./model +import ./runtime +import ./bootstrap +import ./organizer +import ./ui + +proc cmdImport(file: string, format = "auto", dryRun = false) = + if not fileExists(file): + errorMsg &"File not found: {file}" + quit(1) + + let cfg = loadConfig() + let content = readFile(file) + + let detectedFormat = if format == "auto": detectFormat(content, file) else: format + + if dryRun: + let parsed = parseImport(content, detectedFormat) + infoMsg &"Would import {parsed.len} bookmarks (format: {detectedFormat})" + for (url, title, folder) in parsed[0 .. min(9, parsed.high)]: + echo &" [{folder}] {title} - {url[0..min(79, url.high)]}" + if parsed.len > 10: + dimMsg &"... and {parsed.len - 10} more" + return + + let count = importBookmarks(cfg, content, detectedFormat, extractFilename(file)) + infoMsg &"Imported {count} bookmarks from {file} (format: {detectedFormat})" + +proc cmdOrganise(model = "", autoAcceptHigh = false, autoAcceptAll = false, + limit = 0, verbose = false) = + let overrides = Config(modelVariant: model, verbose: verbose) + let cfg = loadConfig(overrides) + let registry = loadModelRegistry() + + ensureReady(cfg, registry) + discard cfg.organizeBookmarks(autoAcceptAll = autoAcceptAll) + +proc cmdList(category = "", unorganised = false, format = "table") = + let cfg = loadConfig() + let db = cfg.initDb() + defer: db.close() + + var bookmarks: seq[BookmarkEntry] + + if unorganised: + bookmarks = getUnorganisedBookmarks(cfg) + else: + var query = "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks" + if category.len > 0: + let escaped = category.replace("'", "''") + query.add " WHERE raw_folder = '" & escaped & "'" + query.add " ORDER BY added_at DESC LIMIT 50" + for row in db.fastRows(sql(query)): + bookmarks.add(BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + confidence: row[5], + )) + + if bookmarks.len == 0: + dimMsg "No bookmarks found." + return + + for b in bookmarks: + let title = if b.title.len > 0: b.title else: "(untitled)" + var category = "-" + if b.category.len > 0: category = b.category + elif b.rawFolder.len > 0: category = b.rawFolder + echo &" {title:<50} {category}" + echo &"\n {bookmarks.len} bookmarks" + +proc cmdSearch(query: string) = + let cfg = loadConfig() + let db = cfg.initDb() + defer: db.close() + + let searchPattern = query.replace("'", "''") + let sqlQuery = &"SELECT url, title, raw_folder, category FROM bookmarks WHERE title LIKE '%{searchPattern}%' OR url LIKE '%{searchPattern}%' OR category LIKE '%{searchPattern}%' LIMIT 20" + + var found = 0 + for row in db.fastRows(sql(sqlQuery)): + let title = if row[1].len > 0: row[1] else: "(untitled)" + echo &" {title:<50} {row[0][0..min(79, row[0].high)]}" + found.inc + + echo &"\n {found} results for \"{query}\"" + +proc cmdUndo = + let cfg = loadConfig() + let count = cfg.undoLastBatch() + if count > 0: + infoMsg &"Undid {count} bookmark classifications" + else: + dimMsg "Nothing to undo" + +proc cmdModelList = + let cfg = loadConfig() + let registry = loadModelRegistry() + cfg.listModels(registry) + +proc cmdModelSet(variant: string) = + let cfgDir = defaultConfigDir() + createDir(cfgDir) + let configPath = cfgDir / "config.toml" + var content = "" + if fileExists(configPath): + content = readFile(configPath) + content.add &"\nmodelVariant = \"{variant}\"\n" + writeFile(configPath, content) + infoMsg &"Default model set to {variant}" + +proc cmdModelDownload = + let cfg = loadConfig() + let registry = loadModelRegistry() + cfg.ensureModel(registry) + +proc cmdStatus = + let cfg = loadConfig() + let registry = loadModelRegistry() + + echo "" + styledWriteLine(stdout, styleBright, " Endpoint: ", resetStyle, cfg.llmUrl) + styledWriteLine(stdout, styleBright, " Managed: ", resetStyle, $cfg.runtimeManaged) + styledWriteLine(stdout, styleBright, " Model: ", resetStyle, cfg.modelVariant) + + let modelReady = isModelReady(cfg, registry) + styledWriteLine(stdout, styleBright, " Model: ", resetStyle, if modelReady: "[ready]" else: "[not downloaded]") + + if cfg.runtimeManaged: + let running = isRuntimeRunning(cfg) + styledWriteLine(stdout, styleBright, " Runtime: ", resetStyle, if running: "[running]" else: "[stopped]") + + styledWriteLine(stdout, styleBright, " Data dir: ", resetStyle, cfg.dataDir) + echo "" + +proc cmdDoctor = + echo "" + let cfg = loadConfig() + var issues = 0 + + let dbPath = cfg.dbPath() + if fileExists(dbPath): + infoMsg &"Database: {dbPath}" + else: + warnMsg "Database not found (will be created on first import)" + issues.inc + + let binPath = cfg.runtimeBinPath() + if fileExists(binPath): + infoMsg &"Runtime: {binPath}" + elif not cfg.runtimeManaged: + dimMsg "Runtime: using external endpoint" + else: + warnMsg "Runtime not downloaded (will download on first organise)" + issues.inc + + let registry = loadModelRegistry() + if isModelReady(cfg, registry): + infoMsg &"Model: {cfg.modelVariant} ready" + elif not cfg.runtimeManaged: + dimMsg "Model: using external endpoint" + else: + warnMsg &"Model not downloaded: {cfg.modelVariant}" + issues.inc + + try: + let client = newHttpClient(timeout = 3000) + defer: client.close() + discard client.getContent(&"{cfg.llmUrl}/models") + infoMsg &"Endpoint reachable: {cfg.llmUrl}" + except: + if cfg.runtimeManaged: + warnMsg &"Endpoint not reachable: {cfg.llmUrl} (normal if not running)" + else: + errorMsg &"Endpoint not reachable: {cfg.llmUrl}" + issues.inc + + echo "" + if issues == 0: + infoMsg "All checks passed" + else: + warnMsg &"{issues} issue(s) found" + +when isMainModule: + dispatchMulti( + [cmdImport, cmdName = "import", doc = "Import bookmarks from a file", + help = {"file": "Path to bookmark file", "format": "Format: auto|html|json|urllist", "dry-run": "Parse only, no database write"}], + [cmdOrganise, cmdName = "organise", doc = "AI-organize unorganized bookmarks", + help = {"model": "Override model variant", "auto-accept-high": "Skip review for high confidence", + "auto-accept-all": "Accept all suggestions", "limit": "Max bookmarks to process", "verbose": "Show debug output"}], + [cmdList, cmdName = "list", doc = "List bookmarks", + help = {"category": "Filter by folder path", "unorganised": "Show only unorganized", "format": "table|json|csv"}], + [cmdSearch, cmdName = "search", doc = "Search bookmarks", + help = {"query": "Search term"}], + [cmdUndo, cmdName = "undo", doc = "Undo last batch of classifications"], + [cmdModelList, cmdName = "model-list", doc = "List available models"], + [cmdModelSet, cmdName = "model-set", doc = "Set default model variant", + help = {"variant": "Model variant name"}], + [cmdModelDownload, cmdName = "model-download", doc = "Download model without running organise"], + [cmdStatus, cmdName = "status", doc = "Show runtime and model status"], + [cmdDoctor, cmdName = "doctor", doc = "Run self-diagnostic checks"], + ) diff --git a/src/lazybookmarks/model.nim b/src/lazybookmarks/model.nim new file mode 100644 index 0000000..58a30a4 --- /dev/null +++ b/src/lazybookmarks/model.nim @@ -0,0 +1,157 @@ +import std/[os, strutils, strformat, streams, terminal, osproc] +import nimcrypto/sha2 +import nimcrypto/utils +import jsony +import ./config + +type + ModelEntry* = object + name*: string + ollamaModel*: string + ollamaTag*: string + digest*: string + sizeBytes*: int64 + + ModelRegistry* = object + entries*: seq[ModelEntry] + +const modelRegistryJson = staticRead("../../assets/models.json") + +const OllamaRegistry* = "https://registry.ollama.ai/v2/library" + +proc loadModelRegistry*: ModelRegistry = + return modelRegistryJson.fromJson(ModelRegistry) + +proc findModel*(registry: ModelRegistry, variant: string): ModelEntry = + for e in registry.entries: + if e.name == variant: + return e + raise newException(ValueError, &"Unknown model variant: {variant}") + +proc modelFilename*(entry: ModelEntry): string = + entry.ollamaModel & "-" & entry.ollamaTag + +proc resolveDownloadUrl*(entry: ModelEntry): string = + let digest = entry.digest.replace("sha256:", "") + return OllamaRegistry & "/" & entry.ollamaModel & "/blobs/sha256:" & digest + +proc sha256File*(path: string): string = + var ctx: sha256 + ctx.init() + let file = newFileStream(path, fmRead) + if file == nil: + raise newException(IOError, &"Cannot open file: {path}") + defer: file.close() + var buf = newString(8192) + while true: + let n = file.readData(buf[0].addr, buf.len) + if n == 0: break + ctx.update(cast[ptr byte](buf[0].addr), uint(n)) + var digest: array[32, byte] + ctx.finish(digest) + return digest.toHex() + +proc formatBytes*(bytes: int64): string = + if bytes < 1024: return $bytes & " B" + if bytes < 1024 * 1024: return $(bytes div 1024) & " KB" + if bytes < 1024 * 1024 * 1024: return &"{float(bytes) / (1024*1024):.1f} MB" + return &"{float(bytes) / (1024*1024*1024):.2f} GB" + +proc ensureModel*(cfg: Config, registry: ModelRegistry) = + let entry = findModel(registry, cfg.modelVariant) + let modelsDir = cfg.modelsDir() + let filename = modelFilename(entry) + let modelPath = modelsDir / filename + let hashPath = modelPath & ".sha256" + + ensureDir(modelsDir) + + if fileExists(modelPath): + if fileExists(hashPath): + let storedHash = readFile(hashPath).strip() + if storedHash == entry.digest: + return + let currentHash = sha256File(modelPath) + if currentHash == entry.digest: + writeFile(hashPath, entry.digest) + return + stdout.styledWriteLine(styleBright, fgYellow, " ! ", fgDefault, resetStyle, "Model file corrupted, re-downloading...") + removeFile(modelPath) + if fileExists(hashPath): + removeFile(hashPath) + + let partPath = modelPath & ".part" + let resume = fileExists(partPath) + + if resume: + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, + "Resuming " & entry.name & " (" & formatBytes(entry.sizeBytes) & ")...") + else: + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, + "Downloading " & entry.name & " (" & formatBytes(entry.sizeBytes) & ")...") + + let downloadUrl = resolveDownloadUrl(entry) + let exitCode = execCmd("curl -fL -C - -o " & quoteShell(partPath) & " " & + quoteShell(downloadUrl) & " --progress-bar 2>&1") + if exitCode != 0: + echo "" + if fileExists(partPath): + let partSize = getFileSize(partPath) + if partSize > 0 and partSize < entry.sizeBytes: + stdout.styledWriteLine(styleBright, fgYellow, " ! ", fgDefault, resetStyle, + "Partial download saved (" & formatBytes(partSize) & "/" & + formatBytes(entry.sizeBytes) & "). Re-run to resume.") + else: + removeFile(partPath) + quit(1) + + echo "" + + let actualHash = sha256File(partPath) + if actualHash != entry.digest.replace("sha256:", ""): + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "Checksum verification failed") + removeFile(partPath) + quit(1) + + moveFile(partPath, modelPath) + writeFile(hashPath, entry.digest) + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, + "Model ready: " & entry.name) + +proc getModelPath*(cfg: Config, registry: ModelRegistry): string = + let entry = findModel(registry, cfg.modelVariant) + return cfg.modelsDir() / modelFilename(entry) + +proc isEntryReady*(entry: ModelEntry, cfg: Config): bool = + try: + let modelPath = cfg.modelsDir() / modelFilename(entry) + let hashPath = modelPath & ".sha256" + if not fileExists(modelPath): return false + if fileExists(hashPath): + return readFile(hashPath).strip() == entry.digest + return sha256File(modelPath) == entry.digest.replace("sha256:", "") + except CatchableError: + return false + +proc isModelReady*(cfg: Config, registry: ModelRegistry): bool = + try: + let entry = findModel(registry, cfg.modelVariant) + return isEntryReady(entry, cfg) + except CatchableError: + return false + +proc listModels*(cfg: Config, registry: ModelRegistry) = + echo "" + for entry in registry.entries: + let isCurrent = entry.name == cfg.modelVariant + let isReady = isEntryReady(entry, cfg) + let marker = if isCurrent: " *" else: "" + let status = if isReady: "[installed]" else: "[not installed]" + let name = if isCurrent: entry.name & marker else: entry.name + if isCurrent: + stdout.styledWrite(styleBright) + stdout.write " " & name + stdout.styledWrite(resetStyle) + stdout.styledWriteLine(" " & status & " " & formatBytes(entry.sizeBytes)) + echo "" + stdout.styledWriteLine(styleDim, " * = current selection", resetStyle) diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim new file mode 100644 index 0000000..3067897 --- /dev/null +++ b/src/lazybookmarks/organizer.nim @@ -0,0 +1,445 @@ +import std/[os, strutils, strformat, json, re, math, tables, algorithm, sequtils, sets] +import db_connector/db_sqlite +import ./config +import ./storage +import ./client +import ./prompts +import ./ui + +type + TaxonomyCategory* = object + folderId*: string + folderPath*: string + description*: string + keywords*: seq[string] + + Taxonomy* = object + categories*: seq[TaxonomyCategory] + + ClusterSuggestion* = object + name*: string + description*: string + keywords*: seq[string] + parentFolderId*: string + + Classification* = object + bookmarkId*: string + targetFolderId*: string + confidence*: string + reason*: string + + Suggestion* = object + bookmarkId*: int64 + bookmarkTitle*: string + bookmarkUrl*: string + targetFolderId*: string + targetFolderPath*: string + confidence*: string + reason*: string + isNewFolder*: bool + +const StopWords = ["the","a","an","and","or","of","to","in","for","is","on","with","at","by","from","this","that","it","as","are","was","be","has","have"] + +proc tokenizeText*(text: string): seq[string] = + result = @[] + for word in text.toLowerAscii().replace(re"[^a-z0-9\säöüß]", " ").splitWhitespace(): + if word.len > 2 and word notin StopWords: + result.add(word) + +proc extractDomainPatterns*(bookmarks: seq[BookmarkEntry], threshold = 0.2): seq[string] = + var counts: Table[string, int] + for b in bookmarks: + try: + var url = b.url + let idx = url.find("://") + if idx >= 0: + url = url[idx + 3 .. ^1] + let slashIdx = url.find('/') + if slashIdx >= 0: + url = url[0 .. slashIdx - 1] + url = url.replace(re"^www\.", "") + if url.len > 0: + counts[url] = counts.getOrDefault(url, 0) + 1 + except: + discard + let total = max(1, bookmarks.len) + result = @[] + for domain, count in counts: + if count.float / total.float >= threshold: + result.add(domain) + sort(result, proc(a, b: string): int = cmp(counts[b], counts[a])) + if result.len > 5: + result.setLen(5) + +proc computeTFIDF*(folderBookmarks: Table[string, seq[BookmarkEntry]], allBookmarks: seq[BookmarkEntry]): Table[string, seq[string]] = + var df: Table[string, int] + let N = max(1, allBookmarks.len) + + for b in allBookmarks: + for tok in tokenizeText(b.title): + df[tok] = df.getOrDefault(tok, 0) + 1 + + result = initTable[string, seq[string]]() + for folderId, bookmarks in folderBookmarks: + if bookmarks.len == 0: + result[folderId] = @[] + continue + var tf: Table[string, int] + for b in bookmarks: + for tok in tokenizeText(b.title): + tf[tok] = tf.getOrDefault(tok, 0) + 1 + + proc idf(term: string): float = + let d = df.getOrDefault(term, 0) + return ln(N.float / (1.0 + d.float)) + + var scored: seq[tuple[word: string, score: float]] = @[] + for word, freq in tf: + let score = (freq.float / bookmarks.len.float) * idf(word) + scored.add((word, score)) + + scored.sort(proc(a, b: (string, float)): int = cmp(b[1], a[1])) + var keywords: seq[string] = @[] + for s in scored[0 .. min(5, scored.high)]: + keywords.add(s[0]) + result[folderId] = keywords + +proc sampleExemplars*(bookmarks: seq[BookmarkEntry], count = 2): string = + var sorted = bookmarks + sorted.sort(proc(a, b: BookmarkEntry): int = cmp(b.addedAt, a.addedAt)) + var parts: seq[string] = @[] + for i in 0 .. min(count - 1, sorted.high): + var host = sorted[i].url + try: + let idx = host.find("://") + if idx >= 0: host = host[idx + 3 .. ^1] + let slashIdx = host.find('/') + if slashIdx >= 0: host = host[0 .. slashIdx - 1] + except: + discard + let title = if sorted[i].title.len > 40: sorted[i].title[0 .. 39] else: sorted[i].title + parts.add("\"" & title & "\" " & host) + return parts.join(" | ") + +proc buildFingerprint*(folders: seq[FolderEntry]): string = + var parts: seq[string] = @[] + for f in folders: + parts.add(&"{f.uuid}:{f.bookmarkCount}") + parts.sort() + return parts.join(",") + +proc loadCachedTaxonomy*(db: DbConn, fingerprint: string): (bool, Taxonomy) = + try: + let row = db.getRow(sql("SELECT taxonomy FROM taxonomy_cache WHERE fingerprint = ?"), fingerprint) + if row[0].len == 0: + return (false, Taxonomy()) + let json = parseJson(row[0]) + var cats: seq[TaxonomyCategory] = @[] + for elem in json["categories"].getElems(): + var kws: seq[string] = @[] + for kw in elem["keywords"].getElems(): + kws.add(kw.getStr()) + cats.add(TaxonomyCategory( + folderId: elem["folderId"].getStr(), + folderPath: elem["folderPath"].getStr(), + description: elem["description"].getStr(), + keywords: kws, + )) + let tax = Taxonomy(categories: cats) + return (true, tax) + except: + return (false, Taxonomy()) + +proc saveTaxonomy*(db: DbConn, fingerprint: string, taxonomy: Taxonomy) = + try: + db.exec(sql("INSERT OR REPLACE INTO taxonomy_cache (fingerprint, taxonomy, created_at) VALUES (?, ?, strftime('%s','now'))"), + fingerprint, $(%*taxonomy)) + except: + discard + +proc pruneTaxonomy*(taxonomy: Taxonomy, batch: seq[BookmarkEntry], + tfidfMap: Table[string, seq[string]], + topN = 15, minN = 5): Taxonomy = + var batchTokens = initHashSet[string]() + for b in batch: + for tok in tokenizeText(b.title): + batchTokens.incl(tok) + + var scored: seq[tuple[cat: TaxonomyCategory, overlap: int]] = @[] + for cat in taxonomy.categories: + let keywords = tfidfMap.getOrDefault(cat.folderId, @[]) + let overlap = keywords.filterIt(it in batchTokens).len + scored.add((cat, overlap)) + + sort(scored, proc(a, b: (TaxonomyCategory, int)): int = + result = cmp(b[1], a[1]) + if result == 0: result = cmp(a[0].folderId, b[0].folderId) + ) + + let count = min(max(topN, minN), scored.len) + var pruned: seq[TaxonomyCategory] = @[] + for s in scored[0 .. count - 1]: + pruned.add(s[0]) + return Taxonomy(categories: pruned) + +proc runTaxonomyPhase*(cfg: Config, folders: seq[FolderEntry], + folderBookmarks: Table[string, seq[BookmarkEntry]], + allBookmarks: seq[BookmarkEntry], + db: DbConn): Taxonomy = + let fingerprint = buildFingerprint(folders) + let (cached, taxonomy) = loadCachedTaxonomy(db, fingerprint) + if cached: + if cfg.verbose: + dimMsg &"Taxonomy cache hit ({taxonomy.categories.len} folders)" + return taxonomy + + if cfg.verbose: + dimMsg "Taxonomy cache miss, running Phase 1..." + + let tfidfMap = computeTFIDF(folderBookmarks, allBookmarks) + + var enriched: seq[tuple[id, path, count: string, domains, siblings, keywords, exemplars: string]] = @[] + for folder in folders: + let bookmarks = folderBookmarks.getOrDefault(folder.uuid, @[]) + let domains = extractDomainPatterns(bookmarks) + let keywords = tfidfMap.getOrDefault(folder.uuid, @[]) + let exemplars = sampleExemplars(bookmarks) + + enriched.add(( + id: folder.uuid, + path: folder.path, + count: $folder.bookmarkCount, + domains: domains.join(", "), + siblings: "", + keywords: keywords.join(", "), + exemplars: exemplars, + )) + + let prompt = buildTaxonomyPrompt(enriched) + let response = chatCompletionSimple(cfg, SystemPrompt, prompt, TaxonomySchemaJson) + + result = Taxonomy(categories: @[]) + for elem in response["categories"].getElems(): + var kws: seq[string] = @[] + for kw in elem["keywords"].getElems(): + kws.add(kw.getStr()) + result.categories.add(TaxonomyCategory( + folderId: elem["folderId"].getStr(), + folderPath: elem["folderPath"].getStr(), + description: elem["description"].getStr(), + keywords: kws, + )) + + saveTaxonomy(db, fingerprint, result) + if cfg.verbose: + dimMsg &"Taxonomy cached ({result.categories.len} folders)" + +proc runClusterPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], + taxonomy: Taxonomy, folders: seq[FolderEntry]): seq[ClusterSuggestion] = + var rootFolders: seq[tuple[id, title: string]] = @[] + for f in folders: + if f.parentId == 0: + rootFolders.add((id: f.uuid, title: f.path)) + + let batchTuples = uncategorized.mapIt((id: $it.id, title: it.title, url: it.url)) + let taxCats = taxonomy.categories.mapIt((id: it.folderId, path: it.folderPath)) + let rootIds = rootFolders.mapIt(it.id) + + let prompt = buildClusterPrompt(batchTuples, taxCats, rootFolders) + let schema = buildClusterSchemaJson(rootIds) + + let response = chatCompletionSimple(cfg, SystemPrompt, prompt, schema) + + if not response.hasKey("clusters"): + return @[] + + result = @[] + for elem in response["clusters"].getElems(): + var kws: seq[string] = @[] + for kw in elem["keywords"].getElems(): + kws.add(kw.getStr()) + result.add(ClusterSuggestion( + name: elem["name"].getStr(), + description: elem["description"].getStr(), + keywords: kws, + parentFolderId: elem["parentFolderId"].getStr(), + )) + +proc chunk*[T](s: seq[T], size: int): seq[seq[T]] = + if size <= 0 or s.len == 0: return @[] + result = @[] + var i = 0 + while i < s.len: + var batch: seq[T] = @[] + for j in 0 ..< min(size, s.len - i): + batch.add(s[i + j]) + result.add(batch) + i += size + +proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], + taxonomy: Taxonomy, + folderBookmarks: Table[string, seq[BookmarkEntry]], + allBookmarks: seq[BookmarkEntry], + clusters: seq[ClusterSuggestion]): seq[Suggestion] = + var fullTaxonomy = taxonomy + + let newFolders = clusters.mapIt(TaxonomyCategory( + folderId: &"__new_{it.name}", + folderPath: it.name, + description: it.description, + keywords: it.keywords, + )) + fullTaxonomy.categories.add(newFolders) + + let tfidfMap = computeTFIDF(folderBookmarks, allBookmarks) + + var allSuggestions: seq[Suggestion] = @[] + let batches = uncategorized.chunk(cfg.batchSize) + + for i, batch in batches: + showProgressBar(i + 1, batches.len, "Classifying bookmarks") + + let pruned = pruneTaxonomy(fullTaxonomy, batch, tfidfMap) + let folderIds = pruned.categories.mapIt(it.folderId) + let bookmarkIds = batch.mapIt($it.id) + let schema = buildClassificationSchemaJson(folderIds, bookmarkIds) + + let taxCats = pruned.categories.mapIt( + (id: it.folderId, path: it.folderPath, description: it.description, keywords: it.keywords.join(", ")) + ) + let batchTuples = batch.mapIt((id: $it.id, title: it.title, url: it.url)) + let prompt = buildClassificationPrompt(taxCats, batchTuples) + + try: + let response = chatCompletionSimple(cfg, SystemPrompt, prompt, schema) + + if response.hasKey("moves"): + for move in response["moves"]: + let moveObj = move + let bmId = parseBiggestInt(moveObj["bookmarkId"].getStr()) + let targetId = moveObj["targetFolderId"].getStr() + let conf = moveObj["confidence"].getStr() + let reason = moveObj["reason"].getStr() + + if targetId == "__skip__": + continue + + let bmIdx = batch.findIt(it.id == bmId) + var bmTitle = "" + var bmUrl = "" + if bmIdx >= 0: + bmTitle = batch[bmIdx].title + bmUrl = batch[bmIdx].url + let targetIdx = pruned.categories.findIt(it.folderId == targetId) + var targetPath = targetId + if targetIdx >= 0: + targetPath = pruned.categories[targetIdx].folderPath + let isNew = targetId.startsWith("__new_") + + allSuggestions.add(Suggestion( + bookmarkId: bmId, + bookmarkTitle: bmTitle, + bookmarkUrl: bmUrl, + targetFolderId: targetId, + targetFolderPath: if isNew: targetPath & " (new)" else: targetPath, + confidence: conf, + reason: reason, + isNewFolder: isNew, + )) + except CatchableError as e: + if cfg.verbose: + errorMsg &"Batch {i + 1} failed: {e.msg}" + + echo "" + return allSuggestions + +proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false): int = + let db = cfg.initDb() + defer: db.close() + + let uncategorized = getUnorganisedBookmarks(cfg) + let webUncategorized = uncategorized.filterIt(it.url.startsWith("http://") or it.url.startsWith("https://")) + + if webUncategorized.len == 0: + dimMsg "No unorganized bookmarks found." + return 0 + + infoMsg &"Found {webUncategorized.len} unorganized bookmarks" + + let folders = getAllFolders(cfg) + + var folderBookmarks = initTable[string, seq[BookmarkEntry]]() + var allBookmarks: seq[BookmarkEntry] = @[] + + for row in db.fastRows(sql("SELECT id, url, title, raw_folder FROM bookmarks")): + let bm = BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + ) + allBookmarks.add(bm) + if bm.rawFolder.len > 0: + let folderIdx = folders.findIt(it.path == bm.rawFolder) + if folderIdx >= 0: + let folder = folders[folderIdx] + if folder.uuid notin folderBookmarks: + folderBookmarks[folder.uuid] = @[] + folderBookmarks[folder.uuid].add(bm) + + headerMsg "Phase 1: Analyzing folder structure..." + let taxonomy = runTaxonomyPhase(cfg, folders, folderBookmarks, allBookmarks, db) + + headerMsg "Phase 1.5: Identifying new folder opportunities..." + var clusters: seq[ClusterSuggestion] = @[] + try: + clusters = runClusterPhase(cfg, webUncategorized, taxonomy, folders) + if clusters.len > 0 and cfg.verbose: + dimMsg &"Found {clusters.len} potential new folders" + except CatchableError as e: + if cfg.verbose: + warnMsg &"Cluster phase skipped: {e.msg}" + + headerMsg "Phase 2: Classifying bookmarks..." + let suggestions = runClassificationPhase(cfg, webUncategorized, taxonomy, folderBookmarks, allBookmarks, clusters) + + if suggestions.len == 0: + dimMsg "No suggestions generated." + return 0 + + if autoAcceptAll or cfg.autoAcceptHigh: + var accepted = 0 + for s in suggestions: + if autoAcceptAll or s.confidence == "high": + applyClassification(cfg, s.bookmarkId, s.targetFolderPath, s.confidence, s.reason) + accepted.inc + infoMsg &"Applied {accepted} suggestions automatically" + return accepted + + var accepted = 0 + var skipped = 0 + var edited = 0 + + for s in suggestions: + let action = reviewSuggestion(s.bookmarkUrl, s.bookmarkTitle, s.targetFolderPath, s.confidence, s.reason) + + case action + of ReviewAction.accept: + applyClassification(cfg, s.bookmarkId, s.targetFolderPath, s.confidence, s.reason) + accepted.inc + of ReviewAction.skip: + skipped.inc + of ReviewAction.edit: + stdout.write " New folder path: " + stdout.flushFile() + let newPath = stdin.readLine().strip() + if newPath.len > 0: + applyClassification(cfg, s.bookmarkId, newPath, s.confidence, s.reason) + edited.inc + else: + skipped.inc + of ReviewAction.quitReview: + break + + infoMsg &"Done: {accepted} accepted, {skipped} skipped, {edited} edited" + return accepted diff --git a/src/lazybookmarks/prompts.nim b/src/lazybookmarks/prompts.nim new file mode 100644 index 0000000..3f149bc --- /dev/null +++ b/src/lazybookmarks/prompts.nim @@ -0,0 +1,118 @@ +import std/strutils + +const SystemPrompt* = "You are a bookmark classifier. Given a user's folder structure and uncategorized bookmarks, assign each to the most appropriate existing folder. If no folder fits well, set targetFolderId to \"__skip__\" instead of forcing a poor match. Respond with valid JSON matching the provided schema. Prefer the user's existing folder names. Only suggest new folders when necessary." + +const TaxonomySchemaJson* = """{ + "type": "object", + "properties": { + "categories": { + "type": "array", + "items": { + "type": "object", + "properties": { + "folderId": { "type": "string" }, + "folderPath": { "type": "string" }, + "description": { "type": "string" }, + "keywords": { "type": "array", "items": { "type": "string" }, "maxItems": 10 } + }, + "required": ["folderId", "folderPath", "description", "keywords"], + "additionalProperties": false + } + } + }, + "required": ["categories"], + "additionalProperties": false +}""" + +proc buildClusterSchemaJson*(rootFolderIds: seq[string]): string = + var enumParts: seq[string] = @[] + for id in rootFolderIds: + enumParts.add("\"" & id & "\"") + let enumValues = enumParts.join(", ") + return "{\"type\":\"object\",\"properties\":{\"clusters\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"description\":{\"type\":\"string\"},\"keywords\":{\"type\":\"array\",\"items\":{\"type\":\"string\"},\"maxItems\":8},\"parentFolderId\":{\"type\":\"string\",\"enum\":[" & enumValues & "]}}}}}}" + +proc buildClassificationSchemaJson*(folderIds: seq[string], bookmarkIds: seq[string]): string = + var folderParts: seq[string] = @[] + for id in folderIds: + folderParts.add("\"" & id & "\"") + let folderEnum = folderParts.join(", ") & ", \"__skip__\"" + var bookmarkParts: seq[string] = @[] + for id in bookmarkIds: + bookmarkParts.add("\"" & id & "\"") + let bookmarkEnum = bookmarkParts.join(", ") + return "{\"type\":\"object\",\"properties\":{\"moves\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"bookmarkId\":{\"type\":\"string\",\"enum\":[" & bookmarkEnum & "]},\"targetFolderId\":{\"type\":\"string\",\"enum\":[" & folderEnum & "]},\"confidence\":{\"type\":\"string\",\"enum\":[\"high\",\"medium\",\"low\"]},\"reason\":{\"type\":\"string\"}},\"required\":[\"bookmarkId\",\"targetFolderId\",\"confidence\",\"reason\"],\"additionalProperties\":false}}},\"required\":[\"moves\"]}" + +proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, domains, siblings, keywords, exemplars: string]]): string = + var lines: seq[string] = @[] + for f in enrichedFolders: + var parts: seq[string] = @[] + parts.add("[" & f.id & "] " & f.path & " (" & f.count & ")") + if f.domains.len > 0: parts.add("domains: " & f.domains) + if f.siblings.len > 0: parts.add("siblings: " & f.siblings) + if f.keywords.len > 0: parts.add("keywords: " & f.keywords) + if f.exemplars.len > 0: parts.add("examples: " & f.exemplars) + lines.add(parts.join(" | ")) + return "Analyze these bookmark folders. For each, describe what it contains and provide keywords.\n\n" & lines.join("\n") + +proc formatBookmarkBatch*(bookmarks: seq[tuple[id, title, url: string]]): string = + var lines: seq[string] = @[] + for b in bookmarks: + var shortUrl = b.url + try: + let idx = shortUrl.find("://") + if idx >= 0: + shortUrl = shortUrl[idx + 3 .. ^1] + let slashIdx = shortUrl.find('/') + if slashIdx >= 0: + shortUrl = shortUrl[0 .. slashIdx - 1] + if shortUrl.len > 60: + shortUrl = shortUrl[0 .. 56] & "..." + except: + discard + let title = if b.title.len > 0: b.title else: "(untitled)" + lines.add("[" & b.id & "] \"" & title & "\" " & shortUrl) + return lines.join("\n") + +proc buildClusterPrompt*(uncategorizedBookmarks: seq[tuple[id, title, url: string]], + taxonomyCategories: seq[tuple[id, path: string]], + rootFolders: seq[tuple[id, title: string]]): string = + var existingParts: seq[string] = @[] + for c in taxonomyCategories: + existingParts.add("[" & c.id & "] " & c.path) + let existingList = existingParts.join("\n") + + var rootParts: seq[string] = @[] + for f in rootFolders: + rootParts.add("[" & f.id & "] " & f.title) + let rootList = rootParts.join("\n") + + let bookmarkList = formatBookmarkBatch(uncategorizedBookmarks) + + return "Analyze these uncategorized bookmarks and identify 2-6 thematic groups that would benefit from a new folder.\n\n" & + "Existing folders (for reference -- do NOT use these as parentFolderId):\n" & + existingList & "\n\n" & + "Valid locations for new folders (use one of these IDs as parentFolderId):\n" & + rootList & "\n\n" & + "Uncategorized bookmarks:\n" & + bookmarkList & "\n\n" & + "For each cluster, suggest a short folder name, a description, keywords, and which root location to create it in (parentFolderId).\n" & + "Only suggest clusters when a meaningful group of 2+ bookmarks shares a clear theme. Do not suggest clusters that duplicate an existing folder's purpose." + +proc buildClassificationPrompt*(taxonomyCategories: seq[tuple[id, path, description, keywords: string]], + bookmarkBatch: seq[tuple[id, title, url: string]]): string = + var folderParts: seq[string] = @[] + for c in taxonomyCategories: + folderParts.add("[" & c.id & "] " & c.path & ": " & c.description & " (" & c.keywords & ")") + let folderList = folderParts.join("\n") + let bookmarkList = formatBookmarkBatch(bookmarkBatch) + + return "Classify these bookmarks into the most appropriate folders.\n\n" & + "Available folders:\n" & + folderList & "\n\n" & + "Bookmarks to classify (format: [id] \"title\" url):\n" & + bookmarkList & "\n\n" & + "For each bookmark:\n" & + "- Choose the best existing folder (targetFolderId)\n" & + "- Set confidence: \"high\" (obvious match), \"medium\" (reasonable), \"low\" (uncertain)\n" & + "- Give a brief reason\n\n" & + "Use targetFolderId=\"__skip__\" if no folder is a good match." diff --git a/src/lazybookmarks/runtime.nim b/src/lazybookmarks/runtime.nim new file mode 100644 index 0000000..bb3afd4 --- /dev/null +++ b/src/lazybookmarks/runtime.nim @@ -0,0 +1,131 @@ +import std/[os, osproc, strutils, strformat, httpclient, terminal, json, times] +import ./config + +const RuntimeAssetPattern* = "llama-server-{os}-{arch}-static" + +type Asset = object + name: string + browserDownloadUrl: string + +type LlamaRelease = object + tagName: string + assets: seq[Asset] + +proc detectAssetName(): string = + when defined(linux) and defined(arm64): + return "llama-server-linux-arm64-static" + elif defined(linux) and defined(amd64): + return "llama-server-linux-amd64-static" + elif defined(macosx) and defined(arm64): + return "llama-server-macos-arm64-static" + elif defined(macosx) and defined(amd64): + return "llama-server-macos-amd64-static" + else: + return "llama-server-linux-arm64-static" + +proc findRuntimeAsset*(tagName: string): (string, string) = + let assetName = detectAssetName() + let client = newHttpClient() + defer: client.close() + + try: + let url = &"https://api.github.com/repos/ggml-org/llama.cpp/releases/{tagName}" + let body = client.getContent(url) + let jsn = parseJson(body) + for asset in jsn["assets"]: + let name = asset["name"].getStr() + if name == assetName: + return (name, asset["browser_download_url"].getStr()) + except CatchableError as e: + stderr.writeLine(&"Warning: could not query GitHub releases: {e.msg}") + + return ("", "") + +proc downloadRuntime*(cfg: Config): string = + let binPath = cfg.runtimeBinPath() + if fileExists(binPath): + return binPath + + ensureDir(cfg.dataDir) + ensureDir(cfg.binDir()) + + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, "Downloading llama-server...") + + let (assetName, downloadUrl) = findRuntimeAsset("b5278") + if downloadUrl.len == 0: + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "Could not find llama-server binary for this platform") + quit(1) + + let response = newHttpClient().get(downloadUrl) + if response.code != Http200: + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, &"Download failed: HTTP {response.code}") + quit(1) + + let partPath = binPath & ".part" + let partFile = open(partPath, fmWrite) + partFile.write(response.body) + partFile.close() + + setFilePermissions(partPath, {fpUserRead, fpUserWrite, fpUserExec}) + moveFile(partPath, binPath) + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, "Runtime ready") + return binPath + +proc isRuntimeRunning*(cfg: Config): bool = + let pidPath = cfg.pidFilePath() + if not fileExists(pidPath): + return false + try: + discard readFile(pidPath).strip().parseInt() + let client = newHttpClient(timeout = 2000) + defer: client.close() + discard client.getContent("http://127.0.0.1:18080/health") + return true + except: + return false + +proc spawnRuntime*(cfg: Config, modelPath: string): int = + let binPath = cfg.runtimeBinPath() + if not fileExists(binPath): + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, &"Runtime not found: {binPath}") + quit(1) + + ensureDir(cfg.logsDir()) + + let pid = startProcess(binPath, args = @[ + "--model", modelPath, + "--port", "18080", + "--host", "127.0.0.1", + "--ctx-size", "4096", + ], options = {poStdErrToStdOut}) + + writeFile(cfg.pidFilePath(), $pid.processID) + return pid.processID + +proc pollHealth*(cfg: Config, timeoutMs: int = 30000): bool = + let client = newHttpClient(timeout = 1000) + defer: client.close() + + let startTime = epochTime() * 1000 + while (epochTime() * 1000 - startTime) < timeoutMs.float: + try: + discard client.getContent("http://127.0.0.1:18080/health") + return true + except: + os.sleep(500) + return false + +proc stopRuntime*(cfg: Config) = + let pidPath = cfg.pidFilePath() + if not fileExists(pidPath): + return + try: + let pid = readFile(pidPath).strip().parseInt() + if pid > 0: + when defined(macosx): + discard execShellCmd(&"kill {pid}") + else: + discard execShellCmd(&"kill {pid}") + removeFile(pidPath) + except: + removeFile(pidPath) diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim new file mode 100644 index 0000000..1b3fb54 --- /dev/null +++ b/src/lazybookmarks/storage.nim @@ -0,0 +1,291 @@ +import std/[re, strutils, strformat, random, times, json] +import db_connector/db_sqlite +import ./config + +randomize() + +type + BookmarkEntry* = object + id*: int64 + url*: string + title*: string + rawFolder*: string + category*: string + tags*: string + summary*: string + language*: string + confidence*: string + reason*: string + source*: string + importId*: int64 + organisedAt*: int64 + addedAt*: int64 + + FolderEntry* = object + id*: int64 + uuid*: string + path*: string + parentId*: int64 + bookmarkCount*: int + + ImportEntry* = object + id*: int64 + filename*: string + format*: string + importedAt*: int64 + bookmarkCount*: int + +const Schema = """ +CREATE TABLE IF NOT EXISTS bookmarks ( + id INTEGER PRIMARY KEY, + url TEXT NOT NULL UNIQUE, + title TEXT, + raw_folder TEXT, + category TEXT, + tags TEXT, + summary TEXT, + language TEXT, + confidence TEXT CHECK(confidence IN ('high','medium','low',NULL)), + reason TEXT, + source TEXT, + import_id INTEGER REFERENCES imports(id), + organised_at INTEGER, + added_at INTEGER +); + +CREATE TABLE IF NOT EXISTS imports ( + id INTEGER PRIMARY KEY, + filename TEXT, + format TEXT CHECK(format IN ('netscape','json','urllist')), + imported_at INTEGER, + bookmark_count INTEGER +); + +CREATE TABLE IF NOT EXISTS taxonomy_cache ( + fingerprint TEXT PRIMARY KEY, + taxonomy TEXT NOT NULL, + created_at INTEGER +); + +CREATE TABLE IF NOT EXISTS folders ( + id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL UNIQUE, + path TEXT NOT NULL UNIQUE, + parent_path TEXT, + bookmark_count INTEGER DEFAULT 0 +); + +CREATE INDEX IF NOT EXISTS idx_bookmarks_organised ON bookmarks(organised_at); +CREATE INDEX IF NOT EXISTS idx_bookmarks_import ON bookmarks(import_id); +CREATE INDEX IF NOT EXISTS idx_bookmarks_folder ON bookmarks(raw_folder); +""" + +proc genUuid*: string = + const hexChars = "0123456789abcdef" + var s = "" + for i in 0..31: + if i == 8 or i == 12 or i == 16 or i == 20: + s.add '-' + s.add hexChars[rand(15)] + return s + +proc initDb*(cfg: Config): DbConn = + ensureDir(cfg.dataDir) + result = open(cfg.dbPath(), "", "", "") + for stmt in Schema.split(';'): + let trimmed = stmt.strip() + if trimmed.len > 0: + result.exec(sql(trimmed)) + +proc getOrCreateFolder(db: DbConn, path: string, parentPath: string = ""): FolderEntry = + let row = db.getRow(sql("SELECT id, uuid, path, parent_path, bookmark_count FROM folders WHERE path = ?"), path) + if row[0].len > 0: + return FolderEntry( + id: parseBiggestInt(row[0]), + uuid: row[1], + path: row[2], + parentId: if row[3].len > 0: parseBiggestInt(row[3]) else: 0, + bookmarkCount: if row[4].len > 0: parseBiggestInt(row[4]) else: 0, + ) + let uuid = genUuid() + result = FolderEntry( + uuid: uuid, + path: path, + parentId: 0, + bookmarkCount: 0, + ) + result.id = db.insertId(sql("INSERT INTO folders (uuid, path, parent_path, bookmark_count) VALUES (?, ?, ?, 0)"), uuid, path, parentPath) + return result + +proc importNetscapeHtml*(content: string): seq[tuple[url, title, folder: string]] = + result = @[] + var folderStack: seq[string] = @[""] + + for line in content.splitLines(): + let trimmed = line.strip() + if "
" in trimmed: + if "") + let titleEnd = trimmed.find("") + var title = "" + if titleStart >= 0 and titleEnd > titleStart: + title = trimmed[titleStart + 1 .. titleEnd - 1] + title = title.replace(re"<[^>]+>", "") + result.add((url, title, folderStack.join(" / "))) + elif "" in trimmed: + var matches: array[1, string] + if trimmed.match(re"""]*>(.*?)""", matches): + var folderName = matches[0].replace(re"<[^>]+>", "") + folderStack.add(folderName) + if "" in trimmed and folderStack.len > 1: + discard folderStack.pop() + +proc importJson*(content: string): seq[tuple[url, title, folder: string]] = + result = @[] + try: + let parsed = parseJson(content) + for item in parsed.getElems(): + let url = item{"url"}.getStr("") + let title = item{"title"}.getStr("") + let folder = item{"folder"}.getStr("") + if url.len > 0: + result.add((url, title, folder)) + except: + discard + +proc importUrlList*(content: string): seq[tuple[url, title, folder: string]] = + result = @[] + for line in content.splitLines(): + let trimmed = line.strip() + if trimmed.len == 0 or trimmed.startsWith("#"): + continue + if trimmed.startsWith("http://") or trimmed.startsWith("https://"): + result.add((trimmed, "", "")) + +proc detectFormat*(content: string, filename: string): string = + let parts = filename.split('.') + let ext = if parts.len > 1: parts[parts.len - 1].toLowerAscii() else: "" + if ext == "json": + return "json" + if ext == "txt" or ext == "url" or ext == "urls": + return "urllist" + if ext == "html" or ext == "htm": + return "netscape" + if "
" in content and " 0: + continue + if folder.len > 0: + discard db.getOrCreateFolder(folder) + try: + db.exec( + sql("INSERT INTO bookmarks (url, title, raw_folder, source, import_id, added_at) VALUES (?, ?, ?, 'import', ?, ?)"), + url, title, folder, importId, now + ) + count.inc + except DbError: + continue + + return count + +proc getUnorganisedBookmarks*(cfg: Config, limit: int = 0): seq[BookmarkEntry] = + let db = cfg.initDb() + defer: db.close() + + var query = "SELECT id, url, title, raw_folder, category, tags, summary, language, confidence, reason, source, import_id, organised_at, added_at FROM bookmarks WHERE organised_at IS NULL" + if limit > 0: + query.add &" LIMIT {limit}" + query.add " ORDER BY added_at DESC" + + for row in db.fastRows(sql(query)): + result.add(BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + tags: row[5], + summary: row[6], + language: row[7], + confidence: row[8], + reason: row[9], + source: row[10], + importId: if row[11].len > 0: parseBiggestInt(row[11]) else: 0, + organisedAt: if row[12].len > 0: parseBiggestInt(row[12]) else: 0, + addedAt: if row[13].len > 0: parseBiggestInt(row[13]) else: 0, + )) + +proc getAllFolders*(cfg: Config): seq[FolderEntry] = + let db = cfg.initDb() + defer: db.close() + + for row in db.fastRows(sql("SELECT id, uuid, path, parent_path, bookmark_count FROM folders")): + result.add(FolderEntry( + id: parseBiggestInt(row[0]), + uuid: row[1], + path: row[2], + parentId: if row[3].len > 0: parseBiggestInt(row[3]) else: 0, + bookmarkCount: if row[4].len > 0: parseBiggestInt(row[4]) else: 0, + )) + +proc applyClassification*(cfg: Config, bookmarkId: int64, category: string, confidence: string, reason: string) = + let db = cfg.initDb() + defer: db.close() + + let now = getTime().toUnix() + let folder = db.getOrCreateFolder(category) + db.exec(sql( + "UPDATE bookmarks SET category = ?, confidence = ?, reason = ?, organised_at = ? WHERE id = ?" + ), category, confidence, reason, now, bookmarkId) + +proc undoLastBatch*(cfg: Config): int = + let db = cfg.initDb() + defer: db.close() + + let now = getTime().toUnix() + result = db.execAffectedRows(sql( + "UPDATE bookmarks SET category = NULL, confidence = NULL, reason = NULL, organised_at = NULL" + ),) + # Actually undo: find the last batch by organised_at + let row = db.getRow(sql( + "SELECT organised_at FROM bookmarks WHERE organised_at IS NOT NULL ORDER BY organised_at DESC LIMIT 1" + )) + if row[0].len > 0: + let batchTime = parseBiggestInt(row[0]) + result = db.execAffectedRows(sql( + "UPDATE bookmarks SET category = NULL, confidence = NULL, reason = NULL, organised_at = NULL WHERE organised_at >= ?" + ), batchTime) diff --git a/src/lazybookmarks/ui.nim b/src/lazybookmarks/ui.nim new file mode 100644 index 0000000..b6cc50a --- /dev/null +++ b/src/lazybookmarks/ui.nim @@ -0,0 +1,61 @@ +import std/[terminal, strutils] + +proc infoMsg*(msg: string) = + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, msg) + +proc warnMsg*(msg: string) = + stdout.styledWriteLine(styleBright, fgYellow, " ! ", fgDefault, resetStyle, msg) + +proc errorMsg*(msg: string) = + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, msg) + +proc dimMsg*(msg: string) = + stdout.styledWrite(styleDim, " " & msg, resetStyle, "\n") + +proc headerMsg*(msg: string) = + stdout.styledWriteLine(styleBright, fgCyan, "\n " & msg, resetStyle, "\n") + +proc showProgressBar*(current: int, total: int, prefix: string = "") = + stdout.write "\r\e[2K" + if total == 0: + stdout.write prefix & " 0/0" + stdout.flushFile() + return + let pct = (current * 100) div total + let width = 30 + let filled = (current * width) div total + let bar = repeat("#", filled) & repeat("-", width - filled) + stdout.write prefix & " [" & bar & "] " & $pct & "% (" & $current & "/" & $total & ")" + stdout.flushFile() + +type ReviewAction* = enum + accept, skip, edit, quitReview + +proc reviewSuggestion*(url: string, title: string, targetFolder: string, confidence: string, reason: string): ReviewAction = + echo "" + stdout.styledWrite(styleBright, " ┌─ ", resetStyle, url, "\n") + stdout.styledWrite(styleBright, " │ ", resetStyle) + stdout.write "\"" & title & "\"\n" + let confColor = case confidence + of "high": fgGreen + of "medium": fgYellow + else: fgRed + stdout.styledWrite(styleBright, " │ ", resetStyle, "→ ") + stdout.styledWrite(confColor, targetFolder, resetStyle) + stdout.write " [" & confidence.toUpperAscii() & "]\n" + stdout.styledWriteLine(styleBright, " │ ", resetStyle, styleDim, reason, resetStyle) + stdout.styledWriteLine(styleBright, " └─ ", resetStyle, styleDim, "[A]ccept [S]kip [e]dit [q]uit", resetStyle) + stdout.write " > " + stdout.flushFile() + + while true: + let input = stdin.readLine().strip().toLowerAscii() + case input + of "a", "accept": return ReviewAction.accept + of "s", "skip": return ReviewAction.skip + of "e", "edit": return ReviewAction.edit + of "q", "quit": return ReviewAction.quitReview + else: + stdout.write "\r\e[2K" + stdout.write " > " + stdout.flushFile() From b925468f0550caa16251e063bb147649cf74a0a0 Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 11:56:02 +0100 Subject: [PATCH 02/27] Add .gitignore for build/ and BUILD.md with build instructions --- .gitignore | 1 + BUILD.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 BUILD.md diff --git a/.gitignore b/.gitignore index 991ade9..39ef364 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.gguf /*.html +/build/ diff --git a/BUILD.md b/BUILD.md new file mode 100644 index 0000000..4e08a21 --- /dev/null +++ b/BUILD.md @@ -0,0 +1,57 @@ +# Building lazybookmarks + +## Prerequisites + +- **Nim** >= 2.0.0 — https://nim-lang.org/install.html +- **OpenSSL** (for SHA256 via nimcrypto) +- **curl** (for model downloads) + +### macOS + +```sh +brew install nim openssl +``` + +### Ubuntu/Debian + +```sh +sudo apt install nim libssl-dev curl +``` + +### Arch Linux + +```sh +sudo pacman -S nim openssl curl +``` + +## Install Nim dependencies + +```sh +nimble install cligen db_connector jsony nimcrypto +``` + +## Build + +```sh +# Release (optimised, smaller binary) +nimble build + +# Debug +nimble buildDebug +``` + +The binary will be at `build/lazybookmarks`. + +## Cross-compiling for Linux (from macOS) + +Install a Linux cross-compiler, then: + +```sh +nim c -d:release --os:linux --cpu:arm64 -o:build/lazybookmarks-linux-arm64 src/lazybookmarks/main.nim +``` + +On Ubuntu with `musl` for a static binary: + +```sh +nim c -d:release --os:linux --cpu:arm64 --gc:orc -d:useMalloc -o:build/lazybookmarks src/lazybookmarks/main.nim +``` From b892790eb124792473e267201d85b9962b13350c Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 12:10:38 +0100 Subject: [PATCH 03/27] Fix SHA256 checksum mismatch: normalize hex to lowercase --- src/lazybookmarks/model.nim | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lazybookmarks/model.nim b/src/lazybookmarks/model.nim index 58a30a4..7ea47e2 100644 --- a/src/lazybookmarks/model.nim +++ b/src/lazybookmarks/model.nim @@ -49,7 +49,7 @@ proc sha256File*(path: string): string = ctx.update(cast[ptr byte](buf[0].addr), uint(n)) var digest: array[32, byte] ctx.finish(digest) - return digest.toHex() + return toLowerAscii(digest.toHex()) proc formatBytes*(bytes: int64): string = if bytes < 1024: return $bytes & " B" @@ -108,7 +108,7 @@ proc ensureModel*(cfg: Config, registry: ModelRegistry) = echo "" let actualHash = sha256File(partPath) - if actualHash != entry.digest.replace("sha256:", ""): + if actualHash != toLowerAscii(entry.digest.replace("sha256:", "")): stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "Checksum verification failed") removeFile(partPath) quit(1) @@ -129,7 +129,7 @@ proc isEntryReady*(entry: ModelEntry, cfg: Config): bool = if not fileExists(modelPath): return false if fileExists(hashPath): return readFile(hashPath).strip() == entry.digest - return sha256File(modelPath) == entry.digest.replace("sha256:", "") + return sha256File(modelPath) == toLowerAscii(entry.digest.replace("sha256:", "")) except CatchableError: return false From e4988cdd3e74fb9a4c7cae0d5612b29e81e7539a Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 12:14:01 +0100 Subject: [PATCH 04/27] Fix nimble build: use modern task syntax and add nimcrypto dependency --- lazybookmarks.nimble | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lazybookmarks.nimble b/lazybookmarks.nimble index 02c956a..9344391 100644 --- a/lazybookmarks.nimble +++ b/lazybookmarks.nimble @@ -13,12 +13,13 @@ installDirs = @["lazybookmarks"] requires "nim >= 2.0.0" requires "cligen >= 1.6" requires "db_connector >= 0.1" +requires "nimcrypto" requires "jsony >= 1.1" # Tasks task build, "Build release binary": - self.exec "nim c -d:release -o:build/lazybookmarks src/lazybookmarks/main.nim" + exec "nim c -d:release -o:build/lazybookmarks src/lazybookmarks/main.nim" task buildDebug, "Build debug binary": - self.exec "nim c -o:build/lazybookmarks src/lazybookmarks/main.nim" + exec "nim c -o:build/lazybookmarks src/lazybookmarks/main.nim" From b0ca0a588c936713ecbd32c2e7e1ef86d47d08dc Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 12:26:24 +0100 Subject: [PATCH 05/27] Fix bookmark import: use re.find instead of re.match for substring regex --- src/lazybookmarks/storage.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim index 1b3fb54..668fdd5 100644 --- a/src/lazybookmarks/storage.nim +++ b/src/lazybookmarks/storage.nim @@ -126,7 +126,7 @@ proc importNetscapeHtml*(content: string): seq[tuple[url, title, folder: string] if "
" in trimmed: if "= 0: let url = matches[0] let titleStart = trimmed.find(">") let titleEnd = trimmed.find("") @@ -137,7 +137,7 @@ proc importNetscapeHtml*(content: string): seq[tuple[url, title, folder: string] result.add((url, title, folderStack.join(" / "))) elif "" in trimmed: var matches: array[1, string] - if trimmed.match(re"""]*>(.*?)""", matches): + if trimmed.find(re"""]*>(.*?)""", matches) >= 0: var folderName = matches[0].replace(re"<[^>]+>", "") folderStack.add(folderName) if "" in trimmed and folderStack.len > 1: From ac7bafb46f0a8087a3cbfcb60684f5c96cd231dc Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 12:28:04 +0100 Subject: [PATCH 06/27] Enable SSL support in build tasks and nim.cfg --- lazybookmarks.nimble | 4 ++-- nim.cfg | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lazybookmarks.nimble b/lazybookmarks.nimble index 9344391..6336325 100644 --- a/lazybookmarks.nimble +++ b/lazybookmarks.nimble @@ -19,7 +19,7 @@ requires "jsony >= 1.1" # Tasks task build, "Build release binary": - exec "nim c -d:release -o:build/lazybookmarks src/lazybookmarks/main.nim" + exec "nim c -d:release -d:ssl -o:build/lazybookmarks src/lazybookmarks/main.nim" task buildDebug, "Build debug binary": - exec "nim c -o:build/lazybookmarks src/lazybookmarks/main.nim" + exec "nim c -d:ssl -o:build/lazybookmarks src/lazybookmarks/main.nim" diff --git a/nim.cfg b/nim.cfg index a57e87d..33f3c3c 100644 --- a/nim.cfg +++ b/nim.cfg @@ -1,2 +1,3 @@ --opt:size --mm:orc +-d:ssl From 802b05f725c95a47960d3d1ab9438b750e07b7d7 Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 12:52:23 +0100 Subject: [PATCH 07/27] Switch runtime from llama-server to ollama Replace self-managed llama-server + GGUF model downloads with ollama as the LLM backend. Ollama handles model management (pull/list) and serves an OpenAI-compatible API, eliminating ~150 lines of download progress, tarball extraction, and SHA256 verification code. - runtime.nim: ollama serve/stop/health via shell + HTTP checks - model.nim: ollama pull/list API, remove nimcrypto/sha256/curl download - config.nim: default endpoint now 127.0.0.1:11434/v1, add modelName - client.nim: send actual model name instead of hardcoded 'local' - bootstrap.nim: simplified ensureReady (start ollama, pull model) - models.json: stripped digest/sizeBytes, just name/ollamaModel/ollamaTag - nimble: removed nimcrypto dependency (binary ~700KB, down from ~900KB) --- assets/models.json | 16 +--- lazybookmarks.nimble | 5 +- src/lazybookmarks/bootstrap.nim | 21 +++-- src/lazybookmarks/client.nim | 2 +- src/lazybookmarks/config.nim | 14 +-- src/lazybookmarks/main.nim | 12 +-- src/lazybookmarks/model.nim | 149 +++++++++----------------------- src/lazybookmarks/runtime.nim | 121 +++++++------------------- 8 files changed, 97 insertions(+), 243 deletions(-) diff --git a/assets/models.json b/assets/models.json index 49f3a89..31ac120 100644 --- a/assets/models.json +++ b/assets/models.json @@ -3,30 +3,22 @@ { "name": "qwen3.5-0.8b", "ollamaModel": "qwen3.5", - "ollamaTag": "0.8b", - "digest": "sha256:afb707b6b8fac6e475acc42bc8380fc0b8d2e0e4190be5a969fbf62fcc897db5", - "sizeBytes": 1036034688 + "ollamaTag": "0.8b" }, { "name": "qwen3.5-2b", "ollamaModel": "qwen3.5", - "ollamaTag": "2b", - "digest": "sha256:b709d81508a078a686961de6ca07a953b895d9b286c46e17f00fb267f4f2d297", - "sizeBytes": 2741180928 + "ollamaTag": "2b" }, { "name": "qwen3.5-4b", "ollamaModel": "qwen3.5", - "ollamaTag": "4b", - "digest": "sha256:81fb60c7daa80fc1123380b98970b320ae233409f0f71a72ed7b9b0d62f40490", - "sizeBytes": 3389971840 + "ollamaTag": "4b" }, { "name": "gemma4-e2b", "ollamaModel": "gemma4", - "ollamaTag": "e2b", - "digest": "sha256:4e30e2665218745ef463f722c0bf86be0cab6ee676320f1cfadf91e989107448", - "sizeBytes": 7162394016 + "ollamaTag": "e2b" } ] } diff --git a/lazybookmarks.nimble b/lazybookmarks.nimble index 6336325..3ce7ab9 100644 --- a/lazybookmarks.nimble +++ b/lazybookmarks.nimble @@ -13,13 +13,12 @@ installDirs = @["lazybookmarks"] requires "nim >= 2.0.0" requires "cligen >= 1.6" requires "db_connector >= 0.1" -requires "nimcrypto" requires "jsony >= 1.1" # Tasks -task build, "Build release binary": +task release, "Build release binary to build/": exec "nim c -d:release -d:ssl -o:build/lazybookmarks src/lazybookmarks/main.nim" -task buildDebug, "Build debug binary": +task debug, "Build debug binary to build/": exec "nim c -d:ssl -o:build/lazybookmarks src/lazybookmarks/main.nim" diff --git a/src/lazybookmarks/bootstrap.nim b/src/lazybookmarks/bootstrap.nim index f9f3b92..e081411 100644 --- a/src/lazybookmarks/bootstrap.nim +++ b/src/lazybookmarks/bootstrap.nim @@ -3,22 +3,25 @@ import ./model import ./runtime import ./ui -proc ensureReady*(cfg: Config, registry: ModelRegistry) = +proc ensureReady*(cfg: var Config, registry: ModelRegistry) = if not cfg.runtimeManaged: return if not isRuntimeRunning(cfg): - discard downloadRuntime(cfg) - ensureModel(cfg, registry) - let modelPath = getModelPath(cfg, registry) - discard spawnRuntime(cfg, modelPath) + discard spawnRuntime(cfg) - infoMsg "Waiting for runtime to start..." + infoMsg "Waiting for ollama to start..." if not pollHealth(cfg): - errorMsg "Runtime failed to start within timeout. Check logs:" + errorMsg "Ollama failed to start. Check logs:" dimMsg cfg.logFilePath() quit(1) - infoMsg "Runtime ready" + infoMsg "Ollama ready" + + let entry = findModel(registry, cfg.modelVariant) + cfg.modelName = ollamaRef(entry) + + if not isEntryReady(entry): + pullModel(entry) else: - ensureModel(cfg, registry) + infoMsg "Model ready: " & ollamaRef(entry) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 84f82b0..f1b55df 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -10,7 +10,7 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], jsonSchema: string = "", maxRetries: int = 3): JsonNode = let body = %*{ - "model": "local", + "model": cfg.modelName, "messages": messages, "temperature": 0.1, "max_tokens": 1024, diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index 9e541ef..7d26330 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -3,13 +3,14 @@ import std/os type Config* = object llmUrl*: string modelVariant*: string + modelName*: string dataDir*: string runtimeManaged*: bool autoAcceptHigh*: bool batchSize*: int verbose*: bool -const DefaultLlmUrl* = "http://127.0.0.1:18080/v1" +const DefaultLlmUrl* = "http://127.0.0.1:11434/v1" const DefaultModelVariant* = "qwen3.5-0.8b" const DefaultBatchSize* = 1 @@ -77,23 +78,14 @@ proc loadConfig*(overrides: Config = Config()): Config = proc dbPath*(cfg: Config): string = cfg.dataDir / "bookmarks.db" -proc binDir*(cfg: Config): string = - cfg.dataDir / "bin" - -proc modelsDir*(cfg: Config): string = - cfg.dataDir / "models" - proc logsDir*(cfg: Config): string = cfg.dataDir / "logs" -proc runtimeBinPath*(cfg: Config): string = - cfg.binDir() / "llama-server" - proc pidFilePath*(cfg: Config): string = cfg.dataDir / "runtime.pid" proc logFilePath*(cfg: Config): string = - cfg.logsDir() / "llama-server.log" + cfg.logsDir() / "ollama.log" proc configFilePath*: string = defaultConfigDir() / "config.toml" diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index fd9b81f..abcb0d9 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -34,7 +34,7 @@ proc cmdImport(file: string, format = "auto", dryRun = false) = proc cmdOrganise(model = "", autoAcceptHigh = false, autoAcceptAll = false, limit = 0, verbose = false) = let overrides = Config(modelVariant: model, verbose: verbose) - let cfg = loadConfig(overrides) + var cfg = loadConfig(overrides) let registry = loadModelRegistry() ensureReady(cfg, registry) @@ -153,13 +153,13 @@ proc cmdDoctor = warnMsg "Database not found (will be created on first import)" issues.inc - let binPath = cfg.runtimeBinPath() - if fileExists(binPath): - infoMsg &"Runtime: {binPath}" + let ollamaBin = findOllamaBin() + if ollamaBin.len > 0: + infoMsg &"Ollama: {ollamaBin}" elif not cfg.runtimeManaged: dimMsg "Runtime: using external endpoint" else: - warnMsg "Runtime not downloaded (will download on first organise)" + warnMsg "Ollama not found in PATH (install from https://ollama.com)" issues.inc let registry = loadModelRegistry() @@ -174,7 +174,7 @@ proc cmdDoctor = try: let client = newHttpClient(timeout = 3000) defer: client.close() - discard client.getContent(&"{cfg.llmUrl}/models") + discard client.getContent("http://127.0.0.1:11434/api/tags") infoMsg &"Endpoint reachable: {cfg.llmUrl}" except: if cfg.runtimeManaged: diff --git a/src/lazybookmarks/model.nim b/src/lazybookmarks/model.nim index 7ea47e2..3268967 100644 --- a/src/lazybookmarks/model.nim +++ b/src/lazybookmarks/model.nim @@ -1,6 +1,4 @@ -import std/[os, strutils, strformat, streams, terminal, osproc] -import nimcrypto/sha2 -import nimcrypto/utils +import std/[os, strutils, strformat, httpclient, json, terminal] import jsony import ./config @@ -9,16 +7,12 @@ type name*: string ollamaModel*: string ollamaTag*: string - digest*: string - sizeBytes*: int64 ModelRegistry* = object entries*: seq[ModelEntry] const modelRegistryJson = staticRead("../../assets/models.json") -const OllamaRegistry* = "https://registry.ollama.ai/v2/library" - proc loadModelRegistry*: ModelRegistry = return modelRegistryJson.fromJson(ModelRegistry) @@ -28,123 +22,60 @@ proc findModel*(registry: ModelRegistry, variant: string): ModelEntry = return e raise newException(ValueError, &"Unknown model variant: {variant}") -proc modelFilename*(entry: ModelEntry): string = - entry.ollamaModel & "-" & entry.ollamaTag - -proc resolveDownloadUrl*(entry: ModelEntry): string = - let digest = entry.digest.replace("sha256:", "") - return OllamaRegistry & "/" & entry.ollamaModel & "/blobs/sha256:" & digest - -proc sha256File*(path: string): string = - var ctx: sha256 - ctx.init() - let file = newFileStream(path, fmRead) - if file == nil: - raise newException(IOError, &"Cannot open file: {path}") - defer: file.close() - var buf = newString(8192) - while true: - let n = file.readData(buf[0].addr, buf.len) - if n == 0: break - ctx.update(cast[ptr byte](buf[0].addr), uint(n)) - var digest: array[32, byte] - ctx.finish(digest) - return toLowerAscii(digest.toHex()) - -proc formatBytes*(bytes: int64): string = - if bytes < 1024: return $bytes & " B" - if bytes < 1024 * 1024: return $(bytes div 1024) & " KB" - if bytes < 1024 * 1024 * 1024: return &"{float(bytes) / (1024*1024):.1f} MB" - return &"{float(bytes) / (1024*1024*1024):.2f} GB" - -proc ensureModel*(cfg: Config, registry: ModelRegistry) = - let entry = findModel(registry, cfg.modelVariant) - let modelsDir = cfg.modelsDir() - let filename = modelFilename(entry) - let modelPath = modelsDir / filename - let hashPath = modelPath & ".sha256" - - ensureDir(modelsDir) - - if fileExists(modelPath): - if fileExists(hashPath): - let storedHash = readFile(hashPath).strip() - if storedHash == entry.digest: - return - let currentHash = sha256File(modelPath) - if currentHash == entry.digest: - writeFile(hashPath, entry.digest) - return - stdout.styledWriteLine(styleBright, fgYellow, " ! ", fgDefault, resetStyle, "Model file corrupted, re-downloading...") - removeFile(modelPath) - if fileExists(hashPath): - removeFile(hashPath) +proc ollamaRef*(entry: ModelEntry): string = + entry.ollamaModel & ":" & entry.ollamaTag - let partPath = modelPath & ".part" - let resume = fileExists(partPath) - - if resume: - stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, - "Resuming " & entry.name & " (" & formatBytes(entry.sizeBytes) & ")...") - else: - stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, - "Downloading " & entry.name & " (" & formatBytes(entry.sizeBytes) & ")...") +proc pullModel*(entry: ModelEntry) = + let refStr = ollamaRef(entry) + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, + "Pulling {refStr}...") - let downloadUrl = resolveDownloadUrl(entry) - let exitCode = execCmd("curl -fL -C - -o " & quoteShell(partPath) & " " & - quoteShell(downloadUrl) & " --progress-bar 2>&1") + let exitCode = execShellCmd("ollama pull " & quoteShell(refStr) & " 2>&1") if exitCode != 0: - echo "" - if fileExists(partPath): - let partSize = getFileSize(partPath) - if partSize > 0 and partSize < entry.sizeBytes: - stdout.styledWriteLine(styleBright, fgYellow, " ! ", fgDefault, resetStyle, - "Partial download saved (" & formatBytes(partSize) & "/" & - formatBytes(entry.sizeBytes) & "). Re-run to resume.") - else: - removeFile(partPath) + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, + "Failed to pull {refStr}") quit(1) - echo "" - - let actualHash = sha256File(partPath) - if actualHash != toLowerAscii(entry.digest.replace("sha256:", "")): - stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "Checksum verification failed") - removeFile(partPath) - quit(1) - - moveFile(partPath, modelPath) - writeFile(hashPath, entry.digest) - stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, - "Model ready: " & entry.name) - -proc getModelPath*(cfg: Config, registry: ModelRegistry): string = - let entry = findModel(registry, cfg.modelVariant) - return cfg.modelsDir() / modelFilename(entry) - -proc isEntryReady*(entry: ModelEntry, cfg: Config): bool = +proc listLocalModels*(): seq[string] = + result = @[] try: - let modelPath = cfg.modelsDir() / modelFilename(entry) - let hashPath = modelPath & ".sha256" - if not fileExists(modelPath): return false - if fileExists(hashPath): - return readFile(hashPath).strip() == entry.digest - return sha256File(modelPath) == toLowerAscii(entry.digest.replace("sha256:", "")) - except CatchableError: - return false + let client = newHttpClient(timeout = 5000) + defer: client.close() + let body = client.getContent("http://127.0.0.1:11434/api/tags") + let jsn = parseJson(body) + for m in jsn["models"]: + result.add(m["name"].getStr()) + except: + discard + +proc isEntryReady*(entry: ModelEntry): bool = + let refStr = ollamaRef(entry) + for localName in listLocalModels(): + if localName == refStr or localName.startsWith(refStr & ":"): + return true + return false proc isModelReady*(cfg: Config, registry: ModelRegistry): bool = try: let entry = findModel(registry, cfg.modelVariant) - return isEntryReady(entry, cfg) - except CatchableError: + return isEntryReady(entry) + except: return false +proc ensureModel*(cfg: Config, registry: ModelRegistry) = + let entry = findModel(registry, cfg.modelVariant) + if not isEntryReady(entry): + pullModel(entry) + else: + stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, + "Model ready: " & ollamaRef(entry)) + proc listModels*(cfg: Config, registry: ModelRegistry) = echo "" + let localModels = listLocalModels() for entry in registry.entries: let isCurrent = entry.name == cfg.modelVariant - let isReady = isEntryReady(entry, cfg) + let isReady = isEntryReady(entry) let marker = if isCurrent: " *" else: "" let status = if isReady: "[installed]" else: "[not installed]" let name = if isCurrent: entry.name & marker else: entry.name @@ -152,6 +83,6 @@ proc listModels*(cfg: Config, registry: ModelRegistry) = stdout.styledWrite(styleBright) stdout.write " " & name stdout.styledWrite(resetStyle) - stdout.styledWriteLine(" " & status & " " & formatBytes(entry.sizeBytes)) + stdout.styledWriteLine(" " & status & " " & ollamaRef(entry)) echo "" stdout.styledWriteLine(styleDim, " * = current selection", resetStyle) diff --git a/src/lazybookmarks/runtime.nim b/src/lazybookmarks/runtime.nim index bb3afd4..6fe2f03 100644 --- a/src/lazybookmarks/runtime.nim +++ b/src/lazybookmarks/runtime.nim @@ -1,106 +1,46 @@ -import std/[os, osproc, strutils, strformat, httpclient, terminal, json, times] +import std/[os, osproc, strutils, strformat, httpclient, terminal, times] import ./config -const RuntimeAssetPattern* = "llama-server-{os}-{arch}-static" - -type Asset = object - name: string - browserDownloadUrl: string - -type LlamaRelease = object - tagName: string - assets: seq[Asset] - -proc detectAssetName(): string = - when defined(linux) and defined(arm64): - return "llama-server-linux-arm64-static" - elif defined(linux) and defined(amd64): - return "llama-server-linux-amd64-static" - elif defined(macosx) and defined(arm64): - return "llama-server-macos-arm64-static" - elif defined(macosx) and defined(amd64): - return "llama-server-macos-amd64-static" - else: - return "llama-server-linux-arm64-static" - -proc findRuntimeAsset*(tagName: string): (string, string) = - let assetName = detectAssetName() - let client = newHttpClient() - defer: client.close() - - try: - let url = &"https://api.github.com/repos/ggml-org/llama.cpp/releases/{tagName}" - let body = client.getContent(url) - let jsn = parseJson(body) - for asset in jsn["assets"]: - let name = asset["name"].getStr() - if name == assetName: - return (name, asset["browser_download_url"].getStr()) - except CatchableError as e: - stderr.writeLine(&"Warning: could not query GitHub releases: {e.msg}") - - return ("", "") - -proc downloadRuntime*(cfg: Config): string = - let binPath = cfg.runtimeBinPath() - if fileExists(binPath): - return binPath - - ensureDir(cfg.dataDir) - ensureDir(cfg.binDir()) - - stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, "Downloading llama-server...") - - let (assetName, downloadUrl) = findRuntimeAsset("b5278") - if downloadUrl.len == 0: - stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "Could not find llama-server binary for this platform") - quit(1) - - let response = newHttpClient().get(downloadUrl) - if response.code != Http200: - stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, &"Download failed: HTTP {response.code}") - quit(1) - - let partPath = binPath & ".part" - let partFile = open(partPath, fmWrite) - partFile.write(response.body) - partFile.close() - - setFilePermissions(partPath, {fpUserRead, fpUserWrite, fpUserExec}) - moveFile(partPath, binPath) - stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, "Runtime ready") - return binPath +proc findOllamaBin*(): string = + for dir in getEnv("PATH").split(PathSep): + let path = dir / "ollama" + if fileExists(path): + return path + return "" proc isRuntimeRunning*(cfg: Config): bool = - let pidPath = cfg.pidFilePath() - if not fileExists(pidPath): - return false try: - discard readFile(pidPath).strip().parseInt() let client = newHttpClient(timeout = 2000) defer: client.close() - discard client.getContent("http://127.0.0.1:18080/health") + discard client.getContent("http://127.0.0.1:11434/api/tags") return true except: return false -proc spawnRuntime*(cfg: Config, modelPath: string): int = - let binPath = cfg.runtimeBinPath() - if not fileExists(binPath): - stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, &"Runtime not found: {binPath}") +proc spawnRuntime*(cfg: Config): int = + let binPath = findOllamaBin() + if binPath.len == 0: + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "ollama not found in PATH") + when defined(macosx): + stdout.styledWriteLine(styleDim, " macOS: brew install ollama", resetStyle) + elif defined(linux): + stdout.styledWriteLine(styleDim, " Linux: curl -fsSL https://ollama.com/install.sh | sh", resetStyle) + stdout.styledWriteLine(styleDim, " Manual: https://ollama.com/download", resetStyle) quit(1) ensureDir(cfg.logsDir()) - let pid = startProcess(binPath, args = @[ - "--model", modelPath, - "--port", "18080", - "--host", "127.0.0.1", - "--ctx-size", "4096", - ], options = {poStdErrToStdOut}) + let logPath = cfg.logFilePath() + let pidPath = cfg.pidFilePath() + let cmd = quoteShell(binPath) & " serve" & + " >> " & quoteShell(logPath) & " 2>&1 & echo $! > " & quoteShell(pidPath) + + discard execShellCmd(cmd) - writeFile(cfg.pidFilePath(), $pid.processID) - return pid.processID + try: + result = readFile(pidPath).strip().parseInt() + except: + result = 0 proc pollHealth*(cfg: Config, timeoutMs: int = 30000): bool = let client = newHttpClient(timeout = 1000) @@ -109,7 +49,7 @@ proc pollHealth*(cfg: Config, timeoutMs: int = 30000): bool = let startTime = epochTime() * 1000 while (epochTime() * 1000 - startTime) < timeoutMs.float: try: - discard client.getContent("http://127.0.0.1:18080/health") + discard client.getContent("http://127.0.0.1:11434/api/tags") return true except: os.sleep(500) @@ -122,10 +62,7 @@ proc stopRuntime*(cfg: Config) = try: let pid = readFile(pidPath).strip().parseInt() if pid > 0: - when defined(macosx): - discard execShellCmd(&"kill {pid}") - else: - discard execShellCmd(&"kill {pid}") + discard execShellCmd(&"kill {pid}") removeFile(pidPath) except: removeFile(pidPath) From f15f488dc091a4b9958d7cf1bd38a8e5997d363c Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 12:56:39 +0100 Subject: [PATCH 08/27] Remove SSL dependency: all traffic is localhost to ollama --- BUILD.md | 23 +++++++++-------------- lazybookmarks.nimble | 4 ++-- nim.cfg | 1 - 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/BUILD.md b/BUILD.md index 4e08a21..71a7865 100644 --- a/BUILD.md +++ b/BUILD.md @@ -3,41 +3,42 @@ ## Prerequisites - **Nim** >= 2.0.0 — https://nim-lang.org/install.html -- **OpenSSL** (for SHA256 via nimcrypto) -- **curl** (for model downloads) +- **Ollama** — https://ollama.com/download (runtime dependency, not build-time) ### macOS ```sh -brew install nim openssl +brew install nim ollama ``` ### Ubuntu/Debian ```sh -sudo apt install nim libssl-dev curl +sudo apt install nim +curl -fsSL https://ollama.com/install.sh | sh ``` ### Arch Linux ```sh -sudo pacman -S nim openssl curl +sudo pacman -S nim +yay -S ollama-cuda # or ollama-rocm for AMD ``` ## Install Nim dependencies ```sh -nimble install cligen db_connector jsony nimcrypto +nimble install cligen db_connector jsony ``` ## Build ```sh # Release (optimised, smaller binary) -nimble build +nimble release # Debug -nimble buildDebug +nimble debug ``` The binary will be at `build/lazybookmarks`. @@ -49,9 +50,3 @@ Install a Linux cross-compiler, then: ```sh nim c -d:release --os:linux --cpu:arm64 -o:build/lazybookmarks-linux-arm64 src/lazybookmarks/main.nim ``` - -On Ubuntu with `musl` for a static binary: - -```sh -nim c -d:release --os:linux --cpu:arm64 --gc:orc -d:useMalloc -o:build/lazybookmarks src/lazybookmarks/main.nim -``` diff --git a/lazybookmarks.nimble b/lazybookmarks.nimble index 3ce7ab9..b79c9e0 100644 --- a/lazybookmarks.nimble +++ b/lazybookmarks.nimble @@ -18,7 +18,7 @@ requires "jsony >= 1.1" # Tasks task release, "Build release binary to build/": - exec "nim c -d:release -d:ssl -o:build/lazybookmarks src/lazybookmarks/main.nim" + exec "nim c -d:release -o:build/lazybookmarks src/lazybookmarks/main.nim" task debug, "Build debug binary to build/": - exec "nim c -d:ssl -o:build/lazybookmarks src/lazybookmarks/main.nim" + exec "nim c -o:build/lazybookmarks src/lazybookmarks/main.nim" diff --git a/nim.cfg b/nim.cfg index 33f3c3c..a57e87d 100644 --- a/nim.cfg +++ b/nim.cfg @@ -1,3 +1,2 @@ --opt:size --mm:orc --d:ssl From 853c92a63ad63148f4ace4017b7948380dd954d2 Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 13:21:56 +0100 Subject: [PATCH 09/27] Fix string interpolation, URL concat, and remove ollama lifecycle management - Fix {refStr} not interpolated in model pull messages - Use string concat (&) instead of path join (/) for API URLs - Remove spawn/poll/stop ollama lifecycle (treat as external service) - Add requireRuntime with platform-specific start/install hints - Remove unused pidFilePath/logFilePath/logsDir from config - Increase retry backoff delay for slow model loading --- src/lazybookmarks/bootstrap.nim | 11 +----- src/lazybookmarks/client.nim | 11 ++++-- src/lazybookmarks/config.nim | 9 ----- src/lazybookmarks/main.nim | 39 ++++++++++--------- src/lazybookmarks/model.nim | 4 +- src/lazybookmarks/runtime.nim | 66 +++++++++------------------------ 6 files changed, 48 insertions(+), 92 deletions(-) diff --git a/src/lazybookmarks/bootstrap.nim b/src/lazybookmarks/bootstrap.nim index e081411..25d3405 100644 --- a/src/lazybookmarks/bootstrap.nim +++ b/src/lazybookmarks/bootstrap.nim @@ -7,16 +7,7 @@ proc ensureReady*(cfg: var Config, registry: ModelRegistry) = if not cfg.runtimeManaged: return - if not isRuntimeRunning(cfg): - discard spawnRuntime(cfg) - - infoMsg "Waiting for ollama to start..." - if not pollHealth(cfg): - errorMsg "Ollama failed to start. Check logs:" - dimMsg cfg.logFilePath() - quit(1) - - infoMsg "Ollama ready" + requireRuntime(cfg) let entry = findModel(registry, cfg.modelVariant) cfg.modelName = ollamaRef(entry) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index f1b55df..41d8705 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -1,4 +1,4 @@ -import std/[httpclient, json, os] +import std/[httpclient, json, os, osproc] import ./config type @@ -29,14 +29,17 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], client.headers = newHttpHeaders([("Content-Type", "application/json")]) defer: client.close() + let url = cfg.llmUrl & "/chat/completions" + if cfg.verbose: + stderr.writeLine("[chat] POST " & url) + var lastError = "" for attempt in 1..maxRetries: try: - let url = cfg.llmUrl / "chat" / "completions" let response = client.postContent(url, body = $body) if cfg.verbose: - stderr.writeLine("[attempt " & $attempt & "] POST " & url & " -> " & $response.len & " bytes") + stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") let parsed = parseJson(response) if parsed.hasKey("choices") and parsed["choices"].len > 0: @@ -50,7 +53,7 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], stderr.writeLine("[attempt " & $attempt & "] Error: " & e.msg) if attempt < maxRetries: let delay = 1000 * (1 shl (attempt - 1)) - os.sleep(delay) + discard execShellCmd("sleep " & $(delay * 3 div 1000)) raise newException(CatchableError, "chatCompletion failed after " & $maxRetries & " attempts: " & lastError) diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index 7d26330..6f78b51 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -78,14 +78,5 @@ proc loadConfig*(overrides: Config = Config()): Config = proc dbPath*(cfg: Config): string = cfg.dataDir / "bookmarks.db" -proc logsDir*(cfg: Config): string = - cfg.dataDir / "logs" - -proc pidFilePath*(cfg: Config): string = - cfg.dataDir / "runtime.pid" - -proc logFilePath*(cfg: Config): string = - cfg.logsDir() / "ollama.log" - proc configFilePath*: string = defaultConfigDir() / "config.toml" diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index abcb0d9..474d321 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -1,4 +1,4 @@ -import std/[os, strutils, httpclient, terminal, strformat] +import std/[os, strutils, terminal, strformat] import cligen import db_connector/db_sqlite import ./config @@ -128,15 +128,14 @@ proc cmdStatus = echo "" styledWriteLine(stdout, styleBright, " Endpoint: ", resetStyle, cfg.llmUrl) - styledWriteLine(stdout, styleBright, " Managed: ", resetStyle, $cfg.runtimeManaged) styledWriteLine(stdout, styleBright, " Model: ", resetStyle, cfg.modelVariant) let modelReady = isModelReady(cfg, registry) - styledWriteLine(stdout, styleBright, " Model: ", resetStyle, if modelReady: "[ready]" else: "[not downloaded]") + styledWriteLine(stdout, styleBright, " Model: ", resetStyle, if modelReady: "[ready]" else: "[not pulled]") if cfg.runtimeManaged: let running = isRuntimeRunning(cfg) - styledWriteLine(stdout, styleBright, " Runtime: ", resetStyle, if running: "[running]" else: "[stopped]") + styledWriteLine(stdout, styleBright, " Ollama: ", resetStyle, if running: "[running]" else: "[not running]") styledWriteLine(stdout, styleBright, " Data dir: ", resetStyle, cfg.dataDir) echo "" @@ -159,7 +158,11 @@ proc cmdDoctor = elif not cfg.runtimeManaged: dimMsg "Runtime: using external endpoint" else: - warnMsg "Ollama not found in PATH (install from https://ollama.com)" + warnMsg "Ollama not found in PATH" + when defined(macosx): + stdout.styledWriteLine(styleDim, " Install: brew install ollama", resetStyle) + elif defined(linux): + stdout.styledWriteLine(styleDim, " Install: curl -fsSL https://ollama.com/install.sh | sh", resetStyle) issues.inc let registry = loadModelRegistry() @@ -168,20 +171,20 @@ proc cmdDoctor = elif not cfg.runtimeManaged: dimMsg "Model: using external endpoint" else: - warnMsg &"Model not downloaded: {cfg.modelVariant}" - issues.inc + warnMsg &"Model not pulled: {cfg.modelVariant}" - try: - let client = newHttpClient(timeout = 3000) - defer: client.close() - discard client.getContent("http://127.0.0.1:11434/api/tags") - infoMsg &"Endpoint reachable: {cfg.llmUrl}" - except: - if cfg.runtimeManaged: - warnMsg &"Endpoint not reachable: {cfg.llmUrl} (normal if not running)" - else: - errorMsg &"Endpoint not reachable: {cfg.llmUrl}" - issues.inc + let running = isRuntimeRunning(cfg) + if running: + infoMsg "Ollama: running" + elif not cfg.runtimeManaged: + dimMsg "Runtime: using external endpoint" + else: + warnMsg "Ollama not running" + when defined(macosx): + stdout.styledWriteLine(styleDim, " Start: open -a Ollama", resetStyle) + elif defined(linux): + stdout.styledWriteLine(styleDim, " Start: ollama serve &", resetStyle) + issues.inc echo "" if issues == 0: diff --git a/src/lazybookmarks/model.nim b/src/lazybookmarks/model.nim index 3268967..509077b 100644 --- a/src/lazybookmarks/model.nim +++ b/src/lazybookmarks/model.nim @@ -28,12 +28,12 @@ proc ollamaRef*(entry: ModelEntry): string = proc pullModel*(entry: ModelEntry) = let refStr = ollamaRef(entry) stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, - "Pulling {refStr}...") + "Pulling " & refStr & "...") let exitCode = execShellCmd("ollama pull " & quoteShell(refStr) & " 2>&1") if exitCode != 0: stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, - "Failed to pull {refStr}") + "Failed to pull " & refStr) quit(1) proc listLocalModels*(): seq[string] = diff --git a/src/lazybookmarks/runtime.nim b/src/lazybookmarks/runtime.nim index 6fe2f03..ac08275 100644 --- a/src/lazybookmarks/runtime.nim +++ b/src/lazybookmarks/runtime.nim @@ -1,4 +1,4 @@ -import std/[os, osproc, strutils, strformat, httpclient, terminal, times] +import std/[os, strutils, httpclient, terminal] import ./config proc findOllamaBin*(): string = @@ -17,52 +17,20 @@ proc isRuntimeRunning*(cfg: Config): bool = except: return false -proc spawnRuntime*(cfg: Config): int = - let binPath = findOllamaBin() - if binPath.len == 0: - stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "ollama not found in PATH") - when defined(macosx): - stdout.styledWriteLine(styleDim, " macOS: brew install ollama", resetStyle) - elif defined(linux): - stdout.styledWriteLine(styleDim, " Linux: curl -fsSL https://ollama.com/install.sh | sh", resetStyle) - stdout.styledWriteLine(styleDim, " Manual: https://ollama.com/download", resetStyle) - quit(1) - - ensureDir(cfg.logsDir()) - - let logPath = cfg.logFilePath() - let pidPath = cfg.pidFilePath() - let cmd = quoteShell(binPath) & " serve" & - " >> " & quoteShell(logPath) & " 2>&1 & echo $! > " & quoteShell(pidPath) - - discard execShellCmd(cmd) - - try: - result = readFile(pidPath).strip().parseInt() - except: - result = 0 - -proc pollHealth*(cfg: Config, timeoutMs: int = 30000): bool = - let client = newHttpClient(timeout = 1000) - defer: client.close() - - let startTime = epochTime() * 1000 - while (epochTime() * 1000 - startTime) < timeoutMs.float: - try: - discard client.getContent("http://127.0.0.1:11434/api/tags") - return true - except: - os.sleep(500) - return false - -proc stopRuntime*(cfg: Config) = - let pidPath = cfg.pidFilePath() - if not fileExists(pidPath): +proc requireRuntime*(cfg: Config) = + if isRuntimeRunning(cfg): return - try: - let pid = readFile(pidPath).strip().parseInt() - if pid > 0: - discard execShellCmd(&"kill {pid}") - removeFile(pidPath) - except: - removeFile(pidPath) + if not cfg.runtimeManaged: + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, + "Endpoint not reachable: " & cfg.llmUrl) + quit(1) + stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, + "Ollama is not running.") + when defined(macosx): + stdout.styledWriteLine(styleDim, " Start it with: open -a Ollama", resetStyle) + stdout.styledWriteLine(styleDim, " Or install: brew install ollama", resetStyle) + elif defined(linux): + stdout.styledWriteLine(styleDim, " Start it with: ollama serve &", resetStyle) + stdout.styledWriteLine(styleDim, " Or install: curl -fsSL https://ollama.com/install.sh | sh", resetStyle) + stdout.styledWriteLine(styleDim, " Manual: https://ollama.com/download", resetStyle) + quit(1) From 5c038244ccd0db7755d4425a7d803d92231ac630 Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 13:26:12 +0100 Subject: [PATCH 10/27] Handle qwen3.5 thinking mode and improve JSON extraction - Add options.think=false to suppress thinking tags in qwen3.5 - Strip thinking tags, system-reminder tags before JSON extraction - Increase max_tokens to 2048 - Include raw response in error messages for debugging --- src/lazybookmarks/client.nim | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 41d8705..08668ca 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -1,4 +1,4 @@ -import std/[httpclient, json, os, osproc] +import std/[httpclient, json, os, re, strutils] import ./config type @@ -6,6 +6,25 @@ type role*: string content*: string +proc stripThinkTags*(s: string): string = + result = s + result = result.replace(re"[\s\S]*?", "") + result = result.replace(re"[\s\S]*?", "") + result = result.replace(re"]*>", "") + result = result.strip() + +proc extractJson*(s: string): string = + let cleaned = stripThinkTags(s) + let start = cleaned.find('{') + if start < 0: + return "" + var endPos = cleaned.high + while endPos > start and cleaned[endPos] != '}': + dec endPos + if endPos <= start: + return "" + return cleaned[start .. endPos] + proc chatCompletion*(cfg: Config, messages: seq[Message], jsonSchema: string = "", maxRetries: int = 3): JsonNode = @@ -13,7 +32,10 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], "model": cfg.modelName, "messages": messages, "temperature": 0.1, - "max_tokens": 1024, + "max_tokens": 2048, + "options": { + "think": false, + }, } if jsonSchema.len > 0: @@ -43,8 +65,14 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], let parsed = parseJson(response) if parsed.hasKey("choices") and parsed["choices"].len > 0: - let content = parsed["choices"][0]["message"]["content"].getStr() - return parseJson(content) + let rawContent = parsed["choices"][0]["message"]["content"].getStr() + let content = extractJson(rawContent) + if content.len > 0: + if cfg.verbose: + stderr.writeLine("[chat] response: " & content[0..min(200, content.high)]) + return parseJson(content) + else: + lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] else: lastError = "No choices in response: " & response[0..min(200, response.high)] except CatchableError as e: From 5428cb90411effb1beb29c9ae980aa9337474c6c Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 13:33:25 +0100 Subject: [PATCH 11/27] Add verbose request logging and set qwen3.5-2b as default model - Log model name and message previews in verbose mode - Switch default from qwen3.5-0.8b to qwen3.5-2b for more reliable output --- src/lazybookmarks/client.nim | 4 +++- src/lazybookmarks/config.nim | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 08668ca..7e073de 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -53,7 +53,9 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], let url = cfg.llmUrl & "/chat/completions" if cfg.verbose: - stderr.writeLine("[chat] POST " & url) + stderr.writeLine("[chat] POST " & url & " model=" & cfg.modelName) + for m in messages: + stderr.writeLine("[chat] " & m.role & ": " & m.content[0..min(300, m.content.high)]) var lastError = "" for attempt in 1..maxRetries: diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index 6f78b51..047958c 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -11,7 +11,7 @@ type Config* = object verbose*: bool const DefaultLlmUrl* = "http://127.0.0.1:11434/v1" -const DefaultModelVariant* = "qwen3.5-0.8b" +const DefaultModelVariant* = "qwen3.5-2b" const DefaultBatchSize* = 1 proc xdgDataHome*: string = From c859b627b733226658e2177810473fffc3faa383 Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 23:04:54 +0100 Subject: [PATCH 12/27] Fix undoLastBatch clearing all bookmarks and remove unused variables --- src/lazybookmarks/model.nim | 1 - src/lazybookmarks/storage.nim | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/lazybookmarks/model.nim b/src/lazybookmarks/model.nim index 509077b..f96b703 100644 --- a/src/lazybookmarks/model.nim +++ b/src/lazybookmarks/model.nim @@ -72,7 +72,6 @@ proc ensureModel*(cfg: Config, registry: ModelRegistry) = proc listModels*(cfg: Config, registry: ModelRegistry) = echo "" - let localModels = listLocalModels() for entry in registry.entries: let isCurrent = entry.name == cfg.modelVariant let isReady = isEntryReady(entry) diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim index 668fdd5..e642317 100644 --- a/src/lazybookmarks/storage.nim +++ b/src/lazybookmarks/storage.nim @@ -267,7 +267,6 @@ proc applyClassification*(cfg: Config, bookmarkId: int64, category: string, conf defer: db.close() let now = getTime().toUnix() - let folder = db.getOrCreateFolder(category) db.exec(sql( "UPDATE bookmarks SET category = ?, confidence = ?, reason = ?, organised_at = ? WHERE id = ?" ), category, confidence, reason, now, bookmarkId) @@ -276,15 +275,10 @@ proc undoLastBatch*(cfg: Config): int = let db = cfg.initDb() defer: db.close() - let now = getTime().toUnix() - result = db.execAffectedRows(sql( - "UPDATE bookmarks SET category = NULL, confidence = NULL, reason = NULL, organised_at = NULL" - ),) - # Actually undo: find the last batch by organised_at let row = db.getRow(sql( "SELECT organised_at FROM bookmarks WHERE organised_at IS NOT NULL ORDER BY organised_at DESC LIMIT 1" )) - if row[0].len > 0: + if row.len > 0 and row[0].len > 0: let batchTime = parseBiggestInt(row[0]) result = db.execAffectedRows(sql( "UPDATE bookmarks SET category = NULL, confidence = NULL, reason = NULL, organised_at = NULL WHERE organised_at >= ?" From 1b6cef6e68eb03840db2d51730ac2c797f5663ea Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 23:19:52 +0100 Subject: [PATCH 13/27] Improve small model (0.8b) JSON schema adherence with lightweight constraints and few-shot examples --- src/lazybookmarks/client.nim | 73 ++++++++++++++++++++++++++------- src/lazybookmarks/config.nim | 25 ++++++++++- src/lazybookmarks/organizer.nim | 6 ++- src/lazybookmarks/prompts.nim | 19 +++++++-- 4 files changed, 101 insertions(+), 22 deletions(-) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 7e073de..5af67e3 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -8,21 +8,63 @@ type proc stripThinkTags*(s: string): string = result = s - result = result.replace(re"[\s\S]*?", "") + result = result.replace(re"💭[\s\S]*?💭", "") + result = result.replace(re"]*>", "") result = result.replace(re"[\s\S]*?", "") - result = result.replace(re"]*>", "") + result = result.replace(re"```json\s*", "") + result = result.replace(re"```\s*$", "") result = result.strip() +proc closeJson*(s: string): string = + var opens: seq[char] = @[] + var inStr = false + var j = 0 + while j < s.len: + if not inStr: + case s[j] + of '{', '[': opens.add(s[j]) + of '}': + if opens.len > 0 and opens[opens.high] == '{': discard opens.pop() + of ']': + if opens.len > 0 and opens[opens.high] == '[': discard opens.pop() + of '"': inStr = true + else: discard + else: + if s[j] == '"' and (j == 0 or s[j - 1] != '\\'): + inStr = false + inc j + result = s + var k = opens.high + while k >= 0: + let closing = if opens[k] == '{': '}' else: ']' + result.add(closing) + dec k + proc extractJson*(s: string): string = - let cleaned = stripThinkTags(s) + var cleaned = stripThinkTags(s) let start = cleaned.find('{') if start < 0: return "" - var endPos = cleaned.high - while endPos > start and cleaned[endPos] != '}': - dec endPos - if endPos <= start: - return "" + var depth = 0 + var inStr = false + var endPos = -1 + for i in start .. cleaned.high: + let c = cleaned[i] + if inStr: + if c == '"' and (i == 0 or cleaned[i - 1] != '\\'): + inStr = false + else: + case c + of '"': inStr = true + of '{': inc depth + of '}': + dec depth + if depth == 0: + endPos = i + break + else: discard + if endPos < 0: + return closeJson(cleaned[start .. cleaned.high]) return cleaned[start .. endPos] proc chatCompletion*(cfg: Config, messages: seq[Message], @@ -39,13 +81,16 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], } if jsonSchema.len > 0: - body["response_format"] = %*{ - "type": "json_schema", - "json_schema": { - "strict": true, - "schema": parseJson(jsonSchema), + if cfg.isSmallModel(): + body["response_format"] = %*{ "type": "json_object" } + else: + body["response_format"] = %*{ + "type": "json_schema", + "json_schema": { + "strict": true, + "schema": parseJson(jsonSchema), + } } - } let client = newHttpClient(timeout = 120000) client.headers = newHttpHeaders([("Content-Type", "application/json")]) diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index 047958c..46d1d8f 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -1,4 +1,6 @@ -import std/os +import std/[os, strutils, re] + +type ParamSize* = enum psSmall, psNormal type Config* = object llmUrl*: string @@ -9,6 +11,19 @@ type Config* = object autoAcceptHigh*: bool batchSize*: int verbose*: bool + paramSize*: ParamSize + +proc parseParamSize*(variant: string): ParamSize = + for m in variant.findAll(re"[\d.]+[bB]"): + let numPart = m[0 ..< m.len - 1] + try: + if parseFloat(numPart) < 1.5: return psSmall + except: + discard + return psNormal + +proc isSmallModel*(cfg: Config): bool = + cfg.paramSize == psSmall const DefaultLlmUrl* = "http://127.0.0.1:11434/v1" const DefaultModelVariant* = "qwen3.5-2b" @@ -34,14 +49,18 @@ proc ensureDir*(dir: string) = createDir(dir) proc loadConfig*(overrides: Config = Config()): Config = + let variant = if overrides.modelVariant.len > 0: overrides.modelVariant + elif getEnv("LB_MODEL").len > 0: getEnv("LB_MODEL") + else: DefaultModelVariant result = Config( llmUrl: DefaultLlmUrl, - modelVariant: DefaultModelVariant, + modelVariant: variant, dataDir: defaultDataDir(), runtimeManaged: true, autoAcceptHigh: false, batchSize: DefaultBatchSize, verbose: false, + paramSize: parseParamSize(variant), ) let envLlmUrl = getEnv("LLM_URL") @@ -52,6 +71,7 @@ proc loadConfig*(overrides: Config = Config()): Config = let envModel = getEnv("LB_MODEL") if envModel.len > 0: result.modelVariant = envModel + result.paramSize = parseParamSize(envModel) let envDataDir = getEnv("LB_DATA_DIR") if envDataDir.len > 0: @@ -66,6 +86,7 @@ proc loadConfig*(overrides: Config = Config()): Config = result.runtimeManaged = false if overrides.modelVariant.len > 0: result.modelVariant = overrides.modelVariant + result.paramSize = parseParamSize(overrides.modelVariant) if overrides.dataDir.len > 0: result.dataDir = overrides.dataDir if overrides.batchSize > 0: diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index 3067897..71d7993 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -246,7 +246,8 @@ proc runClusterPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], let rootIds = rootFolders.mapIt(it.id) let prompt = buildClusterPrompt(batchTuples, taxCats, rootFolders) - let schema = buildClusterSchemaJson(rootIds) + let schema = if cfg.isSmallModel(): buildClusterSchemaJsonSmall() + else: buildClusterSchemaJson(rootIds) let response = chatCompletionSimple(cfg, SystemPrompt, prompt, schema) @@ -302,7 +303,8 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], let pruned = pruneTaxonomy(fullTaxonomy, batch, tfidfMap) let folderIds = pruned.categories.mapIt(it.folderId) let bookmarkIds = batch.mapIt($it.id) - let schema = buildClassificationSchemaJson(folderIds, bookmarkIds) + let schema = if cfg.isSmallModel(): buildClassificationSchemaJsonSmall() + else: buildClassificationSchemaJson(folderIds, bookmarkIds) let taxCats = pruned.categories.mapIt( (id: it.folderId, path: it.folderPath, description: it.description, keywords: it.keywords.join(", ")) diff --git a/src/lazybookmarks/prompts.nim b/src/lazybookmarks/prompts.nim index 3f149bc..5607465 100644 --- a/src/lazybookmarks/prompts.nim +++ b/src/lazybookmarks/prompts.nim @@ -1,6 +1,6 @@ import std/strutils -const SystemPrompt* = "You are a bookmark classifier. Given a user's folder structure and uncategorized bookmarks, assign each to the most appropriate existing folder. If no folder fits well, set targetFolderId to \"__skip__\" instead of forcing a poor match. Respond with valid JSON matching the provided schema. Prefer the user's existing folder names. Only suggest new folders when necessary." +const SystemPrompt* = "You are a bookmark classifier. Given a user's folder structure and uncategorized bookmarks, assign each to the most appropriate existing folder. If no folder fits well, set targetFolderId to \"__skip__\" instead of forcing a poor match. Respond with ONLY valid JSON matching the required structure. No explanation, no markdown, no other text. Prefer the user's existing folder names. Only suggest new folders when necessary." const TaxonomySchemaJson* = """{ "type": "object", @@ -42,6 +42,12 @@ proc buildClassificationSchemaJson*(folderIds: seq[string], bookmarkIds: seq[str let bookmarkEnum = bookmarkParts.join(", ") return "{\"type\":\"object\",\"properties\":{\"moves\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"bookmarkId\":{\"type\":\"string\",\"enum\":[" & bookmarkEnum & "]},\"targetFolderId\":{\"type\":\"string\",\"enum\":[" & folderEnum & "]},\"confidence\":{\"type\":\"string\",\"enum\":[\"high\",\"medium\",\"low\"]},\"reason\":{\"type\":\"string\"}},\"required\":[\"bookmarkId\",\"targetFolderId\",\"confidence\",\"reason\"],\"additionalProperties\":false}}},\"required\":[\"moves\"]}" +proc buildClassificationSchemaJsonSmall*(): string = + return "{\"type\":\"object\",\"properties\":{\"moves\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"bookmarkId\":{\"type\":\"string\"},\"targetFolderId\":{\"type\":\"string\"},\"confidence\":{\"type\":\"string\"},\"reason\":{\"type\":\"string\"}},\"required\":[\"bookmarkId\",\"targetFolderId\",\"confidence\",\"reason\"]}}},\"required\":[\"moves\"]}" + +proc buildClusterSchemaJsonSmall*(): string = + return "{\"type\":\"object\",\"properties\":{\"clusters\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"description\":{\"type\":\"string\"},\"keywords\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"parentFolderId\":{\"type\":\"string\"}},\"required\":[\"name\",\"description\",\"keywords\",\"parentFolderId\"]}}},\"required\":[\"clusters\"]}" + proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, domains, siblings, keywords, exemplars: string]]): string = var lines: seq[string] = @[] for f in enrichedFolders: @@ -52,7 +58,10 @@ proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, do if f.keywords.len > 0: parts.add("keywords: " & f.keywords) if f.exemplars.len > 0: parts.add("examples: " & f.exemplars) lines.add(parts.join(" | ")) - return "Analyze these bookmark folders. For each, describe what it contains and provide keywords.\n\n" & lines.join("\n") + return "Analyze these bookmark folders. For each, describe what it contains and provide keywords.\n\n" & + lines.join("\n") & "\n\n" & + "Respond with a JSON object: {\"categories\": [{\"folderId\": \"\", \"folderPath\": \"\", \"description\": \"\", \"keywords\": [\"word1\", \"word2\"]}]}\n\n" & + "Example:\n{\"categories\": [{\"folderId\": \"a1b2c3\", \"folderPath\": \"Tech/Blogs\", \"description\": \"Programming and software development blogs\", \"keywords\": [\"programming\", \"software\", \"code\"]}]}" proc formatBookmarkBatch*(bookmarks: seq[tuple[id, title, url: string]]): string = var lines: seq[string] = @[] @@ -96,7 +105,8 @@ proc buildClusterPrompt*(uncategorizedBookmarks: seq[tuple[id, title, url: strin "Uncategorized bookmarks:\n" & bookmarkList & "\n\n" & "For each cluster, suggest a short folder name, a description, keywords, and which root location to create it in (parentFolderId).\n" & - "Only suggest clusters when a meaningful group of 2+ bookmarks shares a clear theme. Do not suggest clusters that duplicate an existing folder's purpose." + "Only suggest clusters when a meaningful group of 2+ bookmarks shares a clear theme. Do not suggest clusters that duplicate an existing folder's purpose.\n\n" & + "Respond with a JSON object: {\"clusters\": [{\"name\": \"\", \"description\": \"\", \"keywords\": [\"word1\", \"word2\"], \"parentFolderId\": \"\"}]}" proc buildClassificationPrompt*(taxonomyCategories: seq[tuple[id, path, description, keywords: string]], bookmarkBatch: seq[tuple[id, title, url: string]]): string = @@ -115,4 +125,5 @@ proc buildClassificationPrompt*(taxonomyCategories: seq[tuple[id, path, descript "- Choose the best existing folder (targetFolderId)\n" & "- Set confidence: \"high\" (obvious match), \"medium\" (reasonable), \"low\" (uncertain)\n" & "- Give a brief reason\n\n" & - "Use targetFolderId=\"__skip__\" if no folder is a good match." + "Use targetFolderId=\"__skip__\" if no folder is a good match.\n\n" & + "Respond with a JSON object: {\"moves\": [{\"bookmarkId\": \"\", \"targetFolderId\": \"\", \"confidence\": \"high|medium|low\", \"reason\": \"\"}]}" From c01bf72ade0cefc34b6a2570a78f65c5e906c4ac Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 23:31:56 +0100 Subject: [PATCH 14/27] Add parallel LLM requests (concurrency=4) and increase default batch size to 5/10 --- src/lazybookmarks/client.nim | 80 ++++++++++++++-- src/lazybookmarks/config.nim | 13 ++- src/lazybookmarks/main.nim | 9 +- src/lazybookmarks/organizer.nim | 159 +++++++++++++++++++++----------- 4 files changed, 193 insertions(+), 68 deletions(-) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 5af67e3..9c58bef 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -1,4 +1,4 @@ -import std/[httpclient, json, os, re, strutils] +import std/[httpclient, json, os, re, strutils, asyncdispatch] import ./config type @@ -67,10 +67,8 @@ proc extractJson*(s: string): string = return closeJson(cleaned[start .. cleaned.high]) return cleaned[start .. endPos] -proc chatCompletion*(cfg: Config, messages: seq[Message], - jsonSchema: string = "", - maxRetries: int = 3): JsonNode = - let body = %*{ +proc buildRequestBody(cfg: Config, messages: seq[Message], jsonSchema: string): JsonNode = + result = %*{ "model": cfg.modelName, "messages": messages, "temperature": 0.1, @@ -79,12 +77,11 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], "think": false, }, } - if jsonSchema.len > 0: if cfg.isSmallModel(): - body["response_format"] = %*{ "type": "json_object" } + result["response_format"] = %*{ "type": "json_object" } else: - body["response_format"] = %*{ + result["response_format"] = %*{ "type": "json_schema", "json_schema": { "strict": true, @@ -92,6 +89,11 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], } } +proc chatCompletion*(cfg: Config, messages: seq[Message], + jsonSchema: string = "", + maxRetries: int = 3): JsonNode = + let body = buildRequestBody(cfg, messages, jsonSchema) + let client = newHttpClient(timeout = 120000) client.headers = newHttpHeaders([("Content-Type", "application/json")]) defer: client.close() @@ -133,9 +135,69 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], raise newException(CatchableError, "chatCompletion failed after " & $maxRetries & " attempts: " & lastError) proc chatCompletionSimple*(cfg: Config, systemPrompt: string, userMessage: string, - jsonSchema: string = ""): JsonNode = + jsonSchema: string = ""): JsonNode = let messages = @[ Message(role: "system", content: systemPrompt), Message(role: "user", content: userMessage), ] return chatCompletion(cfg, messages, jsonSchema) + +proc chatCompletionAsync*(cfg: Config, messages: seq[Message], + jsonSchema: string = "", + maxRetries: int = 3): Future[JsonNode] {.async.} = + let body = buildRequestBody(cfg, messages, jsonSchema) + let url = cfg.llmUrl & "/chat/completions" + + if cfg.verbose: + stderr.writeLine("[chat-async] POST " & url & " model=" & cfg.modelName) + + var lastError = "" + for attempt in 1..maxRetries: + try: + let client = newAsyncHttpClient() + client.headers = newHttpHeaders([("Content-Type", "application/json")]) + + let postFut = client.postContent(url, body = $body) + let timedOut = not await withTimeout(postFut, 120_000) + client.close() + + if timedOut: + lastError = "Request timed out (120s)" + if cfg.verbose: + stderr.writeLine("[attempt " & $attempt & "] Timeout") + else: + let response = postFut.read() + + if cfg.verbose: + stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") + + let parsed = parseJson(response) + if parsed.hasKey("choices") and parsed["choices"].len > 0: + let rawContent = parsed["choices"][0]["message"]["content"].getStr() + let content = extractJson(rawContent) + if content.len > 0: + if cfg.verbose: + stderr.writeLine("[chat-async] response: " & content[0..min(200, content.high)]) + return parseJson(content) + else: + lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] + else: + lastError = "No choices in response: " & response[0..min(200, response.high)] + except CatchableError as e: + lastError = e.msg + if cfg.verbose: + stderr.writeLine("[attempt " & $attempt & "] Error: " & e.msg) + + if attempt < maxRetries: + let delay = 1000 * (1 shl (attempt - 1)) + await sleepAsync(delay) + + raise newException(CatchableError, "chatCompletionAsync failed after " & $maxRetries & " attempts: " & lastError) + +proc chatCompletionSimpleAsync*(cfg: Config, systemPrompt: string, userMessage: string, + jsonSchema: string = ""): Future[JsonNode] {.async.} = + let messages = @[ + Message(role: "system", content: systemPrompt), + Message(role: "user", content: userMessage), + ] + return await chatCompletionAsync(cfg, messages, jsonSchema) diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index 46d1d8f..ef532f6 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -10,6 +10,7 @@ type Config* = object runtimeManaged*: bool autoAcceptHigh*: bool batchSize*: int + concurrency*: int verbose*: bool paramSize*: ParamSize @@ -27,7 +28,8 @@ proc isSmallModel*(cfg: Config): bool = const DefaultLlmUrl* = "http://127.0.0.1:11434/v1" const DefaultModelVariant* = "qwen3.5-2b" -const DefaultBatchSize* = 1 +const DefaultBatchSize* = 5 +const DefaultConcurrency* = 4 proc xdgDataHome*: string = result = getEnv("XDG_DATA_HOME") @@ -52,15 +54,18 @@ proc loadConfig*(overrides: Config = Config()): Config = let variant = if overrides.modelVariant.len > 0: overrides.modelVariant elif getEnv("LB_MODEL").len > 0: getEnv("LB_MODEL") else: DefaultModelVariant + let ps = parseParamSize(variant) + let defaultBatch = if ps == psSmall: 5 else: 10 result = Config( llmUrl: DefaultLlmUrl, modelVariant: variant, dataDir: defaultDataDir(), runtimeManaged: true, autoAcceptHigh: false, - batchSize: DefaultBatchSize, + batchSize: defaultBatch, + concurrency: DefaultConcurrency, verbose: false, - paramSize: parseParamSize(variant), + paramSize: ps, ) let envLlmUrl = getEnv("LLM_URL") @@ -91,6 +96,8 @@ proc loadConfig*(overrides: Config = Config()): Config = result.dataDir = overrides.dataDir if overrides.batchSize > 0: result.batchSize = overrides.batchSize + if overrides.concurrency > 0: + result.concurrency = overrides.concurrency if overrides.verbose: result.verbose = true if overrides.autoAcceptHigh: diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index 474d321..30d0cc4 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -32,8 +32,9 @@ proc cmdImport(file: string, format = "auto", dryRun = false) = infoMsg &"Imported {count} bookmarks from {file} (format: {detectedFormat})" proc cmdOrganise(model = "", autoAcceptHigh = false, autoAcceptAll = false, - limit = 0, verbose = false) = - let overrides = Config(modelVariant: model, verbose: verbose) + limit = 0, batchSize = 0, concurrency = 0, verbose = false) = + let overrides = Config(modelVariant: model, batchSize: batchSize, + concurrency: concurrency, verbose: verbose) var cfg = loadConfig(overrides) let registry = loadModelRegistry() @@ -198,7 +199,9 @@ when isMainModule: help = {"file": "Path to bookmark file", "format": "Format: auto|html|json|urllist", "dry-run": "Parse only, no database write"}], [cmdOrganise, cmdName = "organise", doc = "AI-organize unorganized bookmarks", help = {"model": "Override model variant", "auto-accept-high": "Skip review for high confidence", - "auto-accept-all": "Accept all suggestions", "limit": "Max bookmarks to process", "verbose": "Show debug output"}], + "auto-accept-all": "Accept all suggestions", "limit": "Max bookmarks to process", + "batch-size": "Bookmarks per LLM request (0=auto)", "concurrency": "Parallel LLM requests (0=auto)", + "verbose": "Show debug output"}], [cmdList, cmdName = "list", doc = "List bookmarks", help = {"category": "Filter by folder path", "unorganised": "Show only unorganized", "format": "table|json|csv"}], [cmdSearch, cmdName = "search", doc = "Search bookmarks", diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index 71d7993..389ed2d 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -1,4 +1,4 @@ -import std/[os, strutils, strformat, json, re, math, tables, algorithm, sequtils, sets] +import std/[os, strutils, strformat, json, re, math, tables, algorithm, sequtils, sets, asyncdispatch] import db_connector/db_sqlite import ./config import ./storage @@ -277,6 +277,66 @@ proc chunk*[T](s: seq[T], size: int): seq[seq[T]] = result.add(batch) i += size +proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], + fullTaxonomy: Taxonomy, + tfidfMap: Table[string, seq[string]], + batchIndex: int): Future[seq[Suggestion]] {.async.} = + let pruned = pruneTaxonomy(fullTaxonomy, batch, tfidfMap) + let folderIds = pruned.categories.mapIt(it.folderId) + let bookmarkIds = batch.mapIt($it.id) + let schema = if cfg.isSmallModel(): buildClassificationSchemaJsonSmall() + else: buildClassificationSchemaJson(folderIds, bookmarkIds) + + let taxCats = pruned.categories.mapIt( + (id: it.folderId, path: it.folderPath, description: it.description, keywords: it.keywords.join(", ")) + ) + let batchTuples = batch.mapIt((id: $it.id, title: it.title, url: it.url)) + let prompt = buildClassificationPrompt(taxCats, batchTuples) + + try: + let response = await chatCompletionSimpleAsync(cfg, SystemPrompt, prompt, schema) + var suggestions: seq[Suggestion] = @[] + + if response.hasKey("moves"): + for move in response["moves"]: + let moveObj = move + let bmId = parseBiggestInt(moveObj["bookmarkId"].getStr()) + let targetId = moveObj["targetFolderId"].getStr() + let conf = moveObj["confidence"].getStr() + let reason = moveObj["reason"].getStr() + + if targetId == "__skip__": + continue + + let bmIdx = batch.findIt(it.id == bmId) + var bmTitle = "" + var bmUrl = "" + if bmIdx >= 0: + bmTitle = batch[bmIdx].title + bmUrl = batch[bmIdx].url + let targetIdx = pruned.categories.findIt(it.folderId == targetId) + var targetPath = targetId + if targetIdx >= 0: + targetPath = pruned.categories[targetIdx].folderPath + let isNew = targetId.startsWith("__new_") + + suggestions.add(Suggestion( + bookmarkId: bmId, + bookmarkTitle: bmTitle, + bookmarkUrl: bmUrl, + targetFolderId: targetId, + targetFolderPath: if isNew: targetPath & " (new)" else: targetPath, + confidence: conf, + reason: reason, + isNewFolder: isNew, + )) + + return suggestions + except CatchableError as e: + if cfg.verbose: + errorMsg &"Batch {batchIndex + 1} failed: {e.msg}" + return @[] + proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], taxonomy: Taxonomy, folderBookmarks: Table[string, seq[BookmarkEntry]], @@ -294,63 +354,56 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], let tfidfMap = computeTFIDF(folderBookmarks, allBookmarks) - var allSuggestions: seq[Suggestion] = @[] let batches = uncategorized.chunk(cfg.batchSize) + let conc = cfg.concurrency - for i, batch in batches: - showProgressBar(i + 1, batches.len, "Classifying bookmarks") + if batches.len == 0: + return @[] - let pruned = pruneTaxonomy(fullTaxonomy, batch, tfidfMap) - let folderIds = pruned.categories.mapIt(it.folderId) - let bookmarkIds = batch.mapIt($it.id) - let schema = if cfg.isSmallModel(): buildClassificationSchemaJsonSmall() - else: buildClassificationSchemaJson(folderIds, bookmarkIds) + var completedCount = 0 + var allSuggestions: seq[Suggestion] = @[] - let taxCats = pruned.categories.mapIt( - (id: it.folderId, path: it.folderPath, description: it.description, keywords: it.keywords.join(", ")) - ) - let batchTuples = batch.mapIt((id: $it.id, title: it.title, url: it.url)) - let prompt = buildClassificationPrompt(taxCats, batchTuples) + if conc <= 1: + for i, batch in batches: + showProgressBar(i + 1, batches.len, "Classifying bookmarks") + let suggestions = classifyBatchAsync(cfg, batch, fullTaxonomy, tfidfMap, i).waitFor() + allSuggestions.add(suggestions) + echo "" + return allSuggestions + + var pending: seq[Future[seq[Suggestion]]] = @[] + var batchIdx = 0 + + proc drainPending(): int = + var drained = 0 + var i = 0 + while i < pending.len: + if pending[i].finished: + let batchResult = pending[i].read() + pending.delete(i) + inc completedCount + showProgressBar(completedCount, batches.len, "Classifying bookmarks") + allSuggestions.add(batchResult) + inc drained + else: + inc i + return drained - try: - let response = chatCompletionSimple(cfg, SystemPrompt, prompt, schema) - - if response.hasKey("moves"): - for move in response["moves"]: - let moveObj = move - let bmId = parseBiggestInt(moveObj["bookmarkId"].getStr()) - let targetId = moveObj["targetFolderId"].getStr() - let conf = moveObj["confidence"].getStr() - let reason = moveObj["reason"].getStr() - - if targetId == "__skip__": - continue - - let bmIdx = batch.findIt(it.id == bmId) - var bmTitle = "" - var bmUrl = "" - if bmIdx >= 0: - bmTitle = batch[bmIdx].title - bmUrl = batch[bmIdx].url - let targetIdx = pruned.categories.findIt(it.folderId == targetId) - var targetPath = targetId - if targetIdx >= 0: - targetPath = pruned.categories[targetIdx].folderPath - let isNew = targetId.startsWith("__new_") - - allSuggestions.add(Suggestion( - bookmarkId: bmId, - bookmarkTitle: bmTitle, - bookmarkUrl: bmUrl, - targetFolderId: targetId, - targetFolderPath: if isNew: targetPath & " (new)" else: targetPath, - confidence: conf, - reason: reason, - isNewFolder: isNew, - )) - except CatchableError as e: - if cfg.verbose: - errorMsg &"Batch {i + 1} failed: {e.msg}" + while batchIdx < batches.len: + while pending.len < conc and batchIdx < batches.len: + pending.add(classifyBatchAsync(cfg, batches[batchIdx], fullTaxonomy, tfidfMap, batchIdx)) + inc batchIdx + + while not pending[0].finished: + if drainPending() > 0 and pending.len == 0: break + + discard drainPending() + + while pending.len > 0: + if not pending[0].finished: + poll() + else: + discard drainPending() echo "" return allSuggestions From d4b03ca8f0259e8f3b32feb43d489cd3920f5fad Mon Sep 17 00:00:00 2001 From: Corv Date: Sun, 5 Apr 2026 23:39:51 +0100 Subject: [PATCH 15/27] Remove redundant deps install step and unnecessary installDirs from nimble --- BUILD.md | 10 ---------- lazybookmarks.nimble | 1 - 2 files changed, 11 deletions(-) diff --git a/BUILD.md b/BUILD.md index 71a7865..18cf49c 100644 --- a/BUILD.md +++ b/BUILD.md @@ -25,20 +25,10 @@ sudo pacman -S nim yay -S ollama-cuda # or ollama-rocm for AMD ``` -## Install Nim dependencies - -```sh -nimble install cligen db_connector jsony -``` - ## Build ```sh -# Release (optimised, smaller binary) nimble release - -# Debug -nimble debug ``` The binary will be at `build/lazybookmarks`. diff --git a/lazybookmarks.nimble b/lazybookmarks.nimble index b79c9e0..ae90c35 100644 --- a/lazybookmarks.nimble +++ b/lazybookmarks.nimble @@ -6,7 +6,6 @@ description = "CLI bookmark organizer powered by local LLM" license = "MIT" srcDir = "src" bin = @["lazybookmarks/main"] -installDirs = @["lazybookmarks"] # Dependencies From 8adc5d9d1a6e36688ce9cfbc5748374d895b0748 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 00:10:48 +0100 Subject: [PATCH 16/27] Fix 9 bugs, remove dead code, and extract shared utilities across 9 files --- src/lazybookmarks/bootstrap.nim | 2 +- src/lazybookmarks/client.nim | 2 +- src/lazybookmarks/config.nim | 12 +++--- src/lazybookmarks/main.nim | 69 +++++++++++++----------------- src/lazybookmarks/model.nim | 18 ++++---- src/lazybookmarks/organizer.nim | 40 ++++++------------ src/lazybookmarks/prompts.nim | 30 ++++++------- src/lazybookmarks/runtime.nim | 2 +- src/lazybookmarks/storage.nim | 74 +++++++++++++++++++++------------ 9 files changed, 125 insertions(+), 124 deletions(-) diff --git a/src/lazybookmarks/bootstrap.nim b/src/lazybookmarks/bootstrap.nim index 25d3405..f16cb91 100644 --- a/src/lazybookmarks/bootstrap.nim +++ b/src/lazybookmarks/bootstrap.nim @@ -12,7 +12,7 @@ proc ensureReady*(cfg: var Config, registry: ModelRegistry) = let entry = findModel(registry, cfg.modelVariant) cfg.modelName = ollamaRef(entry) - if not isEntryReady(entry): + if not isEntryReady(entry, cfg): pullModel(entry) else: infoMsg "Model ready: " & ollamaRef(entry) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 9c58bef..07b410b 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -130,7 +130,7 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], stderr.writeLine("[attempt " & $attempt & "] Error: " & e.msg) if attempt < maxRetries: let delay = 1000 * (1 shl (attempt - 1)) - discard execShellCmd("sleep " & $(delay * 3 div 1000)) + discard execShellCmd("sleep " & $(delay div 1000)) raise newException(CatchableError, "chatCompletion failed after " & $maxRetries & " attempts: " & lastError) diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index ef532f6..e346e87 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -1,4 +1,4 @@ -import std/[os, strutils, re] +import std/[os, re, strutils] type ParamSize* = enum psSmall, psNormal @@ -28,9 +28,14 @@ proc isSmallModel*(cfg: Config): bool = const DefaultLlmUrl* = "http://127.0.0.1:11434/v1" const DefaultModelVariant* = "qwen3.5-2b" -const DefaultBatchSize* = 5 const DefaultConcurrency* = 4 +proc ollamaApiUrl*(cfg: Config): string = + if cfg.llmUrl.endsWith("/v1"): + cfg.llmUrl[0 ..< cfg.llmUrl.len - 3] + else: + cfg.llmUrl + proc xdgDataHome*: string = result = getEnv("XDG_DATA_HOME") if result.len == 0: @@ -105,6 +110,3 @@ proc loadConfig*(overrides: Config = Config()): Config = proc dbPath*(cfg: Config): string = cfg.dataDir / "bookmarks.db" - -proc configFilePath*: string = - defaultConfigDir() / "config.toml" diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index 30d0cc4..e4699c1 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -1,6 +1,5 @@ import std/[os, strutils, terminal, strformat] import cligen -import db_connector/db_sqlite import ./config import ./storage import ./model @@ -34,37 +33,22 @@ proc cmdImport(file: string, format = "auto", dryRun = false) = proc cmdOrganise(model = "", autoAcceptHigh = false, autoAcceptAll = false, limit = 0, batchSize = 0, concurrency = 0, verbose = false) = let overrides = Config(modelVariant: model, batchSize: batchSize, - concurrency: concurrency, verbose: verbose) + concurrency: concurrency, verbose: verbose, + autoAcceptHigh: autoAcceptHigh) var cfg = loadConfig(overrides) let registry = loadModelRegistry() ensureReady(cfg, registry) - discard cfg.organizeBookmarks(autoAcceptAll = autoAcceptAll) + discard cfg.organizeBookmarks(autoAcceptAll = autoAcceptAll, limit = limit) -proc cmdList(category = "", unorganised = false, format = "table") = +proc cmdList(category = "", unorganised = false) = let cfg = loadConfig() - let db = cfg.initDb() - defer: db.close() - var bookmarks: seq[BookmarkEntry] if unorganised: bookmarks = getUnorganisedBookmarks(cfg) else: - var query = "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks" - if category.len > 0: - let escaped = category.replace("'", "''") - query.add " WHERE raw_folder = '" & escaped & "'" - query.add " ORDER BY added_at DESC LIMIT 50" - for row in db.fastRows(sql(query)): - bookmarks.add(BookmarkEntry( - id: parseBiggestInt(row[0]), - url: row[1], - title: row[2], - rawFolder: row[3], - category: row[4], - confidence: row[5], - )) + bookmarks = listBookmarks(cfg, category) if bookmarks.len == 0: dimMsg "No bookmarks found." @@ -72,27 +56,24 @@ proc cmdList(category = "", unorganised = false, format = "table") = for b in bookmarks: let title = if b.title.len > 0: b.title else: "(untitled)" - var category = "-" - if b.category.len > 0: category = b.category - elif b.rawFolder.len > 0: category = b.rawFolder - echo &" {title:<50} {category}" + var cat = "-" + if b.category.len > 0: cat = b.category + elif b.rawFolder.len > 0: cat = b.rawFolder + echo &" {title:<50} {cat}" echo &"\n {bookmarks.len} bookmarks" proc cmdSearch(query: string) = let cfg = loadConfig() - let db = cfg.initDb() - defer: db.close() - - let searchPattern = query.replace("'", "''") - let sqlQuery = &"SELECT url, title, raw_folder, category FROM bookmarks WHERE title LIKE '%{searchPattern}%' OR url LIKE '%{searchPattern}%' OR category LIKE '%{searchPattern}%' LIMIT 20" + let bookmarks = searchBookmarks(cfg, query) - var found = 0 - for row in db.fastRows(sql(sqlQuery)): - let title = if row[1].len > 0: row[1] else: "(untitled)" - echo &" {title:<50} {row[0][0..min(79, row[0].high)]}" - found.inc + if bookmarks.len == 0: + dimMsg &"No results for \"{query}\"" + return - echo &"\n {found} results for \"{query}\"" + for b in bookmarks: + let title = if b.title.len > 0: b.title else: "(untitled)" + echo &" {title:<50} {b.url[0..min(79, b.url.high)]}" + echo &"\n {bookmarks.len} results for \"{query}\"" proc cmdUndo = let cfg = loadConfig() @@ -114,8 +95,18 @@ proc cmdModelSet(variant: string) = var content = "" if fileExists(configPath): content = readFile(configPath) - content.add &"\nmodelVariant = \"{variant}\"\n" - writeFile(configPath, content) + var lines = content.splitLines() + var replaced = false + var newLines: seq[string] = @[] + for line in lines: + if line.strip().startsWith("modelVariant"): + newLines.add(&"modelVariant = \"{variant}\"") + replaced = true + else: + newLines.add(line) + if not replaced: + newLines.add(&"modelVariant = \"{variant}\"") + writeFile(configPath, newLines.join("\n") & "\n") infoMsg &"Default model set to {variant}" proc cmdModelDownload = @@ -203,7 +194,7 @@ when isMainModule: "batch-size": "Bookmarks per LLM request (0=auto)", "concurrency": "Parallel LLM requests (0=auto)", "verbose": "Show debug output"}], [cmdList, cmdName = "list", doc = "List bookmarks", - help = {"category": "Filter by folder path", "unorganised": "Show only unorganized", "format": "table|json|csv"}], + help = {"category": "Filter by folder path", "unorganised": "Show only unorganized"}], [cmdSearch, cmdName = "search", doc = "Search bookmarks", help = {"query": "Search term"}], [cmdUndo, cmdName = "undo", doc = "Undo last batch of classifications"], diff --git a/src/lazybookmarks/model.nim b/src/lazybookmarks/model.nim index f96b703..9b57049 100644 --- a/src/lazybookmarks/model.nim +++ b/src/lazybookmarks/model.nim @@ -1,4 +1,4 @@ -import std/[os, strutils, strformat, httpclient, json, terminal] +import std/[os, strformat, httpclient, json, terminal] import jsony import ./config @@ -36,35 +36,35 @@ proc pullModel*(entry: ModelEntry) = "Failed to pull " & refStr) quit(1) -proc listLocalModels*(): seq[string] = +proc listLocalModels*(cfg: Config): seq[string] = result = @[] try: let client = newHttpClient(timeout = 5000) defer: client.close() - let body = client.getContent("http://127.0.0.1:11434/api/tags") + let body = client.getContent(cfg.ollamaApiUrl() & "/api/tags") let jsn = parseJson(body) for m in jsn["models"]: result.add(m["name"].getStr()) except: discard -proc isEntryReady*(entry: ModelEntry): bool = +proc isEntryReady*(entry: ModelEntry, cfg: Config): bool = let refStr = ollamaRef(entry) - for localName in listLocalModels(): - if localName == refStr or localName.startsWith(refStr & ":"): + for localName in listLocalModels(cfg): + if localName == refStr: return true return false proc isModelReady*(cfg: Config, registry: ModelRegistry): bool = try: let entry = findModel(registry, cfg.modelVariant) - return isEntryReady(entry) + return isEntryReady(entry, cfg) except: return false proc ensureModel*(cfg: Config, registry: ModelRegistry) = let entry = findModel(registry, cfg.modelVariant) - if not isEntryReady(entry): + if not isEntryReady(entry, cfg): pullModel(entry) else: stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, @@ -74,7 +74,7 @@ proc listModels*(cfg: Config, registry: ModelRegistry) = echo "" for entry in registry.entries: let isCurrent = entry.name == cfg.modelVariant - let isReady = isEntryReady(entry) + let isReady = isEntryReady(entry, cfg) let marker = if isCurrent: " *" else: "" let status = if isReady: "[installed]" else: "[not installed]" let name = if isCurrent: entry.name & marker else: entry.name diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index 389ed2d..915a325 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -1,4 +1,4 @@ -import std/[os, strutils, strformat, json, re, math, tables, algorithm, sequtils, sets, asyncdispatch] +import std/[strutils, strformat, json, re, math, tables, algorithm, sequtils, sets, asyncdispatch] import db_connector/db_sqlite import ./config import ./storage @@ -49,19 +49,9 @@ proc tokenizeText*(text: string): seq[string] = proc extractDomainPatterns*(bookmarks: seq[BookmarkEntry], threshold = 0.2): seq[string] = var counts: Table[string, int] for b in bookmarks: - try: - var url = b.url - let idx = url.find("://") - if idx >= 0: - url = url[idx + 3 .. ^1] - let slashIdx = url.find('/') - if slashIdx >= 0: - url = url[0 .. slashIdx - 1] - url = url.replace(re"^www\.", "") - if url.len > 0: - counts[url] = counts.getOrDefault(url, 0) + 1 - except: - discard + let domain = extractDomain(b.url).replace(re"^www\.", "") + if domain.len > 0: + counts[domain] = counts.getOrDefault(domain, 0) + 1 let total = max(1, bookmarks.len) result = @[] for domain, count in counts: @@ -109,14 +99,7 @@ proc sampleExemplars*(bookmarks: seq[BookmarkEntry], count = 2): string = sorted.sort(proc(a, b: BookmarkEntry): int = cmp(b.addedAt, a.addedAt)) var parts: seq[string] = @[] for i in 0 .. min(count - 1, sorted.high): - var host = sorted[i].url - try: - let idx = host.find("://") - if idx >= 0: host = host[idx + 3 .. ^1] - let slashIdx = host.find('/') - if slashIdx >= 0: host = host[0 .. slashIdx - 1] - except: - discard + let host = extractDomain(sorted[i].url) let title = if sorted[i].title.len > 40: sorted[i].title[0 .. 39] else: sorted[i].title parts.add("\"" & title & "\" " & host) return parts.join(" | ") @@ -198,7 +181,7 @@ proc runTaxonomyPhase*(cfg: Config, folders: seq[FolderEntry], let tfidfMap = computeTFIDF(folderBookmarks, allBookmarks) - var enriched: seq[tuple[id, path, count: string, domains, siblings, keywords, exemplars: string]] = @[] + var enriched: seq[tuple[id, path, count: string, domains, keywords, exemplars: string]] = @[] for folder in folders: let bookmarks = folderBookmarks.getOrDefault(folder.uuid, @[]) let domains = extractDomainPatterns(bookmarks) @@ -210,7 +193,6 @@ proc runTaxonomyPhase*(cfg: Config, folders: seq[FolderEntry], path: folder.path, count: $folder.bookmarkCount, domains: domains.join(", "), - siblings: "", keywords: keywords.join(", "), exemplars: exemplars, )) @@ -325,7 +307,7 @@ proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], bookmarkTitle: bmTitle, bookmarkUrl: bmUrl, targetFolderId: targetId, - targetFolderPath: if isNew: targetPath & " (new)" else: targetPath, + targetFolderPath: targetPath, confidence: conf, reason: reason, isNewFolder: isNew, @@ -408,11 +390,11 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], echo "" return allSuggestions -proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false): int = +proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false, limit: int = 0): int = let db = cfg.initDb() defer: db.close() - let uncategorized = getUnorganisedBookmarks(cfg) + let uncategorized = getUnorganisedBookmarks(cfg, limit) let webUncategorized = uncategorized.filterIt(it.url.startsWith("http://") or it.url.startsWith("https://")) if webUncategorized.len == 0: @@ -476,7 +458,9 @@ proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false): int = var edited = 0 for s in suggestions: - let action = reviewSuggestion(s.bookmarkUrl, s.bookmarkTitle, s.targetFolderPath, s.confidence, s.reason) + let displayPath = s.targetFolderPath & (if s.isNewFolder: " (new)" else: "") + let action = reviewSuggestion(s.bookmarkUrl, s.bookmarkTitle, + displayPath, s.confidence, s.reason) case action of ReviewAction.accept: diff --git a/src/lazybookmarks/prompts.nim b/src/lazybookmarks/prompts.nim index 5607465..479aac2 100644 --- a/src/lazybookmarks/prompts.nim +++ b/src/lazybookmarks/prompts.nim @@ -1,5 +1,17 @@ import std/strutils +proc extractDomain*(url: string): string = + result = url + try: + let idx = result.find("://") + if idx >= 0: + result = result[idx + 3 .. ^1] + let slashIdx = result.find('/') + if slashIdx >= 0: + result = result[0 .. slashIdx - 1] + except: + discard + const SystemPrompt* = "You are a bookmark classifier. Given a user's folder structure and uncategorized bookmarks, assign each to the most appropriate existing folder. If no folder fits well, set targetFolderId to \"__skip__\" instead of forcing a poor match. Respond with ONLY valid JSON matching the required structure. No explanation, no markdown, no other text. Prefer the user's existing folder names. Only suggest new folders when necessary." const TaxonomySchemaJson* = """{ @@ -48,13 +60,12 @@ proc buildClassificationSchemaJsonSmall*(): string = proc buildClusterSchemaJsonSmall*(): string = return "{\"type\":\"object\",\"properties\":{\"clusters\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"description\":{\"type\":\"string\"},\"keywords\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"parentFolderId\":{\"type\":\"string\"}},\"required\":[\"name\",\"description\",\"keywords\",\"parentFolderId\"]}}},\"required\":[\"clusters\"]}" -proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, domains, siblings, keywords, exemplars: string]]): string = +proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, domains, keywords, exemplars: string]]): string = var lines: seq[string] = @[] for f in enrichedFolders: var parts: seq[string] = @[] parts.add("[" & f.id & "] " & f.path & " (" & f.count & ")") if f.domains.len > 0: parts.add("domains: " & f.domains) - if f.siblings.len > 0: parts.add("siblings: " & f.siblings) if f.keywords.len > 0: parts.add("keywords: " & f.keywords) if f.exemplars.len > 0: parts.add("examples: " & f.exemplars) lines.add(parts.join(" | ")) @@ -66,18 +77,9 @@ proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, do proc formatBookmarkBatch*(bookmarks: seq[tuple[id, title, url: string]]): string = var lines: seq[string] = @[] for b in bookmarks: - var shortUrl = b.url - try: - let idx = shortUrl.find("://") - if idx >= 0: - shortUrl = shortUrl[idx + 3 .. ^1] - let slashIdx = shortUrl.find('/') - if slashIdx >= 0: - shortUrl = shortUrl[0 .. slashIdx - 1] - if shortUrl.len > 60: - shortUrl = shortUrl[0 .. 56] & "..." - except: - discard + var shortUrl = extractDomain(b.url) + if shortUrl.len > 60: + shortUrl = shortUrl[0 .. 56] & "..." let title = if b.title.len > 0: b.title else: "(untitled)" lines.add("[" & b.id & "] \"" & title & "\" " & shortUrl) return lines.join("\n") diff --git a/src/lazybookmarks/runtime.nim b/src/lazybookmarks/runtime.nim index ac08275..2c0ab72 100644 --- a/src/lazybookmarks/runtime.nim +++ b/src/lazybookmarks/runtime.nim @@ -12,7 +12,7 @@ proc isRuntimeRunning*(cfg: Config): bool = try: let client = newHttpClient(timeout = 2000) defer: client.close() - discard client.getContent("http://127.0.0.1:11434/api/tags") + discard client.getContent(cfg.ollamaApiUrl() & "/api/tags") return true except: return false diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim index e642317..3f058a0 100644 --- a/src/lazybookmarks/storage.nim +++ b/src/lazybookmarks/storage.nim @@ -11,12 +11,8 @@ type title*: string rawFolder*: string category*: string - tags*: string - summary*: string - language*: string confidence*: string reason*: string - source*: string importId*: int64 organisedAt*: int64 addedAt*: int64 @@ -28,13 +24,6 @@ type parentId*: int64 bookmarkCount*: int - ImportEntry* = object - id*: int64 - filename*: string - format*: string - importedAt*: int64 - bookmarkCount*: int - const Schema = """ CREATE TABLE IF NOT EXISTS bookmarks ( id INTEGER PRIMARY KEY, @@ -42,12 +31,8 @@ CREATE TABLE IF NOT EXISTS bookmarks ( title TEXT, raw_folder TEXT, category TEXT, - tags TEXT, - summary TEXT, - language TEXT, confidence TEXT CHECK(confidence IN ('high','medium','low',NULL)), reason TEXT, - source TEXT, import_id INTEGER REFERENCES imports(id), organised_at INTEGER, added_at INTEGER @@ -213,7 +198,7 @@ proc importBookmarks*(cfg: Config, content: string, format: string, filename: st discard db.getOrCreateFolder(folder) try: db.exec( - sql("INSERT INTO bookmarks (url, title, raw_folder, source, import_id, added_at) VALUES (?, ?, ?, 'import', ?, ?)"), + sql("INSERT INTO bookmarks (url, title, raw_folder, import_id, added_at) VALUES (?, ?, ?, ?, ?)"), url, title, folder, importId, now ) count.inc @@ -226,7 +211,7 @@ proc getUnorganisedBookmarks*(cfg: Config, limit: int = 0): seq[BookmarkEntry] = let db = cfg.initDb() defer: db.close() - var query = "SELECT id, url, title, raw_folder, category, tags, summary, language, confidence, reason, source, import_id, organised_at, added_at FROM bookmarks WHERE organised_at IS NULL" + var query = "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks WHERE organised_at IS NULL" if limit > 0: query.add &" LIMIT {limit}" query.add " ORDER BY added_at DESC" @@ -238,15 +223,52 @@ proc getUnorganisedBookmarks*(cfg: Config, limit: int = 0): seq[BookmarkEntry] = title: row[2], rawFolder: row[3], category: row[4], - tags: row[5], - summary: row[6], - language: row[7], - confidence: row[8], - reason: row[9], - source: row[10], - importId: if row[11].len > 0: parseBiggestInt(row[11]) else: 0, - organisedAt: if row[12].len > 0: parseBiggestInt(row[12]) else: 0, - addedAt: if row[13].len > 0: parseBiggestInt(row[13]) else: 0, + confidence: row[5], + )) + +proc listBookmarks*(cfg: Config, category: string = ""): seq[BookmarkEntry] = + let db = cfg.initDb() + defer: db.close() + + if category.len > 0: + for row in db.fastRows(sql( + "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks WHERE raw_folder = ? ORDER BY added_at DESC LIMIT 50"), + category): + result.add(BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + confidence: row[5], + )) + else: + for row in db.fastRows(sql( + "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks ORDER BY added_at DESC LIMIT 50")): + result.add(BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + confidence: row[5], + )) + +proc searchBookmarks*(cfg: Config, query: string): seq[BookmarkEntry] = + let db = cfg.initDb() + defer: db.close() + + let pattern = "%" & query & "%" + for row in db.fastRows(sql( + "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks WHERE title LIKE ? OR url LIKE ? OR category LIKE ? LIMIT 20"), + pattern, pattern, pattern): + result.add(BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + confidence: row[5], )) proc getAllFolders*(cfg: Config): seq[FolderEntry] = From aa12b49bd0e3009c0ea639101a4e1c13f46731a2 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 00:17:01 +0100 Subject: [PATCH 17/27] Add CI and release workflows for Linux and macOS (x86_64 + arm64) --- .github/workflows/ci.yml | 52 +++++++++++++++++++++++++ .github/workflows/release.yml | 73 +++++++++++++++++++++++++++++++++++ .gitignore | 2 + 3 files changed, 127 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..96879df --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,52 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + name: ${{ matrix.os }}-${{ matrix.arch }} + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - os: linux + arch: x86_64 + runner: ubuntu-24.04 + - os: linux + arch: arm64 + runner: ubuntu-24.04-arm + - os: macos + arch: x86_64 + runner: macos-13 + - os: macos + arch: arm64 + runner: macos-14 + steps: + - uses: actions/checkout@v4 + + - name: Install Nim + uses: jiro4989/setup-nim-action@v2 + with: + nim-version: stable + + - name: Install system deps (Linux) + if: runner.os == 'Linux' + run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev + + - name: Build + run: nimble release + + - name: Smoke test + run: | + ./build/lazybookmarks --help + ./build/lazybookmarks status + + - uses: actions/upload-artifact@v4 + with: + name: lazybookmarks-${{ matrix.os }}-${{ matrix.arch }} + path: build/lazybookmarks diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..88fbc81 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,73 @@ +name: Release + +on: + push: + tags: ["v*"] + +permissions: + contents: write + +jobs: + build: + name: ${{ matrix.os }}-${{ matrix.arch }} + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - os: linux + arch: x86_64 + runner: ubuntu-24.04 + - os: linux + arch: arm64 + runner: ubuntu-24.04-arm + - os: macos + arch: x86_64 + runner: macos-13 + - os: macos + arch: arm64 + runner: macos-14 + steps: + - uses: actions/checkout@v4 + + - name: Install Nim + uses: jiro4989/setup-nim-action@v2 + with: + nim-version: stable + + - name: Install system deps (Linux) + if: runner.os == 'Linux' + run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev + + - name: Build + run: nimble release + + - name: Package + run: | + mkdir -p dist + tar -czf "dist/lazybookmarks-${{ matrix.os }}-${{ matrix.arch }}.tar.gz" -C build lazybookmarks + ( cd dist && shasum -a 256 "lazybookmarks-${{ matrix.os }}-${{ matrix.arch }}.tar.gz" > "lazybookmarks-${{ matrix.os }}-${{ matrix.arch }}.tar.gz.sha256" ) + + - uses: actions/upload-artifact@v4 + with: + name: lazybookmarks-${{ matrix.os }}-${{ matrix.arch }} + path: dist/* + + release: + name: Publish + needs: build + runs-on: ubuntu-24.04 + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + + - name: Generate SHA256SUMS + run: ( cd dist && cat *.sha256 > SHA256SUMS ) + + - name: Create release + uses: softprops/action-gh-release@v2 + with: + generate_release_notes: true + files: dist/* diff --git a/.gitignore b/.gitignore index 39ef364..7483dc4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.gguf /*.html /build/ +/dist/ +/lazybookmarks/ From d2a080ff4b7dba2541bfbaaaff9160dae021b8d1 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 00:39:51 +0100 Subject: [PATCH 18/27] Add dedup and check-links subcommands for bookmark cleanup --- src/lazybookmarks/linkchecker.nim | 119 ++++++++++++++++++++++ src/lazybookmarks/main.nim | 100 ++++++++++++++++++- src/lazybookmarks/prompts.nim | 13 +-- src/lazybookmarks/storage.nim | 158 +++++++++++++++++++++++++++++- src/lazybookmarks/ui.nim | 63 +++++++++++- 5 files changed, 438 insertions(+), 15 deletions(-) create mode 100644 src/lazybookmarks/linkchecker.nim diff --git a/src/lazybookmarks/linkchecker.nim b/src/lazybookmarks/linkchecker.nim new file mode 100644 index 0000000..f2e914c --- /dev/null +++ b/src/lazybookmarks/linkchecker.nim @@ -0,0 +1,119 @@ +import std/[osproc, strutils, sequtils, os, tables] +import ./config +import ./storage + +type LinkStatus* = enum + lsAlive, lsDead, lsUnknown, lsRedirected + +type LinkResult* = object + bookmark*: BookmarkEntry + status*: LinkStatus + statusCode*: int + redirectUrl*: string + +const TrackingParams = [ + "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", + "fbclid", "gclid", "msclkid", +] + +proc checkBatch(urls: seq[string]): string = + if urls.len == 0: + return "" + let tmpFile = getTempDir() / "lbcheck_urls.txt" + let outFile = getTempDir() / "lbcheck_results.txt" + try: + var f: File + if not open(f, tmpFile, fmWrite): + return "" + for u in urls: + f.writeLine(u) + f.close() + + let cmd = "cat '" & tmpFile & "' | xargs -P " & $urls.len & + " -I{} curl -sI -o /dev/null -w '%{http_code}\\t%{redirect_url}\\t{}\\n' " & + "--max-time 10 -L --max-redirs 5 -A 'Mozilla/5.0 (compatible; lazybookmarks/0.1)' '{}' > '" & outFile & "' 2>/dev/null" + discard execShellCmd(cmd) + + if not fileExists(outFile): + return "" + result = readFile(outFile) + except: + discard + finally: + try: removeFile(tmpFile) + except: discard + try: removeFile(outFile) + except: discard + +proc classifyCode(code: int, redirectUrl: string): LinkStatus = + if code >= 200 and code < 400: + if redirectUrl.len > 0: + var isTracking = false + for tp in TrackingParams: + if redirectUrl.contains("?" & tp & "=") or redirectUrl.contains("&" & tp & "="): + isTracking = true + break + if isTracking: + return lsAlive + return lsRedirected + return lsAlive + if code == 404 or code == 410: + return lsDead + return lsUnknown + +proc checkAllLinks*(cfg: Config, bookmarks: seq[BookmarkEntry], + concurrency: int = 8, + onProgress: proc(current, total: int) = nil): seq[LinkResult] = + result = newSeq[LinkResult](bookmarks.len) + let total = bookmarks.len + if total == 0: + return + + let batchSize = min(concurrency * 4, total) + var offset = 0 + var done = 0 + + while offset < total: + let endIdx = min(offset + batchSize, total) + var urls: seq[string] = @[] + for i in offset ..< endIdx: + urls.add(bookmarks[i].url) + + let content = checkBatch(urls) + var resultMap: Table[string, tuple[code: int, redirectUrl: string]] = initTable[string, tuple[code: int, redirectUrl: string]]() + + for line in content.splitLines(): + if line.len == 0: + continue + let parts = line.split("\t", maxsplit = 2) + if parts.len < 3: + continue + var code = 0 + try: code = parseInt(parts[0]) + except: continue + let redirect = parts[1] + let url = parts[2] + if url.len > 0: + resultMap[url] = (code, redirect) + + for i in offset ..< endIdx: + let url = bookmarks[i].url + if url in resultMap: + let (code, redirect) = resultMap[url] + result[i] = LinkResult( + bookmark: bookmarks[i], + status: classifyCode(code, redirect), + statusCode: code, + redirectUrl: redirect, + ) + else: + result[i] = LinkResult( + bookmark: bookmarks[i], + status: lsUnknown, + statusCode: 0, + ) + + done = endIdx + offset = endIdx + if onProgress != nil: + onProgress(done, total) diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index e4699c1..72a00d9 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -1,4 +1,4 @@ -import std/[os, strutils, terminal, strformat] +import std/[os, strutils, terminal, strformat, sequtils, algorithm] import cligen import ./config import ./storage @@ -6,6 +6,7 @@ import ./model import ./runtime import ./bootstrap import ./organizer +import ./linkchecker import ./ui proc cmdImport(file: string, format = "auto", dryRun = false) = @@ -184,6 +185,98 @@ proc cmdDoctor = else: warnMsg &"{issues} issue(s) found" +proc cmdDedup(interactive = true, autoRemove = false) = + let cfg = loadConfig() + let groups = findDuplicates(cfg) + + if groups.len == 0: + infoMsg "No duplicate bookmarks found." + return + + var totalDupes = 0 + for g in groups: + totalDupes.inc g.dupes.len + dimMsg &"Found {groups.len} duplicate group(s) ({totalDupes} total duplicates)" + echo "" + + var totalRemoved = 0 + + if interactive: + for i, g in groups: + let remove = reviewDuplicateGroup(i + 1, groups.len, g) + if remove: + let ids = g.dupes.mapIt(it.id) + let removed = cfg.removeDuplicates(ids) + totalRemoved.inc removed + if removed > 0: + infoMsg &"Removed {removed} duplicate(s)" + else: + dimMsg "Skipped" + else: + if autoRemove: + for g in groups: + let ids = g.dupes.mapIt(it.id) + let removed = cfg.removeDuplicates(ids) + totalRemoved.inc removed + infoMsg &"Removed {totalRemoved} duplicate(s) across {groups.len} group(s)" + else: + for i, g in groups: + stdout.styledWriteLine(styleBright, &" Group {i+1}/{groups.len} ", resetStyle, styleDim, &"({g.reason})", resetStyle) + let title = if g.keep.title.len > 0: g.keep.title else: "(untitled)" + stdout.styledWriteLine(" Keep: ", fgGreen, title, resetStyle, styleDim, &" [{g.keep.url[0..min(79, g.keep.url.high)]}]", resetStyle) + for d in g.dupes: + let dt = if d.title.len > 0: d.title else: "(untitled)" + stdout.styledWriteLine(styleDim, " - ", resetStyle, dt, styleDim, &" [{d.url[0..min(79, d.url.high)]}]", resetStyle) + echo "" + dimMsg &"Run with --auto-remove to delete duplicates, or --interactive to review each group" + +proc cmdCheckLinks(concurrency = 8, deadOnly = false, deleteDead = false, + unorganised = false) = + let cfg = loadConfig() + + var bookmarks: seq[BookmarkEntry] + if unorganised: + bookmarks = getUnorganisedBookmarks(cfg) + else: + bookmarks = getAllBookmarks(cfg) + + if bookmarks.len == 0: + dimMsg "No bookmarks to check." + return + + infoMsg &"Checking {bookmarks.len} bookmark(s) (concurrency: {concurrency})..." + echo "" + + var results: seq[LinkResult] + + proc onProgress(current, total: int) = + showProgressBar(current, total, " Checking") + + results = checkAllLinks(cfg, bookmarks, concurrency, onProgress) + + stdout.write "\n" + + var filtered: seq[LinkResult] + if deadOnly: + filtered = results.filterIt(it.status == lsDead) + else: + filtered = results + + if filtered.len > 0 and not deadOnly: + filtered.sort(proc(a, b: LinkResult): int = + result = ord(a.status) - ord(b.status)) + + for r in filtered: + showLinkResult(r) + + showLinkSummary(results) + + if deleteDead: + let deadIds = results.filterIt(it.status == lsDead).mapIt(it.bookmark.id) + if deadIds.len > 0: + let removed = cfg.deleteBookmarks(deadIds) + infoMsg &"Deleted {removed} dead bookmark(s)" + when isMainModule: dispatchMulti( [cmdImport, cmdName = "import", doc = "Import bookmarks from a file", @@ -204,4 +297,9 @@ when isMainModule: [cmdModelDownload, cmdName = "model-download", doc = "Download model without running organise"], [cmdStatus, cmdName = "status", doc = "Show runtime and model status"], [cmdDoctor, cmdName = "doctor", doc = "Run self-diagnostic checks"], + [cmdDedup, cmdName = "dedup", doc = "Find and remove duplicate bookmarks", + help = {"interactive": "Review each duplicate group", "auto-remove": "Remove all duplicates without prompting"}], + [cmdCheckLinks, cmdName = "check-links", doc = "Check bookmarks for dead links", + help = {"concurrency": "Parallel requests", "dead-only": "Show only dead links", + "delete-dead": "Delete dead bookmarks", "unorganised": "Only check unorganized bookmarks"}], ) diff --git a/src/lazybookmarks/prompts.nim b/src/lazybookmarks/prompts.nim index 479aac2..8f3d0aa 100644 --- a/src/lazybookmarks/prompts.nim +++ b/src/lazybookmarks/prompts.nim @@ -1,16 +1,5 @@ import std/strutils - -proc extractDomain*(url: string): string = - result = url - try: - let idx = result.find("://") - if idx >= 0: - result = result[idx + 3 .. ^1] - let slashIdx = result.find('/') - if slashIdx >= 0: - result = result[0 .. slashIdx - 1] - except: - discard +import ./storage const SystemPrompt* = "You are a bookmark classifier. Given a user's folder structure and uncategorized bookmarks, assign each to the most appropriate existing folder. If no folder fits well, set targetFolderId to \"__skip__\" instead of forcing a poor match. Respond with ONLY valid JSON matching the required structure. No explanation, no markdown, no other text. Prefer the user's existing folder names. Only suggest new folders when necessary." diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim index 3f058a0..288d320 100644 --- a/src/lazybookmarks/storage.nim +++ b/src/lazybookmarks/storage.nim @@ -1,4 +1,4 @@ -import std/[re, strutils, strformat, random, times, json] +import std/[re, strutils, strformat, random, times, json, uri, sequtils] import db_connector/db_sqlite import ./config @@ -65,6 +65,8 @@ CREATE INDEX IF NOT EXISTS idx_bookmarks_import ON bookmarks(import_id); CREATE INDEX IF NOT EXISTS idx_bookmarks_folder ON bookmarks(raw_folder); """ +proc initDb*(cfg: Config): DbConn + proc genUuid*: string = const hexChars = "0123456789abcdef" var s = "" @@ -74,6 +76,145 @@ proc genUuid*: string = s.add hexChars[rand(15)] return s +const TrackingParams = [ + "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", + "fbclid", "gclid", "msclkid", "ref", "source", "mc_cid", "mc_eid", +] + +proc normalizeUrl*(url: string): string = + try: + var u = parseUri(url) + var host = u.hostname.toLowerAscii() + var path = u.path + if path.len > 1 and path.endsWith("/"): + path = path[0 ..< path.len - 1] + var query = u.query + if query.len > 0: + var pairs: seq[string] = @[] + for part in query.split('&'): + let eqIdx = part.find('=') + let key = if eqIdx >= 0: part[0 ..< eqIdx].toLowerAscii() else: part.toLowerAscii() + var isTracking = false + for tp in TrackingParams: + if key == tp: + isTracking = true + break + if not isTracking: + pairs.add(part) + if pairs.len > 0: + query = pairs.join("&") + result = host & path & "?" & query + else: + result = host & path + else: + result = host & path + except: + result = url.toLowerAscii() + +proc extractDomain*(url: string): string = + try: + let u = parseUri(url) + result = u.hostname.toLowerAscii() + except: + let idx = url.find("://") + if idx >= 0: + let rest = url[idx + 3 .. url.high] + let slashIdx = rest.find('/') + result = if slashIdx >= 0: rest[0 ..< slashIdx] else: rest + else: + result = url + +type + DuplicateGroup* = object + keep*: BookmarkEntry + dupes*: seq[BookmarkEntry] + reason*: string + +proc findDuplicates*(cfg: Config): seq[DuplicateGroup] = + let db = cfg.initDb() + defer: db.close() + + for row in db.fastRows(sql("SELECT id, url, title, raw_folder, category, confidence FROM bookmarks ORDER BY added_at DESC")): + let b = BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + confidence: row[5], + ) + + let normUrl = normalizeUrl(b.url) + var matched = false + + for i in 0 ..< result.len: + let g = result[i] + let keepNorm = normalizeUrl(g.keep.url) + if keepNorm == normUrl and normUrl.len > 0: + result[i].dupes.add(b) + matched = true + break + + if not matched: + for i in 0 ..< result.len: + let g = result[i] + let keepDomain = extractDomain(g.keep.url) + let bDomain = extractDomain(b.url) + if keepDomain == bDomain and keepDomain.len > 0: + let keepTitle = g.keep.title.strip().toLowerAscii() + let bTitle = b.title.strip().toLowerAscii() + if keepTitle.len > 3 and keepTitle == bTitle: + result[i].dupes.add(b) + matched = true + break + + if not matched: + result.add(DuplicateGroup(keep: b, dupes: @[], reason: "")) + + for i in 0 ..< result.len: + if result[i].dupes.len > 0: + let keepNorm = normalizeUrl(result[i].keep.url) + var allNormMatch = true + for d in result[i].dupes: + if normalizeUrl(d.url) != keepNorm: + allNormMatch = false + break + result[i].reason = if allNormMatch: "normalized URL match" else: "same domain + title" + + result = result.filterIt(it.dupes.len > 0) + + for i in 0 ..< result.len: + var g = result[i] + if g.dupes.len > 0: + var bestIdx = 0 + for j in 0 ..< g.dupes.len: + if g.dupes[j].category.len > 0 and g.keep.category.len == 0: + bestIdx = j + 1 + break + if bestIdx > 0: + let oldKeep = g.keep + g.keep = g.dupes[bestIdx - 1] + g.dupes[bestIdx - 1] = oldKeep + result[i] = g + +proc removeDuplicates*(cfg: Config, ids: seq[int64]): int = + if ids.len == 0: + return 0 + let db = cfg.initDb() + defer: db.close() + let placeholders = repeat("?", ids.len).join(",") + result = db.execAffectedRows( + sql(&"DELETE FROM bookmarks WHERE id IN ({placeholders})"), ids) + +proc deleteBookmarks*(cfg: Config, ids: seq[int64]): int = + if ids.len == 0: + return 0 + let db = cfg.initDb() + defer: db.close() + let placeholders = repeat("?", ids.len).join(",") + result = db.execAffectedRows( + sql(&"DELETE FROM bookmarks WHERE id IN ({placeholders})"), ids) + proc initDb*(cfg: Config): DbConn = ensureDir(cfg.dataDir) result = open(cfg.dbPath(), "", "", "") @@ -226,6 +367,21 @@ proc getUnorganisedBookmarks*(cfg: Config, limit: int = 0): seq[BookmarkEntry] = confidence: row[5], )) +proc getAllBookmarks*(cfg: Config): seq[BookmarkEntry] = + let db = cfg.initDb() + defer: db.close() + + for row in db.fastRows(sql( + "SELECT id, url, title, raw_folder, category, confidence FROM bookmarks ORDER BY added_at DESC")): + result.add(BookmarkEntry( + id: parseBiggestInt(row[0]), + url: row[1], + title: row[2], + rawFolder: row[3], + category: row[4], + confidence: row[5], + )) + proc listBookmarks*(cfg: Config, category: string = ""): seq[BookmarkEntry] = let db = cfg.initDb() defer: db.close() diff --git a/src/lazybookmarks/ui.nim b/src/lazybookmarks/ui.nim index b6cc50a..3f54db9 100644 --- a/src/lazybookmarks/ui.nim +++ b/src/lazybookmarks/ui.nim @@ -1,4 +1,6 @@ -import std/[terminal, strutils] +import std/[terminal, strutils, strformat] +import ./storage +import ./linkchecker proc infoMsg*(msg: string) = stdout.styledWriteLine(styleBright, fgGreen, " ✓ ", fgDefault, resetStyle, msg) @@ -59,3 +61,62 @@ proc reviewSuggestion*(url: string, title: string, targetFolder: string, confide stdout.write "\r\e[2K" stdout.write " > " stdout.flushFile() + +proc reviewDuplicateGroup*(idx: int, total: int, group: DuplicateGroup): bool = + echo "" + stdout.styledWriteLine(styleBright, fgCyan, &" Duplicate group {idx}/{total} ", resetStyle, styleDim, &"({group.reason})", resetStyle) + stdout.styledWriteLine(styleBright, " Keep: ", fgGreen, group.keep.title, resetStyle, styleDim, &" [{group.keep.url[0..min(79, group.keep.url.high)]}]", resetStyle) + for i, d in group.dupes: + let title = if d.title.len > 0: d.title else: "(untitled)" + stdout.styledWriteLine(styleDim, " - ", resetStyle, title, styleDim, &" [{d.url[0..min(79, d.url.high)]}]", resetStyle) + stdout.styledWriteLine(styleBright, " └─ ", resetStyle, styleDim, "[R]emove dupes [S]kip [q]uit", resetStyle) + stdout.write " > " + stdout.flushFile() + + while true: + let input = stdin.readLine().strip().toLowerAscii() + case input + of "r", "remove": return true + of "s", "skip": return false + of "q", "quit": return false + else: + stdout.write "\r\e[2K" + stdout.write " > " + stdout.flushFile() + +proc showLinkResult*(r: LinkResult) = + let title = if r.bookmark.title.len > 0: r.bookmark.title else: "(untitled)" + let (label, color) = case r.status + of lsAlive: ("OK", fgGreen) + of lsDead: ("DEAD", fgRed) + of lsUnknown: ("???", fgYellow) + of lsRedirected: ("REDIR", fgCyan) + var extra = "" + if r.status == lsRedirected and r.redirectUrl.len > 0: + extra = &" -> {r.redirectUrl[0..min(60, r.redirectUrl.high)]}" + if r.statusCode > 0: + extra = &" [{r.statusCode}]{extra}" + stdout.styledWrite(" ", color, &"{label:<5}", resetStyle, &" {title:<50}", styleDim, &" {r.bookmark.url[0..min(60, r.bookmark.url.high)]}{extra}", resetStyle, "\n") + +proc showLinkSummary*(results: seq[LinkResult]) = + var alive = 0 + var dead = 0 + var unknown = 0 + var redirected = 0 + for r in results: + case r.status + of lsAlive: inc alive + of lsDead: inc dead + of lsUnknown: inc unknown + of lsRedirected: inc redirected + echo "" + stdout.styledWriteLine(styleBright, " Summary:", resetStyle) + if alive > 0: + stdout.styledWriteLine(" ", fgGreen, &"{alive} alive", resetStyle) + if dead > 0: + stdout.styledWriteLine(" ", fgRed, &"{dead} dead", resetStyle) + if redirected > 0: + stdout.styledWriteLine(" ", fgCyan, &"{redirected} redirected", resetStyle) + if unknown > 0: + stdout.styledWriteLine(" ", fgYellow, &"{unknown} unknown", resetStyle) + echo "" From fe4e6e7863b578ad1009264e42227a9067cb5079 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 08:25:47 +0100 Subject: [PATCH 19/27] Fix loadConfig ignoring config.toml written by model-set --- src/lazybookmarks/config.nim | 32 ++++++++++++++++++++++++++++++- src/lazybookmarks/linkchecker.nim | 2 +- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index e346e87..69ad5d8 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -55,9 +55,39 @@ proc defaultConfigDir*: string = proc ensureDir*(dir: string) = createDir(dir) +proc readTomlString(content: string, key: string): string = + for line in content.splitLines(): + let stripped = line.strip() + if stripped.startsWith(key & " = "): + var val = stripped[key.len + 3 .. stripped.high].strip() + if val.startsWith("\"") and val.endsWith("\""): + val = val[1 ..< val.high] + return val + return "" + +proc readTomlInt(content: string, key: string): int = + let val = readTomlString(content, key) + if val.len > 0: + try: return parseInt(val) + except: discard + return 0 + +proc readTomlBool(content: string, key: string): bool = + let val = readTomlString(content, key) + return val == "true" + proc loadConfig*(overrides: Config = Config()): Config = + let configPath = defaultConfigDir() / "config.toml" + var fileModel = "" + var fileAutoAccept = false + if fileExists(configPath): + let content = readFile(configPath) + fileModel = readTomlString(content, "modelVariant") + fileAutoAccept = readTomlBool(content, "autoAcceptHigh") + let variant = if overrides.modelVariant.len > 0: overrides.modelVariant elif getEnv("LB_MODEL").len > 0: getEnv("LB_MODEL") + elif fileModel.len > 0: fileModel else: DefaultModelVariant let ps = parseParamSize(variant) let defaultBatch = if ps == psSmall: 5 else: 10 @@ -66,7 +96,7 @@ proc loadConfig*(overrides: Config = Config()): Config = modelVariant: variant, dataDir: defaultDataDir(), runtimeManaged: true, - autoAcceptHigh: false, + autoAcceptHigh: fileAutoAccept, batchSize: defaultBatch, concurrency: DefaultConcurrency, verbose: false, diff --git a/src/lazybookmarks/linkchecker.nim b/src/lazybookmarks/linkchecker.nim index f2e914c..a48e3d0 100644 --- a/src/lazybookmarks/linkchecker.nim +++ b/src/lazybookmarks/linkchecker.nim @@ -1,4 +1,4 @@ -import std/[osproc, strutils, sequtils, os, tables] +import std/[strutils, os, tables] import ./config import ./storage From 83ac10369f1ad555d17c7b3230e1c87c37768410 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 08:36:36 +0100 Subject: [PATCH 20/27] Switch to Ollama native /api/chat endpoint with constrained decoding Use the parameter for schema enforcement at the inference layer (llama.cpp grammar-based constrained decoding) instead of the OpenAI- compatible /v1/chat/completions endpoint's post-hoc response_format hint. This fixes the small model (qwen3.5:0.8b) returning non-JSON responses. - client.nim: native /api/chat with param for Ollama, fall back to OpenAI /v1/chat/completions when LLM_URL is set (runtimeManaged=false) - config.nim: default URL changed to native base (no /v1 suffix), strip trailing slashes in ollamaApiUrl(), remove unused readTomlInt - organizer.nim: use full schemas for all model sizes (constrained decoding eliminates the need for simplified small-model schemas) --- src/lazybookmarks/client.nim | 107 +++++++++++++++++++++----------- src/lazybookmarks/config.nim | 11 +--- src/lazybookmarks/organizer.nim | 6 +- 3 files changed, 77 insertions(+), 47 deletions(-) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 07b410b..22d2339 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -68,19 +68,30 @@ proc extractJson*(s: string): string = return cleaned[start .. endPos] proc buildRequestBody(cfg: Config, messages: seq[Message], jsonSchema: string): JsonNode = - result = %*{ - "model": cfg.modelName, - "messages": messages, - "temperature": 0.1, - "max_tokens": 2048, - "options": { - "think": false, - }, - } - if jsonSchema.len > 0: - if cfg.isSmallModel(): - result["response_format"] = %*{ "type": "json_object" } - else: + let native = cfg.runtimeManaged + if native: + result = %*{ + "model": cfg.modelName, + "messages": messages, + "stream": false, + "temperature": 0.1, + "options": { + "think": false, + }, + } + if jsonSchema.len > 0: + result["format"] = parseJson(jsonSchema) + else: + result = %*{ + "model": cfg.modelName, + "messages": messages, + "temperature": 0.1, + "max_tokens": 2048, + "options": { + "think": false, + }, + } + if jsonSchema.len > 0: result["response_format"] = %*{ "type": "json_schema", "json_schema": { @@ -89,16 +100,29 @@ proc buildRequestBody(cfg: Config, messages: seq[Message], jsonSchema: string): } } +proc chatUrl(cfg: Config): string = + if cfg.runtimeManaged: + cfg.ollamaApiUrl() & "/api/chat" + else: + cfg.llmUrl & "/chat/completions" + +proc extractContent(parsed: JsonNode, native: bool): string = + if native: + parsed["message"]["content"].getStr() + else: + parsed["choices"][0]["message"]["content"].getStr() + proc chatCompletion*(cfg: Config, messages: seq[Message], jsonSchema: string = "", maxRetries: int = 3): JsonNode = let body = buildRequestBody(cfg, messages, jsonSchema) + let native = cfg.runtimeManaged let client = newHttpClient(timeout = 120000) client.headers = newHttpHeaders([("Content-Type", "application/json")]) defer: client.close() - let url = cfg.llmUrl & "/chat/completions" + let url = chatUrl(cfg) if cfg.verbose: stderr.writeLine("[chat] POST " & url & " model=" & cfg.modelName) for m in messages: @@ -113,17 +137,23 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") let parsed = parseJson(response) - if parsed.hasKey("choices") and parsed["choices"].len > 0: - let rawContent = parsed["choices"][0]["message"]["content"].getStr() - let content = extractJson(rawContent) - if content.len > 0: - if cfg.verbose: - stderr.writeLine("[chat] response: " & content[0..min(200, content.high)]) - return parseJson(content) - else: - lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] + var rawContent = "" + try: + rawContent = extractContent(parsed, native) + except CatchableError: + lastError = "Unexpected response format: " & response[0..min(200, response.high)] + if attempt < maxRetries: + let delay = 1000 * (1 shl (attempt - 1)) + discard execShellCmd("sleep " & $(delay div 1000)) + continue + + let content = extractJson(rawContent) + if content.len > 0: + if cfg.verbose: + stderr.writeLine("[chat] response: " & content[0..min(200, content.high)]) + return parseJson(content) else: - lastError = "No choices in response: " & response[0..min(200, response.high)] + lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] except CatchableError as e: lastError = e.msg if cfg.verbose: @@ -146,7 +176,8 @@ proc chatCompletionAsync*(cfg: Config, messages: seq[Message], jsonSchema: string = "", maxRetries: int = 3): Future[JsonNode] {.async.} = let body = buildRequestBody(cfg, messages, jsonSchema) - let url = cfg.llmUrl & "/chat/completions" + let native = cfg.runtimeManaged + let url = chatUrl(cfg) if cfg.verbose: stderr.writeLine("[chat-async] POST " & url & " model=" & cfg.modelName) @@ -172,17 +203,23 @@ proc chatCompletionAsync*(cfg: Config, messages: seq[Message], stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") let parsed = parseJson(response) - if parsed.hasKey("choices") and parsed["choices"].len > 0: - let rawContent = parsed["choices"][0]["message"]["content"].getStr() - let content = extractJson(rawContent) - if content.len > 0: - if cfg.verbose: - stderr.writeLine("[chat-async] response: " & content[0..min(200, content.high)]) - return parseJson(content) - else: - lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] + var rawContent = "" + try: + rawContent = extractContent(parsed, native) + except CatchableError: + lastError = "Unexpected response format: " & response[0..min(200, response.high)] + if attempt < maxRetries: + let delay = 1000 * (1 shl (attempt - 1)) + await sleepAsync(delay) + continue + + let content = extractJson(rawContent) + if content.len > 0: + if cfg.verbose: + stderr.writeLine("[chat-async] response: " & content[0..min(200, content.high)]) + return parseJson(content) else: - lastError = "No choices in response: " & response[0..min(200, response.high)] + lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] except CatchableError as e: lastError = e.msg if cfg.verbose: diff --git a/src/lazybookmarks/config.nim b/src/lazybookmarks/config.nim index 69ad5d8..390cbc0 100644 --- a/src/lazybookmarks/config.nim +++ b/src/lazybookmarks/config.nim @@ -26,13 +26,15 @@ proc parseParamSize*(variant: string): ParamSize = proc isSmallModel*(cfg: Config): bool = cfg.paramSize == psSmall -const DefaultLlmUrl* = "http://127.0.0.1:11434/v1" +const DefaultLlmUrl* = "http://127.0.0.1:11434" const DefaultModelVariant* = "qwen3.5-2b" const DefaultConcurrency* = 4 proc ollamaApiUrl*(cfg: Config): string = if cfg.llmUrl.endsWith("/v1"): cfg.llmUrl[0 ..< cfg.llmUrl.len - 3] + elif cfg.llmUrl.endsWith("/"): + cfg.llmUrl[0 ..< cfg.llmUrl.len - 1] else: cfg.llmUrl @@ -65,13 +67,6 @@ proc readTomlString(content: string, key: string): string = return val return "" -proc readTomlInt(content: string, key: string): int = - let val = readTomlString(content, key) - if val.len > 0: - try: return parseInt(val) - except: discard - return 0 - proc readTomlBool(content: string, key: string): bool = let val = readTomlString(content, key) return val == "true" diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index 915a325..b5812d6 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -228,8 +228,7 @@ proc runClusterPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], let rootIds = rootFolders.mapIt(it.id) let prompt = buildClusterPrompt(batchTuples, taxCats, rootFolders) - let schema = if cfg.isSmallModel(): buildClusterSchemaJsonSmall() - else: buildClusterSchemaJson(rootIds) + let schema = buildClusterSchemaJson(rootIds) let response = chatCompletionSimple(cfg, SystemPrompt, prompt, schema) @@ -266,8 +265,7 @@ proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], let pruned = pruneTaxonomy(fullTaxonomy, batch, tfidfMap) let folderIds = pruned.categories.mapIt(it.folderId) let bookmarkIds = batch.mapIt($it.id) - let schema = if cfg.isSmallModel(): buildClassificationSchemaJsonSmall() - else: buildClassificationSchemaJson(folderIds, bookmarkIds) + let schema = buildClassificationSchemaJson(folderIds, bookmarkIds) let taxCats = pruned.categories.mapIt( (id: it.folderId, path: it.folderPath, description: it.description, keywords: it.keywords.join(", ")) From c5ae8c4b20022e1476a08e399213910e5b7831de Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 08:42:50 +0100 Subject: [PATCH 21/27] Add export subcommand for Netscape HTML bookmark output Outputs bookmarks grouped by AI-assigned category, with unorganized bookmarks in an 'Unorganized' folder. Supports --output for file output and --category for filtering a single folder. --- src/lazybookmarks/main.nim | 14 ++++++++++ src/lazybookmarks/storage.nim | 50 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index 72a00d9..af19ba9 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -277,6 +277,18 @@ proc cmdCheckLinks(concurrency = 8, deadOnly = false, deleteDead = false, let removed = cfg.deleteBookmarks(deadIds) infoMsg &"Deleted {removed} dead bookmark(s)" +proc cmdExport(output = "", category = "") = + let cfg = loadConfig() + let html = exportBookmarksHtml(cfg, category) + if html.len == 0: + dimMsg "No bookmarks to export." + return + if output.len > 0: + writeFile(output, html) + infoMsg &"Exported {getBookmarksForExport(cfg, category).len} bookmarks to {output}" + else: + stdout.write(html) + when isMainModule: dispatchMulti( [cmdImport, cmdName = "import", doc = "Import bookmarks from a file", @@ -290,6 +302,8 @@ when isMainModule: help = {"category": "Filter by folder path", "unorganised": "Show only unorganized"}], [cmdSearch, cmdName = "search", doc = "Search bookmarks", help = {"query": "Search term"}], + [cmdExport, cmdName = "export", doc = "Export bookmarks to Netscape HTML", + help = {"output": "Write to file instead of stdout", "category": "Filter by category"}], [cmdUndo, cmdName = "undo", doc = "Undo last batch of classifications"], [cmdModelList, cmdName = "model-list", doc = "List available models"], [cmdModelSet, cmdName = "model-set", doc = "Set default model variant", diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim index 288d320..c8251c3 100644 --- a/src/lazybookmarks/storage.nim +++ b/src/lazybookmarks/storage.nim @@ -461,3 +461,53 @@ proc undoLastBatch*(cfg: Config): int = result = db.execAffectedRows(sql( "UPDATE bookmarks SET category = NULL, confidence = NULL, reason = NULL, organised_at = NULL WHERE organised_at >= ?" ), batchTime) + +proc htmlEscape*(s: string): string = + result = s + result = result.replace("&", "&") + result = result.replace("<", "<") + result = result.replace(">", ">") + result = result.replace("\"", """) + +proc getBookmarksForExport*(cfg: Config, categoryFilter = ""): seq[tuple[url, title, category: string]] = + let db = cfg.initDb() + defer: db.close() + + let query = if categoryFilter.len > 0: + sql("SELECT url, title, category FROM bookmarks WHERE category = ? ORDER BY category, title") + else: + sql("SELECT url, title, category FROM bookmarks ORDER BY category, title") + + for row in db.fastRows(query, categoryFilter): + result.add(( + url: row[0], + title: row[1], + category: if row[2].len > 0: row[2] else: "Unorganized", + )) + +proc exportBookmarksHtml*(cfg: Config, categoryFilter = ""): string = + let bookmarks = getBookmarksForExport(cfg, categoryFilter) + if bookmarks.len == 0: + return "" + + var lines: seq[string] = @[] + lines.add("""""") + lines.add("""""") + lines.add("Bookmarks") + lines.add("

Bookmarks

") + lines.add("

") + + var currentCat = "" + for bm in bookmarks: + if bm.category != currentCat: + if currentCat.len > 0: + lines.add("

") + currentCat = bm.category + lines.add("

" & htmlEscape(currentCat) & "

") + lines.add("

") + let title = if bm.title.len > 0: bm.title else: bm.url + lines.add("

" & htmlEscape(title) & "") + + lines.add("

") + lines.add("

") + return lines.join("\n") From 6b51c4ffd31f08f053ced7980f0560953f80ea84 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 08:48:03 +0100 Subject: [PATCH 22/27] Add AGENTS.md with Nim dev guide, pipeline docs, and hard-won pitfalls --- AGENTS.md | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..8df15ae --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,141 @@ +# AGENTS.md — Lazybookmarks Development Guide + +## Project Overview +- Chrome extension (AI-powered bookmark organizer using Gemini Nano) ported to standalone Nim CLI +- Target: Linux arm64, dev on macOS arm64 +- Dependencies: cligen, db_connector, jsony (only 3) +- Build: `nimble release` (NOT `nimble build`) + +## Build & Toolchain +- Nim 2.2.8 via Homebrew +- `nimble release` outputs to `build/lazybookmarks` +- Do NOT use `nimble build` — it ignores custom tasks +- `nim.cfg`: --opt:size, --mm:orc, NO -d:ssl +- `nimble release` auto-resolves dependencies + +## LLM Backend +- Default: Ollama native `/api/chat` (constrained decoding via `format` param) +- OpenAI-compatible `/v1/chat/completions` fallback when `runtimeManaged=false` (LLM_URL set) +- `format` param = grammar-based constrained decoding (model physically cannot generate invalid tokens) +- `"options": {"think": false}` suppresses qwen3.5 thinking mode +- Model lineup: qwen3.5:0.8b, qwen3.5:2b (default), qwen3.5:4b, gemma4:e2b +- Model managed via `ollama pull`, not custom download code + +## Architecture +- Config priority: CLI > env vars > config.toml > defaults +- `runtimeManaged` flag: true = Ollama (native endpoints), false = custom LLM_URL (OpenAI endpoints) +- Link checking: `curl` + `xargs -P` (Nim SSL broken with OpenSSL 3.6+) +- `__skip__` handling: bookmarks classified as `__skip__` remain `organised_at = NULL` + +## 3-Phase Pipeline Internals + +The organize command (`organizer.nim`) runs a 3-phase pipeline to classify bookmarks into folders. + +### Phase 1: Taxonomy Analysis (`runTaxonomyPhase`) +- **Input:** All folders with their bookmarks, enriched with TF-IDF keywords, domain patterns, and exemplar bookmarks +- **Process:** Single LLM call asking the model to describe each folder and provide keywords +- **Schema:** `TaxonomySchemaJson` — array of `{folderId, folderPath, description, keywords[]}` +- **Caching:** Results keyed by a fingerprint of folder UUIDs + bookmark counts (`buildFingerprint`). Cache stored in `taxonomy_cache` table. Survives across runs unless folder structure changes. +- **Key helpers:** `computeTFIDF` (term frequency-inverse document frequency per folder), `extractDomainPatterns` (top domains per folder above 20% threshold), `sampleExemplars` (2 most recent bookmark titles/urls per folder) + +### Phase 1.5: Cluster/Theme Grouping (`runClusterPhase`) +- **Input:** All unorganized bookmarks, existing taxonomy categories, root-level folders +- **Process:** Single LLM call to identify 2-6 thematic groups among unorganized bookmarks that deserve a new folder +- **Schema:** `buildClusterSchemaJson(rootFolderIds)` — array of `{name, description, keywords[], parentFolderId}`. The `parentFolderId` is constrained to root folder UUIDs via JSON enum. +- **Output:** `seq[ClusterSuggestion]` — these become synthetic folders prefixed with `__new_` (e.g., `__new_Hardware`) in the taxonomy for Phase 2 + +### Phase 2: Per-Bookmark Classification (`runClassificationPhase`) +- **Input:** Unorganized bookmarks (chunked into batches), full taxonomy (original + new cluster folders) +- **Process:** For each batch, calls `pruneTaxonomy` to reduce the folder list to the most relevant ~15 folders (based on keyword overlap with the batch's titles via TF-IDF), then asks the LLM to classify each bookmark +- **Schema:** `buildClassificationSchemaJson(folderIds, bookmarkIds)` — array of `{bookmarkId, targetFolderId, confidence, reason}`. Both `bookmarkId` and `targetFolderId` are constrained to exact IDs via JSON enum, plus `"__skip__"` as a valid target. +- **Concurrency:** Uses `classifyBatchAsync` with sliding window — up to `concurrency` (default 4) batches in flight simultaneously via `AsyncHttpClient`. Batch size auto-set by model size (5 for small, 10 for normal). +- **`pruneTaxonomy`:** Scores each taxonomy category by how many of its TF-IDF keywords appear in the batch's token set. Keeps top N (max 15, min 5). This prevents overwhelming small models with 30+ folder options. +- **Bookmarks classified as `__skip__`** are silently dropped (not applied). Low-confidence matches can be reviewed interactively or auto-skipped. + +### Data Flow +1. `organizeBookmarks` loads all bookmarks, builds `folderBookmarks` table mapping folder UUID → bookmark entries +2. Phase 1 enriches folders with TF-IDF/domain/exemplar data, runs LLM, caches result +3. Phase 1.5 takes unorganized bookmarks + taxonomy + root folders, suggests new folders +4. Phase 2 merges new folders into taxonomy, chunks unorganized bookmarks, classifies each batch with pruned taxonomy +5. Results become `seq[Suggestion]` with `bookmarkId`, `targetFolderId`, `targetFolderPath`, `confidence`, `reason`, `isNewFolder` +6. Suggestions are applied via `applyClassification` (sets `category`, `confidence`, `reason`, `organised_at` on the bookmark row) + +### Key Types +- `TaxonomyCategory`: folderId, folderPath, description, keywords +- `ClusterSuggestion`: name, description, keywords, parentFolderId +- `Classification`: bookmarkId, targetFolderId, confidence, reason +- `Suggestion`: bookmarkId, bookmarkTitle, bookmarkUrl, targetFolderId, targetFolderPath, confidence, reason, isNewFolder + +## Nim Language Pitfalls (The Hard-Won Lessons) + +### Syntax +- `.[^1]` is Python, not Nim — use `seq[seq.len - 1]` +- `mapIt` cannot have multi-line blocks — use explicit `for` loops +- `=>` lambda syntax doesn't exist in Nim +- Anonymous tuple fields can't be accessed by name (`.score`) — use `[0]`, `[1]` +- `findIt` on seqs returns `int` (index), not the element — use `seq[index]` +- `{}` set literals only support values 0..255 — HTTP codes 302/404/410 must use `==` chains +- `re.match` requires full-string match — use `re.find` for substring matching +- `&"..."` format strings require `strformat` import +- Variable names can't conflict with keywords (e.g., `file` parameter) + +### Standard Library +- `std/terminal` has `hideCursor`/`showCursor` templates that conflict with custom procs +- `postContent` doesn't take a `headers` param — set `client.headers` before calling +- `HttpClient` has no `onProgress` field +- `filterIt`/`mapIt` are in `std/sequtils`, NOT `std/sugar` in Nim 2.2.8 +- `split()` requires `strutils` import +- `parseFloat` requires `strutils` import +- `sort()` requires `std/algorithm` import +- `sum()` doesn't exist as a standalone proc — use manual loop with `.inc` +- `sleep` → `os.sleep` or `execShellCmd` +- `execShellCmd` is in `std/os`, not `std/osproc` +- `/` operator is for filesystem paths, not URL concatenation — use `&` +- `reversed()` doesn't exist — use manual reverse loop with index +- `rfind` doesn't exist — use manual loop or reverse approach +- `chunk` doesn't exist — implement manually +- `formatFloat` uses `precision` not `ffDecimal` named param + +### Async +- `AsyncHttpClient` is inside `std/httpclient`, no built-in timeout +- `withTimeout(fut, ms)` returns `Future[bool]` — true if completed +- `one()` doesn't exist — use polling with `sleepAsync` + `.finished` +- Async macro can't capture `var` parameters — use return values +- `pump()` closures capturing locals from enclosing procs violate borrow checker — inline +- `std/channels` doesn't exist in 2.2.8; `threadpool` is deprecated +- `waitFor()` requires `std/asyncdispatch` + +### JSON & Data +- `HttpHeaders` doesn't have 3-arg `get(key, default)` — use `hasKey` + `[]` +- `HttpCode` is `range[0..255]` — can't use in `{}` +- `toHex` from `nimcrypto/utils` returns uppercase — `toLowerAscii()` for comparisons + +### Build System +- `nimble build` always runs its own default build via `bin` field — custom `build` tasks are ignored +- `self.exec` is old nimble syntax — use just `exec` +- `--out:path` doesn't work in `nim.cfg` — only `--outdir:dir` +- Circular imports cause "undeclared identifier" — restructure to avoid cycles +- Forward declarations needed for procs called before their definition +- Inline `if` in string concatenation within proc call arguments doesn't work — extract to `let` binding + +### SSL +- Nim 2.2.8 SSL bindings are incompatible with OpenSSL 3.6+ +- Do NOT add `-d:ssl` — use `curl` via `execShellCmd` for HTTPS + +## Code Conventions +- No comments unless asked +- Imports grouped: std/*, then local ./* modules +- Procs use `*` export marker for public API +- CLI subcommands use `cmdXxx` naming, flat names in dispatchMulti (e.g., `model-list`) + +## Testing +- Manual e2e testing with real bookmark data (692 bookmarks) +- `./build/lazybookmarks --verbose` for debug output +- Smoke test: `./build/lazybookmarks status` / `./build/lazybookmarks --help` + +## Ollama API Reference (Native Endpoints) +- Chat: `POST /api/chat` with `format` for structured output, `stream: false` +- Models: `GET /api/tags` +- Health: `GET /api/tags` (200 = running) +- Pull: `ollama pull :` (CLI, not API) +- Response: `{"message": {"content": "..."}}` (not OpenAI's `choices[0].message.content`) From c284c9377d4df7cb11f40e80d6919401ac9ae38c Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 08:52:18 +0100 Subject: [PATCH 23/27] Add curl to runtime dependencies in BUILD.md --- BUILD.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/BUILD.md b/BUILD.md index 18cf49c..78132d9 100644 --- a/BUILD.md +++ b/BUILD.md @@ -4,24 +4,25 @@ - **Nim** >= 2.0.0 — https://nim-lang.org/install.html - **Ollama** — https://ollama.com/download (runtime dependency, not build-time) +- **curl** — required at runtime for `check-links` (link health checking) ### macOS ```sh -brew install nim ollama +brew install nim ollama curl ``` ### Ubuntu/Debian ```sh -sudo apt install nim +sudo apt install nim curl curl -fsSL https://ollama.com/install.sh | sh ``` ### Arch Linux ```sh -sudo pacman -S nim +sudo pacman -S nim curl yay -S ollama-cuda # or ollama-rocm for AMD ``` From d4f5fbeaa87a69759ddfcbb5fb896df730dcee81 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 08:55:16 +0100 Subject: [PATCH 24/27] Remove dead small-model schema procs and consolidate chat response parsing --- src/lazybookmarks/client.nim | 65 +++++++++++------------------------ src/lazybookmarks/prompts.nim | 6 ---- 2 files changed, 20 insertions(+), 51 deletions(-) diff --git a/src/lazybookmarks/client.nim b/src/lazybookmarks/client.nim index 22d2339..95d1f55 100644 --- a/src/lazybookmarks/client.nim +++ b/src/lazybookmarks/client.nim @@ -112,6 +112,17 @@ proc extractContent(parsed: JsonNode, native: bool): string = else: parsed["choices"][0]["message"]["content"].getStr() +proc parseChatResponse(response: string, native: bool, verbose: bool, prefix: string): JsonNode = + let parsed = parseJson(response) + let rawContent = extractContent(parsed, native) + let content = extractJson(rawContent) + if content.len > 0: + if verbose: + stderr.writeLine("[" & prefix & "] response: " & content[0..min(200, content.high)]) + return parseJson(content) + else: + raise newException(CatchableError, "No JSON found in response: " & rawContent[0..min(200, rawContent.high)]) + proc chatCompletion*(cfg: Config, messages: seq[Message], jsonSchema: string = "", maxRetries: int = 3): JsonNode = @@ -136,24 +147,7 @@ proc chatCompletion*(cfg: Config, messages: seq[Message], if cfg.verbose: stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") - let parsed = parseJson(response) - var rawContent = "" - try: - rawContent = extractContent(parsed, native) - except CatchableError: - lastError = "Unexpected response format: " & response[0..min(200, response.high)] - if attempt < maxRetries: - let delay = 1000 * (1 shl (attempt - 1)) - discard execShellCmd("sleep " & $(delay div 1000)) - continue - - let content = extractJson(rawContent) - if content.len > 0: - if cfg.verbose: - stderr.writeLine("[chat] response: " & content[0..min(200, content.high)]) - return parseJson(content) - else: - lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] + return parseChatResponse(response, native, cfg.verbose, "chat") except CatchableError as e: lastError = e.msg if cfg.verbose: @@ -193,33 +187,14 @@ proc chatCompletionAsync*(cfg: Config, messages: seq[Message], client.close() if timedOut: - lastError = "Request timed out (120s)" - if cfg.verbose: - stderr.writeLine("[attempt " & $attempt & "] Timeout") - else: - let response = postFut.read() - - if cfg.verbose: - stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") - - let parsed = parseJson(response) - var rawContent = "" - try: - rawContent = extractContent(parsed, native) - except CatchableError: - lastError = "Unexpected response format: " & response[0..min(200, response.high)] - if attempt < maxRetries: - let delay = 1000 * (1 shl (attempt - 1)) - await sleepAsync(delay) - continue - - let content = extractJson(rawContent) - if content.len > 0: - if cfg.verbose: - stderr.writeLine("[chat-async] response: " & content[0..min(200, content.high)]) - return parseJson(content) - else: - lastError = "No JSON found in response: " & rawContent[0..min(200, rawContent.high)] + raise newException(CatchableError, "Request timed out (120s)") + + let response = postFut.read() + + if cfg.verbose: + stderr.writeLine("[attempt " & $attempt & "] -> " & $response.len & " bytes") + + return parseChatResponse(response, native, cfg.verbose, "chat-async") except CatchableError as e: lastError = e.msg if cfg.verbose: diff --git a/src/lazybookmarks/prompts.nim b/src/lazybookmarks/prompts.nim index 8f3d0aa..0f47ce6 100644 --- a/src/lazybookmarks/prompts.nim +++ b/src/lazybookmarks/prompts.nim @@ -43,12 +43,6 @@ proc buildClassificationSchemaJson*(folderIds: seq[string], bookmarkIds: seq[str let bookmarkEnum = bookmarkParts.join(", ") return "{\"type\":\"object\",\"properties\":{\"moves\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"bookmarkId\":{\"type\":\"string\",\"enum\":[" & bookmarkEnum & "]},\"targetFolderId\":{\"type\":\"string\",\"enum\":[" & folderEnum & "]},\"confidence\":{\"type\":\"string\",\"enum\":[\"high\",\"medium\",\"low\"]},\"reason\":{\"type\":\"string\"}},\"required\":[\"bookmarkId\",\"targetFolderId\",\"confidence\",\"reason\"],\"additionalProperties\":false}}},\"required\":[\"moves\"]}" -proc buildClassificationSchemaJsonSmall*(): string = - return "{\"type\":\"object\",\"properties\":{\"moves\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"bookmarkId\":{\"type\":\"string\"},\"targetFolderId\":{\"type\":\"string\"},\"confidence\":{\"type\":\"string\"},\"reason\":{\"type\":\"string\"}},\"required\":[\"bookmarkId\",\"targetFolderId\",\"confidence\",\"reason\"]}}},\"required\":[\"moves\"]}" - -proc buildClusterSchemaJsonSmall*(): string = - return "{\"type\":\"object\",\"properties\":{\"clusters\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"description\":{\"type\":\"string\"},\"keywords\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"parentFolderId\":{\"type\":\"string\"}},\"required\":[\"name\",\"description\",\"keywords\",\"parentFolderId\"]}}},\"required\":[\"clusters\"]}" - proc buildTaxonomyPrompt*(enrichedFolders: seq[tuple[id, path, count: string, domains, keywords, exemplars: string]]): string = var lines: seq[string] = @[] for f in enrichedFolders: From 19d6dd1ce4adbd5828e970c332be77f46463f995 Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 09:24:28 +0100 Subject: [PATCH 25/27] Fix progress bar stall by adding poll() calls in concurrent classification loop --- src/lazybookmarks/organizer.nim | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index b5812d6..c82fe45 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -369,21 +369,18 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], inc i return drained - while batchIdx < batches.len: + while batchIdx < batches.len or pending.len > 0: while pending.len < conc and batchIdx < batches.len: pending.add(classifyBatchAsync(cfg, batches[batchIdx], fullTaxonomy, tfidfMap, batchIdx)) inc batchIdx - while not pending[0].finished: - if drainPending() > 0 and pending.len == 0: break - - discard drainPending() - - while pending.len > 0: - if not pending[0].finished: + while pending.len > 0: poll() - else: discard drainPending() + if pending.len > 0 and not pending[0].finished: + poll() + else: + break echo "" return allSuggestions From 9700100690b1210264a246220e3eedd4d8df0dce Mon Sep 17 00:00:00 2001 From: Corv Date: Mon, 6 Apr 2026 09:50:24 +0100 Subject: [PATCH 26/27] Add resume support: cluster cache and per-batch auto-commit in Phase 2 --- src/lazybookmarks/organizer.nim | 83 +++++++++++++++++++++++++-------- src/lazybookmarks/storage.nim | 6 +++ 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index c82fe45..d5c33a2 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -140,6 +140,34 @@ proc saveTaxonomy*(db: DbConn, fingerprint: string, taxonomy: Taxonomy) = except: discard +proc loadCachedClusters*(db: DbConn, fingerprint: string): (bool, seq[ClusterSuggestion]) = + try: + let row = db.getRow(sql("SELECT clusters FROM cluster_cache WHERE fingerprint = ?"), fingerprint) + if row[0].len == 0: + return (false, @[]) + let json = parseJson(row[0]) + var clusters: seq[ClusterSuggestion] = @[] + for elem in json.getElems(): + var kws: seq[string] = @[] + for kw in elem["keywords"].getElems(): + kws.add(kw.getStr()) + clusters.add(ClusterSuggestion( + name: elem["name"].getStr(), + description: elem["description"].getStr(), + keywords: kws, + parentFolderId: elem["parentFolderId"].getStr(), + )) + return (true, clusters) + except: + return (false, @[]) + +proc saveClusters*(db: DbConn, fingerprint: string, clusters: seq[ClusterSuggestion]) = + try: + db.exec(sql("INSERT OR REPLACE INTO cluster_cache (fingerprint, clusters, created_at) VALUES (?, ?, strftime('%s','now'))"), + fingerprint, $(%*clusters)) + except: + discard + proc pruneTaxonomy*(taxonomy: Taxonomy, batch: seq[BookmarkEntry], tfidfMap: Table[string, seq[string]], topN = 15, minN = 5): Taxonomy = @@ -321,7 +349,8 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], taxonomy: Taxonomy, folderBookmarks: Table[string, seq[BookmarkEntry]], allBookmarks: seq[BookmarkEntry], - clusters: seq[ClusterSuggestion]): seq[Suggestion] = + clusters: seq[ClusterSuggestion], + autoApply: bool = false): (int, seq[Suggestion]) = var fullTaxonomy = taxonomy let newFolders = clusters.mapIt(TaxonomyCategory( @@ -338,18 +367,27 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], let conc = cfg.concurrency if batches.len == 0: - return @[] + return (0, @[]) var completedCount = 0 + var appliedCount = 0 var allSuggestions: seq[Suggestion] = @[] + proc commitBatch(suggestions: seq[Suggestion]) = + for s in suggestions: + if s.confidence != "low": + applyClassification(cfg, s.bookmarkId, s.targetFolderPath, s.confidence, s.reason) + inc appliedCount + if conc <= 1: for i, batch in batches: showProgressBar(i + 1, batches.len, "Classifying bookmarks") let suggestions = classifyBatchAsync(cfg, batch, fullTaxonomy, tfidfMap, i).waitFor() allSuggestions.add(suggestions) + if autoApply: + commitBatch(suggestions) echo "" - return allSuggestions + return (appliedCount, allSuggestions) var pending: seq[Future[seq[Suggestion]]] = @[] var batchIdx = 0 @@ -364,6 +402,8 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], inc completedCount showProgressBar(completedCount, batches.len, "Classifying bookmarks") allSuggestions.add(batchResult) + if autoApply: + commitBatch(batchResult) inc drained else: inc i @@ -383,7 +423,7 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], break echo "" - return allSuggestions + return (appliedCount, allSuggestions) proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false, limit: int = 0): int = let db = cfg.initDb() @@ -423,30 +463,35 @@ proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false, limit: int = 0 let taxonomy = runTaxonomyPhase(cfg, folders, folderBookmarks, allBookmarks, db) headerMsg "Phase 1.5: Identifying new folder opportunities..." + let fingerprint = buildFingerprint(folders) var clusters: seq[ClusterSuggestion] = @[] - try: - clusters = runClusterPhase(cfg, webUncategorized, taxonomy, folders) + let (clusterCached, cachedClusters) = loadCachedClusters(db, fingerprint) + if clusterCached: + clusters = cachedClusters if clusters.len > 0 and cfg.verbose: - dimMsg &"Found {clusters.len} potential new folders" - except CatchableError as e: - if cfg.verbose: - warnMsg &"Cluster phase skipped: {e.msg}" + dimMsg &"Cluster cache hit ({clusters.len} folders)" + else: + try: + clusters = runClusterPhase(cfg, webUncategorized, taxonomy, folders) + if clusters.len > 0: + saveClusters(db, fingerprint, clusters) + if cfg.verbose: + dimMsg &"Found {clusters.len} potential new folders" + except CatchableError as e: + if cfg.verbose: + warnMsg &"Cluster phase skipped: {e.msg}" headerMsg "Phase 2: Classifying bookmarks..." - let suggestions = runClassificationPhase(cfg, webUncategorized, taxonomy, folderBookmarks, allBookmarks, clusters) + let shouldAutoApply = autoAcceptAll or cfg.autoAcceptHigh + let (appliedCount, suggestions) = runClassificationPhase(cfg, webUncategorized, taxonomy, folderBookmarks, allBookmarks, clusters, autoApply = shouldAutoApply) if suggestions.len == 0: dimMsg "No suggestions generated." return 0 - if autoAcceptAll or cfg.autoAcceptHigh: - var accepted = 0 - for s in suggestions: - if autoAcceptAll or s.confidence == "high": - applyClassification(cfg, s.bookmarkId, s.targetFolderPath, s.confidence, s.reason) - accepted.inc - infoMsg &"Applied {accepted} suggestions automatically" - return accepted + if shouldAutoApply: + infoMsg &"Applied {appliedCount} suggestions automatically" + return appliedCount var accepted = 0 var skipped = 0 diff --git a/src/lazybookmarks/storage.nim b/src/lazybookmarks/storage.nim index c8251c3..c215391 100644 --- a/src/lazybookmarks/storage.nim +++ b/src/lazybookmarks/storage.nim @@ -52,6 +52,12 @@ CREATE TABLE IF NOT EXISTS taxonomy_cache ( created_at INTEGER ); +CREATE TABLE IF NOT EXISTS cluster_cache ( + fingerprint TEXT PRIMARY KEY, + clusters TEXT NOT NULL, + created_at INTEGER +); + CREATE TABLE IF NOT EXISTS folders ( id INTEGER PRIMARY KEY, uuid TEXT NOT NULL UNIQUE, From 3a3207647eb38e5f645d36878fc41211bac94f16 Mon Sep 17 00:00:00 2001 From: Corv Date: Tue, 7 Apr 2026 09:42:53 +0100 Subject: [PATCH 27/27] Fix Phase 2 deadlock, add progress indicator, and handle Ctrl+C cleanup The concurrent classification loop had two separate while loops causing an infinite deadlock after submitting the first batch of futures. Merged into a single fill-poll-drain loop. Added bookmark-count progress bar with elapsed time heartbeat, per-batch result logging, and Phase 2 summary. Ctrl+C now propagates correctly to run ollama stop and free GPU memory. Also fixes macOS ollama start hint. --- .gitignore | 1 + src/lazybookmarks/main.nim | 10 ++- src/lazybookmarks/organizer.nim | 119 ++++++++++++++++++++++++++------ src/lazybookmarks/runtime.nim | 2 +- src/lazybookmarks/ui.nim | 14 +++- 5 files changed, 119 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 7483dc4..fade97d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /build/ /dist/ /lazybookmarks/ +/.opencode/plans diff --git a/src/lazybookmarks/main.nim b/src/lazybookmarks/main.nim index af19ba9..668a7a6 100644 --- a/src/lazybookmarks/main.nim +++ b/src/lazybookmarks/main.nim @@ -40,7 +40,15 @@ proc cmdOrganise(model = "", autoAcceptHigh = false, autoAcceptAll = false, let registry = loadModelRegistry() ensureReady(cfg, registry) - discard cfg.organizeBookmarks(autoAcceptAll = autoAcceptAll, limit = limit) + try: + discard cfg.organizeBookmarks(autoAcceptAll = autoAcceptAll, limit = limit) + except CatchableError as e: + if e.name == "EKeyboardInterrupt": + if cfg.runtimeManaged and cfg.modelName.len > 0: + let bin = findOllamaBin() + if bin.len > 0: + discard execShellCmd(bin & " stop " & cfg.modelName & " 2>/dev/null") + raise proc cmdList(category = "", unorganised = false) = let cfg = loadConfig() diff --git a/src/lazybookmarks/organizer.nim b/src/lazybookmarks/organizer.nim index d5c33a2..9f2431c 100644 --- a/src/lazybookmarks/organizer.nim +++ b/src/lazybookmarks/organizer.nim @@ -1,10 +1,11 @@ -import std/[strutils, strformat, json, re, math, tables, algorithm, sequtils, sets, asyncdispatch] +import std/[strutils, strformat, json, re, math, tables, algorithm, sequtils, sets, asyncdispatch, os, times] import db_connector/db_sqlite import ./config import ./storage import ./client import ./prompts import ./ui +import ./runtime type TaxonomyCategory* = object @@ -38,6 +39,11 @@ type reason*: string isNewFolder*: bool + BatchResult* = object + suggestions*: seq[Suggestion] + skipped*: int + lowConf*: int + const StopWords = ["the","a","an","and","or","of","to","in","for","is","on","with","at","by","from","this","that","it","as","are","was","be","has","have"] proc tokenizeText*(text: string): seq[string] = @@ -289,7 +295,7 @@ proc chunk*[T](s: seq[T], size: int): seq[seq[T]] = proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], fullTaxonomy: Taxonomy, tfidfMap: Table[string, seq[string]], - batchIndex: int): Future[seq[Suggestion]] {.async.} = + batchIndex: int): Future[BatchResult] {.async.} = let pruned = pruneTaxonomy(fullTaxonomy, batch, tfidfMap) let folderIds = pruned.categories.mapIt(it.folderId) let bookmarkIds = batch.mapIt($it.id) @@ -304,6 +310,8 @@ proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], try: let response = await chatCompletionSimpleAsync(cfg, SystemPrompt, prompt, schema) var suggestions: seq[Suggestion] = @[] + var skipCount = 0 + var lowCount = 0 if response.hasKey("moves"): for move in response["moves"]: @@ -314,8 +322,12 @@ proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], let reason = moveObj["reason"].getStr() if targetId == "__skip__": + inc skipCount continue + if conf == "low": + inc lowCount + let bmIdx = batch.findIt(it.id == bmId) var bmTitle = "" var bmUrl = "" @@ -339,11 +351,13 @@ proc classifyBatchAsync(cfg: Config, batch: seq[BookmarkEntry], isNewFolder: isNew, )) - return suggestions + return BatchResult(suggestions: suggestions, skipped: skipCount, lowConf: lowCount) except CatchableError as e: + if e.name == "EKeyboardInterrupt": + raise if cfg.verbose: errorMsg &"Batch {batchIndex + 1} failed: {e.msg}" - return @[] + return BatchResult(suggestions: @[], skipped: 0, lowConf: 0) proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], taxonomy: Taxonomy, @@ -365,13 +379,18 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], let batches = uncategorized.chunk(cfg.batchSize) let conc = cfg.concurrency + let totalBookmarks = uncategorized.len if batches.len == 0: return (0, @[]) var completedCount = 0 var appliedCount = 0 + var totalSkipped = 0 + var totalLowConf = 0 + var totalFailed = 0 var allSuggestions: seq[Suggestion] = @[] + var completedBookmarks = 0 proc commitBatch(suggestions: seq[Suggestion]) = for s in suggestions: @@ -380,48 +399,93 @@ proc runClassificationPhase*(cfg: Config, uncategorized: seq[BookmarkEntry], inc appliedCount if conc <= 1: + let startTime = epochTime() for i, batch in batches: - showProgressBar(i + 1, batches.len, "Classifying bookmarks") - let suggestions = classifyBatchAsync(cfg, batch, fullTaxonomy, tfidfMap, i).waitFor() - allSuggestions.add(suggestions) + let elapsed = int(epochTime() - startTime) + let prefix = &"Classifying {completedBookmarks}/{totalBookmarks} bookmarks" + showProgressBar(i + 1, batches.len, prefix, elapsed) + let br = classifyBatchAsync(cfg, batch, fullTaxonomy, tfidfMap, i).waitFor() + allSuggestions.add(br.suggestions) + completedBookmarks += batch.len + totalSkipped += br.skipped + totalLowConf += br.lowConf if autoApply: - commitBatch(suggestions) + commitBatch(br.suggestions) + dimMsg &"Batch {i + 1}/{batches.len}: {br.suggestions.len} classified, {br.skipped} skipped, {br.lowConf} low-confidence" + echo "" + dimMsg &"Phase 2 complete: {allSuggestions.len} classified, {totalSkipped} skipped, {totalLowConf} low-confidence, {totalFailed} failed" echo "" return (appliedCount, allSuggestions) - var pending: seq[Future[seq[Suggestion]]] = @[] + var pending: seq[Future[BatchResult]] = @[] var batchIdx = 0 + let startTime = epochTime() + var lastHeartbeat = 0.0 + + proc hasUnfinished(): bool = + for f in pending: + if not f.finished: return true + return false - proc drainPending(): int = - var drained = 0 + proc drainPending() = var i = 0 while i < pending.len: if pending[i].finished: - let batchResult = pending[i].read() + let br = pending[i].read() pending.delete(i) inc completedCount - showProgressBar(completedCount, batches.len, "Classifying bookmarks") - allSuggestions.add(batchResult) + let bmCount = if completedCount <= batches.len: min(cfg.batchSize, totalBookmarks - completedBookmarks + cfg.batchSize) else: cfg.batchSize + completedBookmarks += bmCount + allSuggestions.add(br.suggestions) + totalSkipped += br.skipped + totalLowConf += br.lowConf if autoApply: - commitBatch(batchResult) - inc drained + commitBatch(br.suggestions) + let elapsed = int(epochTime() - startTime) + let prefix = &"Classifying {completedBookmarks}/{totalBookmarks} bookmarks" + showProgressBar(completedCount, batches.len, prefix, elapsed) + dimMsg &"Batch {completedCount}/{batches.len}: {br.suggestions.len} classified, {br.skipped} skipped, {br.lowConf} low-confidence" + inc i else: inc i - return drained while batchIdx < batches.len or pending.len > 0: while pending.len < conc and batchIdx < batches.len: pending.add(classifyBatchAsync(cfg, batches[batchIdx], fullTaxonomy, tfidfMap, batchIdx)) inc batchIdx - while pending.len > 0: - poll() - discard drainPending() - if pending.len > 0 and not pending[0].finished: + if hasUnfinished(): + try: poll() - else: - break + except CatchableError as e: + if e.name == "EKeyboardInterrupt": + raise + sleep(50) + + drainPending() + + if hasUnfinished(): + let now = epochTime() + if now - lastHeartbeat >= 1.0: + lastHeartbeat = now + let elapsed = int(epochTime() - startTime) + let prefix = &"Classifying {completedBookmarks}/{totalBookmarks} bookmarks" + showProgressBar(completedCount, batches.len, prefix, elapsed) + + for f in pending: + if f.finished: + try: + let br = f.read() + allSuggestions.add(br.suggestions) + totalSkipped += br.skipped + totalLowConf += br.lowConf + if autoApply: + commitBatch(br.suggestions) + except CatchableError: + inc totalFailed + echo "" + dimMsg &"Phase 2 complete: {allSuggestions.len} classified, {totalSkipped} skipped, {totalLowConf} low-confidence, {totalFailed} failed" echo "" return (appliedCount, allSuggestions) @@ -429,6 +493,15 @@ proc organizeBookmarks*(cfg: Config, autoAcceptAll: bool = false, limit: int = 0 let db = cfg.initDb() defer: db.close() + defer: + if cfg.runtimeManaged and cfg.modelName.len > 0: + let bin = findOllamaBin() + if bin.len > 0: + try: + discard execShellCmd(bin & " stop " & cfg.modelName & " 2>/dev/null") + except: + discard + let uncategorized = getUnorganisedBookmarks(cfg, limit) let webUncategorized = uncategorized.filterIt(it.url.startsWith("http://") or it.url.startsWith("https://")) diff --git a/src/lazybookmarks/runtime.nim b/src/lazybookmarks/runtime.nim index 2c0ab72..2d64cda 100644 --- a/src/lazybookmarks/runtime.nim +++ b/src/lazybookmarks/runtime.nim @@ -27,7 +27,7 @@ proc requireRuntime*(cfg: Config) = stdout.styledWriteLine(styleBright, fgRed, " ✗ ", fgDefault, resetStyle, "Ollama is not running.") when defined(macosx): - stdout.styledWriteLine(styleDim, " Start it with: open -a Ollama", resetStyle) + stdout.styledWriteLine(styleDim, " Start it with: ollama serve &", resetStyle) stdout.styledWriteLine(styleDim, " Or install: brew install ollama", resetStyle) elif defined(linux): stdout.styledWriteLine(styleDim, " Start it with: ollama serve &", resetStyle) diff --git a/src/lazybookmarks/ui.nim b/src/lazybookmarks/ui.nim index 3f54db9..6be3656 100644 --- a/src/lazybookmarks/ui.nim +++ b/src/lazybookmarks/ui.nim @@ -17,7 +17,14 @@ proc dimMsg*(msg: string) = proc headerMsg*(msg: string) = stdout.styledWriteLine(styleBright, fgCyan, "\n " & msg, resetStyle, "\n") -proc showProgressBar*(current: int, total: int, prefix: string = "") = +proc formatElapsed*(seconds: int): string = + if seconds < 60: + return &"{seconds}s" + let m = seconds div 60 + let s = seconds mod 60 + return &"{m}m{s:02d}s" + +proc showProgressBar*(current: int, total: int, prefix: string = "", elapsed: int = -1) = stdout.write "\r\e[2K" if total == 0: stdout.write prefix & " 0/0" @@ -27,7 +34,10 @@ proc showProgressBar*(current: int, total: int, prefix: string = "") = let width = 30 let filled = (current * width) div total let bar = repeat("#", filled) & repeat("-", width - filled) - stdout.write prefix & " [" & bar & "] " & $pct & "% (" & $current & "/" & $total & ")" + var suffix = " (" & $current & "/" & $total & ")" + if elapsed >= 0: + suffix.add(" " & formatElapsed(elapsed)) + stdout.write prefix & " [" & bar & "] " & $pct & "%" & suffix stdout.flushFile() type ReviewAction* = enum