diff --git a/.docker/docker-compose.prod.yml b/.docker/docker-compose.prod.yml index 5ae9238..fd058bb 100644 --- a/.docker/docker-compose.prod.yml +++ b/.docker/docker-compose.prod.yml @@ -15,6 +15,7 @@ services: - CACHE_HEADER_TIME=0 - CACHE_TIME=0 - db_table=us_code_2025 + - CHROMA_HOST=congress_chromadb restart: unless-stopped congress_viewer_app: volumes: @@ -27,6 +28,8 @@ services: restart: unless-stopped congress_postgres: image: tianon/true + congress_chromadb: + restart: unless-stopped networks: parser: external: diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index e597af0..df7ad2c 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -39,6 +39,7 @@ services: - db_user=parser - db_pass=parser - db_table=us_code_2025 + - CHROMA_HOST=congress_chromadb build: context: ../backend dockerfile: .docker/Dockerfile @@ -48,6 +49,7 @@ services: - "9091:8080" depends_on: - congress_postgres + - congress_chromadb networks: parser: entrypoint: "uvicorn" @@ -85,6 +87,8 @@ services: container_name: congress_hillstack tty: true stdin_open: true + environment: + - FASTAPI_URL=http://congress_parser_fastapi:8080 build: context: ../hillstack dockerfile: .docker/Dockerfile @@ -92,6 +96,7 @@ services: - "3001:3001" depends_on: - congress_postgres + - congress_parser_fastapi networks: parser: congress_postgres: @@ -107,9 +112,28 @@ services: - postgres-volume:/var/lib/postgresql/data networks: parser: + congress_chromadb: + image: chromadb/chroma:0.6.3 + container_name: congress_chromadb + environment: + - IS_PERSISTENT=TRUE + - PERSIST_DIRECTORY=/chroma/chroma + - ANONYMIZED_TELEMETRY=False + volumes: + - chromadb-volume:/chroma/chroma + ports: + - "8000:8000" + networks: + parser: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/heartbeat"] + interval: 30s + timeout: 10s + retries: 3 networks: parser: external: name: docker_parser volumes: postgres-volume: + chromadb-volume: diff --git a/backend/.alembic/versions/a1b2c3d4e5f6_add_user_interest.py b/backend/.alembic/versions/a1b2c3d4e5f6_add_user_interest.py new file mode 100644 index 0000000..695b449 --- /dev/null +++ b/backend/.alembic/versions/a1b2c3d4e5f6_add_user_interest.py @@ -0,0 +1,107 @@ +"""Add user_interest and user_interest_usc_content tables + +Revision ID: a1b2c3d4e5f6 +Revises: c9d7e37be069 +Create Date: 2026-02-26 00:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'a1b2c3d4e5f6' +down_revision: Union[str, None] = 'c9d7e37be069' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + 'user_interest', + sa.Column('user_interest_id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.String(), nullable=True), + sa.Column('interest_text', sa.String(), nullable=True), + sa.Column( + 'created_at', + sa.DateTime(), + server_default=sa.text('now()'), + nullable=True, + ), + sa.Column( + 'updated_at', + sa.DateTime(), + server_default=sa.text('now()'), + nullable=True, + ), + sa.ForeignKeyConstraint( + ['user_id'], + ['sensitive.user_ident.user_id'], + ondelete='CASCADE', + ), + sa.PrimaryKeyConstraint('user_interest_id'), + schema='sensitive', + ) + op.create_index( + 'ix_sensitive_user_interest_user_id', + 'user_interest', + ['user_id'], + schema='sensitive', + ) + + op.create_table( + 'user_interest_usc_content', + sa.Column('user_interest_usc_content_id', sa.Integer(), nullable=False), + sa.Column('user_interest_id', sa.Integer(), nullable=True), + sa.Column('usc_ident', sa.String(), nullable=True), + sa.Column('match_source', sa.String(), nullable=True), + sa.Column('is_active', sa.Boolean(), nullable=True, server_default=sa.text('true')), + sa.Column('match_rank', sa.Integer(), nullable=True), + sa.Column( + 'created_at', + sa.DateTime(), + server_default=sa.text('now()'), + nullable=True, + ), + sa.ForeignKeyConstraint( + ['user_interest_id'], + ['sensitive.user_interest.user_interest_id'], + ondelete='CASCADE', + ), + sa.PrimaryKeyConstraint('user_interest_usc_content_id'), + schema='sensitive', + ) + op.create_index( + 'ix_sensitive_user_interest_usc_content_user_interest_id', + 'user_interest_usc_content', + ['user_interest_id'], + schema='sensitive', + ) + op.create_index( + 'ix_sensitive_user_interest_usc_content_usc_ident', + 'user_interest_usc_content', + ['usc_ident'], + schema='sensitive', + ) + + +def downgrade() -> None: + op.drop_index( + 'ix_sensitive_user_interest_usc_content_usc_ident', + table_name='user_interest_usc_content', + schema='sensitive', + ) + op.drop_index( + 'ix_sensitive_user_interest_usc_content_user_interest_id', + table_name='user_interest_usc_content', + schema='sensitive', + ) + op.drop_table('user_interest_usc_content', schema='sensitive') + op.drop_index( + 'ix_sensitive_user_interest_user_id', + table_name='user_interest', + schema='sensitive', + ) + op.drop_table('user_interest', schema='sensitive') diff --git a/backend/congress_db/models.py b/backend/congress_db/models.py index dfb8924..fb2ca6d 100644 --- a/backend/congress_db/models.py +++ b/backend/congress_db/models.py @@ -321,6 +321,49 @@ class UserUSCContent(SensitiveBase): usc_ident = Column(String) +class UserInterest(SensitiveBase): + """ + Natural language interest statement for a user, used to auto-match USC sections + """ + + __tablename__ = "user_interest" + + user_interest_id = Column(Integer, primary_key=True) + + user_id = Column( + String, + ForeignKey("sensitive.user_ident.user_id", ondelete="CASCADE"), + index=True, + ) + interest_text = Column(String) + created_at = Column(DateTime, server_default=func.now()) + updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now()) + + +class UserInterestUscContent(SensitiveBase): + """ + USC content sections matched to a user's interest, either automatically or manually + """ + + __tablename__ = "user_interest_usc_content" + + user_interest_usc_content_id = Column(Integer, primary_key=True) + + user_interest_id = Column( + Integer, + ForeignKey( + "sensitive.user_interest.user_interest_id", + ondelete="CASCADE", + ), + index=True, + ) + usc_ident = Column(String, index=True) + match_source = Column(String, default="auto") # 'auto' or 'manual' + is_active = Column(Boolean, default=True) + match_rank = Column(Integer, nullable=True) + created_at = Column(DateTime, server_default=func.now()) + + class UserLLMQuery(SensitiveBase): """ Acts as a log of all user queries into legislation, for tracking purposes diff --git a/backend/congress_fastapi/handlers/interest.py b/backend/congress_fastapi/handlers/interest.py new file mode 100644 index 0000000..87c056c --- /dev/null +++ b/backend/congress_fastapi/handlers/interest.py @@ -0,0 +1,235 @@ +from typing import List + +from sqlalchemy import select, insert, update, or_, func + +from congress_fastapi.db.postgres import get_database +from congress_fastapi.handlers.uscode import search_chroma +from congress_db.models import ( + UserInterest, + UserInterestUscContent, + USCContent, + USCContentDiff, + LegislationVersion, + Legislation, + Congress, +) + + +async def handle_get_interest(user_id: str) -> dict: + database = await get_database() + + interest = await database.fetch_one( + select(UserInterest).where(UserInterest.user_id == user_id) + ) + if not interest: + return {"interest": None, "matches": []} + + matches = await database.fetch_all( + select(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_id + == interest["user_interest_id"] + ) + .order_by(UserInterestUscContent.match_rank) + ) + return { + "interest": dict(interest), + "matches": [dict(m) for m in matches], + } + + +async def handle_save_interest(user_id: str, interest_text: str) -> dict: + database = await get_database() + + existing = await database.fetch_one( + select(UserInterest).where(UserInterest.user_id == user_id) + ) + if existing: + await database.execute( + update(UserInterest) + .where(UserInterest.user_id == user_id) + .values(interest_text=interest_text, updated_at=func.now()) + ) + interest_id = existing["user_interest_id"] + else: + interest_id = await database.execute( + insert(UserInterest).values( + user_id=user_id, interest_text=interest_text + ) + ) + + # Run ChromaDB semantic search + try: + chroma_matches = await search_chroma(interest_text, 50) + except Exception: + chroma_matches = [] + + # Deactivate all previous auto-matched sections + await database.execute( + update(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_id == interest_id + ) + .where(UserInterestUscContent.match_source == "auto") + .values(is_active=False) + ) + + # Upsert new auto-matched sections + for rank, match in enumerate(chroma_matches): + usc_ident = match.get("usc_ident") + if not usc_ident: + continue + + existing_match = await database.fetch_one( + select(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_id == interest_id + ) + .where(UserInterestUscContent.usc_ident == usc_ident) + ) + if existing_match: + await database.execute( + update(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_usc_content_id + == existing_match["user_interest_usc_content_id"] + ) + .values(is_active=True, match_rank=rank, match_source="auto") + ) + else: + await database.execute( + insert(UserInterestUscContent).values( + user_interest_id=interest_id, + usc_ident=usc_ident, + match_source="auto", + is_active=True, + match_rank=rank, + ) + ) + + return await handle_get_interest(user_id) + + +async def handle_toggle_interest_section( + user_id: str, usc_ident: str, is_active: bool +) -> None: + database = await get_database() + + interest = await database.fetch_one( + select(UserInterest).where(UserInterest.user_id == user_id) + ) + if not interest: + return + + await database.execute( + update(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_id + == interest["user_interest_id"] + ) + .where(UserInterestUscContent.usc_ident == usc_ident) + .values(is_active=is_active) + ) + + +async def handle_add_interest_section(user_id: str, usc_ident: str) -> None: + database = await get_database() + + interest = await database.fetch_one( + select(UserInterest).where(UserInterest.user_id == user_id) + ) + if not interest: + return + + existing = await database.fetch_one( + select(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_id + == interest["user_interest_id"] + ) + .where(UserInterestUscContent.usc_ident == usc_ident) + ) + if existing: + await database.execute( + update(UserInterestUscContent) + .where( + UserInterestUscContent.user_interest_usc_content_id + == existing["user_interest_usc_content_id"] + ) + .values(is_active=True, match_source="manual") + ) + else: + await database.execute( + insert(UserInterestUscContent).values( + user_interest_id=interest["user_interest_id"], + usc_ident=usc_ident, + match_source="manual", + is_active=True, + match_rank=None, + ) + ) + + +async def handle_get_interest_legislation(user_id: str) -> dict: + database = await get_database() + + interest = await database.fetch_one( + select(UserInterest).where(UserInterest.user_id == user_id) + ) + if not interest: + return {"legislation": []} + + match_rows = await database.fetch_all( + select(UserInterestUscContent.usc_ident) + .where( + UserInterestUscContent.user_interest_id + == interest["user_interest_id"] + ) + .where(UserInterestUscContent.is_active == True) # noqa: E712 + .distinct() + ) + idents: List[str] = [r["usc_ident"] for r in match_rows if r["usc_ident"]] + if not idents: + return {"legislation": []} + + query = ( + select( + Legislation.legislation_id, + Legislation.title, + Legislation.number, + Congress.session_number, + Legislation.legislation_type, + Legislation.chamber, + func.min(LegislationVersion.effective_date).label("effective_date"), + ) + .select_from(USCContent) + .join( + USCContentDiff, + USCContentDiff.usc_content_id == USCContent.usc_content_id, + ) + .join( + LegislationVersion, + USCContentDiff.version_id == LegislationVersion.version_id, + ) + .join( + Legislation, + LegislationVersion.legislation_id == Legislation.legislation_id, + ) + .join(Congress, Congress.congress_id == Legislation.congress_id) + .where( + or_(*[USCContent.usc_ident.ilike(f"{ident}%") for ident in idents]) + ) + .group_by( + Legislation.legislation_id, + Legislation.title, + Legislation.number, + Congress.session_number, + Legislation.legislation_type, + Legislation.chamber, + ) + .order_by(func.min(LegislationVersion.effective_date).desc()) + .limit(100) + ) + + results = await database.fetch_all(query) + return {"legislation": [dict(r) for r in results]} diff --git a/backend/congress_fastapi/handlers/uscode.py b/backend/congress_fastapi/handlers/uscode.py index bb2fe96..7a3e7c3 100644 --- a/backend/congress_fastapi/handlers/uscode.py +++ b/backend/congress_fastapi/handlers/uscode.py @@ -14,7 +14,12 @@ ) chroma_host = ( - os.environ.get("LLM_HOST", "10.0.0.120").split("http://")[-1].split(":")[0] + os.environ.get( + "CHROMA_HOST", + os.environ.get("LLM_HOST", "10.0.0.120"), + ) + .split("http://")[-1] + .split(":")[0] ) @@ -108,5 +113,6 @@ async def search_chroma(query: str, num: int) -> List[dict]: result["title"] = short_title.capitalize() result["section_display"] = content.heading.strip() result["usc_link"] = f"{short_title[1:]}/{content.number}" + result["usc_ident"] = ident results_by_id[ident] = result return [results_by_id[ident] for ident in response["ids"][0]] diff --git a/backend/congress_fastapi/routes/user.py b/backend/congress_fastapi/routes/user.py index ccfeaee..c58911d 100644 --- a/backend/congress_fastapi/routes/user.py +++ b/backend/congress_fastapi/routes/user.py @@ -1,6 +1,7 @@ import traceback from typing import List, Optional from congress_db.models import UserIdent +from pydantic import BaseModel from fastapi import ( APIRouter, HTTPException, @@ -28,6 +29,13 @@ InvalidTokenException, handle_get_usc_tracking_folders ) +from congress_fastapi.handlers.interest import ( + handle_get_interest, + handle_save_interest, + handle_toggle_interest_section, + handle_add_interest_section, + handle_get_interest_legislation, +) from congress_fastapi.models.errors import Error from congress_fastapi.models.user import ( UserLoginRequest, @@ -277,3 +285,78 @@ async def user_usc_tracking_folder_results( ) return await handle_get_usc_tracking_results(user.user_id, folder_id) + + +class InterestSaveRequest(BaseModel): + interest_text: str + + +class InterestToggleRequest(BaseModel): + usc_ident: str + is_active: bool + + +class InterestAddSectionRequest(BaseModel): + usc_ident: str + + +@router.get("/user/interest") +async def user_interest_get( + user: UserIdent = Depends(user_from_cookie), +) -> dict: + if user is None: + raise HTTPException( + status_code=403, detail="Invalid or expired authentication token" + ) + return await handle_get_interest(user.user_id) + + +@router.post("/user/interest") +async def user_interest_save( + body: InterestSaveRequest, + user: UserIdent = Depends(user_from_cookie), +) -> dict: + if user is None: + raise HTTPException( + status_code=403, detail="Invalid or expired authentication token" + ) + return await handle_save_interest(user.user_id, body.interest_text) + + +@router.patch("/user/interest/section") +async def user_interest_toggle_section( + body: InterestToggleRequest, + user: UserIdent = Depends(user_from_cookie), +) -> dict: + if user is None: + raise HTTPException( + status_code=403, detail="Invalid or expired authentication token" + ) + await handle_toggle_interest_section( + user.user_id, body.usc_ident, body.is_active + ) + return {"ok": True} + + +@router.post("/user/interest/section") +async def user_interest_add_section( + body: InterestAddSectionRequest, + user: UserIdent = Depends(user_from_cookie), +) -> dict: + if user is None: + raise HTTPException( + status_code=403, detail="Invalid or expired authentication token" + ) + await handle_add_interest_section(user.user_id, body.usc_ident) + return {"ok": True} + + +@router.get("/user/interest/legislation") +async def user_interest_legislation( + user: UserIdent = Depends(user_from_cookie), +) -> dict: + if user is None: + raise HTTPException( + status_code=403, detail="Invalid or expired authentication token" + ) + return await handle_get_interest_legislation(user.user_id) diff --git a/backend/congress_parser/importers/chroma_uscode.py b/backend/congress_parser/importers/chroma_uscode.py new file mode 100644 index 0000000..2a0b697 --- /dev/null +++ b/backend/congress_parser/importers/chroma_uscode.py @@ -0,0 +1,403 @@ +""" +ChromaDB importer for US Code sections. + +Reads top-level USC sections from PostgreSQL and indexes them into the ChromaDB +"uscode" collection so that the interest-based code linking feature can perform +semantic search against them. + +Usage +----- +# Index the latest USC release (auto-detected): + python3 -m congress_parser.importers.chroma_uscode + +# Wipe the collection and re-index from scratch: + python3 -m congress_parser.importers.chroma_uscode --reset + +# Specify an explicit version_id (matches the hardcoded value in uscode.py): + python3 -m congress_parser.importers.chroma_uscode --version-id 74573 + +# Check what would be indexed without writing anything: + python3 -m congress_parser.importers.chroma_uscode --dry-run + +# Tune throughput (default batch size is 200): + python3 -m congress_parser.importers.chroma_uscode --batch-size 500 + +Prerequisites +------------- +- US Code data must already be in PostgreSQL (run congress_parser.importers.releases first) +- ChromaDB service must be running (docker-compose up congress_chromadb) +- CHROMA_HOST env var must point to the ChromaDB host (default: congress_chromadb) +""" + +import argparse +import asyncio +import json +import os +import sys +from typing import Optional +from urllib import request as urllib_request +from urllib.error import HTTPError, URLError + +import chromadb +from chromadb.config import Settings + +from congress_fastapi.db.postgres import get_database + +# ── Configuration ──────────────────────────────────────────────────────────── + +CHROMA_HOST = ( + os.environ.get( + "CHROMA_HOST", + os.environ.get("LLM_HOST", "localhost"), + ) + .split("http://")[-1] + .split(":")[0] +) +CHROMA_PORT = int(os.environ.get("CHROMA_PORT", "8000")) +CHROMA_TENANT = "congress-dev" +CHROMA_DATABASE = "usc-chat" +COLLECTION_NAME = "uscode" +DEFAULT_BATCH_SIZE = 200 + + +# ── Database helpers ────────────────────────────────────────────────────────── + + +async def get_latest_version_id(database) -> int: + """Return the version_id associated with the most recent USC release point.""" + row = await database.fetch_one( + """ + SELECT v.version_id + FROM version v + JOIN usc_release ur ON ur.version_id = v.version_id + WHERE ur.effective_date IS NOT NULL + ORDER BY ur.effective_date DESC + LIMIT 1 + """ + ) + if row is None: + raise RuntimeError( + "No USC release points found in the database.\n" + "Run 'python3 -m congress_parser.importers.releases' first." + ) + return row[0] + + +async def count_sections(database, version_id: int) -> int: + """Count indexable top-level USC sections for this version.""" + row = await database.fetch_one( + """ + SELECT COUNT(*) + FROM usc_content + WHERE version_id = :vid + AND usc_ident ~ '^/us/usc/t[0-9]+/s[^/]+$' + AND heading IS NOT NULL + AND heading != '' + """, + values={"vid": version_id}, + ) + return row[0] + + +async def fetch_sections_batch( + database, version_id: int, offset: int, batch_size: int +) -> list: + """ + Fetch one page of top-level USC sections, joined to their title name. + + Filters to identifiers of the form /us/usc/t{n}/s{identifier} — these are + the IDs stored in ChromaDB and resolved back to Postgres in search_chroma(). + """ + return await database.fetch_all( + """ + SELECT + uc.usc_ident, + uc.heading, + uc.content_str, + uc.number, + uc.section_display, + ch.long_title AS chapter_title, + ch.short_title AS chapter_short_title + FROM usc_content uc + JOIN usc_section us_sec ON us_sec.usc_section_id = uc.usc_section_id + JOIN usc_chapter ch ON ch.usc_chapter_id = us_sec.usc_chapter_id + WHERE uc.version_id = :vid + AND uc.usc_ident ~ '^/us/usc/t[0-9]+/s[^/]+$' + AND uc.heading IS NOT NULL + AND uc.heading != '' + ORDER BY uc.usc_ident + LIMIT :lim + OFFSET :off + """, + values={"vid": version_id, "lim": batch_size, "off": offset}, + ) + + +def build_document(row) -> str: + """ + Build the text that ChromaDB will embed for a USC section. + + Format: " — §<number>. <heading>\\n<content>" + The title name helps the embedding model place sections in context. + """ + parts = [] + chapter_title = (row["chapter_title"] or "").strip().capitalize() + if chapter_title: + parts.append(chapter_title) + if row["section_display"]: + section_label = row["section_display"].strip() + heading = (row["heading"] or "").strip() + if heading: + parts.append(f"{section_label}. {heading}") + else: + parts.append(section_label) + elif row["heading"]: + parts.append(row["heading"].strip()) + header = " — ".join(parts) + + content = (row["content_str"] or "").strip() + if content: + # Truncate very long sections to stay within typical embedding limits + content = content[:8000] + return f"{header}\n{content}" if header else content + return header + + +# ── ChromaDB setup ───────────────────────────────────────────────────────────── + + +def _chroma_rest(method: str, path: str, body: Optional[dict] = None): + """Make a raw HTTP call to the ChromaDB admin REST API.""" + url = f"http://{CHROMA_HOST}:{CHROMA_PORT}{path}" + data = json.dumps(body).encode() if body else None + req = urllib_request.Request( + url, + data=data, + headers={"Content-Type": "application/json"} if data else {}, + method=method, + ) + try: + with urllib_request.urlopen(req, timeout=10) as resp: + return resp.status + except HTTPError as exc: + return exc.code # Return status code; callers decide what's acceptable + + +def ensure_chroma_tenant_and_db(): + """ + Create the ChromaDB tenant and database if they do not already exist. + + HTTP 422 means the resource already exists (ChromaDB's behaviour); we + treat that the same as HTTP 200. + """ + # Tenant + status = _chroma_rest("POST", "/api/v1/tenants", {"name": CHROMA_TENANT}) + if status not in (200, 201, 422, 409): + print( + f"Warning: unexpected HTTP {status} when creating tenant '{CHROMA_TENANT}'. " + "Proceeding anyway.", + file=sys.stderr, + ) + + # Database (scoped to tenant via query param) + status = _chroma_rest( + "POST", + f"/api/v1/databases?tenant={CHROMA_TENANT}", + {"name": CHROMA_DATABASE}, + ) + if status not in (200, 201, 422, 409): + print( + f"Warning: unexpected HTTP {status} when creating database '{CHROMA_DATABASE}'. " + "Proceeding anyway.", + file=sys.stderr, + ) + + +async def open_chroma_collection(reset: bool): + """Connect to ChromaDB and return the 'uscode' collection.""" + client = await chromadb.AsyncHttpClient( + host=CHROMA_HOST, + port=CHROMA_PORT, + ssl=False, + headers=None, + settings=Settings(), + tenant=CHROMA_TENANT, + database=CHROMA_DATABASE, + ) + + if reset: + try: + await client.delete_collection(COLLECTION_NAME) + print(f"Deleted existing collection '{COLLECTION_NAME}'.") + except Exception: + pass # Didn't exist yet + + collection = await client.get_or_create_collection( + COLLECTION_NAME, + metadata={"hnsw:space": "cosine"}, + ) + count = await collection.count() + if count > 0 and not reset: + print( + f"Collection '{COLLECTION_NAME}' already contains {count:,} documents. " + "Upserting will add new and update changed sections.\n" + "Use --reset to wipe and rebuild from scratch." + ) + return collection + + +# ── Main import logic ────────────────────────────────────────────────────────── + + +async def run_import( + version_id: Optional[int], + batch_size: int, + reset: bool, + dry_run: bool, +): + database = await get_database() + + # Determine version + if version_id is None: + version_id = await get_latest_version_id(database) + print(f"Auto-detected latest USC version_id: {version_id}") + else: + print(f"Using specified version_id: {version_id}") + + total = await count_sections(database, version_id) + print(f"Sections eligible for indexing: {total:,}") + + if total == 0: + print( + "\nNo sections found. Make sure the USC data has been loaded:\n" + " python3 -m congress_parser.importers.releases", + file=sys.stderr, + ) + sys.exit(1) + + if dry_run: + print("Dry-run mode — exiting without writing to ChromaDB.") + return + + # ── ChromaDB setup ────────────────────────────────────────── + print(f"\nConnecting to ChromaDB at {CHROMA_HOST}:{CHROMA_PORT} …") + try: + ensure_chroma_tenant_and_db() + except URLError as exc: + print( + f"Could not reach ChromaDB at {CHROMA_HOST}:{CHROMA_PORT}: {exc}\n" + "Is the container running? docker-compose up congress_chromadb", + file=sys.stderr, + ) + sys.exit(1) + + collection = await open_chroma_collection(reset=reset) + print(f"Opened collection '{COLLECTION_NAME}'. Starting import…\n") + + # ── Batch loop ────────────────────────────────────────────── + offset = 0 + indexed = 0 + skipped = 0 + + while offset < total: + rows = await fetch_sections_batch(database, version_id, offset, batch_size) + if not rows: + break + + ids: list[str] = [] + documents: list[str] = [] + metadatas: list[dict] = [] + + for row in rows: + usc_ident: str = row["usc_ident"] + doc_text = build_document(row) + if not doc_text.strip(): + skipped += 1 + continue + + # Extract title number from ident: /us/usc/t42/s1395 → "42" + parts = usc_ident.split("/") + title_num = parts[3].lstrip("t") if len(parts) > 3 else "" + + ids.append(usc_ident) + documents.append(doc_text) + metadatas.append( + { + "title": title_num, + "number": row["number"] or "", + "section_display": row["section_display"] or "", + "heading": (row["heading"] or "")[:200], + } + ) + + if ids: + await collection.upsert(ids=ids, documents=documents, metadatas=metadatas) + indexed += len(ids) + + offset += batch_size + done = min(offset, total) + pct = done / total * 100 + print( + f" {done:>{len(str(total))}}/{total} ({pct:5.1f}%) " + f"indexed: {indexed:,}", + end="\r", + flush=True, + ) + + print(f"\n\nDone. Indexed {indexed:,} sections, skipped {skipped:,} empty.") + final_count = await collection.count() + print(f"Collection '{COLLECTION_NAME}' now contains {final_count:,} documents.") + + +# ── CLI entry point ──────────────────────────────────────────────────────────── + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Index US Code sections into ChromaDB for semantic interest-based search." + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "--reset", + action="store_true", + help="Delete and recreate the ChromaDB collection before indexing.", + ) + parser.add_argument( + "--batch-size", + type=int, + default=DEFAULT_BATCH_SIZE, + metavar="N", + help=f"Documents per upsert batch (default: {DEFAULT_BATCH_SIZE}).", + ) + parser.add_argument( + "--version-id", + type=int, + default=None, + metavar="ID", + help=( + "Specific usc_content.version_id to index " + "(default: latest usc_release)." + ), + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Count eligible sections and exit without writing to ChromaDB.", + ) + args = parser.parse_args() + + asyncio.run( + run_import( + version_id=args.version_id, + batch_size=args.batch_size, + reset=args.reset, + dry_run=args.dry_run, + ) + ) + + +if __name__ == "__main__": + main() diff --git a/hillstack/prisma/schema.prisma b/hillstack/prisma/schema.prisma index 87d6d30..de61f11 100644 --- a/hillstack/prisma/schema.prisma +++ b/hillstack/prisma/schema.prisma @@ -554,6 +554,7 @@ model user_ident { user_auth_google String? @db.VarChar user_auth_expiration DateTime? @db.Timestamp(6) user_auth_cookie String? @db.VarChar + user_interest user_interest[] user_legislation user_legislation[] user_legislator user_legislator[] user_llm_query user_llm_query[] @@ -604,6 +605,34 @@ model user_llm_query { @@schema("sensitive") } +model user_interest { + user_interest_id Int @id @default(autoincrement()) + user_id String? @db.VarChar + interest_text String? @db.Text + created_at DateTime? @default(now()) @db.Timestamp(6) + updated_at DateTime? @default(now()) @db.Timestamp(6) + user_ident user_ident? @relation(fields: [user_id], references: [user_id], onDelete: Cascade, onUpdate: NoAction) + user_interest_usc_content user_interest_usc_content[] + + @@index([user_id], map: "ix_sensitive_user_interest_user_id") + @@schema("sensitive") +} + +model user_interest_usc_content { + user_interest_usc_content_id Int @id @default(autoincrement()) + user_interest_id Int? + usc_ident String? @db.VarChar + match_source String? @db.VarChar + is_active Boolean? @default(true) + match_rank Int? + created_at DateTime? @default(now()) @db.Timestamp(6) + user_interest user_interest? @relation(fields: [user_interest_id], references: [user_interest_id], onDelete: Cascade, onUpdate: NoAction) + + @@index([user_interest_id], map: "ix_sensitive_user_interest_usc_content_user_interest_id") + @@index([usc_ident], map: "ix_sensitive_user_interest_usc_content_usc_ident") + @@schema("sensitive") +} + model user_usc_content { user_usc_content_id Int @id @default(autoincrement()) user_id String? @db.VarChar diff --git a/hillstack/src/app/_home/dashboard.tsx b/hillstack/src/app/_home/dashboard.tsx index 2542112..96cd220 100644 --- a/hillstack/src/app/_home/dashboard.tsx +++ b/hillstack/src/app/_home/dashboard.tsx @@ -9,11 +9,11 @@ import SellIcon from '@mui/icons-material/Sell'; import { Card, Grid, Toolbar } from '@mui/material'; import Box from '@mui/material/Box'; import type React from 'react'; -import { DashboardWidgetNoContent } from '~/app/_home/widgets'; import { LegislationCalendar } from '~/app/_home/widgets/legislationCalendar'; import { LegislationFollowed } from '~/app/_home/widgets/legislationFollowed'; import { LegislationTags } from '~/app/_home/widgets/legislationTags'; import { LegislatorFollowed } from '~/app/_home/widgets/legislatorFollowed'; +import { InterestFeed } from '~/app/_home/widgets/interestFeed'; function DashboardWidget({ title, @@ -82,9 +82,9 @@ export function Dashboard() { <Grid size={{ xs: 12, md: 12 }}> <DashboardWidget Icon={LocalPoliceIcon} - title='USC Tracking' + title='Your Interest Areas' > - <DashboardWidgetNoContent /> + <InterestFeed /> </DashboardWidget> </Grid> </Grid> diff --git a/hillstack/src/app/_home/widgets/interestFeed.tsx b/hillstack/src/app/_home/widgets/interestFeed.tsx new file mode 100644 index 0000000..863c50f --- /dev/null +++ b/hillstack/src/app/_home/widgets/interestFeed.tsx @@ -0,0 +1,117 @@ +'use client'; + +import { Box, Button, List, ListItem, ListItemButton, Typography } from '@mui/material'; +import Link from 'next/link'; +import { useSession } from 'next-auth/react'; +import type { RouterOutputs } from '~/trpc/react'; +import { api } from '~/trpc/react'; +import { DashboardWidgetContent } from './'; + +type InterestBill = RouterOutputs['user']['interestLegislation'][number]; + +export function InterestFeed() { + const { data: session } = useSession(); + const { data, isLoading, isError } = api.user.interestLegislation.useQuery( + undefined, + { enabled: Boolean(session) }, + ); + + if (!session) { + return ( + <Box + sx={{ + display: 'flex', + flexDirection: 'column', + alignItems: 'center', + justifyContent: 'center', + height: 220, + gap: 1, + }} + > + <Typography color='textDisabled' variant='body2'> + Log in to track legislation by interest + </Typography> + </Box> + ); + } + + if (!isLoading && !isError && data?.length === 0) { + return ( + <Box + sx={{ + display: 'flex', + flexDirection: 'column', + alignItems: 'center', + justifyContent: 'center', + height: 220, + gap: 1, + px: 2, + }} + > + <Typography + color='textDisabled' + textAlign='center' + variant='body2' + > + No bills found yet. Set up your interests to track + relevant legislation. + </Typography> + <Button + component={Link} + href='/user/interests' + size='small' + variant='outlined' + > + Set up interests + </Button> + </Box> + ); + } + + return ( + <DashboardWidgetContent + isEmpty={(data?.length ?? 0) === 0} + isError={isError} + isLoading={isLoading} + > + {data && ( + <List dense> + {data.slice(0, 8).map((bill: InterestBill) => ( + <ListItem + disablePadding + key={bill.legislation_id} + > + <ListItemButton> + <Link + href={`/congress/bills/${bill.legislation_id}`} + style={{ + width: '100%', + display: 'block', + }} + > + <Box + sx={{ + display: 'flex', + flexDirection: 'column', + }} + > + <Typography color='primary'> + {bill.title} + </Typography> + <Typography variant='caption'> + {`${bill.session_number}th · `} + {bill.chamber === 'house' + ? 'H.R.' + : 'S.'} + {` #${bill.number}`} + </Typography> + </Box> + </Link> + </ListItemButton> + </ListItem> + ))} + </List> + )} + </DashboardWidgetContent> + ); +} diff --git a/hillstack/src/app/congress/bills/[billId]/interestBadge.tsx b/hillstack/src/app/congress/bills/[billId]/interestBadge.tsx new file mode 100644 index 0000000..0821438 --- /dev/null +++ b/hillstack/src/app/congress/bills/[billId]/interestBadge.tsx @@ -0,0 +1,31 @@ +'use client'; + +import TrackChangesIcon from '@mui/icons-material/TrackChanges'; +import { Chip, Tooltip } from '@mui/material'; +import { useSession } from 'next-auth/react'; +import { api } from '~/trpc/react'; + +export function InterestBadge({ legislationId }: { legislationId: number }) { + const { data: session } = useSession(); + + const { data } = api.user.interestBillMatch.useQuery( + { legislation_id: legislationId }, + { enabled: Boolean(session) }, + ); + + if (!session || !data?.matches) return null; + + return ( + <Tooltip + title={`Touches your interest areas: ${data.matchedIdents.join(', ')}`} + > + <Chip + color='success' + icon={<TrackChangesIcon />} + label='Matches your interests' + size='small' + sx={{ px: 1, mr: 1, mt: { xs: 0.5, md: 0 } }} + /> + </Tooltip> + ); +} diff --git a/hillstack/src/app/congress/bills/[billId]/layout.tsx b/hillstack/src/app/congress/bills/[billId]/layout.tsx index 8beccb1..b657dcf 100644 --- a/hillstack/src/app/congress/bills/[billId]/layout.tsx +++ b/hillstack/src/app/congress/bills/[billId]/layout.tsx @@ -3,6 +3,7 @@ import MergeTypeIcon from '@mui/icons-material/MergeType'; import { Box, Chip, Container, Paper, Typography } from '@mui/material'; import type { Params } from 'next/dist/server/request/params'; +import { InterestBadge } from '~/app/congress/bills/[billId]/interestBadge'; import { BillTabs } from '~/app/congress/bills/[billId]/tabs'; import { BillVersionEnum } from '~/enums'; import { api, HydrateClient } from '~/trpc/server'; @@ -118,7 +119,8 @@ export default async function BillLayout({ )} </Box> - <BillTabs /> + <InterestBadge legislationId={Number(billId)} /> + <BillTabs /> <Paper elevation={0} sx={{ diff --git a/hillstack/src/app/user/interests/page.tsx b/hillstack/src/app/user/interests/page.tsx new file mode 100644 index 0000000..a525bab --- /dev/null +++ b/hillstack/src/app/user/interests/page.tsx @@ -0,0 +1,337 @@ +'use client'; + +import AddIcon from '@mui/icons-material/Add'; +import AutorenewIcon from '@mui/icons-material/Autorenew'; +import CheckBoxIcon from '@mui/icons-material/CheckBox'; +import CheckBoxOutlineBlankIcon from '@mui/icons-material/CheckBoxOutlineBlank'; +import { + Alert, + Box, + Button, + Chip, + CircularProgress, + Container, + Divider, + IconButton, + Paper, + TextField, + Tooltip, + Typography, +} from '@mui/material'; +import { useSession } from 'next-auth/react'; +import { type ChangeEvent, useEffect, useMemo, useState } from 'react'; +import { api } from '~/trpc/react'; + +type MatchItem = { + user_interest_usc_content_id: number; + usc_ident: string | null; + match_source: string | null; + is_active: boolean | null; + match_rank: number | null; +}; + +// Group matched sections by USC title (e.g. "t42" → "Title 42") +function groupByTitle(matches: MatchItem[]): Record<string, MatchItem[]> { + const groups: Record<string, MatchItem[]> = {}; + for (const m of matches) { + const parts = m.usc_ident?.split('/') ?? []; + // usc_ident looks like /us/usc/t42/s1395/... + const titlePart = parts[3] ?? 'unknown'; + const titleNum = titlePart.replace('t', 'Title '); + if (!groups[titleNum]) groups[titleNum] = []; + groups[titleNum].push(m); + } + return groups; +} + +export default function InterestsPage() { + const { data: session, status } = useSession(); + const utils = api.useUtils(); + + const { data: interest, isLoading } = api.user.interestGet.useQuery( + undefined, + { enabled: Boolean(session) }, + ); + + const [text, setText] = useState(''); + const [addIdent, setAddIdent] = useState(''); + const [showAddField, setShowAddField] = useState(false); + + useEffect(() => { + if (interest?.interest_text) { + setText(interest.interest_text); + } + }, [interest?.interest_text]); + + const saveMutation = api.user.interestSave.useMutation({ + onSuccess: () => utils.user.interestGet.invalidate(), + }); + + const toggleMutation = api.user.interestToggleSection.useMutation({ + onSuccess: () => utils.user.interestGet.invalidate(), + }); + + const addMutation = api.user.interestAddSection.useMutation({ + onSuccess: () => { + utils.user.interestGet.invalidate(); + setAddIdent(''); + setShowAddField(false); + }, + }); + + const rawMatches: MatchItem[] = ( + interest?.user_interest_usc_content ?? [] + ).map((m) => ({ + user_interest_usc_content_id: m.user_interest_usc_content_id, + usc_ident: m.usc_ident ?? null, + match_source: m.match_source ?? null, + is_active: m.is_active ?? null, + match_rank: m.match_rank ?? null, + })); + + const grouped = useMemo( + () => groupByTitle(rawMatches), + // eslint-disable-next-line react-hooks/exhaustive-deps + [interest?.user_interest_usc_content], + ); + + const activeCount = rawMatches.filter((m) => m.is_active).length; + + if (status === 'loading') { + return ( + <Container maxWidth='md' sx={{ py: 4 }}> + <CircularProgress /> + </Container> + ); + } + + if (!session) { + return ( + <Container maxWidth='md' sx={{ py: 4 }}> + <Alert severity='info'> + Please log in to manage your policy interests. + </Alert> + </Container> + ); + } + + return ( + <Container maxWidth='md' sx={{ py: 4 }}> + <Typography gutterBottom variant='h1'> + Your Policy Interests + </Typography> + <Typography color='textSecondary' sx={{ mb: 3 }} variant='body2'> + Describe what policy areas you care about in plain language. + The system will find matching US Code sections and alert you + when new legislation touches them. + </Typography> + + <Paper elevation={2} sx={{ p: 3, mb: 3 }}> + <TextField + fullWidth + helperText='Be specific for better matches. Up to 500 characters.' + label='What policy areas do you care about?' + maxRows={6} + minRows={3} + multiline + onChange={(e: ChangeEvent<HTMLInputElement>) => + setText(e.target.value.slice(0, 500)) + } + placeholder='e.g. "Medicare billing and reimbursement policy for rural critical access hospitals"' + value={text} + /> + <Box + sx={{ mt: 2, display: 'flex', alignItems: 'center', gap: 1 }} + > + <Button + disabled={ + !text.trim() || + saveMutation.isPending || + text === interest?.interest_text + } + onClick={() => + saveMutation.mutate({ interest_text: text }) + } + startIcon={ + saveMutation.isPending ? ( + <CircularProgress size={16} /> + ) : ( + <AutorenewIcon /> + ) + } + variant='contained' + > + {saveMutation.isPending + ? 'Finding matches…' + : interest?.interest_text + ? 'Update interests' + : 'Find matching sections'} + </Button> + {saveMutation.isError && ( + <Typography color='error' variant='caption'> + Failed to save. Please try again. + </Typography> + )} + </Box> + </Paper> + + {isLoading && <CircularProgress />} + + {interest && rawMatches.length > 0 && ( + <Paper elevation={2} sx={{ p: 3 }}> + <Box + sx={{ + display: 'flex', + alignItems: 'center', + justifyContent: 'space-between', + mb: 2, + }} + > + <Typography variant='h2'> + Matched US Code Sections + </Typography> + <Chip + label={`${activeCount} active`} + size='small' + variant='outlined' + /> + </Box> + + {(Object.entries(grouped) as [string, MatchItem[]][]).map( + ([titleLabel, sections]) => ( + <Box key={titleLabel} sx={{ mb: 2 }}> + <Typography + color='textSecondary' + sx={{ mb: 0.5 }} + variant='subtitle2' + > + {titleLabel} + </Typography> + {sections.map((match) => { + const sectionSlug = + match.usc_ident + ?.split('/') + .slice(3) + .join('/') ?? ''; + return ( + <Box + key={ + match.user_interest_usc_content_id + } + sx={{ + display: 'flex', + alignItems: 'center', + py: 0.5, + opacity: match.is_active + ? 1 + : 0.45, + }} + > + <Tooltip + title={ + match.is_active + ? 'Click to deselect this section' + : 'Click to restore this section' + } + > + <IconButton + onClick={() => + toggleMutation.mutate({ + user_interest_usc_content_id: + match.user_interest_usc_content_id, + is_active: + !match.is_active, + }) + } + size='small' + sx={{ mr: 1 }} + > + {match.is_active ? ( + <CheckBoxIcon + color='primary' + fontSize='small' + /> + ) : ( + <CheckBoxOutlineBlankIcon fontSize='small' /> + )} + </IconButton> + </Tooltip> + <Typography variant='body2'> + {sectionSlug} + </Typography> + {match.match_source === + 'manual' && ( + <Chip + label='manual' + size='small' + sx={{ ml: 1 }} + variant='outlined' + /> + )} + </Box> + ); + })} + <Divider sx={{ mt: 1 }} /> + </Box> + ), + )} + + <Box sx={{ mt: 2 }}> + {showAddField ? ( + <Box sx={{ display: 'flex', gap: 1 }}> + <TextField + label='USC citation' + onChange={( + e: ChangeEvent<HTMLInputElement>, + ) => setAddIdent(e.target.value)} + placeholder='/us/usc/t42/s1395' + size='small' + value={addIdent} + /> + <Button + disabled={ + !addIdent.trim() || + addMutation.isPending + } + onClick={() => + addMutation.mutate({ + usc_ident: addIdent, + }) + } + variant='contained' + > + Add + </Button> + <Button + onClick={() => setShowAddField(false)} + variant='outlined' + > + Cancel + </Button> + </Box> + ) : ( + <Button + onClick={() => setShowAddField(true)} + size='small' + startIcon={<AddIcon />} + variant='text' + > + Add section manually + </Button> + )} + </Box> + </Paper> + )} + + {interest && + rawMatches.length === 0 && + !isLoading && ( + <Alert severity='info'> + No matching sections found. Try a more specific + interest description, or make sure ChromaDB has been + indexed. + </Alert> + )} + </Container> + ); +} diff --git a/hillstack/src/server/api/routers/user.ts b/hillstack/src/server/api/routers/user.ts index 8786189..1eb467a 100644 --- a/hillstack/src/server/api/routers/user.ts +++ b/hillstack/src/server/api/routers/user.ts @@ -1,6 +1,9 @@ +import { Prisma } from '@prisma/client'; import { z } from 'zod'; import { createTRPCRouter, privateProcedure } from '~/server/api/trpc'; +const FASTAPI_URL = process.env.FASTAPI_URL ?? 'http://localhost:8080'; + export const userRouter = createTRPCRouter({ legislationFollowing: privateProcedure .input( @@ -208,4 +211,303 @@ export const userRouter = createTRPCRouter({ return userLegislation.map((leg) => leg); }), + + // ── Interest procedures ────────────────────────────────────────────────── + + interestGet: privateProcedure.query(async ({ ctx }) => { + return ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + include: { + user_interest_usc_content: { + orderBy: { match_rank: 'asc' }, + }, + }, + }); + }), + + interestSave: privateProcedure + .input(z.object({ interest_text: z.string().min(1).max(500) })) + .mutation(async ({ input, ctx }) => { + // 1. Upsert the interest record + const existing = await ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + }); + + let interestId: number; + if (existing) { + await ctx.db.user_interest.update({ + where: { user_interest_id: existing.user_interest_id }, + data: { + interest_text: input.interest_text, + updated_at: new Date(), + }, + }); + interestId = existing.user_interest_id; + } else { + const created = await ctx.db.user_interest.create({ + data: { + user_id: ctx.user.email, + interest_text: input.interest_text, + }, + }); + interestId = created.user_interest_id; + } + + // 2. Call the FastAPI ChromaDB search endpoint (no auth needed) + let chromaMatches: Array<{ + usc_ident?: string; + title?: string; + section_display?: string; + usc_link?: string; + }> = []; + try { + const res = await fetch(`${FASTAPI_URL}/uscode/search`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + query: input.interest_text, + results: 50, + }), + }); + if (res.ok) { + chromaMatches = await res.json(); + } + } catch { + // ChromaDB unavailable — interest saved, matches will be empty + } + + // 3. Deactivate old auto-matched sections + await ctx.db.user_interest_usc_content.updateMany({ + where: { + user_interest_id: interestId, + match_source: 'auto', + }, + data: { is_active: false }, + }); + + // 4. Upsert new auto-matched sections + for (let rank = 0; rank < chromaMatches.length; rank++) { + const match = chromaMatches[rank]; + const usc_ident = match?.usc_ident; + if (!usc_ident) continue; + + const existingMatch = + await ctx.db.user_interest_usc_content.findFirst({ + where: { user_interest_id: interestId, usc_ident }, + }); + + if (existingMatch) { + await ctx.db.user_interest_usc_content.update({ + where: { + user_interest_usc_content_id: + existingMatch.user_interest_usc_content_id, + }, + data: { + is_active: true, + match_rank: rank, + match_source: 'auto', + }, + }); + } else { + await ctx.db.user_interest_usc_content.create({ + data: { + user_interest_id: interestId, + usc_ident, + match_source: 'auto', + is_active: true, + match_rank: rank, + }, + }); + } + } + + return ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + include: { + user_interest_usc_content: { + orderBy: { match_rank: 'asc' }, + }, + }, + }); + }), + + interestToggleSection: privateProcedure + .input( + z.object({ + user_interest_usc_content_id: z.number(), + is_active: z.boolean(), + }), + ) + .mutation(async ({ input, ctx }) => { + // Verify ownership before updating + const interest = await ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + }); + if (!interest) return; + + await ctx.db.user_interest_usc_content.updateMany({ + where: { + user_interest_usc_content_id: + input.user_interest_usc_content_id, + user_interest_id: interest.user_interest_id, + }, + data: { is_active: input.is_active }, + }); + }), + + interestAddSection: privateProcedure + .input(z.object({ usc_ident: z.string() })) + .mutation(async ({ input, ctx }) => { + const interest = await ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + }); + if (!interest) return; + + const existing = + await ctx.db.user_interest_usc_content.findFirst({ + where: { + user_interest_id: interest.user_interest_id, + usc_ident: input.usc_ident, + }, + }); + + if (existing) { + await ctx.db.user_interest_usc_content.update({ + where: { + user_interest_usc_content_id: + existing.user_interest_usc_content_id, + }, + data: { is_active: true, match_source: 'manual' }, + }); + } else { + await ctx.db.user_interest_usc_content.create({ + data: { + user_interest_id: interest.user_interest_id, + usc_ident: input.usc_ident, + match_source: 'manual', + is_active: true, + }, + }); + } + }), + + interestLegislation: privateProcedure.query(async ({ ctx }) => { + const interest = await ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + include: { + user_interest_usc_content: { + where: { is_active: true }, + select: { usc_ident: true }, + }, + }, + }); + + if ( + !interest || + interest.user_interest_usc_content.length === 0 + ) { + return []; + } + + const idents = interest.user_interest_usc_content + .map((m) => m.usc_ident) + .filter((id): id is string => Boolean(id)); + + if (idents.length === 0) return []; + + // Build a raw SQL query since Prisma doesn't support ILIKE prefix arrays + const identsWithWildcard = idents.map((id) => `${id}%`); + const orClauses = identsWithWildcard + .map((_, i) => `uc.usc_ident ILIKE $${i + 1}`) + .join(' OR '); + + type LegislationRow = { + legislation_id: bigint; + title: string; + number: number; + session_number: number; + legislation_type: string; + chamber: string; + effective_date: Date | null; + }; + + const rows = await ctx.db.$queryRawUnsafe<LegislationRow[]>( + `SELECT + l.legislation_id, + l.title, + l.number, + c.session_number, + l.legislation_type, + l.chamber, + MIN(lv.effective_date) AS effective_date + FROM usc_content uc + JOIN usc_content_diff ucd ON ucd.usc_content_id = uc.usc_content_id + JOIN legislation_version lv ON lv.version_id = ucd.version_id + JOIN legislation l ON l.legislation_id = lv.legislation_id + JOIN congress c ON c.congress_id = l.congress_id + WHERE ${orClauses} + GROUP BY l.legislation_id, l.title, l.number, l.legislation_type, l.chamber, c.session_number + ORDER BY MIN(lv.effective_date) DESC + LIMIT 50`, + ...identsWithWildcard, + ); + + // BigInt → number for JSON serialisation + return rows.map((r) => ({ + ...r, + legislation_id: Number(r.legislation_id), + })); + }), + + interestBillMatch: privateProcedure + .input(z.object({ legislation_id: z.number() })) + .query(async ({ input, ctx }) => { + const interest = await ctx.db.user_interest.findFirst({ + where: { user_id: ctx.user.email }, + include: { + user_interest_usc_content: { + where: { is_active: true }, + select: { usc_ident: true }, + }, + }, + }); + + if ( + !interest || + interest.user_interest_usc_content.length === 0 + ) { + return { matches: false, matchedIdents: [] as string[] }; + } + + const idents = interest.user_interest_usc_content + .map((m) => m.usc_ident) + .filter((id): id is string => Boolean(id)); + + if (idents.length === 0) { + return { matches: false, matchedIdents: [] as string[] }; + } + + const identsWithWildcard = idents.map((id) => `${id}%`); + const orClauses = identsWithWildcard + .map((_, i) => `uc.usc_ident ILIKE $${i + 2}`) + .join(' OR '); + + type IdentRow = { usc_ident: string }; + const rows = await ctx.db.$queryRawUnsafe<IdentRow[]>( + `SELECT DISTINCT uc.usc_ident + FROM usc_content uc + JOIN usc_content_diff ucd ON ucd.usc_content_id = uc.usc_content_id + JOIN legislation_version lv ON lv.version_id = ucd.version_id + WHERE lv.legislation_id = $1 + AND (${orClauses}) + LIMIT 10`, + input.legislation_id, + ...identsWithWildcard, + ); + + return { + matches: rows.length > 0, + matchedIdents: rows.map((r) => r.usc_ident), + }; + }), });