diff --git a/backend/app/api/v1/endpoints/library.py b/backend/app/api/v1/endpoints/library.py
index e2b9b95..fef850f 100644
--- a/backend/app/api/v1/endpoints/library.py
+++ b/backend/app/api/v1/endpoints/library.py
@@ -25,6 +25,7 @@
ensure_unique_storage_name,
normalize_original_filename,
sanitize_storage_filename,
+ build_storage_name_with_email,
)
settings = get_settings()
@@ -290,6 +291,7 @@ async def ensure_uploaded_paper_local(
content = await _download_pdf_from_url(candidate_url)
stored_filename, file_url, file_size, file_hash = await _save_pdf_bytes(
current_user.id,
+ current_user.email,
content,
preferred_name=record.original_filename,
)
@@ -375,12 +377,14 @@ async def _download_pdf_from_url(url: str) -> bytes:
async def _save_pdf_bytes(
user_id: int,
+ user_email: str,
content: bytes,
*,
preferred_name: str | None = None,
) -> tuple[str, str, int, str]:
display_name = normalize_original_filename(preferred_name or f"user_{user_id}.pdf")
- storage_candidate = sanitize_storage_filename(display_name)
+ storage_candidate = build_storage_name_with_email(display_name, user_email)
+ storage_candidate = sanitize_storage_filename(storage_candidate)
stored_filename, destination = ensure_unique_storage_name(UPLOAD_DIR, storage_candidate)
await asyncio.to_thread(destination.write_bytes, content)
file_url = f"/media/uploads/{stored_filename}"
diff --git a/backend/app/api/v1/endpoints/papers.py b/backend/app/api/v1/endpoints/papers.py
index 0d117ad..763f3a7 100644
--- a/backend/app/api/v1/endpoints/papers.py
+++ b/backend/app/api/v1/endpoints/papers.py
@@ -27,6 +27,7 @@
ParsedPaperCacheRepository,
UploadedPaperRepository,
)
+from app.db.note_repository import NoteRepository
from app.db.conversation_repository import ConversationRepository
from app.db.session import get_db
from app.dependencies.auth import get_current_user
@@ -46,6 +47,7 @@
ensure_unique_storage_name,
normalize_original_filename,
sanitize_storage_filename,
+ build_storage_name_with_email,
)
if TYPE_CHECKING:
@@ -151,6 +153,8 @@ async def list_uploaded_papers(
async def upload_paper(
file: UploadFile = File(..., description="需要上传的 PDF 文件"),
folder_id: int | None = Form(None, description="文件夹 ID,不填则保存在未分类"),
+ conflict_resolution: str | None = Form(None, description="冲突处理方式:overwrite 或 rename"),
+ new_filename: str | None = Form(None, description="当 conflict_resolution=rename 时的新文件名"),
current_user=Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> upload_schema.UploadedPaperRead:
@@ -170,42 +174,108 @@ async def upload_paper(
)
cleaned_bytes = raw_bytes[:MAX_UPLOAD_BYTES]
- file_hash = _calculate_file_hash(cleaned_bytes)
-
- original_display_name = normalize_original_filename(file.filename)
- storage_candidate = sanitize_storage_filename(original_display_name)
- stored_filename, destination = ensure_unique_storage_name(UPLOAD_DIR, storage_candidate)
-
- await asyncio.to_thread(destination.write_bytes, cleaned_bytes)
- relative_url = f"/media/uploads/{stored_filename}"
-
- folder_repo = LibraryFolderRepository(db)
repo = UploadedPaperRepository(db)
+ folder_repo = LibraryFolderRepository(db)
resolved_folder_id = await _ensure_folder_access(
folder_repo,
folder_id=folder_id if folder_id and folder_id > 0 else None,
user_id=current_user.id,
)
+
+ target_display_name = normalize_original_filename(file.filename)
+ existing = await repo.get_by_original_name(current_user.id, target_display_name)
+
+ # 解析冲突处理策略
+ resolution = (conflict_resolution or "").strip().lower() or None
+ if existing and not resolution:
+ raise HTTPException(
+ status_code=status.HTTP_409_CONFLICT,
+ detail={
+ "message": "当前用户已存在同名文件",
+ "conflict": True,
+ "filename": target_display_name,
+ "options": ["overwrite", "rename"],
+ },
+ )
+
+ if resolution == "rename":
+ if not new_filename:
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="重命名上传时必须提供新文件名")
+ target_display_name = normalize_original_filename(new_filename)
+ existing = await repo.get_by_original_name(current_user.id, target_display_name)
+ if existing:
+ raise HTTPException(
+ status_code=status.HTTP_409_CONFLICT,
+ detail={
+ "message": "新的文件名仍然存在冲突,请更换名称",
+ "conflict": True,
+ "filename": target_display_name,
+ "options": ["overwrite", "rename"],
+ },
+ )
+ elif resolution not in {None, "overwrite"}:
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="无效的冲突处理方式")
+
+ if existing and resolution == "overwrite":
+ stored_filename = existing.stored_filename
+ destination = (UPLOAD_DIR / stored_filename).resolve()
+ relative_url = existing.file_url or f"/media/uploads/{stored_filename}"
+ # 先删除旧文件,再写入新内容,保持物理名不变
+ try:
+ if destination.exists():
+ await asyncio.to_thread(destination.unlink)
+ except Exception:
+ logger.warning("Failed to remove existing file before overwrite: %s", destination)
+ else:
+ storage_candidate = build_storage_name_with_email(target_display_name, current_user.email)
+ storage_candidate = sanitize_storage_filename(storage_candidate)
+ stored_filename, destination = ensure_unique_storage_name(UPLOAD_DIR, storage_candidate)
+ relative_url = f"/media/uploads/{stored_filename}"
+
+ await asyncio.to_thread(destination.write_bytes, cleaned_bytes)
+ file_hash = _calculate_file_hash(cleaned_bytes)
+
metadata_json: dict | None = None
try:
- metadata_json = await extract_pdf_metadata_async(destination, original_display_name)
+ metadata_json = await extract_pdf_metadata_async(destination, target_display_name)
except Exception as exc: # pragma: no cover - best effort only
logger.warning("Failed to extract metadata for uploaded PDF: %s", exc)
metadata_json = None
try:
- record = await repo.create(
- user_id=current_user.id,
- stored_filename=stored_filename,
- original_filename=original_display_name,
- content_type=file.content_type or "application/pdf",
- file_size=len(cleaned_bytes),
- file_url=relative_url,
- file_hash=file_hash,
- folder_id=resolved_folder_id,
- metadata_json=metadata_json,
- )
+ if existing and resolution == "overwrite":
+ await repo.purge_cached_artifacts(existing)
+
+ conv_repo = ConversationRepository(db)
+ note_repo = NoteRepository(db)
+ await conv_repo.delete_conversations_for_paper(current_user.id, existing.id)
+ await note_repo.detach_uploaded_paper(current_user.id, existing.id)
+
+ # 更新记录为新的文件
+ await repo.update_file_fields(
+ existing,
+ stored_filename=stored_filename,
+ file_url=relative_url,
+ file_size=len(cleaned_bytes),
+ file_hash=file_hash,
+ content_type=file.content_type or "application/pdf",
+ )
+ await repo.update_metadata(existing, metadata_json)
+ record = existing
+ else:
+ # 新建记录(无冲突或重命名)
+ record = await repo.create(
+ user_id=current_user.id,
+ stored_filename=stored_filename,
+ original_filename=target_display_name,
+ content_type=file.content_type or "application/pdf",
+ file_size=len(cleaned_bytes),
+ file_url=relative_url,
+ file_hash=file_hash,
+ folder_id=resolved_folder_id,
+ metadata_json=metadata_json,
+ )
await db.commit()
except Exception:
await db.rollback()
@@ -834,21 +904,29 @@ async def _handle_paper_qa(
conversation_id = request.conversation_id
if conversation_id:
- # 验证对话存在且属于当前用户,并属于智能阅读
+ # 验证对话存在且属于当前用户,并属于智能阅读,且绑定到该文档
conversation = await conv_repo.get_conversation(conversation_id, current_user.id)
if not conversation or conversation.category != "reading":
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Conversation not found or access denied"
)
+ if conversation.paper_id not in (None, request.paper_id):
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="Conversation not found for this paper",
+ )
+ if conversation.paper_id is None:
+ conversation.paper_id = request.paper_id
+ await db.flush()
# 获取历史消息
history_messages = await conv_repo.get_conversation_messages(conversation_id, current_user.id)
else:
- # 创建新对话
+ # 创建新对话并绑定该文档
paper_title = parse_result.get("metadata", {}).get("title", "未命名文档")
conversation = await conv_repo.create_conversation(
current_user.id,
- ConversationCreate(title=f"关于《{paper_title}》的讨论", category="reading")
+ ConversationCreate(title=f"关于《{paper_title}》的讨论", category="reading", paper_id=request.paper_id)
)
conversation_id = conversation.id
history_messages = []
diff --git a/backend/app/api/v1/endpoints/users.py b/backend/app/api/v1/endpoints/users.py
index 8dd7104..9bb903d 100644
--- a/backend/app/api/v1/endpoints/users.py
+++ b/backend/app/api/v1/endpoints/users.py
@@ -2,16 +2,20 @@
from __future__ import annotations
import asyncio
+import shutil
from pathlib import Path
from typing import Final
from uuid import uuid4
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import get_settings
from app.core.security import hash_password, verify_password
from app.db.repository import UserRepository
+from app.models.uploaded_paper import UploadedPaper
+from app.models.parsed_paper_cache import ParsedPaperCache
from app.db.session import get_db
from app.dependencies.auth import get_current_user
from app.schemas import user as user_schema
@@ -45,6 +49,16 @@ def _remove_avatar_file(avatar_url: str | None) -> None:
pass
+def _remove_path_safely(target: Path) -> None:
+ try:
+ if target.is_file() or target.is_symlink():
+ target.unlink()
+ elif target.is_dir():
+ shutil.rmtree(target)
+ except OSError:
+ pass
+
+
@router.post(
"",
response_model=user_schema.UserRead,
@@ -179,13 +193,35 @@ async def delete_account(
current_user=Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
- """Soft-delete the user account by marking it inactive."""
+ """Hard delete user and all related data/files so the email can be reused."""
+
+ # Collect uploaded papers before DB deletion (to remove files/cache/parsed dirs)
+ result = await db.execute(
+ select(UploadedPaper.id, UploadedPaper.stored_filename, UploadedPaper.file_hash).where(
+ UploadedPaper.user_id == current_user.id
+ )
+ )
+ uploads = list(result.all())
+ # Remove physical files and parsed outputs
+ for paper_id, stored_filename, file_hash in uploads:
+ upload_path = settings.media_path / "uploads" / stored_filename
+ parse_dir = settings.media_path / "parsed" / f"paper_{paper_id}"
+ _remove_path_safely(upload_path)
+ _remove_path_safely(parse_dir)
+
+ if file_hash:
+ await db.execute(delete(ParsedPaperCache).where(ParsedPaperCache.file_hash == file_hash))
+
+ # Remove avatar if under media
_remove_avatar_file(getattr(current_user, "avatar_url", None))
- repo = UserRepository(db)
- await repo.update(current_user, {"is_active": False})
+ # 删除用户前先删除上传记录,避免 ORM 删除流程尝试将 user_id 置空导致约束错误
+ await db.execute(delete(UploadedPaper).where(UploadedPaper.user_id == current_user.id))
+
+ # Finally delete the user (FK cascades will clean remaining dependencies)
+ await db.delete(current_user)
await db.commit()
- return {"message": "账户已成功注销"}
+ return {"message": "账户已彻底删除,可使用该邮箱重新注册"}
diff --git a/backend/app/db/conversation_repository.py b/backend/app/db/conversation_repository.py
index f49f213..d938187 100644
--- a/backend/app/db/conversation_repository.py
+++ b/backend/app/db/conversation_repository.py
@@ -26,13 +26,14 @@ async def create_conversation(self, user_id: int, data: ConversationCreate) -> C
user_id=user_id,
title=data.title,
category=data.category or "search",
+ paper_id=getattr(data, "paper_id", None),
)
self.db.add(conversation)
await self.db.commit()
await self.db.refresh(conversation)
return conversation
- async def get_conversation(self, conversation_id: int, user_id: int) -> Optional[Conversation]:
+ async def get_conversation(self, conversation_id: int, user_id: int, *, paper_id: int | None = None) -> Optional[Conversation]:
"""获取特定对话(含消息)"""
stmt = (
select(Conversation)
@@ -43,6 +44,8 @@ async def get_conversation(self, conversation_id: int, user_id: int) -> Optional
)
.options(selectinload(Conversation.messages))
)
+ if paper_id is not None:
+ stmt = stmt.where(Conversation.paper_id == paper_id)
result = await self.db.execute(stmt)
return result.scalar_one_or_none()
@@ -111,6 +114,28 @@ async def delete_conversation(self, conversation_id: int, user_id: int) -> bool:
await self.db.commit()
return True
+ async def delete_conversations_for_paper(self, user_id: int, paper_id: int) -> int:
+ """软删除绑定到指定文档的阅读类对话,返回删除数量"""
+ stmt = (
+ select(Conversation)
+ .where(
+ Conversation.user_id == user_id,
+ Conversation.paper_id == paper_id,
+ Conversation.category == "reading",
+ Conversation.is_deleted == False,
+ )
+ .options(selectinload(Conversation.messages))
+ )
+ result = await self.db.execute(stmt)
+ conversations = result.scalars().all()
+ deleted = 0
+ for conv in conversations:
+ conv.is_deleted = True
+ deleted += 1
+ if deleted:
+ await self.db.commit()
+ return deleted
+
async def add_message(
self,
conversation_id: int,
diff --git a/backend/app/db/note_repository.py b/backend/app/db/note_repository.py
index d4e1425..a1f06b1 100644
--- a/backend/app/db/note_repository.py
+++ b/backend/app/db/note_repository.py
@@ -4,7 +4,7 @@
from collections.abc import Mapping
from typing import Any
-from sqlalchemy import func, select
+from sqlalchemy import func, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.note import Note
@@ -85,3 +85,16 @@ async def update(self, note: Note, updates: Mapping[str, Any]) -> Note:
async def delete(self, note: Note) -> None:
await self._session.delete(note)
await self._session.flush()
+
+ async def detach_uploaded_paper(self, user_id: int, paper_id: int) -> int:
+ """Set uploaded_paper_id to NULL for notes linked to the given paper, returns affected rows."""
+
+ stmt = (
+ update(Note)
+ .where(Note.user_id == user_id, Note.uploaded_paper_id == paper_id)
+ .values(uploaded_paper_id=None)
+ .execution_options(synchronize_session="fetch")
+ )
+ result = await self._session.execute(stmt)
+ await self._session.flush()
+ return result.rowcount or 0
diff --git a/backend/app/db/repository.py b/backend/app/db/repository.py
index d9acefe..2c03b43 100644
--- a/backend/app/db/repository.py
+++ b/backend/app/db/repository.py
@@ -20,6 +20,14 @@ class UserRepository:
def __init__(self, session: AsyncSession) -> None:
self._session = session
+ async def get_by_original_name(self, user_id: int, original_filename: str) -> UploadedPaper | None:
+ stmt = select(UploadedPaper).where(
+ UploadedPaper.user_id == user_id,
+ UploadedPaper.original_filename == original_filename,
+ )
+ result = await self._session.execute(stmt)
+ return result.scalar_one_or_none()
+
async def get_by_email(self, email: str) -> User | None:
stmt = select(User).where(User.email == email)
result = await self._session.execute(stmt)
@@ -110,6 +118,14 @@ class UploadedPaperRepository:
def __init__(self, session: AsyncSession) -> None:
self._session = session
+ async def get_by_original_name(self, user_id: int, original_filename: str) -> UploadedPaper | None:
+ stmt = select(UploadedPaper).where(
+ UploadedPaper.user_id == user_id,
+ UploadedPaper.original_filename == original_filename,
+ )
+ result = await self._session.execute(stmt)
+ return result.scalar_one_or_none()
+
def _deduped_uploads_subquery(
self,
user_id: int,
@@ -318,6 +334,18 @@ async def delete(self, record: UploadedPaper) -> None:
await self._session.delete(record)
await self._session.flush()
+ async def purge_cached_artifacts(self, record: UploadedPaper) -> None:
+ """Delete cached parse artifacts (cache + jobs) without removing the record itself."""
+
+ if record.file_hash:
+ await self._session.execute(
+ delete(ParsedPaperCache).where(ParsedPaperCache.file_hash == record.file_hash)
+ )
+ await self._session.execute(
+ delete(MineruParseJob).where(MineruParseJob.file_hash == record.file_hash)
+ )
+ await self._session.flush()
+
class ParsedPaperCacheRepository:
"""Cache MinerU parse outputs by file hash."""
diff --git a/backend/app/models/conversation.py b/backend/app/models/conversation.py
index f2bbf71..85d6299 100644
--- a/backend/app/models/conversation.py
+++ b/backend/app/models/conversation.py
@@ -15,6 +15,7 @@ class Conversation(Base):
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
+ paper_id = Column(Integer, ForeignKey("uploaded_papers.id", ondelete="SET NULL"), nullable=True, index=True)
title = Column(String(500), nullable=False)
category = Column(String(32), nullable=False, server_default="search", index=True)
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, index=True)
@@ -23,6 +24,7 @@ class Conversation(Base):
# 关系
user = relationship("User", back_populates="conversations")
+ paper = relationship("UploadedPaper", back_populates="conversations")
messages = relationship("ConversationMessage", back_populates="conversation", cascade="all, delete-orphan", order_by="ConversationMessage.created_at")
diff --git a/backend/app/models/uploaded_paper.py b/backend/app/models/uploaded_paper.py
index 0a5e4e6..3dadecc 100644
--- a/backend/app/models/uploaded_paper.py
+++ b/backend/app/models/uploaded_paper.py
@@ -27,3 +27,4 @@ class UploadedPaper(Base):
user = relationship("User", backref="uploaded_papers")
folder = relationship("LibraryFolder", back_populates="papers")
+ conversations = relationship("Conversation", back_populates="paper")
diff --git a/backend/app/schemas/conversation.py b/backend/app/schemas/conversation.py
index 65b8b5b..f390257 100644
--- a/backend/app/schemas/conversation.py
+++ b/backend/app/schemas/conversation.py
@@ -34,6 +34,7 @@ class ConversationBase(BaseModel):
category: Literal["search", "reading"] = Field(
"search", description="对话类别:search(智能搜索)或 reading(智能阅读)"
)
+ paper_id: int | None = Field(None, description="可选的关联上传文献 ID,仅用于智能阅读场景")
class ConversationCreate(ConversationBase):
diff --git a/backend/app/services/mineru_service.py b/backend/app/services/mineru_service.py
index 3690569..6a04f1d 100644
--- a/backend/app/services/mineru_service.py
+++ b/backend/app/services/mineru_service.py
@@ -102,14 +102,25 @@ def parse_pdf_direct(pdf_path: Path, output_dir: Path, lang="ch") -> dict[str, A
# Generate outputs
# 1. Markdown
md_content = union_make(pdf_info, MakeMode.MM_MD, "images")
- md_writer.write_string(f"{stem}.md", md_content)
+ # md_writer.write_string(f"{stem}.md", md_content)
+ (target_dir / f"{stem}.md").write_text(md_content, encoding="utf-8", errors="replace")
# 2. Content List (JSON)
content_list = union_make(pdf_info, MakeMode.CONTENT_LIST, "images")
- md_writer.write_string(f"{stem}_content_list.json", json.dumps(content_list, ensure_ascii=False, indent=4))
+ # md_writer.write_string(f"{stem}_content_list.json", json.dumps(content_list, ensure_ascii=False, indent=4))
+ (target_dir / f"{stem}_content_list.json").write_text(
+ json.dumps(content_list, ensure_ascii=False, indent=4),
+ encoding="utf-8",
+ errors="replace"
+ )
# 3. Middle JSON
- md_writer.write_string(f"{stem}_middle.json", json.dumps(middle_json, ensure_ascii=False, indent=4))
+ # md_writer.write_string(f"{stem}_middle.json", json.dumps(middle_json, ensure_ascii=False, indent=4))
+ (target_dir / f"{stem}_middle.json").write_text(
+ json.dumps(middle_json, ensure_ascii=False, indent=4),
+ encoding="utf-8",
+ errors="replace"
+ )
loguru_logger.info(f"Parsing completed for {pdf_path.name}. Output in {target_dir}")
diff --git a/backend/app/utils/file_naming.py b/backend/app/utils/file_naming.py
index 1644d9f..55ddea1 100644
--- a/backend/app/utils/file_naming.py
+++ b/backend/app/utils/file_naming.py
@@ -2,6 +2,7 @@
from __future__ import annotations
import re
+import hashlib
from pathlib import Path
SAFE_PATTERN = re.compile(r"[^A-Za-z0-9._-]+")
@@ -45,3 +46,21 @@ def ensure_unique_storage_name(directory: Path, filename: str) -> tuple[str, Pat
counter += 1
return candidate.name, candidate
+
+def build_storage_name_with_email(original_filename: str, user_email: str, *, hash_length: int = 12) -> str:
+ """Generate a storage filename using the original name plus a stable hash of the user's email.
+
+ This keeps the display name unchanged while ensuring cross-user uniqueness in the uploads folder.
+ """
+
+ normalized = normalize_original_filename(original_filename)
+ sanitized = Path(sanitize_storage_filename(normalized))
+
+ stem = sanitized.stem or "paper"
+ suffix = sanitized.suffix or ".pdf"
+
+ email_bytes = (user_email or "").encode("utf-8", "ignore")
+ email_hash = hashlib.sha256(email_bytes).hexdigest()[:hash_length]
+
+ return f"{stem}_{email_hash}{suffix}"
+
diff --git a/backend/app/workers/mineru_runner.py b/backend/app/workers/mineru_runner.py
index 73b42f2..3d5dcdf 100644
--- a/backend/app/workers/mineru_runner.py
+++ b/backend/app/workers/mineru_runner.py
@@ -23,15 +23,17 @@ def main() -> int:
try:
parse_result = parse_pdf_direct(pdf_path, output_dir, lang=args.lang)
+ # Use ensure_ascii=True to avoid UnicodeEncodeError if result contains surrogates
result_path.write_text(
- json.dumps({"success": True, "result": parse_result}, ensure_ascii=False),
+ json.dumps({"success": True, "result": parse_result}, ensure_ascii=True),
encoding="utf-8",
)
return 0
except Exception as exc: # pragma: no cover - helper executed in subprocess
error_payload = {"success": False, "error": str(exc), "error_type": type(exc).__name__}
try:
- result_path.write_text(json.dumps(error_payload, ensure_ascii=False), encoding="utf-8")
+ # Use ensure_ascii=True for error payload too
+ result_path.write_text(json.dumps(error_payload, ensure_ascii=True), encoding="utf-8")
except Exception:
print(f"[MinerU Runner] Failed to write error payload: {error_payload}", file=sys.stderr)
print(f"[MinerU Runner] Error: {exc}", file=sys.stderr)
diff --git a/backend/config/magic-pdf.config.json b/backend/config/magic-pdf.config.json
index aa7bad7..2596261 100644
--- a/backend/config/magic-pdf.config.json
+++ b/backend/config/magic-pdf.config.json
@@ -4,7 +4,7 @@
},
"models-dir": "/tmp/magic-pdf/models",
"layoutreader-model-dir": "/tmp/magic-pdf/models/Layout",
- "device-mode": "cpu",
+ "device-mode": "cuda",
"ocr-config": {
"enable": true
},
diff --git a/backend/migrations/versions/20251223_upload_conflict_and_conversation_binding.py b/backend/migrations/versions/20251223_upload_conflict_and_conversation_binding.py
new file mode 100644
index 0000000..2e23311
--- /dev/null
+++ b/backend/migrations/versions/20251223_upload_conflict_and_conversation_binding.py
@@ -0,0 +1,47 @@
+"""Add unique constraint for uploads and bind conversations to papers
+
+Revision ID: 20251223_upload_conflict
+Revises: 20251114_conversation_category
+Create Date: 2025-12-23 00:00:00
+"""
+
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "20251223_upload_conflict"
+down_revision = "20251114_conversation_category"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ # Conversations: add paper binding (nullable) for smart-reading cleanup
+ op.add_column("conversations", sa.Column("paper_id", sa.Integer(), nullable=True))
+ op.create_index("ix_conversations_paper_id", "conversations", ["paper_id"], unique=False)
+ op.create_foreign_key(
+ "fk_conversations_paper",
+ "conversations",
+ "uploaded_papers",
+ ["paper_id"],
+ ["id"],
+ ondelete="SET NULL",
+ )
+
+ # Uploaded papers: enforce per-user filename uniqueness
+ op.create_unique_constraint(
+ "uq_uploaded_papers_user_original",
+ "uploaded_papers",
+ ["user_id", "original_filename"],
+ )
+
+
+def downgrade() -> None:
+ op.drop_constraint("uq_uploaded_papers_user_original", "uploaded_papers", type_="unique")
+
+ op.drop_constraint("fk_conversations_paper", "conversations", type_="foreignkey")
+ op.drop_index("ix_conversations_paper_id", table_name="conversations")
+ op.drop_column("conversations", "paper_id")
diff --git a/docker-compose.yml b/docker-compose.yml
index 61ad785..1e7e8ec 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -49,7 +49,7 @@ services:
profiles: ["infra", "frontend"]
environment:
POSTGRES_USER: postgres
- POSTGRES_PASSWORD: postgres
+ POSTGRES_PASSWORD: shizhiyuan_hanxujie_liushiao
POSTGRES_DB: papers
ports:
- "5432:5432"
diff --git a/frontend/src/app/academic/page.tsx b/frontend/src/app/academic/page.tsx
index 44d8b90..2439933 100644
--- a/frontend/src/app/academic/page.tsx
+++ b/frontend/src/app/academic/page.tsx
@@ -518,7 +518,7 @@ export default function AcademicSearchPage() {
messages={conversation}
loading={loading}
userAvatar={currentUser?.avatar_url}
- userName={currentUser?.full_name ?? undefined}
+ userName={currentUser?.full_name || currentUser?.email || undefined}
/>
)}
diff --git a/frontend/src/app/library/page.tsx b/frontend/src/app/library/page.tsx
index c5a21c5..161ab8c 100644
--- a/frontend/src/app/library/page.tsx
+++ b/frontend/src/app/library/page.tsx
@@ -512,12 +512,12 @@ export default function LibraryPage() {
你当前未登录,无法查看个人上传的论文。
-
前往登录
-
+
);
diff --git a/frontend/src/app/notes/[id]/page.tsx b/frontend/src/app/notes/[id]/page.tsx
index 7066b77..5bb124a 100644
--- a/frontend/src/app/notes/[id]/page.tsx
+++ b/frontend/src/app/notes/[id]/page.tsx
@@ -63,6 +63,7 @@ export default function NoteDetailPage() {
}
async function bootstrap() {
+ if (!token) return;
try {
const [noteRes, paperRes] = await Promise.all([
fetchNote(token, noteId),
@@ -155,34 +156,34 @@ export default function NoteDetailPage() {
contentContainerClassName="items-stretch !px-0 !py-0"
contentClassName="w-full h-full min-h-screen flex flex-col gap-0"
>
-
+
{savedMessage && {savedMessage}}
{error && {error}}
- 更新于 {formatDate(note.updated_at)}
+ 更新于 {formatDate(note.updated_at)}
-
+
-
+
setTitle(e.target.value)}
/>
-
+
-
+
) : (
-
未关联文件
+
未关联文件
)}
@@ -224,7 +225,7 @@ export default function NoteDetailPage() {
)}
{/* 最小化状态 */}
diff --git a/frontend/src/lib/api-client.ts b/frontend/src/lib/api-client.ts
index 690f361..1a23617 100644
--- a/frontend/src/lib/api-client.ts
+++ b/frontend/src/lib/api-client.ts
@@ -362,16 +362,48 @@ export async function deleteAccount(token: string): Promise<{ message: string }>
return response.json();
}
+export type UploadConflictOption = "overwrite" | "rename";
+
+export interface UploadConflictDetail {
+ message?: string;
+ conflict?: boolean;
+ filename?: string;
+ options?: UploadConflictOption[];
+}
+
+export class UploadConflictError extends Error {
+ detail: UploadConflictDetail;
+ constructor(detail: UploadConflictDetail, message?: string) {
+ super(message ?? detail.message ?? "上传发生同名冲突");
+ this.name = "UploadConflictError";
+ this.detail = detail;
+ }
+}
+
+export function isUploadConflictError(error: unknown): error is UploadConflictError {
+ return error instanceof UploadConflictError;
+}
+
export async function uploadPaper(
token: string,
file: File,
- options?: { folderId?: number | null },
+ options?: {
+ folderId?: number | null;
+ conflictResolution?: UploadConflictOption;
+ newFilename?: string;
+ },
): Promise {
const formData = new FormData();
formData.append("file", file);
if (options?.folderId && options.folderId > 0) {
formData.append("folder_id", options.folderId.toString());
}
+ if (options?.conflictResolution) {
+ formData.append("conflict_resolution", options.conflictResolution);
+ }
+ if (options?.newFilename) {
+ formData.append("new_filename", options.newFilename);
+ }
const response = await fetch(`${BACKEND_URL}/api/v1/papers/upload`, {
method: "POST",
@@ -382,8 +414,19 @@ export async function uploadPaper(
});
if (!response.ok) {
- const detail = (await response.json().catch(() => ({}))) as { detail?: string };
- throw new Error(detail.detail ?? "上传论文失败");
+ const detail = (await response.json().catch(() => ({}))) as {
+ detail?: string | UploadConflictDetail;
+ };
+
+ if (response.status === 409) {
+ const conflictDetail = (typeof detail.detail === "object" ? detail.detail : {}) as UploadConflictDetail;
+ throw new UploadConflictError(
+ conflictDetail,
+ conflictDetail.message ?? (typeof detail.detail === "string" ? detail.detail : "存在同名文件"),
+ );
+ }
+
+ throw new Error(typeof detail.detail === "string" ? detail.detail : "上传论文失败");
}
return response.json();
diff --git a/redis.conf b/redis.conf
index b79f83b..f4dc752 100644
--- a/redis.conf
+++ b/redis.conf
@@ -2,7 +2,7 @@
# Bind to local interfaces so services in Docker network
# 和宿主机本地开发进程(uvicorn/celery)都能访问
#bind 127.0.0.1
-bind 127.0.0.1
+bind 0.0.0.0
# Disable protected mode because access is restricted by Docker network
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
new file mode 100755
index 0000000..8cdadb5
--- /dev/null
+++ b/scripts/deploy.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# --- 基础配置 ---
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+FRONTEND_DIR="$ROOT_DIR/frontend"
+BACKEND_DIR="$ROOT_DIR/backend"
+COMPOSE_FILE="$ROOT_DIR/docker-compose.yml"
+
+# 颜色定义
+info() { printf "\\033[1;34m[info]\\033[0m %s\\n" "$*"; }
+warn() { printf "\\033[1;33m[warn]\\033[0m %s\\n" "$*"; }
+error() { printf "\\033[1;31m[error]\\033[0m %s\\n" "$*" >&2; }
+success() { printf "\\033[1;32m[success]\\033[0m %s\\n" "$*"; }
+
+# --- 0) 防火墙白名单配置 (针对校外服务器限制清华访问) ---
+setup_firewall() {
+ info "正在配置校外服务器的访问白名单 (UFW)..."
+
+ # 1. 检查 ufw 是否安装
+ if ! command -v ufw &>/dev/null; then
+ error "未找到 ufw,请先安装: sudo apt install ufw"
+ exit 1
+ fi
+
+ # 2. 核心安全防护:允许 SSH (端口22) 从任何地方访问,或者从你当前的 IP 访问
+ # 建议先开放全网 SSH 端口,防止失联。如果追求极高安全,可以只写你现在的 IP。
+ sudo ufw allow 22/tcp comment 'Allow SSH'
+
+ # 3. 清理旧规则并设置默认拦截
+ sudo ufw deny 3000 > /dev/null 2>&1 || true
+ sudo ufw deny 8000 > /dev/null 2>&1 || true
+
+ # 4. 允许清华大学公网出口 IP 段
+ # 这些是清华访问外网时,外网服务器看到的来源 IP
+ local THU_RANGES=("166.111.0.0/16" "101.5.0.0/16" "183.172.0.0/15" "59.66.0.0/16")
+
+ for range in "${THU_RANGES[@]}"; do
+ sudo ufw allow from "$range" to any port 3000 comment 'THU WhiteList'
+ sudo ufw allow from "$range" to any port 8000 comment 'THU WhiteList'
+ done
+
+ # 5. 【重要】如果你现在不在校内,请手动允许你现在的 IP
+ # CURRENT_MY_IP=$(curl -s ifconfig.me)
+ # sudo ufw allow from "$CURRENT_MY_IP" to any port 3000 comment 'My Current IP'
+ # sudo ufw allow from "$CURRENT_MY_IP" to any port 8000 comment 'My Current IP'
+
+ # 6. 激活防火墙
+ echo "y" | sudo ufw enable
+ success "防火墙配置完成!仅限清华校内 IP 访问 3000/8000 端口。"
+}
+
+# 调用函数
+setup_firewall
+
+
+# --- 获取公网 IP ---
+# 尝试自动获取公网 IP,如果失败则使用你提供的默认 IP
+info "正在获取公网 IP 地址..."
+PUBLIC_IP=$(curl -s --max-time 3 ifconfig.me || echo "36.103.203.223")
+success "检测到公网 IP: $PUBLIC_IP"
+
+# --- 1) 检查并启动基础设施 (Docker) ---
+if command -v docker &>/dev/null; then
+ info "正在启动 Postgres/Redis (使用 infra profile)..."
+ # 加上 --profile infra 确保带 profile 的容器能启动
+ docker compose -f "$COMPOSE_FILE" --profile infra up -d postgres redis
+
+ info "等待数据库就绪..."
+ for i in {1..30}; do
+ if docker compose -f "$COMPOSE_FILE" exec -T postgres pg_isready -U postgres -d papers &>/dev/null; then
+ success "数据库已就绪。"
+ break
+ fi
+ [ "$i" -eq 30 ] && warn "数据库启动超时,后续迁移可能失败。"
+ sleep 1
+ done
+else
+ error "未找到 Docker,请确保已执行 sudo usermod -aG docker $USER 并生效。"
+ exit 1
+fi
+
+if ! command -v npm &>/dev/null; then
+ error "未找到 npm,请先安装 Node.js 和 npm (例如: sudo apt install nodejs npm)。"
+ exit 1
+fi
+
+# --- 2) 并行安装后端与前端依赖 ---
+info "开始同步安装前后端依赖 (显示进度)..."
+
+install_backend() {
+ cd "$BACKEND_DIR"
+ [ ! -d ".venv" ] && python3 -m venv .venv
+ source .venv/bin/activate
+ pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
+ # 显示安装进度
+ pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
+}
+
+install_frontend() {
+ cd "$FRONTEND_DIR"
+ if [ -d "node_modules" ]; then
+ info "前端依赖已存在 (node_modules),跳过安装..."
+ else
+ info "正在安装前端依赖 (npm)..."
+ npm install --registry=https://registry.npmmirror.com
+ fi
+}
+
+install_backend &
+PID_BE=$!
+install_frontend &
+PID_FE=$!
+
+wait $PID_BE && success "后端依赖安装完成。"
+wait $PID_FE && success "前端依赖安装完成。"
+
+# --- 3) 启动服务 ---
+info "正在启动所有服务..."
+
+# 环境变量设置 - 显式指定密码和地址
+export POSTGRES_HOST="127.0.0.1"
+export POSTGRES_PASSWORD="shizhiyuan_hanxujie_liushiao" # 对应你刚才改的密码
+export REDIS_HOST="127.0.0.1"
+export PYTHONPATH="$BACKEND_DIR:${PYTHONPATH:-}"
+export HF_ENDPOINT="https://hf-mirror.com"
+export HF_HUB_OFFLINE=1 # 既然你权重下好了,强制离线防止卡顿
+
+# A. 启动后端 FastAPI
+(
+ cd "$BACKEND_DIR"
+ source .venv/bin/activate
+ python -m alembic upgrade head
+ exec uvicorn app.main:app --host 0.0.0.0 --port 8000
+) &
+BACKEND_PID=$!
+
+# B. 启动 Celery Worker
+(
+ cd "$BACKEND_DIR"
+ source .venv/bin/activate
+ CPU_CORES="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)"
+ # 解决 macOS/Ubuntu 上 Celery 的某些 fork 安全问题
+ export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
+ exec celery -A app.workers.celery_app.celery_app worker \
+ --loglevel=info --queues default --pool threads --concurrency "$CPU_CORES"
+) &
+CELERY_PID=$!
+
+# C. 启动前端 Next.js
+(
+ cd "$FRONTEND_DIR"
+ # 将后端地址注入前端
+ # 使用 -H 0.0.0.0 确保对外网开放
+ NEXT_PUBLIC_BACKEND_URL="http://$PUBLIC_IP:8000" npm run dev -- -H 0.0.0.0
+) &
+FRONTEND_PID=$!
+
+# --- 4) 最终信息输出 ---
+# 清屏并显示最终访问地址
+sleep 2
+success "================================================"
+success " InsightReading 服务已全部在公网启动!"
+success "================================================"
+info "▶ 前端访问地址: http://$PUBLIC_IP:3000"
+info "▶ 后端接口地址: http://$PUBLIC_IP:8000/docs"
+info "▶ 数据库状态: 已连接 (127.0.0.1:5432)"
+info "▶ Redis 状态: 已连接 (127.0.0.1:6379)"
+success "================================================"
+info "按 Ctrl+C 停止所有后台进程。"
+
+trap 'info "正在关闭服务..."; kill $BACKEND_PID $FRONTEND_PID $CELERY_PID 2>/dev/null || true; exit' INT TERM
+wait
\ No newline at end of file
diff --git a/scripts/dev1.sh b/scripts/dev1.sh
deleted file mode 100644
index 806460b..0000000
--- a/scripts/dev1.sh
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/env bash
-# Helper script to bring up InsightReading's dev stack (databases + backend + frontend)
-set -euo pipefail
-
-ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-FRONTEND_DIR="$ROOT_DIR/frontend"
-BACKEND_DIR="$ROOT_DIR/backend"
-COMPOSE_FILE="$ROOT_DIR/docker-compose.yml"
-
-info() { printf "\\033[1;34m[info]\\033[0m %s\\n" "$*"; }
-warn() { printf "\\033[1;33m[warn]\\033[0m %s\\n" "$*"; }
-error() { printf "\\033[1;31m[error]\\033[0m %s\\n" "$*" >&2; }
-
-# --- 1) Start infrastructure -------------------------------------------------
-if command -v docker >/dev/null 2>&1; then
- info "Starting postgres/redis via docker compose…"
- docker compose -f "$COMPOSE_FILE" up -d postgres redis
-else
- warn "docker not found; skipping postgres/redis startup"
-fi
-
-# --- 2) Backend ---------------------------------------------------------------
-info "Ensuring backend virtualenv exists…"
-if [ ! -d "$BACKEND_DIR/.venv" ]; then
- python -m venv "$BACKEND_DIR/.venv"
-fi
-
-info "Installing backend dependencies (if needed)…"
-(
- cd "$BACKEND_DIR"
- source .venv/bin/activate
-
- # Check if uv is available for faster installation
- if command -v uv >/dev/null 2>&1; then
- info "Using uv for fast installation…"
- uv pip install -e . --python .venv >/dev/null
- else
- info "uv not found, falling back to pip…"
- pip install --upgrade pip >/dev/null
- pip install -e . >/dev/null
- fi
-)
-
-# --- 3) Run database migrations ----------------------------------------------
-info "Running database migrations…"
-(
- cd "$BACKEND_DIR"
- source .venv/bin/activate
- export POSTGRES_HOST="localhost"
- export REDIS_HOST="localhost"
- export PYTHONPATH="$BACKEND_DIR:${PYTHONPATH:-}"
- alembic upgrade head
-)
-
-# --- 4) Launch Services ------------------------------------------------------
-info "Starting FastAPI backend on :8000…"
-(
- cd "$BACKEND_DIR"
- source .venv/bin/activate
- # 本地开发时,backend 运行在宿主机,需要通过 localhost 访问 docker 启动的 postgres/redis
- export POSTGRES_HOST="localhost"
- export REDIS_HOST="localhost"
- export PYTHONPATH="$BACKEND_DIR:${PYTHONPATH:-}"
- uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
-) &
-BACKEND_PID=$!
-
-info "Starting Celery worker (MinerU queue)…"
-(
- cd "$BACKEND_DIR"
- source .venv/bin/activate
- export POSTGRES_HOST="localhost"
- export REDIS_HOST="localhost"
- export PYTHONPATH="$BACKEND_DIR:${PYTHONPATH:-}"
- # Fixed: removed duplicate .celery_app
- celery -A app.workers.celery_app worker --loglevel=info --queues default
-) &
-CELERY_PID=$!
-
-# --- 5) Frontend --------------------------------------------------------------
-info "Installing frontend dependencies…"
-(
- cd "$FRONTEND_DIR"
- npm install
-) >/dev/null
-
-info "Starting Next.js frontend on :3000…"
-(
- cd "$FRONTEND_DIR"
- npm run dev
-) &
-FRONTEND_PID=$!
-
-info "Backend PID: $BACKEND_PID"
-info "Celery PID: $CELERY_PID"
-info "Frontend PID: $FRONTEND_PID"
-info "Backend API: http://localhost:8000/api/v1"
-info "Frontend: http://localhost:3000"
-info "Press Ctrl+C to stop all services."
-
-trap 'info "Shutting down…"; kill $BACKEND_PID $FRONTEND_PID $CELERY_PID 2>/dev/null || true' INT TERM
-wait $BACKEND_PID $FRONTEND_PID $CELERY_PID
diff --git a/scripts/docker-infra.sh b/scripts/docker-infra.sh
deleted file mode 100644
index d10607f..0000000
--- a/scripts/docker-infra.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-
-info() { printf "\033[1;34m[info]\033[0m %s\n" "$*"; }
-
-info "Starting Postgres and Redis via docker compose (profile=infra)…"
-docker compose -f "$ROOT_DIR/docker-compose.yml" --profile infra up -d postgres redis
-
-info "Postgres + Redis are running. Use 'docker compose --profile infra down' to stop them."
diff --git a/scripts/local.sh b/scripts/local.sh
new file mode 100644
index 0000000..e69de29