diff --git a/.gitignore b/.gitignore index b6edfa8..8f3fcd3 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,6 @@ solutions/ tmp/ .claude/ openai.json + +test-reports/ +.pip-cache/ \ No newline at end of file diff --git a/backend/.coverage b/backend/.coverage deleted file mode 100644 index 065f944..0000000 Binary files a/backend/.coverage and /dev/null differ diff --git a/backend/.coveragerc b/backend/.coveragerc new file mode 100644 index 0000000..1929f7e --- /dev/null +++ b/backend/.coveragerc @@ -0,0 +1,31 @@ +[run] +source = app + +# Exclude integration-heavy modules that require external services and are not +# part of the core business logic unit-test scope. +omit = + # Transport layer (FastAPI routers/endpoints). + app/api/* + app/main.py + + app/workers/* + app/services/mineru_*.py + app/services/email.py + app/services/verification_codes.py + app/core/redis.py + # External search/LLM integrations (require network keys/services). + app/services/academic/* + app/services/ai/* + app/api/v1/arxiv.py + + # Infra glue (covered by integration tests in deployment, not unit tests). + app/db/session.py + app/db/paper_repository.py + +[report] +show_missing = True +skip_covered = True +exclude_lines = + pragma: no cover + if TYPE_CHECKING: + raise NotImplementedError diff --git a/backend/.coveragerc.external b/backend/.coveragerc.external new file mode 100644 index 0000000..34ea5b4 --- /dev/null +++ b/backend/.coveragerc.external @@ -0,0 +1,28 @@ +[run] +source = app + +# External-API test suite still avoids MinerU/Celery/email/redis integrations. +omit = + app/workers/* + app/services/mineru_*.py + app/services/email.py + app/services/verification_codes.py + app/core/redis.py + +[report] +show_missing = True +skip_covered = True +include = + app/services/academic/providers/* + app/services/academic/search_service.py + app/services/academic/query_parser.py + app/services/ai/llm_client.py + app/services/ai/agent_controller.py + app/services/ai/tool_executor.py + app/services/ai/tool_interfaces.py + app/services/ai/tools.py + app/api/v1/arxiv.py +exclude_lines = + pragma: no cover + if TYPE_CHECKING: + raise NotImplementedError diff --git a/backend/.coveragerc.mineru b/backend/.coveragerc.mineru new file mode 100644 index 0000000..68a8fc8 --- /dev/null +++ b/backend/.coveragerc.mineru @@ -0,0 +1,24 @@ +[run] +# Only measure MinerU-related code for the MinerU test group. +source = + app.services.mineru_cli + app.services.mineru_runtime + +# MinerU test suite focuses on MinerU-related code; omit other external integrations. +omit = + app/workers/* + app/services/email.py + app/services/verification_codes.py + app/core/redis.py + app/services/academic/* + app/services/ai/* + app/api/* + app/main.py + +[report] +show_missing = True +skip_covered = True +exclude_lines = + pragma: no cover + if TYPE_CHECKING: + raise NotImplementedError diff --git a/backend/.pip-cache/http-v2/2/9/0/6/8/290688371d9c486157e39b8996a79b0ca5e03c4b74cfdc03d0b70c2d b/backend/.pip-cache/http-v2/2/9/0/6/8/290688371d9c486157e39b8996a79b0ca5e03c4b74cfdc03d0b70c2d new file mode 100644 index 0000000..ce89806 Binary files /dev/null and b/backend/.pip-cache/http-v2/2/9/0/6/8/290688371d9c486157e39b8996a79b0ca5e03c4b74cfdc03d0b70c2d differ diff --git a/backend/.pip-cache/http-v2/2/9/0/6/8/290688371d9c486157e39b8996a79b0ca5e03c4b74cfdc03d0b70c2d.body b/backend/.pip-cache/http-v2/2/9/0/6/8/290688371d9c486157e39b8996a79b0ca5e03c4b74cfdc03d0b70c2d.body new file mode 100644 index 0000000..2029b88 Binary files /dev/null and b/backend/.pip-cache/http-v2/2/9/0/6/8/290688371d9c486157e39b8996a79b0ca5e03c4b74cfdc03d0b70c2d.body differ diff --git a/backend/.pip-cache/http-v2/7/9/5/1/c/7951c90562b5f02d14af0752098311d45a24bc94b49010d480f5c761 b/backend/.pip-cache/http-v2/7/9/5/1/c/7951c90562b5f02d14af0752098311d45a24bc94b49010d480f5c761 new file mode 100644 index 0000000..ca0f0f2 Binary files /dev/null and b/backend/.pip-cache/http-v2/7/9/5/1/c/7951c90562b5f02d14af0752098311d45a24bc94b49010d480f5c761 differ diff --git a/backend/.pip-cache/http-v2/7/9/5/1/c/7951c90562b5f02d14af0752098311d45a24bc94b49010d480f5c761.body b/backend/.pip-cache/http-v2/7/9/5/1/c/7951c90562b5f02d14af0752098311d45a24bc94b49010d480f5c761.body new file mode 100644 index 0000000..aff6b08 --- /dev/null +++ b/backend/.pip-cache/http-v2/7/9/5/1/c/7951c90562b5f02d14af0752098311d45a24bc94b49010d480f5c761.body @@ -0,0 +1,235 @@ +Metadata-Version: 2.1 +Name: pytest-metadata +Version: 3.1.1 +Summary: pytest plugin for test session metadata +Project-URL: Homepage, https://github.com/pytest-dev/pytest-metadata +Project-URL: Tracker, https://github.com/pytest-dev/pytest-metadata/issues +Project-URL: Source, https://github.com/pytest-dev/pytest-metadata +Author-email: Dave Hunt , Jim Brannlund +License-Expression: MPL-2.0 +License-File: AUTHORS +License-File: LICENSE +Keywords: metadata,pytest +Classifier: Development Status :: 5 - Production/Stable +Classifier: Framework :: Pytest +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Classifier: Natural Language :: English +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Quality Assurance +Classifier: Topic :: Software Development :: Testing +Classifier: Topic :: Utilities +Requires-Python: >=3.8 +Requires-Dist: pytest>=7.0.0 +Provides-Extra: test +Requires-Dist: black>=22.1.0; extra == 'test' +Requires-Dist: flake8>=4.0.1; extra == 'test' +Requires-Dist: pre-commit>=2.17.0; extra == 'test' +Requires-Dist: tox>=3.24.5; extra == 'test' +Description-Content-Type: text/x-rst + +pytest-metadata +=============== + +pytest-metadata is a plugin for `pytest `_ that provides +access to test session metadata. + +.. image:: https://img.shields.io/badge/license-MPL%202.0-blue.svg + :target: https://github.com/pytest-dev/pytest-metadata/blob/master/LICENSE + :alt: License +.. image:: https://img.shields.io/pypi/v/pytest-metadata.svg + :target: https://pypi.python.org/pypi/pytest-metadata/ + :alt: PyPI +.. image:: https://img.shields.io/travis/pytest-dev/pytest-metadata.svg + :target: https://travis-ci.org/pytest-dev/pytest-metadata/ +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/ambv/black + :alt: Travis +.. image:: https://img.shields.io/github/issues-raw/pytest-dev/pytest-metadata.svg + :target: https://github.com/pytest-dev/pytest-metadata/issues + :alt: Issues +.. image:: https://img.shields.io/requires/github/pytest-dev/pytest-metadata.svg + :target: https://requires.io/github/pytest-dev/pytest-metadata/requirements/?branch=master + :alt: Requirements + +Requirements +------------ + +You will need the following in order to use pytest-metadata: + +- Python 3.8+ or PyPy3 + +Installation +------------ + +To install pytest-metadata: + +.. code-block:: bash + + $ pip install pytest-metadata + +Contributing +------------ + +We welcome contributions. + +To learn more, see `Development `_ + +Available metadata +------------------ + +The following metadata is gathered by this plugin: + +======== =============== =================================== +Key Description Example +======== =============== =================================== +Python Python version 3.6.4 +Platform Platform Darwin-17.4.0-x86_64-i386-64bit +Packages pytest packages {'py': '1.5.2', 'pytest': '3.4.1'} +Plugins pytest plugins {'metadata': '1.6.0'} +======== =============== =================================== + +Additional metadata +------------------- + +You can provide your own metadata (key, value pair) by specifying ``--metadata`` on the commandline:: + + pytest --metadata foo bar + +Note: You can provide multiple sets of ``--metadata``:: + + pytest --metadata foo bar --metadata baz zoo + +There's also the possibility of passing in metadata as a JSON string:: + + pytest --metadata-from-json '{"cat_says": "bring the cat nip", "human_says": "yes kitty"}' + +Alternatively a JSON can be read from a given file:: + + pytest --metadata-from-json-file path/to/valid/file.json + +Continuous integration +---------------------- + +When run in a continuous integration environment, additional metadata is added +from environment variables. Below is a list of the supported continuous +integration providers, along with links to the environment variables that are +added to metadata if they're present. + +* `AppVeyor `_ +* `Bitbucket `_ +* `CircleCI `_ +* `GitLab CI `_ +* `Jenkins `_ +* `TaskCluster `_ +* `Travis CI `_ + +Note that if you're using `Tox `_ to run your tests +then you will need to `pass down any additional environment variables `_ +for these to be picked up. + +Viewing metadata +---------------- + +If you pass ``--verbose`` on the command line when running your tests, then the +metadata will be displayed in the terminal report header:: + + pytest --verbose + ============================ test session starts ============================ + platform darwin -- Python 3.6.4, pytest-3.4.1, py-1.5.2, pluggy-0.6.0 -- /usr/bin/python + cachedir: .pytest_cache + metadata: {'Python': '3.6.4', 'Platform': 'Darwin-17.4.0-x86_64-i386-64bit', 'Packages': {'pytest': '3.4.1', 'py': '1.5.2', 'pluggy': '0.6.0'}, 'Plugins': {'metadata': '1.6.0'}} + plugins: metadata-1.6.0 + +Including metadata in Junit XML +------------------------------- + +Pytest-metadata provides the session scoped fixture :code:`include_metadata_in_junit_xml` that you may use to include any metadata in Junit XML as ``property`` tags. +For example the following test module + +.. code-block:: python + + import pytest + + pytestmark = pytest.mark.usefixtures('include_metadata_in_junit_xml') + + def test(): + pass + +when called with + +.. code-block:: bash + + pytest --metadata Daffy Duck --junit-xml=results.xml + +would produce the following XML + +.. code-block:: xml + + + + + + + ... + +Accessing metadata +------------------ + +To add/modify/delete metadata at the end of metadata collection, you can use the ``pytest_metadata`` hook: + +.. code-block:: python + + import pytest + @pytest.hookimpl(optionalhook=True) + def pytest_metadata(metadata): + metadata.pop("password", None) + +To access the metadata from a test or fixture, you can use the ``metadata`` +fixture: + +.. code-block:: python + + def test_metadata(metadata): + assert 'metadata' in metadata['Plugins'] + +To access the metadata from a plugin, you can use the ``stash`` attribute of +the ``config`` object. This can be used to read/add/modify the metadata: + +.. code-block:: python + + def pytest_configure(config): + metadata = config.pluginmanager.getplugin("metadata") + if metadata: + from pytest_metadata.plugin import metadata_key + config.stash[metadata_key]['foo'] = 'bar' + +Plugin integrations +------------------- + +Here's a handy list of plugins that either read or contribute to the metadata: + +* `pytest-base-url `_ - Adds the + base URL to the metadata. +* `pytest-html `_ - Displays the + metadata at the start of each report. +* `pytest-reporter-html1 `_ - + Presents metadata as part of the report. +* `pytest-selenium `_ - Adds the + driver, capabilities, and remote server to the metadata. + +Resources +--------- + +- `Release Notes `_ +- `Issue Tracker `_ +- `Code `_ diff --git a/backend/.pip-cache/http-v2/8/9/4/f/5/894f59f36e2085235dbff4af2ceae8a8f2e6c40b3f8daecb497ea868 b/backend/.pip-cache/http-v2/8/9/4/f/5/894f59f36e2085235dbff4af2ceae8a8f2e6c40b3f8daecb497ea868 new file mode 100644 index 0000000..f41ed8a Binary files /dev/null and b/backend/.pip-cache/http-v2/8/9/4/f/5/894f59f36e2085235dbff4af2ceae8a8f2e6c40b3f8daecb497ea868 differ diff --git a/backend/.pip-cache/http-v2/8/9/4/f/5/894f59f36e2085235dbff4af2ceae8a8f2e6c40b3f8daecb497ea868.body b/backend/.pip-cache/http-v2/8/9/4/f/5/894f59f36e2085235dbff4af2ceae8a8f2e6c40b3f8daecb497ea868.body new file mode 100644 index 0000000..26b75ee Binary files /dev/null and b/backend/.pip-cache/http-v2/8/9/4/f/5/894f59f36e2085235dbff4af2ceae8a8f2e6c40b3f8daecb497ea868.body differ diff --git a/backend/.pip-cache/http-v2/9/1/2/c/9/912c9d74ac964be0a36abe844794cb951b87674c85cabe4fb453015c b/backend/.pip-cache/http-v2/9/1/2/c/9/912c9d74ac964be0a36abe844794cb951b87674c85cabe4fb453015c new file mode 100644 index 0000000..134a5bc Binary files /dev/null and b/backend/.pip-cache/http-v2/9/1/2/c/9/912c9d74ac964be0a36abe844794cb951b87674c85cabe4fb453015c differ diff --git a/backend/.pip-cache/http-v2/9/1/2/c/9/912c9d74ac964be0a36abe844794cb951b87674c85cabe4fb453015c.body b/backend/.pip-cache/http-v2/9/1/2/c/9/912c9d74ac964be0a36abe844794cb951b87674c85cabe4fb453015c.body new file mode 100644 index 0000000..01ac1a4 Binary files /dev/null and b/backend/.pip-cache/http-v2/9/1/2/c/9/912c9d74ac964be0a36abe844794cb951b87674c85cabe4fb453015c.body differ diff --git a/backend/.pip-cache/http-v2/a/1/9/5/3/a19537d3cf37c122db841d6fe4cd322bc10d1a558bb00d146b85cb9a b/backend/.pip-cache/http-v2/a/1/9/5/3/a19537d3cf37c122db841d6fe4cd322bc10d1a558bb00d146b85cb9a new file mode 100644 index 0000000..c7b5650 Binary files /dev/null and b/backend/.pip-cache/http-v2/a/1/9/5/3/a19537d3cf37c122db841d6fe4cd322bc10d1a558bb00d146b85cb9a differ diff --git a/backend/.pip-cache/http-v2/a/1/9/5/3/a19537d3cf37c122db841d6fe4cd322bc10d1a558bb00d146b85cb9a.body b/backend/.pip-cache/http-v2/a/1/9/5/3/a19537d3cf37c122db841d6fe4cd322bc10d1a558bb00d146b85cb9a.body new file mode 100644 index 0000000..001a730 Binary files /dev/null and b/backend/.pip-cache/http-v2/a/1/9/5/3/a19537d3cf37c122db841d6fe4cd322bc10d1a558bb00d146b85cb9a.body differ diff --git a/backend/.pip-cache/http-v2/a/1/c/c/2/a1cc232a9b083f372d273dd711e2957b2cbefc9f4888c1e3d5e58c8a b/backend/.pip-cache/http-v2/a/1/c/c/2/a1cc232a9b083f372d273dd711e2957b2cbefc9f4888c1e3d5e58c8a new file mode 100644 index 0000000..749354f Binary files /dev/null and b/backend/.pip-cache/http-v2/a/1/c/c/2/a1cc232a9b083f372d273dd711e2957b2cbefc9f4888c1e3d5e58c8a differ diff --git a/backend/.pip-cache/http-v2/a/1/c/c/2/a1cc232a9b083f372d273dd711e2957b2cbefc9f4888c1e3d5e58c8a.body b/backend/.pip-cache/http-v2/a/1/c/c/2/a1cc232a9b083f372d273dd711e2957b2cbefc9f4888c1e3d5e58c8a.body new file mode 100644 index 0000000..de8afce --- /dev/null +++ b/backend/.pip-cache/http-v2/a/1/c/c/2/a1cc232a9b083f372d273dd711e2957b2cbefc9f4888c1e3d5e58c8a.body @@ -0,0 +1,92 @@ +Metadata-Version: 2.1 +Name: pytest-html +Version: 4.1.1 +Summary: pytest plugin for generating HTML reports +Project-URL: Homepage, https://github.com/pytest-dev/pytest-html +Project-URL: Tracker, https://github.com/pytest-dev/pytest-html/issues +Project-URL: Source, https://github.com/pytest-dev/pytest-html +Author-email: Dave Hunt , Jim Brannlund +License-Expression: MPL-2.0 +License-File: LICENSE +Keywords: html,pytest,report +Classifier: Development Status :: 5 - Production/Stable +Classifier: Framework :: Pytest +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Natural Language :: English +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Quality Assurance +Classifier: Topic :: Software Development :: Testing +Classifier: Topic :: Utilities +Requires-Python: >=3.8 +Requires-Dist: jinja2>=3.0.0 +Requires-Dist: pytest-metadata>=2.0.0 +Requires-Dist: pytest>=7.0.0 +Provides-Extra: docs +Requires-Dist: pip-tools>=6.13.0; extra == 'docs' +Provides-Extra: test +Requires-Dist: assertpy>=1.1; extra == 'test' +Requires-Dist: beautifulsoup4>=4.11.1; extra == 'test' +Requires-Dist: black>=22.1.0; extra == 'test' +Requires-Dist: flake8>=4.0.1; extra == 'test' +Requires-Dist: pre-commit>=2.17.0; extra == 'test' +Requires-Dist: pytest-mock>=3.7.0; extra == 'test' +Requires-Dist: pytest-rerunfailures>=11.1.2; extra == 'test' +Requires-Dist: pytest-xdist>=2.4.0; extra == 'test' +Requires-Dist: selenium>=4.3.0; extra == 'test' +Requires-Dist: tox>=3.24.5; extra == 'test' +Description-Content-Type: text/x-rst + +pytest-html +=========== + +pytest-html is a plugin for `pytest `_ that generates a HTML report for test results. + +.. image:: https://img.shields.io/badge/license-MPL%202.0-blue.svg + :target: https://github.com/pytest-dev/pytest-html/blob/master/LICENSE + :alt: License +.. image:: https://img.shields.io/pypi/v/pytest-html.svg + :target: https://pypi.python.org/pypi/pytest-html/ + :alt: PyPI +.. image:: https://img.shields.io/conda/vn/conda-forge/pytest-html.svg + :target: https://anaconda.org/conda-forge/pytest-html + :alt: Conda Forge +.. image:: https://github.com/pytest-dev/pytest-html/workflows/gh/badge.svg + :target: https://github.com/pytest-dev/pytest-html/actions + :alt: CI +.. image:: https://img.shields.io/requires/github/pytest-dev/pytest-html.svg + :target: https://requires.io/github/pytest-dev/pytest-html/requirements/?branch=master + :alt: Requirements +.. image:: https://codecov.io/gh/pytest-dev/pytest-html/branch/master/graph/badge.svg?token=Y0myNKkdbi + :target: https://codecov.io/gh/pytest-dev/pytest-html + :alt: Codecov + +Resources +--------- + +- `Documentation `_ +- `Release Notes `_ +- `Issue Tracker `_ +- `Code `_ + +Contributing +------------ + +We welcome contributions. + +To learn more, see `Development `_ + +Screenshots +----------- + +.. image:: https://cloud.githubusercontent.com/assets/122800/11952194/62daa964-a88e-11e5-9745-2aa5b714c8bb.png + :target: https://cloud.githubusercontent.com/assets/122800/11951695/f371b926-a88a-11e5-91c2-499166776bd3.png + :alt: Enhanced HTML report diff --git a/backend/.pip-cache/http-v2/a/9/1/2/d/a912d0a742f8df4ab4b6c17a9cfc4e9cfe988a8488ccf38ab1845514 b/backend/.pip-cache/http-v2/a/9/1/2/d/a912d0a742f8df4ab4b6c17a9cfc4e9cfe988a8488ccf38ab1845514 new file mode 100644 index 0000000..1c7b2cb Binary files /dev/null and b/backend/.pip-cache/http-v2/a/9/1/2/d/a912d0a742f8df4ab4b6c17a9cfc4e9cfe988a8488ccf38ab1845514 differ diff --git a/backend/.pip-cache/http-v2/a/9/1/2/d/a912d0a742f8df4ab4b6c17a9cfc4e9cfe988a8488ccf38ab1845514.body b/backend/.pip-cache/http-v2/a/9/1/2/d/a912d0a742f8df4ab4b6c17a9cfc4e9cfe988a8488ccf38ab1845514.body new file mode 100644 index 0000000..2b512e3 Binary files /dev/null and b/backend/.pip-cache/http-v2/a/9/1/2/d/a912d0a742f8df4ab4b6c17a9cfc4e9cfe988a8488ccf38ab1845514.body differ diff --git a/backend/.pip-cache/http-v2/b/7/b/b/7/b7bb760318228b0f3304354dc78af978c52426722e445cfd33e30c5d b/backend/.pip-cache/http-v2/b/7/b/b/7/b7bb760318228b0f3304354dc78af978c52426722e445cfd33e30c5d new file mode 100644 index 0000000..b90c7a0 Binary files /dev/null and b/backend/.pip-cache/http-v2/b/7/b/b/7/b7bb760318228b0f3304354dc78af978c52426722e445cfd33e30c5d differ diff --git a/backend/.pip-cache/http-v2/b/7/b/b/7/b7bb760318228b0f3304354dc78af978c52426722e445cfd33e30c5d.body b/backend/.pip-cache/http-v2/b/7/b/b/7/b7bb760318228b0f3304354dc78af978c52426722e445cfd33e30c5d.body new file mode 100644 index 0000000..84ef0a6 --- /dev/null +++ b/backend/.pip-cache/http-v2/b/7/b/b/7/b7bb760318228b0f3304354dc78af978c52426722e445cfd33e30c5d.body @@ -0,0 +1,673 @@ +Metadata-Version: 2.4 +Name: pytest-cov +Version: 7.0.0 +Summary: Pytest plugin for measuring coverage. +Project-URL: Sources, https://github.com/pytest-dev/pytest-cov +Project-URL: Documentation, https://pytest-cov.readthedocs.io/ +Project-URL: Changelog, https://pytest-cov.readthedocs.io/en/latest/changelog.html +Project-URL: Issue Tracker, https://github.com/pytest-dev/pytest-cov/issues +Author-email: Marc Schlaich +Maintainer-email: Ionel Cristian Mărieș +License-Expression: MIT +License-File: AUTHORS.rst +License-File: LICENSE +Keywords: cover,coverage,distributed,parallel,py.test,pytest +Classifier: Development Status :: 5 - Production/Stable +Classifier: Framework :: Pytest +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Testing +Classifier: Topic :: Utilities +Requires-Python: >=3.9 +Requires-Dist: coverage[toml]>=7.10.6 +Requires-Dist: pluggy>=1.2 +Requires-Dist: pytest>=7 +Provides-Extra: testing +Requires-Dist: process-tests; extra == 'testing' +Requires-Dist: pytest-xdist; extra == 'testing' +Requires-Dist: virtualenv; extra == 'testing' +Description-Content-Type: text/x-rst + +======== +Overview +======== + +.. start-badges + +.. list-table:: + :stub-columns: 1 + + * - docs + - |docs| + * - tests + - |github-actions| + * - package + - |version| |conda-forge| |wheel| |supported-versions| |supported-implementations| |commits-since| + +.. |docs| image:: https://readthedocs.org/projects/pytest-cov/badge/?style=flat + :target: https://readthedocs.org/projects/pytest-cov/ + :alt: Documentation Status + +.. |github-actions| image:: https://github.com/pytest-dev/pytest-cov/actions/workflows/test.yml/badge.svg + :alt: GitHub Actions Status + :target: https://github.com/pytest-dev/pytest-cov/actions + +.. |version| image:: https://img.shields.io/pypi/v/pytest-cov.svg + :alt: PyPI Package latest release + :target: https://pypi.org/project/pytest-cov + +.. |conda-forge| image:: https://img.shields.io/conda/vn/conda-forge/pytest-cov.svg + :target: https://anaconda.org/conda-forge/pytest-cov +.. |wheel| image:: https://img.shields.io/pypi/wheel/pytest-cov.svg + :alt: PyPI Wheel + :target: https://pypi.org/project/pytest-cov + +.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/pytest-cov.svg + :alt: Supported versions + :target: https://pypi.org/project/pytest-cov + +.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/pytest-cov.svg + :alt: Supported implementations + :target: https://pypi.org/project/pytest-cov + +.. |commits-since| image:: https://img.shields.io/github/commits-since/pytest-dev/pytest-cov/v7.0.0.svg + :alt: Commits since latest release + :target: https://github.com/pytest-dev/pytest-cov/compare/v7.0.0...master + +.. end-badges + +This plugin provides coverage functionality as a pytest plugin. Compared to just using ``coverage run`` this plugin does some extras: + +* Automatic erasing and combination of .coverage files and default reporting. +* Support for detailed coverage contexts (add ``--cov-context=test`` to have the full test name including parametrization as the context). +* Xdist support: you can use all of pytest-xdist's features including remote interpreters and still get coverage. +* Consistent pytest behavior. If you run ``coverage run -m pytest`` you will have slightly different ``sys.path`` (CWD will be + in it, unlike when running ``pytest``). + +All features offered by the coverage package should work, either through pytest-cov's command line options or +through coverage's config file. + +* Free software: MIT license + +Installation +============ + +Install with pip:: + + pip install pytest-cov + +For distributed testing support install pytest-xdist:: + + pip install pytest-xdist + +Upgrading from pytest-cov 6.3 +----------------------------- + +`pytest-cov 6.3` and older were using a ``.pth`` file to enable coverage measurements in subprocesses. This was removed in `pytest-cov 7` - use `coverage's patch options `_ to enable subprocess measurements. + +Uninstalling +------------ + +Uninstall with pip:: + + pip uninstall pytest-cov + +Under certain scenarios a stray ``.pth`` file may be left around in site-packages. + +* `pytest-cov 2.0` may leave a ``pytest-cov.pth`` if you installed without wheels + (``easy_install``, ``setup.py install`` etc). +* `pytest-cov 1.8 or older` will leave a ``init_cov_core.pth``. + +Usage +===== + +:: + + pytest --cov=myproj tests/ + +Would produce a report like:: + + -------------------- coverage: ... --------------------- + Name Stmts Miss Cover + ---------------------------------------- + myproj/__init__ 2 0 100% + myproj/myproj 257 13 94% + myproj/feature4286 94 7 92% + ---------------------------------------- + TOTAL 353 20 94% + +Documentation +============= + +https://pytest-cov.readthedocs.io/en/latest/ + + +Coverage Data File +================== + +The data file is erased at the beginning of testing to ensure clean data for each test run. If you +need to combine the coverage of several test runs you can use the ``--cov-append`` option to append +this coverage data to coverage data from previous test runs. + +The data file is left at the end of testing so that it is possible to use normal coverage tools to +examine it. + +Limitations +=========== + +For distributed testing the workers must have the pytest-cov package installed. This is needed since +the plugin must be registered through setuptools for pytest to start the plugin on the +worker. + +Security +======== + +To report a security vulnerability please use the `Tidelift security contact `_. +Tidelift will coordinate the fix and disclosure. + +Acknowledgements +================ + +Whilst this plugin has been built fresh from the ground up it has been influenced by the work done +on pytest-coverage (Ross Lawley, James Mills, Holger Krekel) and nose-cover (Jason Pellerin) which are +other coverage plugins. + +Ned Batchelder for coverage and its ability to combine the coverage results of parallel runs. + +Holger Krekel for pytest with its distributed testing support. + +Jason Pellerin for nose. + +Michael Foord for unittest2. + +No doubt others have contributed to these tools as well. + +Changelog +========= + +7.0.0 (2025-09-09) +------------------ + +* Dropped support for subprocesses measurement. + + It was a feature added long time ago when coverage lacked a nice way to measure subprocesses created in tests. + It relied on a ``.pth`` file, there was no way to opt-out and it created bad interations + with `coverage's new patch system `_ added + in `7.10 `_. + + To migrate to this release you might need to enable the suprocess patch, example for ``.coveragerc``: + + .. code-block:: ini + + [run] + patch = subprocess + + This release also requires at least coverage 7.10.6. +* Switched packaging to have metadata completely in ``pyproject.toml`` and use `hatchling `_ for + building. + Contributed by Ofek Lev in `#551 `_ + with some extras in `#716 `_. +* Removed some not really necessary testing deps like ``six``. + +6.3.0 (2025-09-06) +------------------ + +* Added support for markdown reports. + Contributed by Marcos Boger in `#712 `_ + and `#714 `_. +* Fixed some formatting issues in docs. + Anonymous contribution in `#706 `_. + +6.2.1 (2025-06-12) +------------------ + +* Added a version requirement for pytest's pluggy dependency (1.2.0, released 2023-06-21) that has the required new-style hookwrapper API. +* Removed deprecated license classifier (packaging). +* Disabled coverage warnings in two more situations where they have no value: + + * "module-not-measured" in workers + * "already-imported" in subprocesses + +6.2.0 (2025-06-11) +------------------ + +* The plugin now adds 3 rules in the filter warnings configuration to prevent common coverage warnings being raised as obscure errors:: + + default:unclosed database in `_. +* Removed unnecessary CovFailUnderWarning. Fixes `#675 `_. +* Fixed the term report not using the precision specified via ``--cov-precision``. + + +6.0.0 (2024-10-29) +------------------ + +* Updated various documentation inaccuracies, especially on subprocess handling. +* Changed fail under checks to use the precision set in the coverage configuration. + Now it will perform the check just like ``coverage report`` would. +* Added a ``--cov-precision`` cli option that can override the value set in your coverage configuration. +* Dropped support for now EOL Python 3.8. + +5.0.0 (2024-03-24) +------------------ + +* Removed support for xdist rsync (now deprecated). + Contributed by Matthias Reichenbach in `#623 `_. +* Switched docs theme to Furo. +* Various legacy Python cleanup and CI improvements. + Contributed by Christian Clauss and Hugo van Kemenade in + `#630 `_, + `#631 `_, + `#632 `_ and + `#633 `_. +* Added a ``pyproject.toml`` example in the docs. + Contributed by Dawn James in `#626 `_. +* Modernized project's pre-commit hooks to use ruff. Initial POC contributed by + Christian Clauss in `#584 `_. +* Dropped support for Python 3.7. + +4.1.0 (2023-05-24) +------------------ + +* Updated CI with new Pythons and dependencies. +* Removed rsyncdir support. This makes pytest-cov compatible with xdist 3.0. + Contributed by Sorin Sbarnea in `#558 `_. +* Optimized summary generation to not be performed if no reporting is active (for example, + when ``--cov-report=''`` is used without ``--cov-fail-under``). + Contributed by Jonathan Stewmon in `#589 `_. +* Added support for JSON reporting. + Contributed by Matthew Gamble in `#582 `_. +* Refactored code to use f-strings. + Contributed by Mark Mayo in `#572 `_. +* Fixed a skip in the test suite for some old xdist. + Contributed by a bunch of people in `#565 `_. +* Dropped support for Python 3.6. + + +4.0.0 (2022-09-28) +------------------ + +**Note that this release drops support for multiprocessing.** + + +* `--cov-fail-under` no longer causes `pytest --collect-only` to fail + Contributed by Zac Hatfield-Dodds in `#511 `_. +* Dropped support for multiprocessing (mostly because `issue 82408 `_). This feature was + mostly working but very broken in certain scenarios and made the test suite very flaky and slow. + + There is builtin multiprocessing support in coverage and you can migrate to that. All you need is this in your + ``.coveragerc``:: + + [run] + concurrency = multiprocessing + parallel = true + sigterm = true +* Fixed deprecation in ``setup.py`` by trying to import setuptools before distutils. + Contributed by Ben Greiner in `#545 `_. +* Removed undesirable new lines that were displayed while reporting was disabled. + Contributed by Delgan in `#540 `_. +* Documentation fixes. + Contributed by Andre Brisco in `#543 `_ + and Colin O'Dell in `#525 `_. +* Added support for LCOV output format via `--cov-report=lcov`. Only works with coverage 6.3+. + Contributed by Christian Fetzer in `#536 `_. +* Modernized pytest hook implementation. + Contributed by Bruno Oliveira in `#549 `_ + and Ronny Pfannschmidt in `#550 `_. + + +3.0.0 (2021-10-04) +------------------- + +**Note that this release drops support for Python 2.7 and Python 3.5.** + +* Added support for Python 3.10 and updated various test dependencies. + Contributed by Hugo van Kemenade in + `#500 `_. +* Switched from Travis CI to GitHub Actions. Contributed by Hugo van Kemenade in + `#494 `_ and + `#495 `_. +* Add a ``--cov-reset`` CLI option. + Contributed by Danilo Šegan in + `#459 `_. +* Improved validation of ``--cov-fail-under`` CLI option. + Contributed by ... Ronny Pfannschmidt's desire for skark in + `#480 `_. +* Dropped Python 2.7 support. + Contributed by Thomas Grainger in + `#488 `_. +* Updated trove classifiers. Contributed by Michał Bielawski in + `#481 `_. +* Reverted change for `toml` requirement. + Contributed by Thomas Grainger in + `#477 `_. + +2.12.1 (2021-06-01) +------------------- + +* Changed the `toml` requirement to be always be directly required (instead of being required through a coverage extra). + This fixes issues with pip-compile (`pip-tools#1300 `_). + Contributed by Sorin Sbarnea in `#472 `_. +* Documented ``show_contexts``. + Contributed by Brian Rutledge in `#473 `_. + +2.12.0 (2021-05-14) +------------------- + +* Added coverage's `toml` extra to install requirements in setup.py. + Contributed by Christian Riedel in `#410 `_. +* Fixed ``pytest_cov.__version__`` to have the right value (string with version instead of a string + including ``__version__ =``). +* Fixed license classifier in ``setup.py``. + Contributed by Chris Sreesangkom in `#467 `_. +* Fixed *commits since* badge. + Contributed by Terence Honles in `#470 `_. + +2.11.1 (2021-01-20) +------------------- + +* Fixed support for newer setuptools (v42+). + Contributed by Michał Górny in `#451 `_. + +2.11.0 (2021-01-18) +------------------- + +* Bumped minimum coverage requirement to 5.2.1. This prevents reporting issues. + Contributed by Mateus Berardo de Souza Terra in `#433 `_. +* Improved sample projects (from the `examples `_ + directory) to support running `tox -e pyXY`. Now the example configures a suffixed coverage data file, + and that makes the cleanup environment unnecessary. + Contributed by Ganden Schaffner in `#435 `_. +* Removed the empty `console_scripts` entrypoint that confused some Gentoo build script. + I didn't ask why it was so broken cause I didn't want to ruin my day. + Contributed by Michał Górny in `#434 `_. +* Fixed the missing `coverage context `_ + when using subprocesses. + Contributed by Bernát Gábor in `#443 `_. +* Updated the config section in the docs. + Contributed by Pamela McA'Nulty in `#429 `_. +* Migrated CI to travis-ci.com (from .org). + +2.10.1 (2020-08-14) +------------------- + +* Support for ``pytest-xdist`` 2.0, which breaks compatibility with ``pytest-xdist`` before 1.22.3 (from 2017). + Contributed by Zac Hatfield-Dodds in `#412 `_. +* Fixed the ``LocalPath has no attribute startswith`` failure that occurred when using the ``pytester`` plugin + in inline mode. + +2.10.0 (2020-06-12) +------------------- + +* Improved the ``--no-cov`` warning. Now it's only shown if ``--no-cov`` is present before ``--cov``. +* Removed legacy pytest support. Changed ``setup.py`` so that ``pytest>=4.6`` is required. + +2.9.0 (2020-05-22) +------------------ + +* Fixed ``RemovedInPytest4Warning`` when using Pytest 3.10. + Contributed by Michael Manganiello in `#354 `_. +* Made pytest startup faster when plugin not active by lazy-importing. + Contributed by Anders Hovmöller in `#339 `_. +* Various CI improvements. + Contributed by Daniel Hahler in `#363 `_ and + `#364 `_. +* Various Python support updates (drop EOL 3.4, test against 3.8 final). + Contributed by Hugo van Kemenade in + `#336 `_ and + `#367 `_. +* Changed ``--cov-append`` to always enable ``data_suffix`` (a coverage setting). + Contributed by Harm Geerts in + `#387 `_. +* Changed ``--cov-append`` to handle loading previous data better + (fixes various path aliasing issues). +* Various other testing improvements, github issue templates, example updates. +* Fixed internal failures that are caused by tests that change the current working directory by + ensuring a consistent working directory when coverage is called. + See `#306 `_ and + `coveragepy#881 `_ + +2.8.1 (2019-10-05) +------------------ + +* Fixed `#348 `_ - + regression when only certain reports (html or xml) are used then ``--cov-fail-under`` always fails. + +2.8.0 (2019-10-04) +------------------ + +* Fixed ``RecursionError`` that can occur when using + `cleanup_on_signal `__ or + `cleanup_on_sigterm `__. + See: `#294 `_. + The 2.7.x releases of pytest-cov should be considered broken regarding aforementioned cleanup API. +* Added compatibility with future xdist release that deprecates some internals + (match pytest-xdist master/worker terminology). + Contributed by Thomas Grainger in `#321 `_ +* Fixed breakage that occurs when multiple reporting options are used. + Contributed by Thomas Grainger in `#338 `_. +* Changed internals to use a stub instead of ``os.devnull``. + Contributed by Thomas Grainger in `#332 `_. +* Added support for Coverage 5.0. + Contributed by Ned Batchelder in `#319 `_. +* Added support for float values in ``--cov-fail-under``. + Contributed by Martín Gaitán in `#311 `_. +* Various documentation fixes. Contributed by + Juanjo Bazán, + Andrew Murray and + Albert Tugushev in + `#298 `_, + `#299 `_ and + `#307 `_. +* Various testing improvements. Contributed by + Ned Batchelder, + Daniel Hahler, + Ionel Cristian Mărieș and + Hugo van Kemenade in + `#313 `_, + `#314 `_, + `#315 `_, + `#316 `_, + `#325 `_, + `#326 `_, + `#334 `_ and + `#335 `_. +* Added the ``--cov-context`` CLI options that enables coverage contexts. Only works with coverage 5.0+. + Contributed by Ned Batchelder in `#345 `_. + +2.7.1 (2019-05-03) +------------------ + +* Fixed source distribution manifest so that garbage ain't included in the tarball. + +2.7.0 (2019-05-03) +------------------ + +* Fixed ``AttributeError: 'NoneType' object has no attribute 'configure_node'`` error when ``--no-cov`` is used. + Contributed by Alexander Shadchin in `#263 `_. +* Various testing and CI improvements. Contributed by Daniel Hahler in + `#255 `_, + `#266 `_, + `#272 `_, + `#271 `_ and + `#269 `_. +* Improved ``pytest_cov.embed.cleanup_on_sigterm`` to be reentrant (signal deliveries while signal handling is + running won't break stuff). +* Added ``pytest_cov.embed.cleanup_on_signal`` for customized cleanup. +* Improved cleanup code and fixed various issues with leftover data files. All contributed in + `#265 `_ or + `#262 `_. +* Improved examples. Now there are two examples for the common project layouts, complete with working coverage + configuration. The examples have CI testing. Contributed in + `#267 `_. +* Improved help text for CLI options. + +2.6.1 (2019-01-07) +------------------ + +* Added support for Pytest 4.1. Contributed by Daniel Hahler and Семён Марьясин in + `#253 `_ and + `#230 `_. +* Various test and docs fixes. Contributed by Daniel Hahler in + `#224 `_ and + `#223 `_. +* Fixed the "Module already imported" issue (`#211 `_). + Contributed by Daniel Hahler in `#228 `_. + +2.6.0 (2018-09-03) +------------------ + +* Dropped support for Python 3 < 3.4, Pytest < 3.5 and Coverage < 4.4. +* Fixed some documentation formatting. Contributed by Jean Jordaan and Julian. +* Added an example with ``addopts`` in documentation. Contributed by Samuel Giffard in + `#195 `_. +* Fixed ``TypeError: 'NoneType' object is not iterable`` in certain xdist configurations. Contributed by Jeremy Bowman in + `#213 `_. +* Added a ``no_cover`` marker and fixture. Fixes + `#78 `_. +* Fixed broken ``no_cover`` check when running doctests. Contributed by Terence Honles in + `#200 `_. +* Fixed various issues with path normalization in reports (when combining coverage data from parallel mode). Fixes + `#130 `_. + Contributed by Ryan Hiebert & Ionel Cristian Mărieș in + `#178 `_. +* Report generation failures don't raise exceptions anymore. A warning will be logged instead. Fixes + `#161 `_. +* Fixed multiprocessing issue on Windows (empty env vars are not passed). Fixes + `#165 `_. + +2.5.1 (2017-05-11) +------------------ + +* Fixed xdist breakage (regression in ``2.5.0``). + Fixes `#157 `_. +* Allow setting custom ``data_file`` name in ``.coveragerc``. + Fixes `#145 `_. + Contributed by Jannis Leidel & Ionel Cristian Mărieș in + `#156 `_. + +2.5.0 (2017-05-09) +------------------ + +* Always show a summary when ``--cov-fail-under`` is used. Contributed by Francis Niu in `PR#141 + `_. +* Added ``--cov-branch`` option. Fixes `#85 `_. +* Improve exception handling in subprocess setup. Fixes `#144 `_. +* Fixed handling when ``--cov`` is used multiple times. Fixes `#151 `_. + +2.4.0 (2016-10-10) +------------------ + +* Added a "disarm" option: ``--no-cov``. It will disable coverage measurements. Contributed by Zoltan Kozma in + `PR#135 `_. + + **WARNING: Do not put this in your configuration files, it's meant to be an one-off for situations where you want to + disable coverage from command line.** +* Fixed broken exception handling on ``.pth`` file. See `#136 `_. + +2.3.1 (2016-08-07) +------------------ + +* Fixed regression causing spurious errors when xdist was used. See `#124 + `_. +* Fixed DeprecationWarning about incorrect `addoption` use. Contributed by Florian Bruhin in `PR#127 + `_. +* Fixed deprecated use of funcarg fixture API. Contributed by Daniel Hahler in `PR#125 + `_. + +2.3.0 (2016-07-05) +------------------ + +* Add support for specifying output location for html, xml, and annotate report. + Contributed by Patrick Lannigan in `PR#113 `_. +* Fix bug hiding test failure when cov-fail-under failed. +* For coverage >= 4.0, match the default behaviour of `coverage report` and + error if coverage fails to find the source instead of just printing a warning. + Contributed by David Szotten in `PR#116 `_. +* Fixed bug occurred when bare ``--cov`` parameter was used with xdist. + Contributed by Michael Elovskikh in `PR#120 `_. +* Add support for ``skip_covered`` and added ``--cov-report=term-skip-covered`` command + line options. Contributed by Saurabh Kumar in `PR#115 `_. + +2.2.1 (2016-01-30) +------------------ + +* Fixed incorrect merging of coverage data when xdist was used and coverage was ``>= 4.0``. + +2.2.0 (2015-10-04) +------------------ + +* Added support for changing working directory in tests. Previously changing working + directory would disable coverage measurements in suprocesses. +* Fixed broken handling for ``--cov-report=annotate``. + +2.1.0 (2015-08-23) +------------------ + +* Added support for `coverage 4.0b2`. +* Added the ``--cov-append`` command line options. Contributed by Christian Ledermann + in `PR#80 `_. + +2.0.0 (2015-07-28) +------------------ + +* Added ``--cov-fail-under``, akin to the new ``fail_under`` option in `coverage-4.0` + (automatically activated if there's a ``[report] fail_under = ...`` in ``.coveragerc``). +* Changed ``--cov-report=term`` to automatically upgrade to ``--cov-report=term-missing`` + if there's ``[run] show_missing = True`` in ``.coveragerc``. +* Changed ``--cov`` so it can be used with no path argument (in which case the source + settings from ``.coveragerc`` will be used instead). +* Fixed `.pth` installation to work in all cases (install, easy_install, wheels, develop etc). +* Fixed `.pth` uninstallation to work for wheel installs. +* Support for coverage 4.0. +* Data file suffixing changed to use coverage's ``data_suffix=True`` option (instead of the + custom suffixing). +* Avoid warning about missing coverage data (just like ``coverage.control.process_startup``). +* Fixed a race condition when running with xdist (all the workers tried to combine the files). + It's possible that this issue is not present in `pytest-cov 1.8.X`. + +1.8.2 (2014-11-06) +------------------ + +* N/A diff --git a/backend/.pip-cache/http-v2/b/8/d/3/b/b8d3be6e547361e5710b25307da2acea4600ea5ca3a598aa67310379 b/backend/.pip-cache/http-v2/b/8/d/3/b/b8d3be6e547361e5710b25307da2acea4600ea5ca3a598aa67310379 new file mode 100644 index 0000000..9f86777 Binary files /dev/null and b/backend/.pip-cache/http-v2/b/8/d/3/b/b8d3be6e547361e5710b25307da2acea4600ea5ca3a598aa67310379 differ diff --git a/backend/.pip-cache/http-v2/b/8/d/3/b/b8d3be6e547361e5710b25307da2acea4600ea5ca3a598aa67310379.body b/backend/.pip-cache/http-v2/b/8/d/3/b/b8d3be6e547361e5710b25307da2acea4600ea5ca3a598aa67310379.body new file mode 100644 index 0000000..d4cf466 Binary files /dev/null and b/backend/.pip-cache/http-v2/b/8/d/3/b/b8d3be6e547361e5710b25307da2acea4600ea5ca3a598aa67310379.body differ diff --git a/backend/.pip-cache/http-v2/c/8/e/9/c/c8e9c9575b76dc7560f19a6f67bb475254f4f4259cf71e96521b67d9 b/backend/.pip-cache/http-v2/c/8/e/9/c/c8e9c9575b76dc7560f19a6f67bb475254f4f4259cf71e96521b67d9 new file mode 100644 index 0000000..f09323a Binary files /dev/null and b/backend/.pip-cache/http-v2/c/8/e/9/c/c8e9c9575b76dc7560f19a6f67bb475254f4f4259cf71e96521b67d9 differ diff --git a/backend/.pip-cache/http-v2/c/8/e/9/c/c8e9c9575b76dc7560f19a6f67bb475254f4f4259cf71e96521b67d9.body b/backend/.pip-cache/http-v2/c/8/e/9/c/c8e9c9575b76dc7560f19a6f67bb475254f4f4259cf71e96521b67d9.body new file mode 100644 index 0000000..f72c1e3 Binary files /dev/null and b/backend/.pip-cache/http-v2/c/8/e/9/c/c8e9c9575b76dc7560f19a6f67bb475254f4f4259cf71e96521b67d9.body differ diff --git a/backend/.pip-cache/http-v2/d/4/9/a/3/d49a31e60e488c77a07c2738fe2c5e9d61396c62dad0d8e365e2c2cb b/backend/.pip-cache/http-v2/d/4/9/a/3/d49a31e60e488c77a07c2738fe2c5e9d61396c62dad0d8e365e2c2cb new file mode 100644 index 0000000..aa129ae Binary files /dev/null and b/backend/.pip-cache/http-v2/d/4/9/a/3/d49a31e60e488c77a07c2738fe2c5e9d61396c62dad0d8e365e2c2cb differ diff --git a/backend/.pip-cache/http-v2/d/4/9/a/3/d49a31e60e488c77a07c2738fe2c5e9d61396c62dad0d8e365e2c2cb.body b/backend/.pip-cache/http-v2/d/4/9/a/3/d49a31e60e488c77a07c2738fe2c5e9d61396c62dad0d8e365e2c2cb.body new file mode 100644 index 0000000..53fec6d Binary files /dev/null and b/backend/.pip-cache/http-v2/d/4/9/a/3/d49a31e60e488c77a07c2738fe2c5e9d61396c62dad0d8e365e2c2cb.body differ diff --git a/backend/.pip-cache/http-v2/d/7/0/d/d/d70dde26ba84abf6f30b04b36d6763f71cafd30071ad52e0336a3d42 b/backend/.pip-cache/http-v2/d/7/0/d/d/d70dde26ba84abf6f30b04b36d6763f71cafd30071ad52e0336a3d42 new file mode 100644 index 0000000..ffb29c7 Binary files /dev/null and b/backend/.pip-cache/http-v2/d/7/0/d/d/d70dde26ba84abf6f30b04b36d6763f71cafd30071ad52e0336a3d42 differ diff --git a/backend/.pip-cache/http-v2/d/7/0/d/d/d70dde26ba84abf6f30b04b36d6763f71cafd30071ad52e0336a3d42.body b/backend/.pip-cache/http-v2/d/7/0/d/d/d70dde26ba84abf6f30b04b36d6763f71cafd30071ad52e0336a3d42.body new file mode 100644 index 0000000..c73db65 Binary files /dev/null and b/backend/.pip-cache/http-v2/d/7/0/d/d/d70dde26ba84abf6f30b04b36d6763f71cafd30071ad52e0336a3d42.body differ diff --git a/backend/.pip-cache/http-v2/d/b/5/9/f/db59f9c692e9970580c620144c4d270aa50895d4ad8c8dd6cc4dc47e b/backend/.pip-cache/http-v2/d/b/5/9/f/db59f9c692e9970580c620144c4d270aa50895d4ad8c8dd6cc4dc47e new file mode 100644 index 0000000..1937ffe Binary files /dev/null and b/backend/.pip-cache/http-v2/d/b/5/9/f/db59f9c692e9970580c620144c4d270aa50895d4ad8c8dd6cc4dc47e differ diff --git a/backend/.pip-cache/http-v2/d/b/5/9/f/db59f9c692e9970580c620144c4d270aa50895d4ad8c8dd6cc4dc47e.body b/backend/.pip-cache/http-v2/d/b/5/9/f/db59f9c692e9970580c620144c4d270aa50895d4ad8c8dd6cc4dc47e.body new file mode 100644 index 0000000..b7b920f --- /dev/null +++ b/backend/.pip-cache/http-v2/d/b/5/9/f/db59f9c692e9970580c620144c4d270aa50895d4ad8c8dd6cc4dc47e.body @@ -0,0 +1,200 @@ +Metadata-Version: 2.4 +Name: coverage +Version: 7.13.0 +Summary: Code coverage measurement for Python +Home-page: https://github.com/coveragepy/coveragepy +Author: Ned Batchelder and 246 others +Author-email: ned@nedbatchelder.com +License: Apache-2.0 +Project-URL: Documentation, https://coverage.readthedocs.io/en/7.13.0 +Project-URL: Funding, https://tidelift.com/subscription/pkg/pypi-coverage?utm_source=pypi-coverage&utm_medium=referral&utm_campaign=pypi +Project-URL: Issues, https://github.com/coveragepy/coveragepy/issues +Project-URL: Mastodon, https://hachyderm.io/@coveragepy +Project-URL: Mastodon (nedbat), https://hachyderm.io/@nedbat +Keywords: code coverage testing +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: Developers +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: Python :: 3.15 +Classifier: Programming Language :: Python :: Free Threading :: 3 - Stable +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Quality Assurance +Classifier: Topic :: Software Development :: Testing +Requires-Python: >=3.10 +Description-Content-Type: text/x-rst +License-File: LICENSE.txt +Provides-Extra: toml +Requires-Dist: tomli; python_full_version <= "3.11.0a6" and extra == "toml" +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: description-content-type +Dynamic: home-page +Dynamic: keywords +Dynamic: license +Dynamic: license-file +Dynamic: project-url +Dynamic: provides-extra +Dynamic: requires-python +Dynamic: summary + +.. Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 +.. For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt + +=========== +Coverage.py +=========== + +Code coverage measurement for Python. + +.. image:: https://raw.githubusercontent.com/vshymanskyy/StandWithUkraine/main/banner2-direct.svg + :target: https://vshymanskyy.github.io/StandWithUkraine + :alt: Stand with Ukraine + +------------- + +| |kit| |license| |versions| +| |test-status| |quality-status| |docs| |metacov| +| |tidelift| |sponsor| |stars| |mastodon-coveragepy| |mastodon-nedbat| + |bluesky-nedbat| + +Coverage.py measures code coverage, typically during test execution. It uses +the code analysis tools and tracing hooks provided in the Python standard +library to determine which lines are executable, and which have been executed. + +Coverage.py runs on these versions of Python: + +.. PYVERSIONS + +* Python 3.10 through 3.15 alpha, including free-threading. +* PyPy3 versions 3.10 and 3.11. + +Documentation is on `Read the Docs`_. Code repository and issue tracker are on +`GitHub`_. + +.. _Read the Docs: https://coverage.readthedocs.io/en/7.13.0/ +.. _GitHub: https://github.com/coveragepy/coveragepy + + +For Enterprise +-------------- + +.. |tideliftlogo| image:: https://nedbatchelder.com/pix/Tidelift_Logo_small.png + :alt: Tidelift + :target: https://tidelift.com/subscription/pkg/pypi-coverage?utm_source=pypi-coverage&utm_medium=referral&utm_campaign=readme + +.. list-table:: + :widths: 10 100 + + * - |tideliftlogo| + - `Available as part of the Tidelift Subscription. `_ + Coverage and thousands of other packages are working with + Tidelift to deliver one enterprise subscription that covers all of the open + source you use. If you want the flexibility of open source and the confidence + of commercial-grade software, this is for you. + `Learn more. `_ + + +Getting Started +--------------- + +Looking to run ``coverage`` on your test suite? See the `Quick Start section`_ +of the docs. + +.. _Quick Start section: https://coverage.readthedocs.io/en/7.13.0/#quick-start + + +Change history +-------------- + +The complete history of changes is on the `change history page`_. + +.. _change history page: https://coverage.readthedocs.io/en/7.13.0/changes.html + + +Code of Conduct +--------------- + +Everyone participating in the coverage.py project is expected to treat other +people with respect and to follow the guidelines articulated in the `Python +Community Code of Conduct`_. + +.. _Python Community Code of Conduct: https://www.python.org/psf/codeofconduct/ + + +Contributing +------------ + +Found a bug? Want to help improve the code or documentation? See the +`Contributing section`_ of the docs. + +.. _Contributing section: https://coverage.readthedocs.io/en/7.13.0/contributing.html + + +Security +-------- + +To report a security vulnerability, please use the `Tidelift security +contact`_. Tidelift will coordinate the fix and disclosure. + +.. _Tidelift security contact: https://tidelift.com/security + + +License +------- + +Licensed under the `Apache 2.0 License`_. For details, see `NOTICE.txt`_. + +.. _Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 +.. _NOTICE.txt: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt + + +.. |test-status| image:: https://github.com/coveragepy/coveragepy/actions/workflows/testsuite.yml/badge.svg?branch=main&event=push + :target: https://github.com/coveragepy/coveragepy/actions/workflows/testsuite.yml + :alt: Test suite status +.. |quality-status| image:: https://github.com/coveragepy/coveragepy/actions/workflows/quality.yml/badge.svg?branch=main&event=push + :target: https://github.com/coveragepy/coveragepy/actions/workflows/quality.yml + :alt: Quality check status +.. |docs| image:: https://readthedocs.org/projects/coverage/badge/?version=latest&style=flat + :target: https://coverage.readthedocs.io/en/7.13.0/ + :alt: Documentation +.. |kit| image:: https://img.shields.io/pypi/v/coverage + :target: https://pypi.org/project/coverage/ + :alt: PyPI status +.. |versions| image:: https://img.shields.io/pypi/pyversions/coverage.svg?logo=python&logoColor=FBE072 + :target: https://pypi.org/project/coverage/ + :alt: Python versions supported +.. |license| image:: https://img.shields.io/pypi/l/coverage.svg + :target: https://github.com/coveragepy/coveragepy/blob/main/LICENSE.txt + :alt: License +.. |metacov| image:: https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/nedbat/8c6980f77988a327348f9b02bbaf67f5/raw/metacov.json + :target: https://coveragepy.github.io/metacov-reports/latest.html + :alt: Coverage reports +.. |tidelift| image:: https://tidelift.com/badges/package/pypi/coverage + :target: https://tidelift.com/subscription/pkg/pypi-coverage?utm_source=pypi-coverage&utm_medium=referral&utm_campaign=readme + :alt: Tidelift +.. |stars| image:: https://img.shields.io/github/stars/coveragepy/coveragepy.svg?logo=github&style=flat + :target: https://github.com/coveragepy/coveragepy/stargazers + :alt: GitHub stars +.. |mastodon-nedbat| image:: https://img.shields.io/badge/dynamic/json?style=flat&labelColor=450657&logo=mastodon&logoColor=ffffff&label=@nedbat&query=followers_count&url=https%3A%2F%2Fhachyderm.io%2Fapi%2Fv1%2Faccounts%2Flookup%3Facct=nedbat + :target: https://hachyderm.io/@nedbat + :alt: nedbat on Mastodon +.. |mastodon-coveragepy| image:: https://img.shields.io/badge/dynamic/json?style=flat&labelColor=450657&logo=mastodon&logoColor=ffffff&label=@coveragepy&query=followers_count&url=https%3A%2F%2Fhachyderm.io%2Fapi%2Fv1%2Faccounts%2Flookup%3Facct=coveragepy + :target: https://hachyderm.io/@coveragepy + :alt: coveragepy on Mastodon +.. |bluesky-nedbat| image:: https://img.shields.io/badge/dynamic/json?style=flat&color=96a3b0&labelColor=3686f7&logo=icloud&logoColor=white&label=@nedbat&url=https%3A%2F%2Fpublic.api.bsky.app%2Fxrpc%2Fapp.bsky.actor.getProfile%3Factor=nedbat.com&query=followersCount + :target: https://bsky.app/profile/nedbat.com + :alt: nedbat on Bluesky +.. |sponsor| image:: https://img.shields.io/badge/%E2%9D%A4-Sponsor%20me-brightgreen?style=flat&logo=GitHub + :target: https://github.com/sponsors/nedbat + :alt: Sponsor me on GitHub diff --git a/backend/.pip-cache/selfcheck/f912d6fdcf5d5ad7ee8e291510436cde50c9c15212260f2cb62f8d0e b/backend/.pip-cache/selfcheck/f912d6fdcf5d5ad7ee8e291510436cde50c9c15212260f2cb62f8d0e new file mode 100644 index 0000000..2b70d50 --- /dev/null +++ b/backend/.pip-cache/selfcheck/f912d6fdcf5d5ad7ee8e291510436cde50c9c15212260f2cb62f8d0e @@ -0,0 +1 @@ +{"key":"/Users/xujiehan/course/InsightReading/backend/.venv","last_check":"2025-12-23T06:42:22.829045+00:00","pypi_version":"25.3"} \ No newline at end of file diff --git a/backend/README.md b/backend/README.md index 6696a53..b6afb49 100644 --- a/backend/README.md +++ b/backend/README.md @@ -28,3 +28,52 @@ MinerU 服务现在会在运行时自动探测 CUDA,如果服务器中存在 export MINERU_DEVICE_MODE=cpu # 强制 CPU export MINERU_DEVICE_MODE=cuda # 强制 GPU ``` + +## 测试分组(3 套) + +目前将测试按依赖拆成 3 组(用 pytest marker 选择),并分别产出 HTML 测试报告 + 行覆盖率报告。 + +- 详细说明见:`backend/TEST_GROUPS.md` +- 测试用例索引(每个 `test_xxx.py` 在测什么):`backend/TEST_INDEX.md` + +- **第 1 组:core(仅后端内部依赖)** + - 运行:`bash backend/scripts/test_core.sh` + - 覆盖率配置:`backend/.coveragerc`(排除 FastAPI 传输层、外部 API、MinerU 等) + - 报告目录:`backend/test-reports/core/` + +- **第 2 组:external_api(依赖外部 API,但测试里通常会 mock/stub 掉网络)** + - 运行:`bash backend/scripts/test_external_api.sh` + - 覆盖率配置:`backend/.coveragerc.external` + - 报告目录:`backend/test-reports/external-api/` + +- **第 3 组:mineru(依赖 MinerU)** + - 运行:`bash backend/scripts/test_mineru.sh` + - 覆盖率配置:`backend/.coveragerc.mineru` + - 报告目录:`backend/test-reports/mineru/` + - 真实解析/性能检查不属于单测:`backend/scripts/mineru_comprehensive_check.py` + +## 压力测试(load/stress) + +仓库内置了一个轻量压力测试脚本:`backend/scripts/stress_test.py`,默认用 ASGITransport 在进程内压测(不需要起 uvicorn)。 + +- 一键跑整套压测(服务器推荐,自动输出 JSON + summary.md):`bash backend/scripts/run_stress_suite.sh --url http://127.0.0.1:8000 --token-file tokens.txt` + +- 健康检查(纯框架开销基线):`cd backend && .venv/bin/python scripts/stress_test.py --scenario health --requests 20000 --concurrency 200 --warmup 200` +- 带鉴权的 `/api/v1/users/me`(会自动绕过验证码依赖并批量注册用户,仅限进程内模式):`cd backend && .venv/bin/python scripts/stress_test.py --scenario me --users 200 --requests 20000 --concurrency 200 --warmup 200` +- 论文推荐/检索并发请求(`/api/v1/academic/search`,进程内模式默认 stub 外部 provider):`cd backend && .venv/bin/python scripts/stress_test.py --scenario academic_search --query "vision transformer" --sources semantic_scholar,arxiv,openalex --limit 10 --requests 20000 --concurrency 200 --warmup 200` +- 对已启动的线上/本地服务做真实 HTTP 压测:`cd backend && .venv/bin/python scripts/stress_test.py --url http://127.0.0.1:8000 --scenario health --requests 20000 --concurrency 200` +- 对已启动服务压测 `/api/v1/users/me`:先准备 `tokens.txt`(每行一个 access token),再运行:`cd backend && .venv/bin/python scripts/stress_test.py --url http://127.0.0.1:8000 --scenario me --token-file tokens.txt --requests 20000 --concurrency 200` + +### DeepSeek API 压测 + +直接对 DeepSeek Chat Completions 做并发压测的脚本:`backend/scripts/deepseek_stress_test.py`(注意会产生 API 调用成本)。 + +- 示例:`cd backend && DEEPSEEK_API_KEY=... .venv/bin/python scripts/deepseek_stress_test.py --requests 200 --concurrency 20 --warmup 10 --prompt "用一句话解释什么是 transformer" --max-tokens 128` +- 多条 prompt:`cd backend && DEEPSEEK_API_KEY=... .venv/bin/python scripts/deepseek_stress_test.py --prompts-file prompts.txt --requests 500 --concurrency 50 --warmup 20` + +### 并发解析多个 PDF(MinerU) + +用于测试 MinerU 实际解析吞吐的脚本:`backend/scripts/mineru_comprehensive_check.py` + +- 并发解析(输出落盘到指定目录):`cd backend && .venv/bin/python scripts/mineru_comprehensive_check.py --concurrency 4 --output-dir test-reports/mineru/concurrent path/to/a.pdf path/to/b.pdf path/to/c.pdf` +- 大批量时建议降日志:`--log-level WARNING` diff --git a/backend/TEST_GROUPS.md b/backend/TEST_GROUPS.md new file mode 100644 index 0000000..fd6143a --- /dev/null +++ b/backend/TEST_GROUPS.md @@ -0,0 +1,79 @@ +# 测试分组说明(3 套) + +为了在**不引入外部依赖**(网络、MinerU、队列等)的情况下稳定产出覆盖率,我们将后端测试分成 3 组,并分别生成: + +- HTML 测试报告(pytest-html) +- JUnit XML(CI/IDE 友好) +- 行覆盖率报告(coverage + htmlcov) + +三组测试通过 pytest marker 区分,marker 定义见 `backend/pytest.ini`。 + +更细粒度的“每个 `test_xxx.py` 在测什么”索引见:`backend/TEST_INDEX.md`。 + +--- + +## 第 1 组:core(仅后端内部依赖) + +**目标** + +- 覆盖主要业务逻辑/工具函数/内部服务层(不依赖外部网络与 MinerU)。 +- 作为日常开发的默认快速回归集。 + +**特点** + +- 不应访问外部 API(网络)。 +- 不需要 MinerU CLI/模型文件。 +- 覆盖率统计口径:`backend/.coveragerc`(排除 FastAPI 传输层、外部 API、MinerU、Celery/Redis/email 等集成模块)。 + +**如何运行** + +- 命令:`bash backend/scripts/test_core.sh` +- 报告目录:`backend/test-reports/core/` + - `pytest-report.html`:测试用例结果(可直接用浏览器打开) + - `htmlcov/index.html`:行覆盖率网页报告 + +--- + +## 第 2 组:external_api(外部 API 客户端/集成,但测试中通常会 mock/stub 网络) + +**目标** + +- 覆盖与外部检索/LLM 等相关的“客户端与适配层”逻辑(请求构造、参数处理、响应解析、错误处理、重试分支等)。 +- 让我们在**不真的联网**的情况下,仍能对外部依赖相关代码做可靠回归。 + +**特点** + +- 测试一般会对网络请求做 mock/stub,因此不需要真实 API Key,也不依赖网络可用性。 +- 覆盖率统计口径:`backend/.coveragerc.external`(只统计外部集成相关模块,避免把整个 app 都算进去)。 + +**如何运行** + +- 命令:`bash backend/scripts/test_external_api.sh` +- 报告目录:`backend/test-reports/external-api/` + - `pytest-report.html` + - `htmlcov/index.html` + +--- + +## 第 3 组:mineru(MinerU 集成层) + +**目标** + +- 覆盖 MinerU 集成层的“可测试部分”:CLI 探测、输出路径定位、JSON 读取、内容条目与 bbox 归一化、运行时环境变量(device/cache)处理等。 +- 避免把真实 PDF 解析(耗时、依赖模型/算力/环境)塞进单元测试,保证稳定与速度。 + +**特点** + +- 单测以 mock/stub 为主,不要求本机安装可用的 `mineru` 二进制或模型。 +- 覆盖率统计口径:`backend/.coveragerc.mineru`(只统计 `app.services.mineru_cli` / `app.services.mineru_runtime`)。 + +**如何运行** + +- 命令:`bash backend/scripts/test_mineru.sh` +- 报告目录:`backend/test-reports/mineru/` + - `pytest-report.html` + - `htmlcov/index.html` + +**真实解析 / 性能检查(不属于单元测试)** + +- 如果需要跑“真实解析流程”(更接近端到端/性能测试),请使用脚本:`backend/scripts/mineru_comprehensive_check.py` diff --git a/backend/TEST_INDEX.md b/backend/TEST_INDEX.md new file mode 100644 index 0000000..fe6d168 --- /dev/null +++ b/backend/TEST_INDEX.md @@ -0,0 +1,209 @@ +# 测试脚本与用例索引 + +这份文档用于回答一个常见问题:**每个测试脚本 / `test_xxx.py` 到底在测什么**。 + +> 说明:测试分组(core / external_api / mineru)的高层解释见 `backend/TEST_GROUPS.md`。 + +--- + +## 运行脚本(3 个) + +这些脚本的共同点: + +- 都使用 `coverage run + pytest` 运行,并产出 `pytest-report.html`(测试结果)与 `htmlcov/index.html`(行覆盖率)。 +- 都把覆盖率口径写在各自的 `coverage rcfile` 里(`--rcfile=...`),避免把“非本组范围”的代码算进来。 + +### `backend/scripts/test_core.sh` + +- **做什么**:运行“core 组”测试:`pytest -m "not external_api and not mineru"`。 +- **覆盖率口径**:`backend/.coveragerc`(`source = app`,并 `omit` 传输层、外部 API、MinerU 等集成代码)。 +- **报告输出**:`backend/test-reports/core/`。 + +### `backend/scripts/test_external_api.sh` + +- **做什么**:运行“external_api 组”测试:`pytest -m external_api`。 +- **覆盖率口径**:`backend/.coveragerc.external`(只统计外部检索/LLM/工具等模块;网络请求通常会在测试里 mock/stub)。 +- **报告输出**:`backend/test-reports/external-api/`。 + +### `backend/scripts/test_mineru.sh` + +- **做什么**:运行“mineru 组”测试:`pytest -m mineru`。 +- **覆盖率口径**:`backend/.coveragerc.mineru`(只统计 `app.services.mineru_cli` / `app.services.mineru_runtime`)。 +- **报告输出**:`backend/test-reports/mineru/`。 + +--- + +## 通用测试夹具(fixtures) + +### `backend/tests/conftest.py` + +- **`async_client`**:用 `httpx.AsyncClient + ASGITransport` 直接在进程内跑 FastAPI 应用(不需要起真实 HTTP 服务)。 +- **数据库隔离**:使用 SQLite 内存库(`sqlite+aiosqlite:///:memory:`),并通过 `dependency_overrides` 覆盖 `get_db`。 +- **文件隔离**:设置 `MEDIA_ROOT` 到 `backend/tests/tmp_media`,并创建 `uploads/`、`avatars/` 目录。 +- **跳过 Redis 验证码依赖**:自动 monkeypatch `verify_code` 永远返回 True,避免注册流程依赖 Redis。 +- **运行 async test**:实现 `pytest_pyfunc_call`,让 vanilla pytest 能直接执行 `async def test_...`(不依赖 pytest-asyncio)。 + +--- + +## MinerU 组(`pytest.mark.mineru`) + +### `backend/tests/test_mineru_cli_adapters.py` + +- **作用**:覆盖 MinerU 集成层的“适配/胶水代码”分支(不跑真实 MinerU)。 +- **重点覆盖**:CLI 探测(PATH/解释器目录 fallback)、产物目录定位、JSON 读取失败处理、CLI 调用异常包装、`parse_pdf` 结果结构与 URL 生成、临时目录清理、`parse_pdf_async` 线程委托。 + +### `backend/tests/test_mineru_cli_normalization.py` + +- **作用**:覆盖 MinerU 输出结构的“归一化逻辑”。 +- **重点覆盖**:block type 映射、page 字段兼容、bbox 缩放/裁剪/非法过滤、content_list 清洗、`/media/...` URL 生成。 + +### `backend/tests/test_mineru_cli_utils.py` + +- **作用**:覆盖 MinerU CLI wrapper 的“辅助工具函数”。 +- **重点覆盖**:plain text 聚合、metadata 构建(含 PyMuPDF 不可用时的容错)、运行时缓存目录与环境变量注入(含 mkdir 失败容错)。 + +### `backend/tests/test_mineru_runtime.py` + +- **作用**:覆盖 MinerU runtime 设备选择逻辑(CPU/CUDA)。 +- **重点覆盖**:`MINERU_DEVICE_MODE` 归一化与写回、`apply_device_env` 合并策略、CUDA 探测分支(通过 mock 控制)。 + +--- + +## external_api 组(`pytest.mark.external_api`) + +### `backend/tests/test_academic_providers.py` + +- **作用**:分别验证 Semantic Scholar / OpenAlex / arXiv provider 的“结果归一化 + 异常处理”。 + +### `backend/tests/test_external_provider_branches.py` + +- **作用**:更细粒度地覆盖 provider 的内部 HTTP/重试/限流分支。 +- **例子**:429 + Retry-After、OpenAlex 非 200 报错、arXiv SSL connector/CA bundle 分支等。 + +### `backend/tests/test_arxiv_provider_http_branches.py` + +- **作用**:专注覆盖 `ArxivProvider` 的重试、timeout、rate-limit、404 的边界路径。 + +### `backend/tests/test_academic_search_service.py` + +- **作用**:验证 `AcademicSearchService` 的多源聚合策略:合并、去重、排序、分类映射。 + +### `backend/tests/test_external_search_service_branches.py` + +- **作用**:补齐 `AcademicSearchService` 的“分支覆盖”:默认 source、错误收集、429 重试、过滤与排序边界。 + +### `backend/tests/test_query_parser.py` + +- **作用**:验证 `QueryParser` 在“LLM 有输出 / LLM 不可用 / LLM 输出非法”时的解析与回退策略。 + +### `backend/tests/test_query_parser_branches.py` + +- **作用**:补齐 `QueryParser` 的辅助函数分支:去 markdown fence、字段归一化、fallback intent 检测、空 query 拒绝。 + +### `backend/tests/test_llm_client.py` + +- **作用**:验证 `DeepSeekClient` 的请求组装、错误处理、SSE streaming 解析、SSL 配置分支。 +- **说明**:网络请求都用 fake session/response stub,不会真实联网。 + +### `backend/tests/test_agent_controller.py` + +- **作用**:验证 `AgentController` 的 ReAct/工具调用循环:action 解析、调用工具、异常/无法解析时的兜底回答。 + +### `backend/tests/test_tool_executor.py` + +- **作用**:验证 `ToolExecutor` 的工具路由与 JSON 分发:找不到工具、JSON 非法等错误路径。 + +### `backend/tests/test_academic_search_tool.py` + +- **作用**:验证 `AcademicSearchTool` 的输入校验与输出格式化(作者截断、摘要截断、source 过滤等)。 + +### `backend/tests/test_finish_tool.py` + +- **作用**:验证 `FinishTool` 的入参校验与输出结构(最终回答封装)。 + +### `backend/tests/test_intelligent_service.py` + +- **作用**:验证 `IntelligentSearchService` 的旧逻辑 fallback、工具参数归一化、function-call 计划执行与错误传播。 + +### `backend/tests/test_arxiv.py` + +- **作用**:验证 FastAPI 的 `/api/v1/arxiv/search` endpoint:query 拼接、返回结构、API 错误与 XML 解析错误处理。 + +--- + +## core 组(默认:不带 `external_api`/`mineru` marker 的测试) + +> 注:很多 core 测试带 `pytest.mark.asyncio`,只是表示它们是 async test,并不影响分组;分组只看 `external_api`/`mineru`。 + +### `backend/tests/test_auth.py` + +- **作用**:覆盖注册/登录/获取当前用户等核心认证流程(接口级)。 + +### `backend/tests/test_auth_dependencies.py` + +- **作用**:覆盖认证依赖的边界场景:非法 token、缺少 `sub`、用户禁用后的访问、optional auth 的 query token fallback 等。 + +### `backend/tests/test_auth_endpoints_extra.py` + +- **作用**:补齐 auth endpoints 分支:未知用户登录、禁用账号登录、邮件服务未配置、重置密码流程。 + +### `backend/tests/test_security.py` + +- **作用**:安全回归:过期 token、`alg=none` 伪造 token、token 对应用户不存在等应被拒绝的场景。 + +### `backend/tests/test_profile.py` + +- **作用**:用户资料更新与头像上传(接口级),并验证头像文件落盘到 `MEDIA_ROOT/avatars/`。 + +### `backend/tests/test_conversations_endpoints.py` + +- **作用**:对话会话与消息的增删改查、筛选、404 等(接口级)。 + +### `backend/tests/test_library_endpoints.py` + +- **作用**:文献库文件夹 CRUD、上传记录归档/移动、download/ensure-local、删除等(接口级)。 + +### `backend/tests/test_notes_endpoints.py` + +- **作用**:笔记 CRUD + 筛选查询(接口级)。 + +### `backend/tests/test_papers_upload.py` + +- **作用**:论文上传与上传列表(接口级):只允许 PDF、大小限制、返回字段正确等。 + +### `backend/tests/test_papers_qa.py` + +- **作用**:论文 QA 接口(接口级):对解析缓存与 LLM client 做 stub,验证正常返回与缺少解析结果时的错误。 + +### `backend/tests/test_parse_utils.py` + +- **作用**:papers endpoint 的辅助函数:文件 hash 计算、补齐 hash、parse result DTO 构建。 + +### `backend/tests/test_db_repositories.py` + +- **作用**:核心仓储层(Repository)CRUD 与业务语义:去重统计、级联删除、解析缓存/任务状态流转等。 + +### `backend/tests/test_db_utils.py` + +- **作用**:数据库初始化工具 `ensure_database_exists` 的逻辑:存在/创建/重试/失败分支(全 mock,不需要真实 Postgres)。 + +### `backend/tests/test_config.py` + +- **作用**:配置对象 `Settings` 的派生属性与环境变量覆盖逻辑。 + +### `backend/tests/test_file_naming_utils.py` + +- **作用**:上传文件命名工具:原始文件名归一化、存储文件名清洗、同名冲突自动加后缀。 + +### `backend/tests/test_schema_validators.py` + +- **作用**:Pydantic schema 的业务校验(密码强度/长度等)。 + +### `backend/tests/test_pdf_metadata.py` + +- **作用**:PDF 元信息提取(标题/作者/摘要/主题词),含同步/异步一致性与边界条件。 + +### `backend/tests/test_annotations_service.py` + +- **作用**:PDF 批注能力(高亮/便签)以及异步 wrapper。 + diff --git a/backend/app/services/ai/llm_client.py b/backend/app/services/ai/llm_client.py index 788cb03..ab951a9 100644 --- a/backend/app/services/ai/llm_client.py +++ b/backend/app/services/ai/llm_client.py @@ -204,7 +204,7 @@ async def chat_content_stream( ) -> AsyncIterator[Dict[str, Any]]: """[EN] Stream message content chunks instead of waiting for completion. [ZH] 以增量形式返回消息内容片段。""" - + seen_done = False async for event in self.chat_stream( messages, temperature=temperature, @@ -215,8 +215,13 @@ async def chat_content_stream( parallel_tool_calls=parallel_tool_calls, ): if event.get("type") == "done": - yield {"type": "done"} - break + if not seen_done: + yield {"type": "done"} + seen_done = True + continue + + if seen_done: + continue data = event.get("data") if not data: diff --git a/backend/pytest.ini b/backend/pytest.ini index 2184bc5..1764594 100644 --- a/backend/pytest.ini +++ b/backend/pytest.ini @@ -1,3 +1,5 @@ [pytest] markers = asyncio: async test executed via event loop + external_api: tests that exercise external API clients (network mocked/stubbed) + mineru: tests that require MinerU (CLI/runtime or E2E) diff --git a/backend/scripts/deepseek_stress_test.py b/backend/scripts/deepseek_stress_test.py new file mode 100644 index 0000000..4626b7e --- /dev/null +++ b/backend/scripts/deepseek_stress_test.py @@ -0,0 +1,279 @@ +"""DeepSeek API stress/load test runner. + +This script sends concurrent requests to DeepSeek chat completions and reports +latency percentiles, RPS, and error rates. + +Notes: + - This will incur API cost. Start small (requests/concurrency) and scale up. + - Requires network access and a valid DeepSeek API key. +""" + +from __future__ import annotations + +import argparse +import asyncio +import contextlib +import json +import math +import random +import statistics +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import aiohttp + +from app.core.config import settings +from app.services.ai.llm_client import DeepSeekClient + + +@dataclass +class Result: + ok: int + errors: int + status_counts: dict[int, int] + latencies_ms: list[float] + elapsed_s: float + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +def _percentile(sorted_values: list[float], p: float) -> float: + if not sorted_values: + return 0.0 + if p <= 0: + return float(sorted_values[0]) + if p >= 100: + return float(sorted_values[-1]) + k = (len(sorted_values) - 1) * (p / 100.0) + f = math.floor(k) + c = math.ceil(k) + if f == c: + return float(sorted_values[int(k)]) + d0 = sorted_values[f] * (c - k) + d1 = sorted_values[c] * (k - f) + return float(d0 + d1) + + +def _print_summary(result: Result) -> None: + total = result.ok + result.errors + rps = total / result.elapsed_s if result.elapsed_s > 0 else 0.0 + lat_sorted = sorted(result.latencies_ms) + avg = statistics.fmean(lat_sorted) if lat_sorted else 0.0 + p50 = _percentile(lat_sorted, 50) + p95 = _percentile(lat_sorted, 95) + p99 = _percentile(lat_sorted, 99) + worst = lat_sorted[-1] if lat_sorted else 0.0 + + print(f"total={total} ok={result.ok} errors={result.errors} elapsed_s={result.elapsed_s:.3f} rps={rps:.1f}") + print(f"latency_ms avg={avg:.2f} p50={p50:.2f} p95={p95:.2f} p99={p99:.2f} max={worst:.2f}") + codes = " ".join(f"{code}:{count}" for code, count in sorted(result.status_counts.items())) + print(f"status_codes {codes}") + if result.total_tokens: + tps = result.total_tokens / result.elapsed_s if result.elapsed_s > 0 else 0.0 + print( + f"tokens prompt={result.prompt_tokens} completion={result.completion_tokens} " + f"total={result.total_tokens} tokens_per_s={tps:.1f}" + ) + + +def _load_prompts(path: str) -> list[str]: + data = Path(path).read_text(encoding="utf-8") + prompts = [line.strip() for line in data.splitlines() if line.strip()] + if not prompts: + raise ValueError(f"prompts file is empty: {path}") + return prompts + + +async def _run_fixed_requests( + *, + session: aiohttp.ClientSession, + url: str, + headers: dict[str, str], + request_count: int, + concurrency: int, + payload_factory, +) -> Result: + sem = asyncio.Semaphore(concurrency) + latencies_ms: list[float] = [0.0] * request_count + status_counts: dict[int, int] = {} + ok = 0 + errors = 0 + prompt_tokens = 0 + completion_tokens = 0 + total_tokens = 0 + lock = asyncio.Lock() + + async def one(i: int) -> None: + nonlocal ok, errors, prompt_tokens, completion_tokens, total_tokens + async with sem: + start = time.perf_counter() + status = 0 + usage: dict[str, Any] | None = None + try: + async with session.post(url, headers=headers, json=payload_factory()) as resp: + status = resp.status + text = await resp.text() + if status < 400: + try: + data = json.loads(text) + usage_value = data.get("usage") + if isinstance(usage_value, dict): + usage = usage_value + except json.JSONDecodeError: + usage = None + else: + # Keep error lightweight; full body can be large. + usage = None + except Exception: + status = 0 + usage = None + + elapsed_ms = (time.perf_counter() - start) * 1000 + latencies_ms[i] = elapsed_ms + + async with lock: + status_counts[status] = status_counts.get(status, 0) + 1 + if 200 <= status < 400: + ok += 1 + else: + errors += 1 + + if usage: + p = int(usage.get("prompt_tokens") or 0) + c = int(usage.get("completion_tokens") or 0) + t = int(usage.get("total_tokens") or (p + c)) + prompt_tokens += p + completion_tokens += c + total_tokens += t + + started = time.perf_counter() + async with asyncio.TaskGroup() as tg: + for i in range(request_count): + tg.create_task(one(i)) + elapsed_s = time.perf_counter() - started + return Result( + ok=ok, + errors=errors, + status_counts=status_counts, + latencies_ms=latencies_ms, + elapsed_s=elapsed_s, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + + +async def main() -> int: + parser = argparse.ArgumentParser(description="DeepSeek API stress/load test.") + parser.add_argument("--requests", type=int, default=50) + parser.add_argument("--concurrency", type=int, default=5) + parser.add_argument("--warmup", type=int, default=5) + parser.add_argument("--timeout", type=int, default=settings.DEEPSEEK_TIMEOUT) + parser.add_argument("--base-url", default=settings.DEEPSEEK_BASE_URL) + parser.add_argument("--model", default=settings.DEEPSEEK_MODEL) + parser.add_argument("--max-tokens", type=int, default=128) + parser.add_argument("--temperature", type=float, default=0.2) + parser.add_argument("--system", default="You are a helpful assistant.") + parser.add_argument("--prompt", default="用一句话解释什么是 transformer。") + parser.add_argument("--prompts-file", default="", help="One prompt per line; randomly selected.") + parser.add_argument("--api-key", default="", help="Defaults to env/settings; avoid passing on CLI history.") + parser.add_argument("--json", dest="json_output", action="store_true", help="Print JSON summary") + args = parser.parse_args() + + if args.requests <= 0 or args.concurrency <= 0: + raise SystemExit("--requests/--concurrency must be > 0") + + api_key = args.api_key or settings.DEEPSEEK_API_KEY or "" + if not api_key: + raise SystemExit("DeepSeek API key is not configured. Set DEEPSEEK_API_KEY or pass --api-key.") + + prompts = _load_prompts(args.prompts_file) if args.prompts_file else [args.prompt] + + base_url = str(args.base_url).rstrip("/") + url = f"{base_url}/chat/completions" + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + deepseek = DeepSeekClient( + api_key=api_key, + model=args.model, + base_url=base_url, + timeout=args.timeout, + max_tokens=args.max_tokens, + ) + + def payload_factory() -> dict[str, Any]: + prompt = random.choice(prompts) + return { + "model": deepseek.model, + "messages": [ + {"role": "system", "content": args.system}, + {"role": "user", "content": prompt}, + ], + "temperature": args.temperature, + "max_tokens": args.max_tokens, + } + + timeout_cfg = aiohttp.ClientTimeout(total=deepseek.timeout) + connector = aiohttp.TCPConnector(ssl=deepseek._ssl_context) + + async with aiohttp.ClientSession(timeout=timeout_cfg, connector=connector, trust_env=deepseek.trust_env) as session: + # Warmup (sequential) + for _ in range(args.warmup): + try: + async with session.post(url, headers=headers, json=payload_factory()) as resp: + await resp.read() + except Exception: + pass + + result = await _run_fixed_requests( + session=session, + url=url, + headers=headers, + request_count=args.requests, + concurrency=args.concurrency, + payload_factory=payload_factory, + ) + + if args.json_output: + lat_sorted = sorted(result.latencies_ms) + payload = { + "requests": args.requests, + "concurrency": args.concurrency, + "ok": result.ok, + "errors": result.errors, + "elapsed_s": result.elapsed_s, + "rps": (args.requests / result.elapsed_s) if result.elapsed_s else 0.0, + "status_counts": result.status_counts, + "latency_ms": { + "avg": statistics.fmean(result.latencies_ms) if result.latencies_ms else 0.0, + "p50": _percentile(lat_sorted, 50), + "p95": _percentile(lat_sorted, 95), + "p99": _percentile(lat_sorted, 99), + "max": max(result.latencies_ms) if result.latencies_ms else 0.0, + }, + "tokens": { + "prompt": result.prompt_tokens, + "completion": result.completion_tokens, + "total": result.total_tokens, + "tokens_per_s": (result.total_tokens / result.elapsed_s) if result.elapsed_s else 0.0, + }, + "model": args.model, + "base_url": base_url, + } + print(json.dumps(payload, ensure_ascii=False, indent=2)) + else: + _print_summary(result) + + return 0 + + +if __name__ == "__main__": + with contextlib.suppress(KeyboardInterrupt): + raise SystemExit(asyncio.run(main())) diff --git a/backend/scripts/mineru_comprehensive_check.py b/backend/scripts/mineru_comprehensive_check.py new file mode 100644 index 0000000..3365d6c --- /dev/null +++ b/backend/scripts/mineru_comprehensive_check.py @@ -0,0 +1,303 @@ +"""Comprehensive MinerU parsing check (manual/performance use). + +This script is intentionally NOT part of the pytest suite. It exercises the +actual MinerU CLI + our wrapper (`app.services.mineru_cli.parse_pdf_async`). + +Usage: + backend/.venv/bin/python backend/scripts/mineru_comprehensive_check.py path/to/a.pdf path/to/b.pdf +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import logging +import sys +import time +from pathlib import Path +from typing import Any + +from app.services.mineru_cli import MineruCLIError, parse_pdf_async + +logger = logging.getLogger(__name__) + + +def analyze_content_types(content_list: list[dict[str, Any]]) -> dict[str, int]: + type_counts: dict[str, int] = {} + for item in content_list: + item_type = item.get("type", "unknown") + type_counts[item_type] = type_counts.get(item_type, 0) + 1 + return type_counts + + +def analyze_bboxes(content_list: list[dict[str, Any]]) -> dict[str, Any]: + total_items = len(content_list) + items_with_bbox = 0 + valid_bboxes = 0 + invalid_bboxes: list[dict[str, Any]] = [] + + for idx, item in enumerate(content_list): + bbox = item.get("bbox") + if bbox is None: + continue + + items_with_bbox += 1 + if isinstance(bbox, (list, tuple)) and len(bbox) >= 4: + x0, y0, x1, y1 = bbox[:4] + if ( + 0 <= x0 <= 1000 + and 0 <= y0 <= 1000 + and 0 <= x1 <= 1000 + and 0 <= y1 <= 1000 + and x0 < x1 + and y0 < y1 + ): + valid_bboxes += 1 + else: + invalid_bboxes.append( + { + "index": idx, + "type": item.get("type"), + "bbox": bbox, + "reason": "Out of range or invalid coordinates", + } + ) + else: + invalid_bboxes.append( + { + "index": idx, + "type": item.get("type"), + "bbox": bbox, + "reason": "Invalid bbox format", + } + ) + + return { + "total_items": total_items, + "items_with_bbox": items_with_bbox, + "valid_bboxes": valid_bboxes, + "bbox_coverage": f"{items_with_bbox / total_items * 100:.1f}%" if total_items else "0%", + "valid_bbox_rate": f"{valid_bboxes / items_with_bbox * 100:.1f}%" if items_with_bbox else "0%", + "invalid_bboxes": invalid_bboxes[:5], + } + + +def analyze_pages(content_list: list[dict[str, Any]]) -> dict[str, Any]: + page_items: dict[int, int] = {} + items_without_page = 0 + + for item in content_list: + page = item.get("page") + if page is not None: + page_items[int(page)] = page_items.get(int(page), 0) + 1 + else: + items_without_page += 1 + + return { + "total_pages": len(page_items), + "page_distribution": dict(sorted(page_items.items())), + "items_without_page": items_without_page, + } + + +def extract_sample_content( + content_list: list[dict[str, Any]], max_samples: int = 3 +) -> dict[str, list[dict[str, Any]]]: + samples: dict[str, list[dict[str, Any]]] = {} + + for item in content_list: + item_type = item.get("type", "unknown") + if item_type not in samples: + samples[item_type] = [] + if len(samples[item_type]) >= max_samples: + continue + + sample: dict[str, Any] = { + "type": item_type, + "page": item.get("page"), + "bbox": item.get("bbox"), + "content_preview": None, + } + + if "text" in item and item["text"]: + text = str(item["text"]) + sample["content_preview"] = text[:100] + "..." if len(text) > 100 else text + elif "markdown" in item and item["markdown"]: + md = str(item["markdown"]) + sample["content_preview"] = md[:100] + "..." if len(md) > 100 else md + elif "content" in item and item["content"] is not None: + content = str(item["content"]) + sample["content_preview"] = content[:100] + "..." if len(content) > 100 else content + + samples[item_type].append(sample) + + return samples + + +async def check_pdf(pdf_path: Path, *, output_dir: Path | None) -> dict[str, Any]: + logger.info("%s", "=" * 80) + logger.info("Testing: %s", pdf_path) + logger.info("%s", "=" * 80) + + started = time.perf_counter() + try: + result = await parse_pdf_async(pdf_path, output_dir=output_dir) + elapsed_s = time.perf_counter() - started + if not result.get("success"): + return { + "file": pdf_path.name, + "success": False, + "elapsed_s": elapsed_s, + "error": result.get("error", "Unknown error"), + } + + content_list = result.get("content") or [] + metadata = result.get("metadata") or {} + markdown = result.get("markdown") or "" + plain_text = result.get("plain_text") or "" + + type_counts = analyze_content_types(content_list) + bbox_analysis = analyze_bboxes(content_list) + page_analysis = analyze_pages(content_list) + content_samples = extract_sample_content(content_list) + + logger.info("✅ Parsing successful") + logger.info("Metadata: title=%s pages=%s size=%s", metadata.get("title"), metadata.get("total_pages"), metadata.get("file_size")) + logger.info("Content: items=%s types=%s", len(content_list), ", ".join(sorted(type_counts.keys()))) + logger.info( + "BBox: %s/%s (%s), valid=%s/%s (%s)", + bbox_analysis["items_with_bbox"], + bbox_analysis["total_items"], + bbox_analysis["bbox_coverage"], + bbox_analysis["valid_bboxes"], + bbox_analysis["items_with_bbox"], + bbox_analysis["valid_bbox_rate"], + ) + logger.info("Pages: total=%s missing=%s", page_analysis["total_pages"], page_analysis["items_without_page"]) + logger.info("Text: markdown=%s chars plain=%s chars", len(markdown), len(plain_text)) + if markdown: + logger.info("Markdown preview: %s", markdown[:200].replace("\n", " ")) + + for content_type, samples in content_samples.items(): + logger.info("Samples: %s", content_type) + for sample in samples: + logger.info(" - page=%s bbox=%s preview=%s", sample.get("page"), sample.get("bbox"), sample.get("content_preview")) + + return { + "file": pdf_path.name, + "success": True, + "elapsed_s": elapsed_s, + "metadata": metadata, + "type_counts": type_counts, + "bbox_analysis": { + "total_items": bbox_analysis["total_items"], + "items_with_bbox": bbox_analysis["items_with_bbox"], + "valid_bboxes": bbox_analysis["valid_bboxes"], + "bbox_coverage": bbox_analysis["bbox_coverage"], + "valid_bbox_rate": bbox_analysis["valid_bbox_rate"], + }, + "page_analysis": { + "total_pages": page_analysis["total_pages"], + "items_without_page": page_analysis["items_without_page"], + }, + "text_lengths": { + "markdown": len(markdown), + "plain_text": len(plain_text), + }, + "output_dir": result.get("output_dir"), + } + except MineruCLIError as exc: + elapsed_s = time.perf_counter() - started + logger.error("❌ MinerU CLI error: %s", exc) + return {"file": pdf_path.name, "success": False, "elapsed_s": elapsed_s, "error": str(exc)} + + +def main(argv: list[str]) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("pdfs", nargs="+", help="PDF file paths to parse") + parser.add_argument("--output-dir", default="", help="Persist MinerU outputs under this directory") + parser.add_argument("--concurrency", type=int, default=1, help="Parse up to N PDFs concurrently") + parser.add_argument( + "--log-level", + default="INFO", + choices=("DEBUG", "INFO", "WARNING", "ERROR"), + help="Logging verbosity (use WARNING/ERROR for large batches)", + ) + parser.add_argument("--json-out", default="", help="Write JSON summary to this file") + args = parser.parse_args(argv) + + logging.basicConfig(level=getattr(logging, args.log_level), format="%(asctime)s - %(levelname)s - %(message)s") + + pdf_paths: list[Path] = [Path(p).expanduser().resolve() for p in args.pdfs] + missing = [p for p in pdf_paths if not p.exists()] + if missing: + logger.error("Missing PDF files: %s", ", ".join(str(p) for p in missing)) + return 2 + + output_dir: Path | None = None + if args.output_dir: + output_dir = Path(args.output_dir).expanduser().resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + async def run_all() -> list[dict[str, Any]]: + if args.concurrency <= 1: + results: list[dict[str, Any]] = [] + for pdf_path in pdf_paths: + results.append(await check_pdf(pdf_path, output_dir=output_dir)) + return results + + sem = asyncio.Semaphore(args.concurrency) + results: list[dict[str, Any] | None] = [None] * len(pdf_paths) + + async def one(index: int, pdf_path: Path) -> None: + async with sem: + per_pdf_out: Path | None = None + if output_dir is not None: + # Avoid output collisions when different PDFs share the same stem. + per_pdf_out = output_dir / f"{pdf_path.stem}-{index:03d}" + per_pdf_out.mkdir(parents=True, exist_ok=True) + results[index] = await check_pdf(pdf_path, output_dir=per_pdf_out) + + async with asyncio.TaskGroup() as tg: + for i, pdf_path in enumerate(pdf_paths): + tg.create_task(one(i, pdf_path)) + + return [item for item in results if item is not None] + + results = asyncio.run(run_all()) + ok = [r for r in results if r.get("success")] + failed = [r for r in results if not r.get("success")] + + elapsed_values = [float(r.get("elapsed_s") or 0.0) for r in results] + elapsed_sorted = sorted(value for value in elapsed_values if value > 0) + if elapsed_sorted: + def _pct(p: float) -> float: + k = int(round((len(elapsed_sorted) - 1) * (p / 100.0))) + k = max(0, min(len(elapsed_sorted) - 1, k)) + return elapsed_sorted[k] + + logger.info( + "Timing (s): avg=%.2f p50=%.2f p95=%.2f max=%.2f", + sum(elapsed_sorted) / len(elapsed_sorted), + _pct(50), + _pct(95), + elapsed_sorted[-1], + ) + + logger.info("%s", "=" * 80) + logger.info("Summary: total=%s ok=%s failed=%s", len(results), len(ok), len(failed)) + for r in failed: + logger.info(" ❌ %s: %s", r.get("file"), r.get("error")) + + if args.json_out: + out = Path(args.json_out).expanduser().resolve() + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(json.dumps(results, ensure_ascii=False, indent=2), encoding="utf-8") + logger.info("Wrote JSON: %s", out) + + return 0 if not failed else 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/backend/scripts/run_stress_suite.sh b/backend/scripts/run_stress_suite.sh new file mode 100644 index 0000000..a003d64 --- /dev/null +++ b/backend/scripts/run_stress_suite.sh @@ -0,0 +1,230 @@ +#!/usr/bin/env bash +set -euo pipefail + +BACKEND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$BACKEND_DIR" + +PY="${BACKEND_DIR}/.venv/bin/python" + +URL="" +TOKEN_FILE="" +OUT_DIR="" + +REQUESTS=20000 +CONCURRENCY=200 +WARMUP=200 + +ME_REQUESTS=20000 +ME_CONCURRENCY=200 +ME_WARMUP=200 + +ACADEMIC_REQUESTS=20000 +ACADEMIC_CONCURRENCY=200 +ACADEMIC_WARMUP=200 +ACADEMIC_QUERY="vision transformer" +ACADEMIC_SOURCES="semantic_scholar" +ACADEMIC_LIMIT=10 +ACADEMIC_QUERIES_FILE="" + +PDF_DIR="" +PDF_LIMIT=0 +MINERU_CONCURRENCY=4 +MINERU_LOG_LEVEL="WARNING" + +RUN_DEEPSEEK=0 +DEEPSEEK_REQUESTS=200 +DEEPSEEK_CONCURRENCY=20 +DEEPSEEK_WARMUP=10 +DEEPSEEK_PROMPT="用一句话解释什么是 transformer。" +DEEPSEEK_PROMPTS_FILE="" +DEEPSEEK_MAX_TOKENS=128 + +usage() { + cat <<'EOF' +One-click stress test suite runner. + +Default: runs health + /users/me + /academic/search. + +Usage: + bash backend/scripts/run_stress_suite.sh --url http://127.0.0.1:8000 --token-file tokens.txt + +Options: + --url URL Base URL of a running backend (recommended on server) + --token-file PATH Required for /users/me in live server mode + --out-dir PATH Output directory (default: backend/test-reports/load/) + + --requests N Health requests (default: 20000) + --concurrency N Health concurrency (default: 200) + --warmup N Health warmup (default: 200) + + --me-requests N /users/me requests (default: 20000) + --me-concurrency N /users/me concurrency (default: 200) + --me-warmup N /users/me warmup (default: 200) + + --academic-requests N /academic/search requests (default: 20000) + --academic-concurrency N /academic/search concurrency (default: 200) + --academic-warmup N /academic/search warmup (default: 200) + --academic-query TEXT Query string (default: "vision transformer") + --academic-queries-file PATH One query per line; randomly selected + --academic-sources CSV semantic_scholar,arxiv,openalex (default: semantic_scholar) + --academic-limit N Result limit (default: 10) + + --pdf-dir PATH Run MinerU concurrent parsing over PDFs in a directory (optional) + --pdf-limit N Limit number of PDFs (0 = all) + --mineru-concurrency N MinerU parse concurrency (default: 4) + + --deepseek Run DeepSeek API stress (requires DEEPSEEK_API_KEY) + --deepseek-requests N DeepSeek requests (default: 200) + --deepseek-concurrency N DeepSeek concurrency (default: 20) + --deepseek-warmup N DeepSeek warmup (default: 10) + --deepseek-prompt TEXT DeepSeek single prompt + --deepseek-prompts-file PATH DeepSeek prompts file (one per line) + --deepseek-max-tokens N DeepSeek max tokens (default: 128) +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --url) URL="${2:-}"; shift 2 ;; + --token-file) TOKEN_FILE="${2:-}"; shift 2 ;; + --out-dir) OUT_DIR="${2:-}"; shift 2 ;; + + --requests) REQUESTS="${2:-}"; shift 2 ;; + --concurrency) CONCURRENCY="${2:-}"; shift 2 ;; + --warmup) WARMUP="${2:-}"; shift 2 ;; + + --me-requests) ME_REQUESTS="${2:-}"; shift 2 ;; + --me-concurrency) ME_CONCURRENCY="${2:-}"; shift 2 ;; + --me-warmup) ME_WARMUP="${2:-}"; shift 2 ;; + + --academic-requests) ACADEMIC_REQUESTS="${2:-}"; shift 2 ;; + --academic-concurrency) ACADEMIC_CONCURRENCY="${2:-}"; shift 2 ;; + --academic-warmup) ACADEMIC_WARMUP="${2:-}"; shift 2 ;; + --academic-query) ACADEMIC_QUERY="${2:-}"; shift 2 ;; + --academic-queries-file) ACADEMIC_QUERIES_FILE="${2:-}"; shift 2 ;; + --academic-sources) ACADEMIC_SOURCES="${2:-}"; shift 2 ;; + --academic-limit) ACADEMIC_LIMIT="${2:-}"; shift 2 ;; + + --pdf-dir) PDF_DIR="${2:-}"; shift 2 ;; + --pdf-limit) PDF_LIMIT="${2:-}"; shift 2 ;; + --mineru-concurrency) MINERU_CONCURRENCY="${2:-}"; shift 2 ;; + + --deepseek) RUN_DEEPSEEK=1; shift ;; + --deepseek-requests) DEEPSEEK_REQUESTS="${2:-}"; shift 2 ;; + --deepseek-concurrency) DEEPSEEK_CONCURRENCY="${2:-}"; shift 2 ;; + --deepseek-warmup) DEEPSEEK_WARMUP="${2:-}"; shift 2 ;; + --deepseek-prompt) DEEPSEEK_PROMPT="${2:-}"; shift 2 ;; + --deepseek-prompts-file) DEEPSEEK_PROMPTS_FILE="${2:-}"; shift 2 ;; + --deepseek-max-tokens) DEEPSEEK_MAX_TOKENS="${2:-}"; shift 2 ;; + + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 2 ;; + esac +done + +if [[ ! -x "$PY" ]]; then + echo "Missing venv python: $PY" >&2 + echo "Create venv first (backend/.venv) and install dependencies." >&2 + exit 2 +fi + +timestamp="$(date +%Y%m%d-%H%M%S)" +if [[ -z "$OUT_DIR" ]]; then + OUT_DIR="$BACKEND_DIR/test-reports/load/$timestamp" +fi +mkdir -p "$OUT_DIR" + +echo "# Load Test Summary" > "$OUT_DIR/summary.md" +echo "" >> "$OUT_DIR/summary.md" +echo "- generated_at: $(date -Iseconds)" >> "$OUT_DIR/summary.md" +echo "- out_dir: $OUT_DIR" >> "$OUT_DIR/summary.md" +echo "" >> "$OUT_DIR/summary.md" + +run_json() { + local name="$1"; shift + local out_json="$OUT_DIR/${name}.json" + local out_err="$OUT_DIR/${name}.log" + + echo "==> $name" + if "$@" >"$out_json" 2>"$out_err"; then + echo "- $name: ok (\`$out_json\`)" >> "$OUT_DIR/summary.md" + return 0 + fi + local code=$? + echo "- $name: failed (exit=$code, log=\`$out_err\`)" >> "$OUT_DIR/summary.md" + return "$code" +} + +echo "==> health" +if [[ -n "$URL" ]]; then + run_json health "$PY" scripts/stress_test.py --url "$URL" --scenario health --requests "$REQUESTS" --concurrency "$CONCURRENCY" --warmup "$WARMUP" --json +else + run_json health "$PY" scripts/stress_test.py --scenario health --requests "$REQUESTS" --concurrency "$CONCURRENCY" --warmup "$WARMUP" --json +fi + +echo "==> users_me" +if [[ -n "$URL" ]]; then + if [[ -z "$TOKEN_FILE" ]]; then + echo "- users_me: skipped (missing --token-file for live server mode)" >> "$OUT_DIR/summary.md" + else + run_json users_me "$PY" scripts/stress_test.py --url "$URL" --scenario me --token-file "$TOKEN_FILE" --requests "$ME_REQUESTS" --concurrency "$ME_CONCURRENCY" --warmup "$ME_WARMUP" --json + fi +else + run_json users_me "$PY" scripts/stress_test.py --scenario me --users 50 --requests "$ME_REQUESTS" --concurrency "$ME_CONCURRENCY" --warmup "$ME_WARMUP" --json +fi + +echo "==> academic_search" +academic_args=(--scenario academic_search --requests "$ACADEMIC_REQUESTS" --concurrency "$ACADEMIC_CONCURRENCY" --warmup "$ACADEMIC_WARMUP" --sources "$ACADEMIC_SOURCES" --limit "$ACADEMIC_LIMIT" --json) +if [[ -n "$ACADEMIC_QUERIES_FILE" ]]; then + academic_args+=(--queries-file "$ACADEMIC_QUERIES_FILE") +else + academic_args+=(--query "$ACADEMIC_QUERY") +fi +if [[ -n "$URL" ]]; then + academic_args=(--url "$URL" "${academic_args[@]}") +fi +run_json academic_search "$PY" scripts/stress_test.py "${academic_args[@]}" + +if [[ -n "$PDF_DIR" ]]; then + echo "==> mineru_concurrent" + if [[ ! -d "$PDF_DIR" ]]; then + echo "- mineru_concurrent: skipped (pdf-dir not found: $PDF_DIR)" >> "$OUT_DIR/summary.md" + else + mapfile -t pdfs < <(find "$PDF_DIR" -type f \( -iname "*.pdf" -o -iname "*.PDF" \) | sort) + if [[ "${#pdfs[@]}" -eq 0 ]]; then + echo "- mineru_concurrent: skipped (no PDFs under $PDF_DIR)" >> "$OUT_DIR/summary.md" + else + if [[ "$PDF_LIMIT" -gt 0 && "${#pdfs[@]}" -gt "$PDF_LIMIT" ]]; then + pdfs=("${pdfs[@]:0:$PDF_LIMIT}") + fi + mineru_out="$OUT_DIR/mineru_outputs" + mkdir -p "$mineru_out" + mineru_json="$OUT_DIR/mineru_concurrent.json" + mineru_log="$OUT_DIR/mineru_concurrent.log" + if "$PY" scripts/mineru_comprehensive_check.py --concurrency "$MINERU_CONCURRENCY" --log-level "$MINERU_LOG_LEVEL" --output-dir "$mineru_out" --json-out "$mineru_json" "${pdfs[@]}" >"$mineru_log" 2>&1; then + echo "- mineru_concurrent: ok (`$mineru_json`)" >> "$OUT_DIR/summary.md" + else + code=$? + echo "- mineru_concurrent: failed (exit=$code, log=`$mineru_log`)" >> "$OUT_DIR/summary.md" + fi + fi + fi +fi + +if [[ "$RUN_DEEPSEEK" -eq 1 ]]; then + echo "==> deepseek" + if [[ -z "${DEEPSEEK_API_KEY:-}" ]]; then + echo "- deepseek: skipped (missing DEEPSEEK_API_KEY env)" >> "$OUT_DIR/summary.md" + else + deepseek_args=(--requests "$DEEPSEEK_REQUESTS" --concurrency "$DEEPSEEK_CONCURRENCY" --warmup "$DEEPSEEK_WARMUP" --max-tokens "$DEEPSEEK_MAX_TOKENS" --json) + if [[ -n "$DEEPSEEK_PROMPTS_FILE" ]]; then + deepseek_args+=(--prompts-file "$DEEPSEEK_PROMPTS_FILE") + else + deepseek_args+=(--prompt "$DEEPSEEK_PROMPT") + fi + run_json deepseek "$PY" scripts/deepseek_stress_test.py "${deepseek_args[@]}" + fi +fi + +echo "" >> "$OUT_DIR/summary.md" +echo "Done. Summary: $OUT_DIR/summary.md" diff --git a/backend/scripts/stress_test.py b/backend/scripts/stress_test.py index d0c3273..783ea16 100644 --- a/backend/scripts/stress_test.py +++ b/backend/scripts/stress_test.py @@ -19,7 +19,7 @@ import time from dataclasses import dataclass from pathlib import Path -from typing import Any, AsyncIterator +from typing import Any, AsyncIterator, Callable from uuid import uuid4 from httpx import ASGITransport, AsyncClient @@ -93,8 +93,124 @@ async def override_get_db() -> AsyncIterator[AsyncSession]: return AsyncClient(transport=ASGITransport(app=app), base_url="http://test") -async def _register_and_login(client: AsyncClient, email: str, password: str) -> str: - resp = await client.post("/api/v1/users", json={"email": email, "password": password}) +def _patch_verify_code_for_asgi() -> Callable[[], None]: + """Bypass Redis verification code dependency for in-process load tests.""" + from app.api.v1.endpoints import users as users_endpoints + + original = getattr(users_endpoints, "verify_code", None) + + async def _always_valid(*args, **kwargs) -> bool: + return True + + users_endpoints.verify_code = _always_valid # type: ignore[assignment] + + def restore() -> None: + if original is not None: + users_endpoints.verify_code = original # type: ignore[assignment] + + return restore + + +def _patch_academic_providers_for_asgi() -> Callable[[], None]: + """Stub external provider calls so /api/v1/academic/search can be load tested offline.""" + from app.services.academic.providers.arxiv import ArxivProvider + from app.services.academic.providers.openalex import OpenAlexProvider + from app.services.academic.providers.semantic_scholar import SemanticScholarProvider + + original_semantic = SemanticScholarProvider.search + original_arxiv = ArxivProvider.search + original_openalex = OpenAlexProvider.search + + async def semantic_search(self, query: str, **kwargs) -> list[dict[str, Any]]: # noqa: ANN001 + limit = int(kwargs.get("limit") or 20) + return [ + { + "paper_id": f"S{i}", + "title": f"{query} - Semantic Scholar Result {i}", + "authors": ["Alice", "Bob"], + "abstract": f"This paper discusses {query} and related methods.", + "year": 2024, + "venue": "NeurIPS", + "url": "https://example.org/ss", + "citation_count": i, + "published_date": "2024-01-01", + "source": "semantic_scholar", + } + for i in range(1, max(2, min(limit, 10)) + 1) + ] + + async def arxiv_search(self, query: str, **kwargs) -> list[dict[str, Any]]: # noqa: ANN001 + max_results = int(kwargs.get("max_results") or 20) + return [ + { + "paper_id": f"A{i}", + "title": f"{query} - arXiv Result {i}", + "authors": ["Carol"], + "abstract": f"arXiv entry about {query}.", + "year": 2023, + "venue": None, + "url": "https://arxiv.org/abs/1234.5678", + "citation_count": 0, + "published": "2023-01-01T00:00:00Z", + "source": "arxiv", + } + for i in range(1, max(2, min(max_results, 6)) + 1) + ] + + async def openalex_search(self, query: str, **kwargs) -> list[dict[str, Any]]: # noqa: ANN001 + limit = int(kwargs.get("limit") or 20) + return [ + { + "paper_id": f"O{i}", + "title": f"{query} - OpenAlex Result {i}", + "authors": ["Dave"], + "abstract": f"OpenAlex result covering {query}.", + "year": 2022, + "venue": "ICLR", + "url": "https://openalex.org/W123", + "citation_count": 2 * i, + "published": "2022-01-01T00:00:00Z", + "source": "openalex", + } + for i in range(1, max(2, min(limit, 6)) + 1) + ] + + SemanticScholarProvider.search = semantic_search # type: ignore[assignment] + ArxivProvider.search = arxiv_search # type: ignore[assignment] + OpenAlexProvider.search = openalex_search # type: ignore[assignment] + + def restore() -> None: + SemanticScholarProvider.search = original_semantic # type: ignore[assignment] + ArxivProvider.search = original_arxiv # type: ignore[assignment] + OpenAlexProvider.search = original_openalex # type: ignore[assignment] + + return restore + + +def _load_tokens(token_file: str) -> list[str]: + path = Path(token_file) + raw = path.read_text(encoding="utf-8").strip() + if not raw: + return [] + if path.suffix.lower() == ".json": + tokens = json.loads(raw) + if not isinstance(tokens, list) or not all(isinstance(item, str) for item in tokens): + raise ValueError("--token-file JSON must be a list of strings") + return [token.strip() for token in tokens if token.strip()] + return [line.strip() for line in raw.splitlines() if line.strip()] + + +async def _register_and_login( + client: AsyncClient, + *, + email: str, + password: str, + verification_code: str, +) -> str: + resp = await client.post( + "/api/v1/users", + json={"email": email, "password": password, "verification_code": verification_code}, + ) # allow re-runs with same user count / db if resp.status_code not in (201, 409): raise RuntimeError(f"register failed: {resp.status_code} {resp.text}") @@ -149,12 +265,26 @@ async def one(i: int) -> None: async def main() -> int: parser = argparse.ArgumentParser(description="Backend stress/load test (ASGI in-process by default).") - parser.add_argument("--scenario", choices=("health", "me"), default="health") + parser.add_argument("--scenario", choices=("health", "me", "academic_search"), default="health") parser.add_argument("--requests", type=int, default=2000) parser.add_argument("--concurrency", type=int, default=50) parser.add_argument("--warmup", type=int, default=50) parser.add_argument("--users", type=int, default=50, help="Only for scenario=me") parser.add_argument("--password", default="StrongPass1!") + parser.add_argument("--verification-code", default="000000", help="Only for scenario=me when registering users") + parser.add_argument( + "--token-file", + default="", + help="Only for scenario=me. Provide existing access tokens (newline-separated or JSON array) to skip registration.", + ) + parser.add_argument("--query", default="transformer", help="Only for scenario=academic_search") + parser.add_argument("--queries-file", default="", help="Only for scenario=academic_search (one query per line)") + parser.add_argument( + "--sources", + default="semantic_scholar", + help="Only for scenario=academic_search (comma-separated: semantic_scholar,arxiv,openalex)", + ) + parser.add_argument("--limit", type=int, default=10, help="Only for scenario=academic_search") parser.add_argument("--json", dest="json_output", action="store_true", help="Print JSON summary") parser.add_argument( "--url", @@ -169,12 +299,17 @@ async def main() -> int: if args.scenario == "me" and args.users <= 0: raise SystemExit("--users must be > 0 for scenario=me") + if args.scenario == "academic_search" and args.limit <= 0: + raise SystemExit("--limit must be > 0 for scenario=academic_search") + # Build client tmpdir = Path(tempfile.mkdtemp(prefix="ir-stress-")) db_path = tmpdir / "stress.sqlite3" client: AsyncClient | None = None engine: AsyncEngine | None = None using_asgi = not bool(args.url) + restore_verify_code: Callable[[], None] | None = None + restore_academic_providers: Callable[[], None] | None = None try: if args.url: @@ -187,23 +322,62 @@ async def main() -> int: os.environ.setdefault("YOLO_CONFIG_DIR", str(tmpdir / "ultralytics")) engine, session_factory = await _setup_sqlite_db(db_path) + restore_verify_code = _patch_verify_code_for_asgi() + restore_academic_providers = _patch_academic_providers_for_asgi() client = await _build_asgi_client(session_factory) # Scenario setup headers_list: list[dict[str, str]] = [] if args.scenario == "me": - # pre-create tokens so hot path is only authenticated GET - for _ in range(args.users): - email = f"stress-{uuid4().hex[:12]}@example.com" - token = await _register_and_login(client, email, args.password) - headers_list.append({"Authorization": f"Bearer {token}"}) + tokens: list[str] = [] + if args.token_file: + tokens = _load_tokens(args.token_file) + if not tokens: + raise RuntimeError(f"--token-file is empty: {args.token_file}") + elif args.url: + raise RuntimeError("Live server mode requires --token-file for scenario=me (or pre-create users manually).") + + if tokens: + headers_list = [{"Authorization": f"Bearer {token}"} for token in tokens] + else: + # pre-create tokens so hot path is only authenticated GET + for _ in range(args.users): + email = f"stress-{uuid4().hex[:12]}@example.com" + token = await _register_and_login( + client, + email=email, + password=args.password, + verification_code=args.verification_code, + ) + headers_list.append({"Authorization": f"Bearer {token}"}) + + queries: list[str] = [] + if args.scenario == "academic_search": + if args.queries_file: + path = Path(args.queries_file) + queries = [line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()] + if not queries: + raise RuntimeError(f"--queries-file is empty: {args.queries_file}") + else: + queries = [args.query] + + sources = [s.strip() for s in args.sources.split(",") if s.strip()] + if not sources: + sources = ["semantic_scholar"] # Warmup async def warmup_request(c: AsyncClient) -> int: if args.scenario == "health": return (await c.get("/health")).status_code - header = random.choice(headers_list) - return (await c.get("/api/v1/users/me", headers=header)).status_code + if args.scenario == "me": + header = random.choice(headers_list) + return (await c.get("/api/v1/users/me", headers=header)).status_code + query = random.choice(queries) + resp = await c.post( + "/api/v1/academic/search", + json={"query": query, "limit": args.limit, "sources": sources}, + ) + return resp.status_code for _ in range(args.warmup): await warmup_request(client) @@ -212,8 +386,15 @@ async def warmup_request(c: AsyncClient) -> int: async def make_request(c: AsyncClient) -> int: if args.scenario == "health": return (await c.get("/health")).status_code - header = random.choice(headers_list) - return (await c.get("/api/v1/users/me", headers=header)).status_code + if args.scenario == "me": + header = random.choice(headers_list) + return (await c.get("/api/v1/users/me", headers=header)).status_code + query = random.choice(queries) + resp = await c.post( + "/api/v1/academic/search", + json={"query": query, "limit": args.limit, "sources": sources}, + ) + return resp.status_code result = await _run_fixed_requests(client, args.requests, args.concurrency, make_request) @@ -251,6 +432,12 @@ async def make_request(c: AsyncClient) -> int: from app.main import app app.dependency_overrides.clear() + if restore_verify_code is not None: + with contextlib.suppress(Exception): + restore_verify_code() + if restore_academic_providers is not None: + with contextlib.suppress(Exception): + restore_academic_providers() with contextlib.suppress(Exception): shutil.rmtree(tmpdir, ignore_errors=True) diff --git a/backend/scripts/test_core.sh b/backend/scripts/test_core.sh new file mode 100644 index 0000000..5483ac0 --- /dev/null +++ b/backend/scripts/test_core.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +BACKEND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$BACKEND_DIR" + +mkdir -p test-reports/core + +rm -f test-reports/core/.coverage +rm -f .coverage + +.venv/bin/python -m coverage run \ + --rcfile=.coveragerc \ + --data-file=test-reports/core/.coverage \ + -m pytest tests -ra \ + -m "not external_api and not mineru" \ + --junitxml=test-reports/core/pytest-junit.xml \ + --html=test-reports/core/pytest-report.html --self-contained-html \ + | tee test-reports/core/pytest-output.txt + +.venv/bin/python -m coverage report \ + --rcfile=.coveragerc \ + --data-file=test-reports/core/.coverage \ + | tee test-reports/core/coverage-output.txt + +.venv/bin/python -m coverage xml \ + --rcfile=.coveragerc \ + --data-file=test-reports/core/.coverage \ + -o test-reports/core/coverage.xml + +.venv/bin/python -m coverage html \ + --rcfile=.coveragerc \ + --data-file=test-reports/core/.coverage \ + -d test-reports/core/htmlcov diff --git a/backend/scripts/test_external_api.sh b/backend/scripts/test_external_api.sh new file mode 100644 index 0000000..387e9e5 --- /dev/null +++ b/backend/scripts/test_external_api.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +BACKEND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$BACKEND_DIR" + +mkdir -p test-reports/external-api + +rm -f test-reports/external-api/.coverage +rm -f .coverage + +# NOTE: On Python 3.13, combining coverage tracing with pytest-html can crash (SIGSEGV). +# Workaround: run pytest-html/junit without coverage, then run coverage without pytest-html. +.venv/bin/python -m pytest tests -ra \ + -m external_api \ + --junitxml=test-reports/external-api/pytest-junit.xml \ + --html=test-reports/external-api/pytest-report.html --self-contained-html \ + | tee test-reports/external-api/pytest-output.txt + +.venv/bin/python -m coverage run \ + --timid \ + --rcfile=.coveragerc.external \ + --data-file=test-reports/external-api/.coverage \ + -m pytest tests -q \ + -m external_api \ + | tee test-reports/external-api/pytest-output-coverage.txt + +.venv/bin/python -m coverage report \ + --rcfile=.coveragerc.external \ + --data-file=test-reports/external-api/.coverage \ + | tee test-reports/external-api/coverage-output.txt + +.venv/bin/python -m coverage xml \ + --rcfile=.coveragerc.external \ + --data-file=test-reports/external-api/.coverage \ + -o test-reports/external-api/coverage.xml + +.venv/bin/python -m coverage html \ + --rcfile=.coveragerc.external \ + --data-file=test-reports/external-api/.coverage \ + -d test-reports/external-api/htmlcov diff --git a/backend/scripts/test_mineru.sh b/backend/scripts/test_mineru.sh new file mode 100644 index 0000000..9ac15b6 --- /dev/null +++ b/backend/scripts/test_mineru.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +BACKEND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$BACKEND_DIR" + +mkdir -p test-reports/mineru + +rm -f test-reports/mineru/.coverage +rm -f .coverage + +.venv/bin/python -m coverage run \ + --rcfile=.coveragerc.mineru \ + --data-file=test-reports/mineru/.coverage \ + -m pytest tests -ra \ + -m mineru \ + --junitxml=test-reports/mineru/pytest-junit.xml \ + --html=test-reports/mineru/pytest-report.html --self-contained-html \ + | tee test-reports/mineru/pytest-output.txt + +.venv/bin/python -m coverage report \ + --rcfile=.coveragerc.mineru \ + --data-file=test-reports/mineru/.coverage \ + | tee test-reports/mineru/coverage-output.txt + +.venv/bin/python -m coverage xml \ + --rcfile=.coveragerc.mineru \ + --data-file=test-reports/mineru/.coverage \ + -o test-reports/mineru/coverage.xml + +.venv/bin/python -m coverage html \ + --rcfile=.coveragerc.mineru \ + --data-file=test-reports/mineru/.coverage \ + -d test-reports/mineru/htmlcov diff --git a/backend/test-reports/pytest-junit.xml b/backend/test-reports/pytest-junit.xml new file mode 100644 index 0000000..a3c7261 --- /dev/null +++ b/backend/test-reports/pytest-junit.xml @@ -0,0 +1 @@ +('/Users/xujiehan/course/InsightReading/backend/tests/test_mineru_comprehensive.py', 12, 'Skipped: MinerU integration test is disabled by default; set RUN_MINERU_E2E=1 to run manually.') \ No newline at end of file diff --git a/backend/test-reports/pytest-output.txt b/backend/test-reports/pytest-output.txt new file mode 100644 index 0000000..677fb31 --- /dev/null +++ b/backend/test-reports/pytest-output.txt @@ -0,0 +1,154 @@ +============================= test session starts ============================== +platform darwin -- Python 3.13.9, pytest-9.0.2, pluggy-1.6.0 +rootdir: /Users/xujiehan/course/InsightReading/backend +configfile: pytest.ini +plugins: anyio-4.11.0, html-4.1.1, metadata-3.1.1, cov-7.0.0 +collected 98 items / 1 skipped + +tests/test_academic_providers.py ..... [ 5%] +tests/test_academic_search_service.py .. [ 7%] +tests/test_academic_search_tool.py .. [ 9%] +tests/test_agent_controller.py .. [ 11%] +tests/test_annotations_service.py .... [ 15%] +tests/test_arxiv.py .... [ 19%] +tests/test_auth.py ...... [ 25%] +tests/test_auth_dependencies.py ..... [ 30%] +tests/test_auth_endpoints_extra.py .... [ 34%] +tests/test_config.py .... [ 38%] +tests/test_conversations_endpoints.py .. [ 40%] +tests/test_db_repositories.py ..... [ 45%] +tests/test_db_utils.py .... [ 50%] +tests/test_file_naming_utils.py ... [ 53%] +tests/test_finish_tool.py .. [ 55%] +tests/test_intelligent_service.py .... [ 59%] +tests/test_library_endpoints.py .. [ 61%] +tests/test_llm_client.py ... [ 64%] +tests/test_mineru_cli_utils.py ... [ 67%] +tests/test_notes_endpoints.py . [ 68%] +tests/test_papers_qa.py .. [ 70%] +tests/test_papers_upload.py .... [ 74%] +tests/test_parse_utils.py .... [ 78%] +tests/test_pdf_metadata.py ..... [ 83%] +tests/test_profile.py .. [ 85%] +tests/test_query_parser.py ... [ 88%] +tests/test_schema_validators.py .... [ 92%] +tests/test_security.py ... [ 95%] +tests/test_tool_executor.py .... [100%] + +=============================== warnings summary =============================== +app/api/v1/arxiv.py:226 + /Users/xujiehan/course/InsightReading/backend/app/api/v1/arxiv.py:226: DeprecationWarning: `regex` has been deprecated, please use `pattern` instead + sortBy: str = Query("relevance", regex="^(relevance|lastUpdatedDate|submittedDate)$", description="Sorting criterion"), + +app/api/v1/arxiv.py:227 + /Users/xujiehan/course/InsightReading/backend/app/api/v1/arxiv.py:227: DeprecationWarning: `regex` has been deprecated, please use `pattern` instead + sortOrder: str = Query("descending", regex="^(ascending|descending)$", description="Sort order") + +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 +.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295 + /Users/xujiehan/course/InsightReading/backend/.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:295: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/ + warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning) + +:488 +:488 + :488: DeprecationWarning: builtin type SwigPyPacked has no __module__ attribute + +:488 +:488 + :488: DeprecationWarning: builtin type SwigPyObject has no __module__ attribute + +:488 + :488: DeprecationWarning: builtin type swigvarlink has no __module__ attribute + +app/schemas/library.py:34 + /Users/xujiehan/course/InsightReading/backend/app/schemas/library.py:34: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/ + @validator("paper_count", pre=True, always=True) + +tests/test_annotations_service.py::test_apply_annotation_to_pdf_adds_highlight +tests/test_annotations_service.py::test_apply_annotation_to_pdf_adds_note +tests/test_annotations_service.py::test_apply_annotation_async_wrapper + /Users/xujiehan/course/InsightReading/backend/app/services/annotations.py:88: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + timestamp = datetime.utcnow() + +tests/test_auth.py: 5 warnings +tests/test_auth_dependencies.py: 2 warnings +tests/test_auth_endpoints_extra.py: 2 warnings +tests/test_conversations_endpoints.py: 3 warnings +tests/test_db_repositories.py: 26 warnings +tests/test_library_endpoints.py: 7 warnings +tests/test_notes_endpoints.py: 5 warnings +tests/test_papers_qa.py: 4 warnings +tests/test_papers_upload.py: 6 warnings +tests/test_profile.py: 2 warnings + /Users/xujiehan/course/InsightReading/backend/.venv/lib/python3.13/site-packages/sqlalchemy/sql/schema.py:3624: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + return util.wrap_callable(lambda ctx: fn(), fn) # type: ignore + +tests/test_conversations_endpoints.py::test_conversation_flow +tests/test_papers_qa.py::test_paper_qa_returns_answer +tests/test_papers_qa.py::test_paper_qa_returns_answer + /Users/xujiehan/course/InsightReading/backend/app/db/conversation_repository.py:133: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + conversation.updated_at = datetime.utcnow() + +tests/test_conversations_endpoints.py::test_conversation_flow + /Users/xujiehan/course/InsightReading/backend/app/db/conversation_repository.py:99: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + conversation.updated_at = datetime.utcnow() + +tests/test_db_repositories.py::test_parse_cache_and_mineru_job_repositories + /Users/xujiehan/course/InsightReading/backend/app/db/repository.py:402: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + job.started_at = datetime.utcnow() + +tests/test_db_repositories.py::test_parse_cache_and_mineru_job_repositories + /Users/xujiehan/course/InsightReading/backend/app/db/repository.py:412: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + job.finished_at = datetime.utcnow() + +tests/test_db_repositories.py::test_parse_cache_and_mineru_job_repositories + /Users/xujiehan/course/InsightReading/backend/app/db/repository.py:426: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + job.finished_at = datetime.utcnow() + +tests/test_library_endpoints.py: 2 warnings +tests/test_notes_endpoints.py: 1 warning +tests/test_papers_qa.py: 2 warnings +tests/test_papers_upload.py: 2 warnings +tests/test_pdf_metadata.py: 4 warnings + /Users/xujiehan/course/InsightReading/backend/app/services/pdf_metadata.py:90: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + "extracted_at": datetime.utcnow().isoformat(), + +tests/test_papers_upload.py::test_upload_honors_size_limit + /Users/xujiehan/course/InsightReading/backend/.venv/lib/python3.13/site-packages/fastapi/routing.py:290: DeprecationWarning: 'HTTP_413_REQUEST_ENTITY_TOO_LARGE' is deprecated. Use 'HTTP_413_CONTENT_TOO_LARGE' instead. + return await dependant.call(**values) + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +- generated xml file: /Users/xujiehan/course/InsightReading/backend/test-reports/pytest-junit.xml - +================================ tests coverage ================================ +_______________ coverage: platform darwin, python 3.13.9-final-0 _______________ + +Name Stmts Miss Cover Missing +----------------------------------------------------------------- +app/core/config.py 101 3 97% 115, 122, 136 +app/db/conversation_repository.py 65 32 51% 32-33, 47, 71-83, 93-102, 107-112, 131-137, 147-150 +app/db/note_repository.py 44 5 89% 27, 52, 57, 77, 83 +app/db/repository.py 215 25 88% 26, 55, 137, 149-157, 243, 267, 274, 300-305, 387-398, 417-421, 431-433 +app/dependencies/auth.py 39 11 72% 43-46, 60-70 +app/models/types.py 14 1 93% 16 +app/schemas/annotation.py 38 3 92% 23, 31, 49 +app/schemas/auth.py 24 2 92% 33-34 +app/schemas/user.py 44 4 91% 58-61 +app/services/annotations.py 53 4 92% 62, 71, 75, 86 +app/services/pdf_metadata.py 59 5 92% 54, 64, 68, 110-111 +app/utils/file_naming.py 33 1 97% 32 +----------------------------------------------------------------- +TOTAL 1256 96 92% + +25 files skipped due to complete coverage. +Coverage HTML written to dir test-reports/htmlcov +Coverage XML written to file test-reports/coverage.xml +- Generated html report: file:///Users/xujiehan/course/InsightReading/backend/test-reports/pytest-report.html - +=========================== short test summary info ============================ +SKIPPED [1] tests/test_mineru_comprehensive.py:12: MinerU integration test is disabled by default; set RUN_MINERU_E2E=1 to run manually. +================= 98 passed, 1 skipped, 100 warnings in 9.21s ================== diff --git a/backend/test-reports/summary.md b/backend/test-reports/summary.md new file mode 100644 index 0000000..c8781ac --- /dev/null +++ b/backend/test-reports/summary.md @@ -0,0 +1,17 @@ +# Pytest Report + +- Generated: `2025-12-23T14:59:44+08:00` +- Pytest HTML: `test-reports/pytest-report.html` +- Coverage HTML: `test-reports/htmlcov/index.html` +- Coverage XML: `test-reports/coverage.xml` +- JUnit XML: `test-reports/pytest-junit.xml` +- Full log: `test-reports/pytest-output.txt` + +## Summary + +- Total: **84** +- Passed: **83** +- Failed: **0** +- Errors: **0** +- Skipped: **1** +- Time (sum suites): **8.79s** diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index fd3ffc7..8984242 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -21,6 +21,7 @@ os.environ.setdefault("MEDIA_ROOT", str(TEST_MEDIA_ROOT)) from app.main import app +from app.api.v1.endpoints import users as users_endpoints @pytest.fixture(scope="session") @@ -32,6 +33,21 @@ def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: loop.close() +@pytest.fixture(autouse=True) +def _bypass_email_verification_codes(monkeypatch: pytest.MonkeyPatch) -> None: + """Avoid Redis dependency while exercising user flows. + + User registration requires a verification code stored in Redis. Unit/integration + tests run against an in-memory SQLite DB and should not require an external + Redis instance, so we stub the verification check to succeed. + """ + + async def _always_valid(*args, **kwargs) -> bool: + return True + + monkeypatch.setattr(users_endpoints, "verify_code", _always_valid) + + @pytest.fixture() def async_client(event_loop: asyncio.AbstractEventLoop) -> AsyncIterator[AsyncClient]: """Yield an AsyncClient wired to an isolated SQLite database. @@ -46,6 +62,8 @@ async def setup() -> AsyncClient: if TEST_MEDIA_ROOT.exists(): shutil.rmtree(TEST_MEDIA_ROOT) TEST_MEDIA_ROOT.mkdir(parents=True, exist_ok=True) + (TEST_MEDIA_ROOT / "uploads").mkdir(parents=True, exist_ok=True) + (TEST_MEDIA_ROOT / "avatars").mkdir(parents=True, exist_ok=True) async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) diff --git a/backend/tests/test_academic_providers.py b/backend/tests/test_academic_providers.py index daa1593..5ce8f1c 100644 --- a/backend/tests/test_academic_providers.py +++ b/backend/tests/test_academic_providers.py @@ -9,6 +9,8 @@ from app.services.academic.providers.openalex import OpenAlexProvider from app.services.academic.providers.arxiv import ArxivProvider +pytestmark = pytest.mark.external_api + @pytest.mark.asyncio async def test_semantic_scholar_search_normalizes(monkeypatch): diff --git a/backend/tests/test_academic_search_service.py b/backend/tests/test_academic_search_service.py index 45239ae..f1d8ff2 100644 --- a/backend/tests/test_academic_search_service.py +++ b/backend/tests/test_academic_search_service.py @@ -6,6 +6,8 @@ from app.schemas.academic import PaperSearchRequest from app.services.academic.search_service import AcademicSearchService +pytestmark = pytest.mark.external_api + class DummyAcademicSearchService(AcademicSearchService): """Override provider lookups with deterministic data.""" diff --git a/backend/tests/test_academic_search_tool.py b/backend/tests/test_academic_search_tool.py index 8a22b3b..d1b047a 100644 --- a/backend/tests/test_academic_search_tool.py +++ b/backend/tests/test_academic_search_tool.py @@ -5,6 +5,8 @@ from app.services.ai.tools import AcademicSearchTool +pytestmark = pytest.mark.external_api + class DummySearchService: """Fake academic search backend returning deterministic data.""" diff --git a/backend/tests/test_agent_controller.py b/backend/tests/test_agent_controller.py index 318f428..2ea5c83 100644 --- a/backend/tests/test_agent_controller.py +++ b/backend/tests/test_agent_controller.py @@ -6,6 +6,8 @@ from app.services.ai.agent_controller import AgentController from app.services.ai.tool_interfaces import ToolRegistry, ToolResult +pytestmark = pytest.mark.external_api + class StubLLMClient: """Deterministic LLM stub that returns scripted responses.""" @@ -77,3 +79,88 @@ async def test_agent_controller_returns_fallback_when_llm_missing() -> None: assert "explain transformers" in result.answer assert result.steps == [] + + +@pytest.mark.asyncio +async def test_agent_controller_recovers_from_unparseable_then_finish() -> None: + llm = StubLLMClient( + responses=[ + "not a json response", + '{"thought": "done", "action": "finish", "action_input": {"answer": "ok"}}', + ] + ) + controller = AgentController( + llm_client=llm, + tool_executor=StubToolExecutor(), + registry=ToolRegistry(), + max_iterations=3, + ) + + result = await controller.run("tell me about llms") + + assert result.answer == "ok" + assert len(result.steps) == 1 + assert result.steps[0].action == "finish" + + +@pytest.mark.asyncio +async def test_agent_controller_generates_final_answer_when_unparseable_after_tool() -> None: + llm = StubLLMClient( + responses=[ + '{"thought": "need data", "action": "search_academic_papers", "action_input": {"query": "LLM"}}', + "malformed", + ] + ) + controller = AgentController( + llm_client=llm, + tool_executor=StubToolExecutor(), + registry=ToolRegistry(), + max_iterations=3, + ) + + result = await controller.run("tell me about llms") + + assert result.answer.startswith("##") + assert "搜索结果" in result.answer + assert len(result.steps) == 1 + assert result.steps[0].action == "search_academic_papers" + + +@pytest.mark.asyncio +async def test_agent_controller_records_tool_errors() -> None: + class FailingToolExecutor(StubToolExecutor): + async def execute(self, tool_name: str, arguments: dict): + self.calls.append((tool_name, arguments)) + return ToolResult(success=False, error="boom") + + llm = StubLLMClient( + responses=[ + '{"thought": "need data", "action": "search_academic_papers", "action_input": {"query": "LLM"}}', + '{"thought": "done", "action": "finish", "action_input": {"answer": "ok"}}', + ] + ) + controller = AgentController( + llm_client=llm, + tool_executor=FailingToolExecutor(), + registry=ToolRegistry(), + max_iterations=3, + ) + + result = await controller.run("tell me about llms") + + assert "错误: boom" == result.steps[0].observation + + +def test_parse_action_supports_multiple_formats() -> None: + controller = AgentController( + llm_client=StubLLMClient(responses=[]), + tool_executor=StubToolExecutor(), + registry=ToolRegistry(), + ) + + assert controller._parse_action('```json {"action": "finish"}```') == {"action": "finish"} + assert controller._parse_action('``` {"action": "finish"}```') == {"action": "finish"} + assert controller._parse_action('prefix {"action": "finish"} suffix') == {"action": "finish"} + assert controller._parse_action("no braces") is None + assert controller._parse_action("{") is None + assert controller._parse_action('```json {bad}```') is None diff --git a/backend/tests/test_arxiv.py b/backend/tests/test_arxiv.py index a7d2ac4..63ce961 100644 --- a/backend/tests/test_arxiv.py +++ b/backend/tests/test_arxiv.py @@ -4,13 +4,26 @@ import types import pytest -from httpx import AsyncClient # type: ignore[import-not-found] +from httpx import ASGITransport, AsyncClient # type: ignore[import-not-found] from app.api.v1 import arxiv +from app.main import app + +pytestmark = pytest.mark.external_api + + +@pytest.fixture() +def arxiv_client(event_loop) -> AsyncClient: + transport = ASGITransport(app=app) + client = AsyncClient(transport=transport, base_url="http://test") + try: + yield client + finally: + event_loop.run_until_complete(client.aclose()) @pytest.mark.asyncio -async def test_arxiv_search_success(async_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: +async def test_arxiv_search_success(arxiv_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: sample_xml = """ @@ -40,7 +53,7 @@ def fake_get(url: str, timeout: int) -> types.SimpleNamespace: monkeypatch.setattr(arxiv.requests, "get", fake_get) - response = await async_client.get( + response = await arxiv_client.get( "/api/v1/arxiv/search", params={"query": "Quantum Machine", "max_results": 2}, ) @@ -61,13 +74,13 @@ def fake_get(url: str, timeout: int) -> types.SimpleNamespace: @pytest.mark.asyncio -async def test_arxiv_search_handles_api_error(async_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: +async def test_arxiv_search_handles_api_error(arxiv_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: def fake_get(url: str, timeout: int) -> types.SimpleNamespace: return types.SimpleNamespace(status_code=503, text="Service Unavailable") monkeypatch.setattr(arxiv.requests, "get", fake_get) - response = await async_client.get( + response = await arxiv_client.get( "/api/v1/arxiv/search", params={"query": "test"}, ) @@ -77,13 +90,13 @@ def fake_get(url: str, timeout: int) -> types.SimpleNamespace: @pytest.mark.asyncio -async def test_arxiv_search_handles_parse_error(async_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: +async def test_arxiv_search_handles_parse_error(arxiv_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: def fake_get(url: str, timeout: int) -> types.SimpleNamespace: return types.SimpleNamespace(status_code=200, text="") monkeypatch.setattr(arxiv.requests, "get", fake_get) - response = await async_client.get( + response = await arxiv_client.get( "/api/v1/arxiv/search", params={"query": "anything"}, ) @@ -93,13 +106,13 @@ def fake_get(url: str, timeout: int) -> types.SimpleNamespace: @pytest.mark.asyncio -async def test_arxiv_search_handles_request_exception(async_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: +async def test_arxiv_search_handles_request_exception(arxiv_client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None: def fake_get(url: str, timeout: int) -> None: raise arxiv.requests.exceptions.Timeout("timed out") monkeypatch.setattr(arxiv.requests, "get", fake_get) - response = await async_client.get( + response = await arxiv_client.get( "/api/v1/arxiv/search", params={"query": "timeout"}, ) diff --git a/backend/tests/test_arxiv_provider_http_branches.py b/backend/tests/test_arxiv_provider_http_branches.py new file mode 100644 index 0000000..f884582 --- /dev/null +++ b/backend/tests/test_arxiv_provider_http_branches.py @@ -0,0 +1,164 @@ +"""Branch-focused tests for ArxivProvider retry/ratelimit logic.""" +from __future__ import annotations + +import asyncio + +import pytest + +from app.services.academic.providers import arxiv as arxiv_module +from app.services.academic.providers.arxiv import ArxivProvider + +pytestmark = pytest.mark.external_api + + +class DummyRequestContext: + def __init__(self, response): + self._response = response + + async def __aenter__(self): + return self._response + + async def __aexit__(self, exc_type, exc, tb): + return False + + +class DummyTextResponse: + def __init__(self, *, status: int, text: str = "ok", headers=None): + self.status = status + self._text = text + self.headers = headers or {} + + async def text(self) -> str: + return self._text + + +class DummySession: + def __init__(self, items): + self._items = list(items) + self.calls = 0 + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + def get(self, url, headers=None): + self.calls += 1 + item = self._items.pop(0) + if isinstance(item, Exception): + raise item + return DummyRequestContext(item) + + +@pytest.mark.asyncio +async def test_fetch_feed_retries_retryable_status(monkeypatch): + provider = ArxivProvider() + + async def noop(): + return None + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + monkeypatch.setattr(arxiv_module.settings, "ARXIV_MAX_RETRIES", 2) + monkeypatch.setattr(arxiv_module.settings, "ARXIV_RETRY_BACKOFF_SECONDS", 0.5) + + session = DummySession( + [ + DummyTextResponse(status=429, headers={"Retry-After": "1"}), + DummyTextResponse(status=200, text="xml"), + ] + ) + monkeypatch.setattr(arxiv_module.aiohttp, "ClientSession", lambda **kwargs: session) + + xml = await provider._fetch_feed("https://example.org") + assert xml == "xml" + assert session.calls == 2 + + +@pytest.mark.asyncio +async def test_fetch_feed_retries_timeout_then_succeeds(monkeypatch): + provider = ArxivProvider() + + async def noop(): + return None + + sleep_calls: list[float] = [] + + async def fake_sleep(duration: float): + sleep_calls.append(duration) + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + monkeypatch.setattr(arxiv_module.asyncio, "sleep", fake_sleep) + monkeypatch.setattr(arxiv_module.settings, "ARXIV_MAX_RETRIES", 2) + + session = DummySession( + [ + asyncio.TimeoutError(), + DummyTextResponse(status=200, text="xml"), + ] + ) + monkeypatch.setattr(arxiv_module.aiohttp, "ClientSession", lambda **kwargs: session) + + assert await provider._fetch_feed("https://example.org") == "xml" + assert sleep_calls + + +@pytest.mark.asyncio +async def test_fetch_feed_raises_after_last_attempt(monkeypatch): + provider = ArxivProvider() + + async def noop(): + return None + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + monkeypatch.setattr(arxiv_module.settings, "ARXIV_MAX_RETRIES", 1) + + session = DummySession([DummyTextResponse(status=500)]) + monkeypatch.setattr(arxiv_module.aiohttp, "ClientSession", lambda **kwargs: session) + + with pytest.raises(Exception, match=r"arXiv API 错误: 500"): + await provider._fetch_feed("https://example.org") + + +@pytest.mark.asyncio +async def test_get_by_id_returns_none_on_404_fetch(monkeypatch): + provider = ArxivProvider() + + async def fake_fetch(url: str) -> str: + raise Exception("404 not found") + + monkeypatch.setattr(provider, "_fetch_feed", fake_fetch) + assert await provider.get_by_id("arXiv:1234v1") is None + + +@pytest.mark.asyncio +async def test_respect_rate_limit_returns_when_disabled(monkeypatch): + provider = ArxivProvider() + monkeypatch.setattr(provider, "RATE_LIMIT_SECONDS", 0.0) + await provider._respect_rate_limit() + + +@pytest.mark.asyncio +async def test_respect_rate_limit_sleeps_until_next_slot(monkeypatch): + provider = ArxivProvider() + provider.RATE_LIMIT_SECONDS = 1.0 + provider._last_request_ts = 0.0 + provider._next_available_ts = 0.0 + provider._rate_limit_lock = asyncio.Lock() + + times = iter([0.0, 1.1, 1.1]) + + def fake_monotonic(): + return next(times) + + sleep_calls: list[float] = [] + + async def fake_sleep(duration: float): + sleep_calls.append(duration) + + monkeypatch.setattr(arxiv_module.time, "monotonic", fake_monotonic) + monkeypatch.setattr(arxiv_module.asyncio, "sleep", fake_sleep) + + await provider._respect_rate_limit() + assert sleep_calls == [1.0] + diff --git a/backend/tests/test_auth.py b/backend/tests/test_auth.py index 4e2d479..b2dd7b0 100644 --- a/backend/tests/test_auth.py +++ b/backend/tests/test_auth.py @@ -6,6 +6,8 @@ pytestmark = pytest.mark.asyncio +DEFAULT_VERIFICATION_CODE = "000000" + async def register_user( client: AsyncClient, @@ -13,7 +15,11 @@ async def register_user( password: str, full_name: str | None = None, ) -> Response: - payload: dict[str, str] = {"email": email, "password": password} + payload: dict[str, str] = { + "email": email, + "password": password, + "verification_code": DEFAULT_VERIFICATION_CODE, + } if full_name is not None: payload["full_name"] = full_name diff --git a/backend/tests/test_auth_dependencies.py b/backend/tests/test_auth_dependencies.py new file mode 100644 index 0000000..13f9a88 --- /dev/null +++ b/backend/tests/test_auth_dependencies.py @@ -0,0 +1,87 @@ +"""Tests that exercise authentication dependency edge-cases.""" +from __future__ import annotations + +import pytest + +from app.dependencies import auth as auth_deps + +DEFAULT_VERIFICATION_CODE = "000000" + +pytestmark = pytest.mark.asyncio + + +async def _register(async_client, email: str, password: str) -> None: + response = await async_client.post( + "/api/v1/users", + json={"email": email, "password": password, "verification_code": DEFAULT_VERIFICATION_CODE}, + ) + assert response.status_code == 201 + + +async def _login(async_client, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +async def test_invalid_token_rejected(async_client) -> None: + response = await async_client.get( + "/api/v1/users/me", + headers={"Authorization": "Bearer not-a-jwt"}, + ) + assert response.status_code == 401 + + +async def test_token_without_subject_rejected(async_client, monkeypatch) -> None: + monkeypatch.setattr(auth_deps, "decode_access_token", lambda token: {}) + response = await async_client.get( + "/api/v1/users/me", + headers={"Authorization": "Bearer whatever"}, + ) + assert response.status_code == 401 + assert response.json()["detail"] == "无法验证凭据" + + +async def test_inactive_user_rejected(async_client) -> None: + await _register(async_client, "inactive@example.com", "StrongPass1") + token = await _login(async_client, "inactive@example.com", "StrongPass1") + + ok = await async_client.get( + "/api/v1/users/me", + headers={"Authorization": f"Bearer {token}"}, + ) + assert ok.status_code == 200 + + deleted = await async_client.delete( + "/api/v1/users/me", + headers={"Authorization": f"Bearer {token}"}, + ) + assert deleted.status_code == 200 + + rejected = await async_client.get( + "/api/v1/users/me", + headers={"Authorization": f"Bearer {token}"}, + ) + assert rejected.status_code == 401 + + +async def test_optional_auth_uses_fallback_access_token(async_client) -> None: + await _register(async_client, "fallback@example.com", "StrongPass1") + token = await _login(async_client, "fallback@example.com", "StrongPass1") + + # No Authorization header; dependency returns None and endpoint falls back to query token. + response = await async_client.get(f"/api/v1/library/uploads/999/download?access_token={token}") + assert response.status_code in {404, 401} + + +async def test_optional_auth_invalid_bearer_returns_unauthorized(async_client) -> None: + response = await async_client.get( + "/api/v1/library/uploads/999/download", + headers={"Authorization": "Bearer not-a-jwt"}, + ) + assert response.status_code == 401 + diff --git a/backend/tests/test_auth_endpoints_extra.py b/backend/tests/test_auth_endpoints_extra.py new file mode 100644 index 0000000..df3cd70 --- /dev/null +++ b/backend/tests/test_auth_endpoints_extra.py @@ -0,0 +1,87 @@ +"""Extra auth endpoint tests to improve coverage.""" +from __future__ import annotations + +import pytest + +from app.api.v1.endpoints import auth as auth_endpoints + +DEFAULT_VERIFICATION_CODE = "000000" + +pytestmark = pytest.mark.asyncio + + +async def _register(async_client, email: str, password: str) -> None: + response = await async_client.post( + "/api/v1/users", + json={"email": email, "password": password, "verification_code": DEFAULT_VERIFICATION_CODE}, + ) + assert response.status_code == 201 + + +async def _login(async_client, email: str, password: str) -> dict: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + return {"status_code": response.status_code, "json": response.json() if response.content else {}} + + +async def test_login_unknown_user_returns_404(async_client) -> None: + result = await _login(async_client, "missing@example.com", "WhateverPass1") + assert result["status_code"] == 404 + assert result["json"]["detail"] == "账号不存在" + + +async def test_disabled_account_cannot_login(async_client) -> None: + await _register(async_client, "disabled@example.com", "StrongPass1") + token = (await _login(async_client, "disabled@example.com", "StrongPass1"))["json"]["access_token"] + + deleted = await async_client.delete( + "/api/v1/users/me", + headers={"Authorization": f"Bearer {token}"}, + ) + assert deleted.status_code == 200 + + result = await _login(async_client, "disabled@example.com", "StrongPass1") + assert result["status_code"] == 403 + assert result["json"]["detail"] == "账号已被禁用" + + +async def test_send_email_code_requires_config(async_client, monkeypatch) -> None: + monkeypatch.setattr(auth_endpoints, "is_email_configured", lambda: False) + response = await async_client.post( + "/api/v1/auth/email/code", + json={"email": "foo@example.com", "purpose": "register"}, + ) + assert response.status_code == 503 + assert response.json()["detail"] == "邮件服务未配置" + + +async def test_reset_password_flows(async_client, monkeypatch) -> None: + await _register(async_client, "reset@example.com", "StrongPass1") + + async def invalid_code(*args, **kwargs) -> bool: + return False + + monkeypatch.setattr(auth_endpoints, "verify_code", invalid_code) + bad = await async_client.post( + "/api/v1/auth/password/reset", + json={"email": "reset@example.com", "code": "111111", "new_password": "NewStrong1"}, + ) + assert bad.status_code == 400 + assert bad.json()["detail"] == "验证码不正确或已过期" + + async def valid_code(*args, **kwargs) -> bool: + return True + + monkeypatch.setattr(auth_endpoints, "verify_code", valid_code) + good = await async_client.post( + "/api/v1/auth/password/reset", + json={"email": "reset@example.com", "code": "222222", "new_password": "NewStrong1"}, + ) + assert good.status_code == 200 + assert good.json()["message"] == "密码重置成功" + + result = await _login(async_client, "reset@example.com", "NewStrong1") + assert result["status_code"] == 200 diff --git a/backend/tests/test_conversations_endpoints.py b/backend/tests/test_conversations_endpoints.py new file mode 100644 index 0000000..1d26d17 --- /dev/null +++ b/backend/tests/test_conversations_endpoints.py @@ -0,0 +1,142 @@ +"""Integration tests for /api/v1/conversations endpoints.""" +from __future__ import annotations + +import pytest + +DEFAULT_VERIFICATION_CODE = "000000" + +pytestmark = pytest.mark.asyncio + + +async def _register(async_client, email: str, password: str) -> None: + response = await async_client.post( + "/api/v1/users", + json={"email": email, "password": password, "verification_code": DEFAULT_VERIFICATION_CODE}, + ) + assert response.status_code == 201 + + +async def _login(async_client, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +async def test_conversation_flow(async_client) -> None: + await _register(async_client, "conv@example.com", "StrongPass1") + token = await _login(async_client, "conv@example.com", "StrongPass1") + + created = await async_client.post( + "/api/v1/conversations/", + headers={"Authorization": f"Bearer {token}"}, + json={"title": "Search session", "category": "search"}, + ) + assert created.status_code == 201 + conversation = created.json() + conversation_id = conversation["id"] + + listed = await async_client.get( + "/api/v1/conversations/", + headers={"Authorization": f"Bearer {token}"}, + ) + assert listed.status_code == 200 + payload = listed.json() + assert payload["total"] == 1 + assert payload["conversations"][0]["message_count"] == 0 + + long_content = "x" * 80 + added = await async_client.post( + f"/api/v1/conversations/{conversation_id}/messages", + headers={"Authorization": f"Bearer {token}"}, + json={"role": "user", "content": long_content}, + ) + assert added.status_code == 201 + + listed = await async_client.get( + "/api/v1/conversations/", + headers={"Authorization": f"Bearer {token}"}, + ) + assert listed.status_code == 200 + payload = listed.json() + assert payload["conversations"][0]["message_count"] == 1 + assert payload["conversations"][0]["last_message_preview"].endswith("...") + + updated = await async_client.patch( + f"/api/v1/conversations/{conversation_id}", + headers={"Authorization": f"Bearer {token}"}, + json={"title": "Renamed"}, + ) + assert updated.status_code == 200 + assert updated.json()["title"] == "Renamed" + + mismatch = await async_client.get( + f"/api/v1/conversations/{conversation_id}?category=reading", + headers={"Authorization": f"Bearer {token}"}, + ) + assert mismatch.status_code == 404 + + detail = await async_client.get( + f"/api/v1/conversations/{conversation_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert detail.status_code == 200 + assert detail.json()["messages"] + + deleted = await async_client.delete( + f"/api/v1/conversations/{conversation_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert deleted.status_code == 204 + + missing = await async_client.get( + f"/api/v1/conversations/{conversation_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert missing.status_code == 404 + + +async def test_conversation_filters_and_not_found_paths(async_client) -> None: + await _register(async_client, "owner@example.com", "StrongPass1") + owner_token = await _login(async_client, "owner@example.com", "StrongPass1") + + await _register(async_client, "other@example.com", "StrongPass1") + other_token = await _login(async_client, "other@example.com", "StrongPass1") + + created = await async_client.post( + "/api/v1/conversations/", + headers={"Authorization": f"Bearer {owner_token}"}, + json={"title": "Reading", "category": "reading"}, + ) + assert created.status_code == 201 + conversation_id = created.json()["id"] + + filtered = await async_client.get( + "/api/v1/conversations/?category=reading", + headers={"Authorization": f"Bearer {owner_token}"}, + ) + assert filtered.status_code == 200 + payload = filtered.json() + assert payload["total"] == 1 + + forbidden = await async_client.get( + f"/api/v1/conversations/{conversation_id}", + headers={"Authorization": f"Bearer {other_token}"}, + ) + assert forbidden.status_code == 404 + + update_missing = await async_client.patch( + "/api/v1/conversations/99999", + headers={"Authorization": f"Bearer {owner_token}"}, + json={"title": "Nope"}, + ) + assert update_missing.status_code == 404 + + delete_missing = await async_client.delete( + "/api/v1/conversations/99999", + headers={"Authorization": f"Bearer {owner_token}"}, + ) + assert delete_missing.status_code == 404 diff --git a/backend/tests/test_db_repositories.py b/backend/tests/test_db_repositories.py new file mode 100644 index 0000000..aa47ed1 --- /dev/null +++ b/backend/tests/test_db_repositories.py @@ -0,0 +1,261 @@ +"""Unit tests for DB repositories (core business persistence logic).""" +from __future__ import annotations + +import pytest +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy import select + +from app.db.repository import ( + LibraryFolderRepository, + MineruParseJobRepository, + ParsedPaperCacheRepository, + UploadedPaperRepository, + UserRepository, +) +from app.models import mineru_parse_job as mineru_job_model # noqa: F401 +from app.models import parsed_paper_cache as parsed_cache_model # noqa: F401 +from app.models import uploaded_paper as uploaded_paper_model # noqa: F401 +from app.models import library_folder as library_folder_model # noqa: F401 +from app.models import user as user_model # noqa: F401 +from app.models.base import Base +from app.models.mineru_parse_job import MineruParseJob +from app.models.parsed_paper_cache import ParsedPaperCache +from app.models.uploaded_paper import UploadedPaper + + +pytestmark = pytest.mark.asyncio + +TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:?cache=shared" + + +async def _make_session() -> AsyncSession: + engine = create_async_engine(TEST_DATABASE_URL, future=True, connect_args={"uri": True}) + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + session_factory = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession) + session = session_factory() + # stash engine on session for teardown + session.info["__engine__"] = engine + return session + + +async def _close_session(session: AsyncSession) -> None: + engine = session.info.pop("__engine__", None) + await session.close() + if engine is not None: + await engine.dispose() + + +async def test_user_repository_oauth_lookup() -> None: + session = await _make_session() + try: + repo = UserRepository(session) + user = await repo.create( + email="oauth@example.com", + hashed_password="hashed", + full_name=None, + oauth_provider="github", + oauth_account_id="123", + ) + await session.commit() + + found = await repo.get_by_oauth_account("github", "123") + assert found is not None and found.id == user.id + + missing = await repo.get_by_oauth_account("google", "123") + assert missing is None + finally: + await _close_session(session) + + +async def test_library_folder_repository_crud() -> None: + session = await _make_session() + try: + user_repo = UserRepository(session) + user = await user_repo.create(email="folder@example.com", hashed_password="hashed", full_name=None) + await session.commit() + + repo = LibraryFolderRepository(session) + folder = await repo.create(user_id=user.id, name="Folder", color="#fff") + await session.commit() + + fetched = await repo.get_for_user(folder.id, user.id) + assert fetched is not None and fetched.name == "Folder" + + folders = await repo.list_for_user(user.id) + assert [f.id for f in folders] == [folder.id] + + renamed = await repo.rename(folder, name="Renamed", color=None) + assert renamed.name == "Renamed" + + await repo.delete(folder) + await session.commit() + assert await repo.get_for_user(folder.id, user.id) is None + finally: + await _close_session(session) + + +async def test_uploaded_paper_repository_dedupe_counts_and_updates() -> None: + session = await _make_session() + try: + user_repo = UserRepository(session) + user = await user_repo.create(email="uploads@example.com", hashed_password="hashed", full_name=None) + await session.commit() + + upload_repo = UploadedPaperRepository(session) + + # Two records with the same hash should dedupe for unique listing/counting. + a = await upload_repo.create( + user_id=user.id, + stored_filename="a.pdf", + original_filename="doc.pdf", + content_type="application/pdf", + file_size=1, + file_url="/media/uploads/a.pdf", + file_hash="hash1", + folder_id=None, + metadata_json=None, + ) + b = await upload_repo.create( + user_id=user.id, + stored_filename="b.pdf", + original_filename="doc.pdf", + content_type="application/pdf", + file_size=1, + file_url="/media/uploads/b.pdf", + file_hash="hash1", + folder_id=None, + metadata_json=None, + ) + await session.commit() + + assert await upload_repo.count_for_user(user.id) == 2 + assert await upload_repo.count_unique_for_user(user.id) == 1 + + items = await upload_repo.list_unique_for_user_paginated(user.id, offset=0, limit=10) + assert len(items) == 1 + + # Folder update should apply to all deduped records by hash. + folder_repo = LibraryFolderRepository(session) + folder = await folder_repo.create(user_id=user.id, name="NLP", color=None) + await session.commit() + + updated = await upload_repo.update_folder(a, folder.id) + await session.commit() + assert updated.folder_id == folder.id + + refreshed_b = await upload_repo.get_for_user(b.id, user.id) + assert refreshed_b is not None and refreshed_b.folder_id == folder.id + + counts = await upload_repo.count_by_folder(user.id) + assert counts.get(folder.id) == 1 + + # Metadata update round-trip. + await upload_repo.update_metadata(updated, {"title": "T"}) + await session.commit() + refreshed = await upload_repo.get_for_user(a.id, user.id) + assert refreshed is not None and refreshed.metadata_json == {"title": "T"} + finally: + await _close_session(session) + + +async def test_uploaded_paper_delete_cascades_parse_cache_and_jobs() -> None: + session = await _make_session() + try: + user_repo = UserRepository(session) + user = await user_repo.create(email="delete@example.com", hashed_password="hashed", full_name=None) + await session.commit() + + upload_repo = UploadedPaperRepository(session) + record = await upload_repo.create( + user_id=user.id, + stored_filename="x.pdf", + original_filename="x.pdf", + content_type="application/pdf", + file_size=1, + file_url="/media/uploads/x.pdf", + file_hash="hash-delete", + folder_id=None, + metadata_json=None, + ) + + session.add(ParsedPaperCache(file_hash="hash-delete", result_json={"plain_text": "hi"})) + session.add( + MineruParseJob( + paper_id=record.id, + user_id=user.id, + pdf_path="/tmp/x.pdf", + output_dir="/tmp/out", + file_hash="hash-delete", + status="queued", + ) + ) + await session.commit() + + await upload_repo.delete(record) + await session.commit() + + remaining_cache = (await session.execute(select(ParsedPaperCache))).scalars().all() + remaining_jobs = (await session.execute(select(MineruParseJob))).scalars().all() + remaining_uploads = (await session.execute(select(UploadedPaper))).scalars().all() + assert remaining_cache == [] + assert remaining_jobs == [] + assert remaining_uploads == [] + finally: + await _close_session(session) + + +async def test_parse_cache_and_mineru_job_repositories() -> None: + session = await _make_session() + try: + user_repo = UserRepository(session) + user = await user_repo.create(email="mineru@example.com", hashed_password="hashed", full_name=None) + upload_repo = UploadedPaperRepository(session) + upload = await upload_repo.create( + user_id=user.id, + stored_filename="m.pdf", + original_filename="m.pdf", + content_type="application/pdf", + file_size=1, + file_url="/media/uploads/m.pdf", + file_hash="hash", + folder_id=None, + metadata_json=None, + ) + await session.commit() + + cache_repo = ParsedPaperCacheRepository(session) + first = await cache_repo.upsert(file_hash="hash", result_json={"plain_text": "a"}) + await session.commit() + assert first.file_hash == "hash" + + updated = await cache_repo.upsert(file_hash="hash", result_json={"plain_text": "b"}) + await session.commit() + assert updated.result_json["plain_text"] == "b" + + job_repo = MineruParseJobRepository(session) + job = await job_repo.create( + paper_id=upload.id, + user_id=user.id, + pdf_path="/tmp/a.pdf", + output_dir="/tmp/out", + file_hash="hash", + ) + await session.commit() + + fetched = await job_repo.get(job.id) + assert fetched is not None and fetched.id == job.id + + running = await job_repo.mark_running(job, task_id="task-1") + succeeded = await job_repo.mark_succeeded(running, result_json={"ok": True}) + await session.commit() + assert succeeded.status == "succeeded" + + latest = await job_repo.get_latest_for_paper(upload.id, user.id) + assert latest is not None and latest.id == job.id + + cancelled = await job_repo.mark_cancelled(job, reason="stop") + await session.commit() + assert cancelled.status == "cancelled" + finally: + await _close_session(session) diff --git a/backend/tests/test_external_provider_branches.py b/backend/tests/test_external_provider_branches.py new file mode 100644 index 0000000..a854b8c --- /dev/null +++ b/backend/tests/test_external_provider_branches.py @@ -0,0 +1,450 @@ +"""Branch-focused tests for external academic providers.""" +from __future__ import annotations + +import asyncio +import logging + +import pytest + +from app.services.academic.providers import arxiv as arxiv_module +from app.services.academic.providers import openalex as openalex_module +from app.services.academic.providers import semantic_scholar as semantic_module +from app.services.academic.providers.arxiv import ArxivProvider +from app.services.academic.providers.openalex import OpenAlexProvider +from app.services.academic.providers.semantic_scholar import SemanticScholarProvider + +pytestmark = pytest.mark.external_api + + +class DummyRequestContext: + def __init__(self, response): + self._response = response + + async def __aenter__(self): + return self._response + + async def __aexit__(self, exc_type, exc, tb): + return False + + +class QueueSession: + def __init__(self, responses): + self._responses = list(responses) + self.calls = 0 + + def get(self, url, params=None, headers=None): + self.calls += 1 + return DummyRequestContext(self._responses.pop(0)) + + +class DummyJSONResponse: + def __init__(self, *, status: int, payload=None, headers=None, text_error: bool = False): + self.status = status + self._payload = payload if payload is not None else {} + self.headers = headers or {} + self._text_error = text_error + + async def json(self): + return self._payload + + async def text(self): + if self._text_error: + raise RuntimeError("boom") + return "detail" + + +@pytest.mark.asyncio +async def test_semantic_scholar_perform_request_retries_429_retry_after(monkeypatch): + provider = SemanticScholarProvider() + + async def noop(): + return None + + sleep_calls: list[float] = [] + + async def fake_sleep(duration: float): + sleep_calls.append(duration) + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + monkeypatch.setattr(semantic_module.asyncio, "sleep", fake_sleep) + + session = QueueSession( + [ + DummyJSONResponse(status=429, headers={"Retry-After": "2"}), + DummyJSONResponse(status=200, payload={"ok": True}), + ] + ) + data = await provider._perform_request(session, "url", {}, {}) + assert data == {"ok": True} + assert session.calls == 2 + assert sleep_calls == [2.0] + + +@pytest.mark.asyncio +async def test_semantic_scholar_perform_request_retries_429_invalid_header(monkeypatch): + provider = SemanticScholarProvider() + + async def noop(): + return None + + sleep_calls: list[float] = [] + + async def fake_sleep(duration: float): + sleep_calls.append(duration) + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + monkeypatch.setattr(semantic_module.asyncio, "sleep", fake_sleep) + + session = QueueSession( + [ + DummyJSONResponse(status=429, headers={"Retry-After": "abc"}), + DummyJSONResponse(status=200, payload={}), + ] + ) + await provider._perform_request(session, "url", {}, {}) + assert session.calls == 2 + assert sleep_calls == [pytest.approx(1.5)] + + +@pytest.mark.asyncio +async def test_semantic_scholar_perform_request_returns_empty_on_404(monkeypatch): + provider = SemanticScholarProvider() + + async def noop(): + return None + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + session = QueueSession([DummyJSONResponse(status=404)]) + data = await provider._perform_request(session, "url", {}, {}) + assert data == {} + + +@pytest.mark.asyncio +async def test_semantic_scholar_perform_request_raises_with_missing_body(monkeypatch): + provider = SemanticScholarProvider() + + async def noop(): + return None + + monkeypatch.setattr(provider, "_respect_rate_limit", noop) + session = QueueSession([DummyJSONResponse(status=500, text_error=True)]) + with pytest.raises(Exception, match=r"Semantic Scholar API 错误: 500"): + await provider._perform_request(session, "url", {}, {}) + + +class DummySessionManager: + def __init__(self, session): + self._session = session + + async def __aenter__(self): + return self._session + + async def __aexit__(self, exc_type, exc, tb): + return False + + +class DummyOpenAlexSession: + def __init__(self, response): + self._response = response + self.captured: list[tuple[str, dict]] = [] + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + def get(self, url, params=None): + self.captured.append((url, params or {})) + return DummyRequestContext(self._response) + + +@pytest.mark.asyncio +async def test_openalex_request_raises_on_non_200(monkeypatch): + provider = OpenAlexProvider() + provider.BASE_URL = "https://openalex.example/works" + + response = DummyJSONResponse(status=500) + session = DummyOpenAlexSession(response) + monkeypatch.setattr(openalex_module.aiohttp, "ClientSession", lambda **kwargs: session) + + with pytest.raises(RuntimeError, match=r"OpenAlex API 错误: 500"): + await provider._request({"search": "x"}) + + +@pytest.mark.asyncio +async def test_openalex_get_by_id_returns_none_and_logs_warning(monkeypatch, caplog): + provider = OpenAlexProvider() + provider.BASE_URL = "https://openalex.example/works" + + response = DummyJSONResponse(status=404) + session = DummyOpenAlexSession(response) + monkeypatch.setattr(openalex_module.aiohttp, "ClientSession", lambda **kwargs: session) + + with caplog.at_level(logging.WARNING): + assert await provider.get_by_id("W1") is None + + assert session.captured[0][0] == "https://openalex.example/works/W1" + assert "OpenAlex get_by_id failed" in caplog.text + + +def test_openalex_extract_primary_location_prefers_best_location() -> None: + item = { + "best_oa_location": { + "source": {"display_name": "TestConf"}, + "landing_page_url": "https://example.org/landing", + }, + "locations": [ + { + "source": {"display_name": "Other"}, + "landing_page_url": "https://example.org/other", + } + ], + } + assert OpenAlexProvider._extract_primary_location(item) == { + "source_display_name": "TestConf", + "landing_page_url": "https://example.org/landing", + } + + +def test_arxiv_build_query_formats_phrases_and_category() -> None: + provider = ArxivProvider() + query = 'deep learning "vision transformer"' + built = provider._build_query(query, "cs.CV") + assert "cat:cs.CV" in built + assert 'all:"vision transformer"' in built + assert "all:deep" in built + + +def test_arxiv_build_query_defaults_to_all_star() -> None: + provider = ArxivProvider() + assert provider._build_query("", None) == "all:*" + + +def test_arxiv_build_connector_disables_ssl(monkeypatch): + provider = ArxivProvider() + monkeypatch.setattr(arxiv_module.settings, "ARXIV_VERIFY_SSL", False) + captured: dict[str, object] = {} + + def fake_connector(*, ssl): + captured["ssl"] = ssl + return object() + + monkeypatch.setattr(arxiv_module.aiohttp, "TCPConnector", fake_connector) + assert provider._build_connector() is not None + assert captured["ssl"] is False + + +def test_arxiv_build_connector_warns_on_missing_ca_bundle(monkeypatch, caplog): + provider = ArxivProvider() + monkeypatch.setattr(arxiv_module.settings, "ARXIV_VERIFY_SSL", True) + monkeypatch.setattr(arxiv_module.settings, "ARXIV_CA_BUNDLE", "/no/such/ca.pem") + + with caplog.at_level(logging.WARNING): + assert provider._build_connector() is None + + assert "ARXIV_CA_BUNDLE does not exist" in caplog.text + + +@pytest.mark.asyncio +async def test_arxiv_respect_retry_after_uses_header(monkeypatch): + provider = ArxivProvider() + captured: list[float] = [] + + async def fake_extend(delay: float) -> None: + captured.append(delay) + + monkeypatch.setattr(provider, "_extend_cooldown", fake_extend) + response = type("Resp", (), {"headers": {"Retry-After": "3"}})() + await provider._respect_retry_after(response, attempt=0, backoff=1.0) + assert captured == [3.0] + + +def test_arxiv_extract_venue_prefers_journal_ref() -> None: + assert ArxivProvider._extract_venue("NeurIPS 2024", "Nature") == "Nature" + + +def test_arxiv_extract_venue_parses_comment_year() -> None: + assert ArxivProvider._extract_venue("Accepted at NeurIPS 2022", None) == "NeurIPS 2022" + + +@pytest.mark.asyncio +async def test_semantic_scholar_search_includes_optional_filters(monkeypatch): + provider = SemanticScholarProvider() + + captured: dict[str, object] = {} + + async def fake_perform(self, session, url, params, headers, allow_retry=True): + captured["params"] = params + return {"data": []} + + monkeypatch.setattr(SemanticScholarProvider, "_perform_request", fake_perform, raising=False) + results = await provider.search("q", venue="Conf", year=2024, limit=1) + assert results == [] + assert captured["params"]["venue"] == "Conf" + assert captured["params"]["year"] == "2024" + + +def test_semantic_scholar_build_headers_includes_api_key(monkeypatch): + monkeypatch.setattr(semantic_module.settings, "SEMANTIC_SCHOLAR_API_KEY", "k") + provider = SemanticScholarProvider() + headers = provider._build_headers() + assert headers["x-api-key"] == "k" + + +@pytest.mark.asyncio +async def test_semantic_scholar_get_by_id_returns_none_for_empty_payload(monkeypatch): + provider = SemanticScholarProvider() + + async def fake_perform(self, session, url, params, headers, allow_retry=True): + return {} + + monkeypatch.setattr(SemanticScholarProvider, "_perform_request", fake_perform, raising=False) + assert await provider.get_by_id("p1") is None + + +@pytest.mark.asyncio +async def test_openalex_search_returns_empty_when_query_missing(): + provider = OpenAlexProvider() + assert await provider.search("") == [] + + +@pytest.mark.asyncio +async def test_openalex_search_builds_filters_and_search_terms(monkeypatch): + provider = OpenAlexProvider() + provider.BASE_URL = "https://openalex.example/works" + + captured: dict[str, object] = {} + + async def fake_request(self, params): + captured["params"] = params + return {"results": []} + + monkeypatch.setattr(OpenAlexProvider, "_request", fake_request, raising=False) + await provider.search("llm", year=2024, venue=" ICLR ", domain=" AI ", limit=200) + + params = captured["params"] + assert "filter" in params + assert "from_publication_date:2024-01-01" in params["filter"] + assert "\"ICLR\"" in params["search"] + assert "AI" in params["search"] + + +@pytest.mark.asyncio +async def test_openalex_get_by_id_accepts_full_url(monkeypatch): + provider = OpenAlexProvider() + provider.BASE_URL = "https://openalex.example/works" + + response = DummyJSONResponse( + status=200, + payload={ + "id": "https://openalex.org/W1", + "title": "Paper", + "authorships": [], + "open_access": {}, + }, + ) + session = DummyOpenAlexSession(response) + monkeypatch.setattr(openalex_module.aiohttp, "ClientSession", lambda **kwargs: session) + + data = await provider.get_by_id("https://openalex.org/W1") + assert data is not None + assert session.captured[0][0] == "https://openalex.org/W1" + + +def test_openalex_extract_primary_location_falls_back_to_locations() -> None: + item = { + "best_oa_location": {}, + "locations": [{"source": {"display_name": "Venue"}, "pdf_url": "https://example.org/p.pdf"}], + } + assert OpenAlexProvider._extract_primary_location(item) == { + "source_display_name": "Venue", + "landing_page_url": "https://example.org/p.pdf", + } + + +def test_openalex_extract_abstract_handles_invalid_types() -> None: + assert OpenAlexProvider._extract_abstract(None) is None + assert OpenAlexProvider._extract_abstract("bad") is None + + +def test_semantic_scholar_build_connector_disables_ssl(monkeypatch): + monkeypatch.setattr(semantic_module.settings, "SEMANTIC_SCHOLAR_VERIFY_SSL", False) + captured: dict[str, object] = {} + + def fake_connector(*, ssl): + captured["ssl"] = ssl + return object() + + monkeypatch.setattr(semantic_module.aiohttp, "TCPConnector", fake_connector) + provider = SemanticScholarProvider() + assert provider._build_connector() is not None + assert captured["ssl"] is False + + +def test_semantic_scholar_build_connector_uses_ca_bundle(monkeypatch, tmp_path): + monkeypatch.setattr(semantic_module.settings, "SEMANTIC_SCHOLAR_VERIFY_SSL", True) + monkeypatch.setattr(semantic_module.settings, "SEMANTIC_SCHOLAR_CA_BUNDLE", str(tmp_path / "ca.pem")) + monkeypatch.setattr(semantic_module.ssl, "create_default_context", lambda cafile=None: object()) + captured: dict[str, object] = {} + + def fake_connector(*, ssl): + captured["ssl"] = ssl + return object() + + monkeypatch.setattr(semantic_module.aiohttp, "TCPConnector", fake_connector) + provider = SemanticScholarProvider() + assert provider._build_connector() is not None + assert captured["ssl"] is not False + + +@pytest.mark.asyncio +async def test_semantic_scholar_respect_rate_limit_sleeps(monkeypatch): + provider = SemanticScholarProvider() + provider._last_request_ts = 0.0 + provider._throttle_lock = None + + times = iter([0.1, 1.6, 1.6]) + + def fake_monotonic(): + return next(times) + + sleep_calls: list[float] = [] + + async def fake_sleep(duration: float): + sleep_calls.append(duration) + + monkeypatch.setattr(semantic_module.time, "monotonic", fake_monotonic) + monkeypatch.setattr(semantic_module.asyncio, "sleep", fake_sleep) + + await provider._respect_rate_limit() + assert sleep_calls == [pytest.approx(provider.MIN_REQUEST_INTERVAL - 0.1)] + assert provider._last_request_ts == pytest.approx(1.6) + + +@pytest.mark.asyncio +async def test_semantic_scholar_search_translates_timeout_and_client_error(monkeypatch): + provider = SemanticScholarProvider() + + class DummySession: + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr(semantic_module.aiohttp, "ClientSession", lambda **kwargs: DummySession()) + + async def raise_timeout(*args, **kwargs): + raise asyncio.TimeoutError() + + monkeypatch.setattr(provider, "_perform_request", raise_timeout) + with pytest.raises(Exception, match="请求超时"): + await provider.search("q") + + async def raise_client_error(*args, **kwargs): + raise semantic_module.aiohttp.ClientError("down") + + monkeypatch.setattr(provider, "_perform_request", raise_client_error) + with pytest.raises(Exception, match="网络错误"): + await provider.search("q") diff --git a/backend/tests/test_external_search_service_branches.py b/backend/tests/test_external_search_service_branches.py new file mode 100644 index 0000000..287b6ec --- /dev/null +++ b/backend/tests/test_external_search_service_branches.py @@ -0,0 +1,110 @@ +"""Branch-focused tests for AcademicSearchService scoring and retries.""" +from __future__ import annotations + +import pytest + +from app.schemas.academic import PaperSearchRequest +from app.services.academic import search_service as search_service_module +from app.services.academic.search_service import AcademicSearchService + +pytestmark = pytest.mark.external_api + + +@pytest.mark.asyncio +async def test_search_defaults_sources_skips_unknown_and_collects_errors(monkeypatch): + service = AcademicSearchService() + + async def fake_semantic(request): + return [ + {"title": "Same", "paper_id": "1", "authors": ["Alice"], "year": 2024, "source": "semantic_scholar"}, + {"title": "Same", "paper_id": "1", "authors": ["Alice"], "year": 2024, "source": "semantic_scholar"}, + ] + + async def fake_arxiv(request): + raise RuntimeError("boom") + + monkeypatch.setattr(service, "_search_semantic_scholar", fake_semantic) + monkeypatch.setattr(service, "_search_arxiv", fake_arxiv) + monkeypatch.setattr(service, "_search_openalex", lambda request: []) + + request = PaperSearchRequest(query="llm", limit=10) + result = await service.search(request, sources=["semantic_scholar", "arxiv", "unknown"]) + + assert result["total"] == 1 + assert result["hits_per_source"]["semantic_scholar"] == 1 + assert result["source_errors"]["arxiv"] == "boom" + + +@pytest.mark.asyncio +async def test_search_semantic_scholar_retries_on_429(monkeypatch): + service = AcademicSearchService() + + calls = 0 + + async def fake_search(**kwargs): + nonlocal calls + calls += 1 + if calls == 1: + raise Exception("429 too many requests") + return [{"title": "ok"}] + + async def fake_sleep(duration: float): + return None + + monkeypatch.setattr(service.providers["semantic_scholar"], "search", fake_search) + monkeypatch.setattr(search_service_module.asyncio, "sleep", fake_sleep) + + request = PaperSearchRequest(query="llm", limit=10) + results = await service._search_semantic_scholar(request) + + assert calls == 2 + assert results == [{"title": "ok"}] + + +def test_apply_request_filters_year_and_venue() -> None: + service = AcademicSearchService() + papers = [ + {"title": "A", "year": 2024, "venue": "NeurIPS", "journal_ref": None, "comment": None}, + {"title": "B", "year": 2023, "venue": "ICLR", "journal_ref": None, "comment": None}, + {"title": "C", "year": 2024, "venue": None, "journal_ref": "NeurIPS 2024", "comment": None}, + ] + request = PaperSearchRequest(query="x", limit=10, year=2024, venue="neurips") + + filtered = service._apply_request_filters(papers, request) + + assert [p["title"] for p in filtered] == ["A", "C"] + + +def test_normalize_paper_generates_id_and_authors_list() -> None: + normalized = AcademicSearchService._normalize_paper({"title": "Paper", "authors": "Alice"}) + assert "id" in normalized + assert normalized["authors"] == [] + + +def test_score_and_rank_papers_handles_empty_and_zero_scores() -> None: + assert AcademicSearchService._score_and_rank_papers([], "q") == [] + + papers = [ + {"title": "A", "authors": [], "abstract": None, "year": None, "citation_count": 0}, + {"title": "B", "authors": [], "abstract": None, "year": None, "citation_count": 0}, + ] + ranked = AcademicSearchService._score_and_rank_papers(papers, "query") + assert all(p["score"] == 0.0 for p in ranked) + + +def test_score_and_rank_papers_normalizes_when_scores_identical() -> None: + papers = [ + {"title": "LLM Paper", "authors": ["Alice"], "abstract": "about llm", "year": 2024, "venue": "ICLR", "citation_count": 1}, + {"title": "LLM Paper", "authors": ["Alice"], "abstract": "about llm", "year": 2024, "venue": "ICLR", "citation_count": 2}, + ] + ranked = AcademicSearchService._score_and_rank_papers(papers, "llm") + assert ranked[0]["score"] == ranked[-1]["score"] + + +def test_score_and_rank_papers_handles_invalid_year_values() -> None: + papers = [ + {"title": "A", "authors": [], "abstract": "x", "year": "not-a-date", "citation_count": 0}, + {"title": "B", "authors": [], "abstract": "x", "year": 2022, "venue": "NeurIPS 2022", "primary_category": "cs.LG", "citation_count": 0}, + ] + ranked = AcademicSearchService._score_and_rank_papers(papers, "x") + assert ranked[0]["title"] in {"A", "B"} diff --git a/backend/tests/test_finish_tool.py b/backend/tests/test_finish_tool.py index 171d6c1..62a60fa 100644 --- a/backend/tests/test_finish_tool.py +++ b/backend/tests/test_finish_tool.py @@ -5,6 +5,8 @@ from app.services.ai.tools import FinishTool +pytestmark = pytest.mark.external_api + @pytest.mark.asyncio async def test_finish_tool_returns_payload() -> None: diff --git a/backend/tests/test_intelligent_service.py b/backend/tests/test_intelligent_service.py index c26b366..364186f 100644 --- a/backend/tests/test_intelligent_service.py +++ b/backend/tests/test_intelligent_service.py @@ -10,6 +10,8 @@ ) from app.services.academic.intelligent_service import IntelligentSearchService +pytestmark = pytest.mark.external_api + class StubLLM: def __init__(self, configured: bool = True) -> None: diff --git a/backend/tests/test_library_endpoints.py b/backend/tests/test_library_endpoints.py new file mode 100644 index 0000000..9bf437e --- /dev/null +++ b/backend/tests/test_library_endpoints.py @@ -0,0 +1,168 @@ +"""Integration tests for /api/v1/library endpoints.""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +DEFAULT_VERIFICATION_CODE = "000000" + +pytestmark = pytest.mark.asyncio + + +async def _register(async_client, email: str, password: str, full_name: str | None = None) -> None: + payload: dict[str, str] = { + "email": email, + "password": password, + "verification_code": DEFAULT_VERIFICATION_CODE, + } + if full_name is not None: + payload["full_name"] = full_name + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +async def _upload_pdf(async_client, token: str, filename: str = "library.pdf") -> dict: + pdf_bytes = b"%PDF-1.4 library content" + response = await async_client.post( + "/api/v1/papers/upload", + headers={"Authorization": f"Bearer {token}"}, + files={"file": (filename, pdf_bytes, "application/pdf")}, + ) + assert response.status_code == 201 + return response.json() + + +async def test_library_folder_crud_and_assignment(async_client) -> None: + await _register(async_client, "lib@example.com", "StrongPass1") + token = await _login(async_client, "lib@example.com", "StrongPass1") + upload = await _upload_pdf(async_client, token) + + list_response = await async_client.get( + "/api/v1/library/folders", + headers={"Authorization": f"Bearer {token}"}, + ) + assert list_response.status_code == 200 + payload = list_response.json() + assert payload["unfiled_count"] == 1 + + create_folder = await async_client.post( + "/api/v1/library/folders", + headers={"Authorization": f"Bearer {token}"}, + json={"name": "NLP", "color": "#ff00ff"}, + ) + assert create_folder.status_code == 201 + folder = create_folder.json() + assert folder["name"] == "NLP" + folder_id = folder["id"] + + assign = await async_client.patch( + f"/api/v1/library/uploads/{upload['id']}/folder", + headers={"Authorization": f"Bearer {token}"}, + json={"folder_id": folder_id}, + ) + assert assign.status_code == 200 + assert assign.json()["folder_id"] == folder_id + + list_response = await async_client.get( + "/api/v1/library/folders", + headers={"Authorization": f"Bearer {token}"}, + ) + assert list_response.status_code == 200 + payload = list_response.json() + assert payload["unfiled_count"] == 0 + assert payload["folders"][0]["paper_count"] == 1 + + rename = await async_client.patch( + f"/api/v1/library/folders/{folder_id}", + headers={"Authorization": f"Bearer {token}"}, + json={"name": "NLP Papers", "color": "#000000"}, + ) + assert rename.status_code == 200 + assert rename.json()["name"] == "NLP Papers" + + delete_folder = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert delete_folder.status_code == 400 + assert delete_folder.json()["detail"] == "请先移动或删除文件夹中的文献" + + move_back = await async_client.patch( + f"/api/v1/library/uploads/{upload['id']}/folder", + headers={"Authorization": f"Bearer {token}"}, + json={"folder_id": None}, + ) + assert move_back.status_code == 200 + assert move_back.json()["folder_id"] is None + + delete_folder = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert delete_folder.status_code == 204 + + +async def test_library_download_head_ensure_local_and_delete(async_client, monkeypatch) -> None: + await _register(async_client, "download@example.com", "StrongPass1") + token = await _login(async_client, "download@example.com", "StrongPass1") + upload = await _upload_pdf(async_client, token, filename="download.pdf") + + head = await async_client.head(f"/api/v1/library/uploads/{upload['id']}/download?access_token={token}") + assert head.status_code == 200 + + download = await async_client.get(f"/api/v1/library/uploads/{upload['id']}/download?access_token={token}") + assert download.status_code == 200 + assert download.headers.get("content-disposition") + + file_url = upload["file_url"] + stored_filename = file_url.rsplit("/", 1)[-1] + + from app.core.config import get_settings + + settings = get_settings() + local_path = settings.media_path / "uploads" / stored_filename + assert local_path.is_file() + + local_path.unlink() + missing = await async_client.post( + f"/api/v1/library/uploads/{upload['id']}/ensure-local", + headers={"Authorization": f"Bearer {token}"}, + json={"pdf_url": None}, + ) + assert missing.status_code == 404 + assert missing.json()["detail"] == "本地 PDF 文件已丢失,请提供远程链接" + + async def fake_download(url: str) -> bytes: + return b"%PDF-1.4 fake remote" + + from app.api.v1.endpoints import library as library_endpoints + + monkeypatch.setattr(library_endpoints, "_download_pdf_from_url", fake_download) + + ensured = await async_client.post( + f"/api/v1/library/uploads/{upload['id']}/ensure-local", + headers={"Authorization": f"Bearer {token}"}, + json={"pdf_url": "https://example.com/remote.pdf"}, + ) + assert ensured.status_code == 200 + ensured_payload = ensured.json() + assert ensured_payload["file_size"] == len(b"%PDF-1.4 fake remote") + assert ensured_payload["file_url"].startswith("/media/uploads/") + + delete_upload = await async_client.delete( + f"/api/v1/library/uploads/{upload['id']}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert delete_upload.status_code == 204 + diff --git a/backend/tests/test_llm_client.py b/backend/tests/test_llm_client.py index 31c9d1b..27296a5 100644 --- a/backend/tests/test_llm_client.py +++ b/backend/tests/test_llm_client.py @@ -2,11 +2,14 @@ from __future__ import annotations import json +import logging import pytest from app.services.ai import llm_client +pytestmark = pytest.mark.external_api + class DummyResponse: def __init__(self, *, status: int, payload: dict[str, object]): @@ -40,13 +43,51 @@ async def __aexit__(self, exc_type, exc, tb): def post(self, url, headers=None, json=None): payload = { - "choices": [ - {"message": {"content": "result"}} - ] + "choices": [{"message": {"content": "result"}}] } return DummyRequestContext(DummyResponse(status=200, payload=payload)) +class DummyStreamContent: + def __init__(self, chunks: list[bytes]): + self._chunks = chunks + + def iter_chunked(self, size: int): + return DummyChunkIterator(self._chunks) + + +class DummyChunkIterator: + def __init__(self, chunks: list[bytes]): + self._iterator = iter(chunks) + + def __aiter__(self): + return self + + async def __anext__(self): + try: + return next(self._iterator) + except StopIteration as exc: + raise StopAsyncIteration from exc + + +class DummyStreamResponse: + def __init__(self, *, status: int, chunks: list[bytes]): + self.status = status + self.content = DummyStreamContent(chunks) + + async def text(self) -> str: + return "" + + +class DummyStreamSession(DummySession): + def __init__(self, response: DummyStreamResponse, *args, **kwargs): + super().__init__(*args, **kwargs) + self._response = response + + def post(self, url, headers=None, json=None): + return DummyRequestContext(self._response) + + @pytest.mark.asyncio async def test_chat_success(monkeypatch): monkeypatch.setattr(llm_client.aiohttp, "ClientSession", DummySession) @@ -76,3 +117,147 @@ def test_client_reports_configuration_state(monkeypatch): monkeypatch.setattr(llm_client.settings, "DEEPSEEK_API_KEY", "") client = llm_client.DeepSeekClient(api_key="") assert client.is_configured is False + + +@pytest.mark.asyncio +async def test_chat_raises_when_api_key_missing(monkeypatch): + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_API_KEY", "") + client = llm_client.DeepSeekClient(api_key="") + with pytest.raises(ValueError, match="API key"): + await client.chat([{"role": "user", "content": "hi"}]) + + +def test_extract_sse_data_handles_multiline_blocks() -> None: + block = "event: message\ndata: {\"a\":1}\ndata: {\"b\":2}\n\n" + assert llm_client.DeepSeekClient._extract_sse_data(block) == "{\"a\":1}\n{\"b\":2}" + + +@pytest.mark.asyncio +async def test_chat_content_stream_yields_content_usage_and_done(monkeypatch): + chunks = [ + b"data: {\"choices\": [{\"delta\": {\"content\": \"Hel\"}}]}\n\n", + b"data: {not-json}\n\n", + b"data: {\"choices\": [{\"delta\": {\"content\": \"lo\"}}], \"usage\": {\"prompt_tokens\": 1}}\n\n", + b"data: [DONE]\n\n", + ] + response = DummyStreamResponse(status=200, chunks=chunks) + monkeypatch.setattr( + llm_client.aiohttp, + "ClientSession", + lambda *args, **kwargs: DummyStreamSession(response, *args, **kwargs), + ) + monkeypatch.setattr(llm_client.aiohttp, "TCPConnector", lambda **kwargs: object()) + client = llm_client.DeepSeekClient(api_key="test", base_url="http://mock") + + events = [] + async for event in client.chat_content_stream([{"role": "user", "content": "hi"}]): + events.append(event) + + assert events[:2] == [ + {"type": "content", "content": "Hel"}, + {"type": "content", "content": "lo"}, + ] + assert {"type": "usage", "usage": {"prompt_tokens": 1}} in events + assert events[-1] == {"type": "done"} + + +@pytest.mark.asyncio +async def test_chat_includes_optional_payload_fields(monkeypatch): + captured: dict[str, object] = {} + + class CapturingSession(DummySession): + def post(self, url, headers=None, json=None): + captured["json"] = json + return DummyRequestContext(DummyResponse(status=200, payload={"choices": [{"message": {"content": "ok"}}]})) + + monkeypatch.setattr(llm_client.aiohttp, "ClientSession", CapturingSession) + monkeypatch.setattr(llm_client.aiohttp, "TCPConnector", lambda **kwargs: object()) + client = llm_client.DeepSeekClient(api_key="test", base_url="http://mock") + + await client.chat( + [{"role": "user", "content": "hi"}], + response_format={"type": "json_object"}, + tools=[{"type": "function", "function": {"name": "noop"}}], + tool_choice="auto", + parallel_tool_calls=False, + ) + + payload = captured["json"] + assert payload["response_format"] == {"type": "json_object"} + assert payload["tools"][0]["type"] == "function" + assert payload["tool_choice"] == "auto" + assert payload["parallel_tool_calls"] is False + + +@pytest.mark.asyncio +async def test_chat_stream_raises_on_http_error(monkeypatch): + class ErrorResponse(DummyStreamResponse): + async def text(self) -> str: + return "bad" + + response = ErrorResponse(status=400, chunks=[]) + + monkeypatch.setattr( + llm_client.aiohttp, + "ClientSession", + lambda *args, **kwargs: DummyStreamSession(response, *args, **kwargs), + ) + monkeypatch.setattr(llm_client.aiohttp, "TCPConnector", lambda **kwargs: object()) + client = llm_client.DeepSeekClient(api_key="test", base_url="http://mock") + + with pytest.raises(RuntimeError, match="DeepSeek API error 400"): + async for _ in client.chat_stream([{"role": "user", "content": "hi"}]): + pass + + +@pytest.mark.asyncio +async def test_iterate_sse_skips_empty_chunks_and_non_data_blocks(): + response = DummyStreamResponse( + status=200, + chunks=[ + b"", + b"event: ping\n\n", + b"data: [DONE]\n\n", + ], + ) + events = [] + async for event in llm_client.DeepSeekClient._iterate_sse(response): # noqa: SLF001 + events.append(event) + + assert events == [{"type": "done"}] + + +@pytest.mark.asyncio +async def test_chat_content_stream_ignores_events_after_done(monkeypatch): + async def fake_chat_stream(*args, **kwargs): + yield {"type": "done"} + yield {"type": "data", "data": {"choices": [{"delta": {"content": "late"}}]}} + + client = llm_client.DeepSeekClient(api_key="test", base_url="http://mock") + monkeypatch.setattr(client, "chat_stream", fake_chat_stream) + + events = [] + async for event in client.chat_content_stream([{"role": "user", "content": "hi"}]): + events.append(event) + + assert events == [{"type": "done"}] + + +def test_build_ssl_context_warns_on_missing_ca_bundle(monkeypatch, caplog): + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_API_KEY", "x") + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_VERIFY_SSL", True) + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_CA_BUNDLE", "/no/such/ca.pem") + + with caplog.at_level(logging.WARNING): + llm_client.DeepSeekClient(api_key="test", base_url="http://mock") + + assert "CA bundle not found" in caplog.text + + +def test_build_ssl_context_disables_verification(monkeypatch): + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_API_KEY", "x") + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_VERIFY_SSL", False) + monkeypatch.setattr(llm_client.settings, "DEEPSEEK_CA_BUNDLE", None) + + client = llm_client.DeepSeekClient(api_key="test", base_url="http://mock") + assert client._ssl_context.check_hostname is False diff --git a/backend/tests/test_mineru_cli_adapters.py b/backend/tests/test_mineru_cli_adapters.py new file mode 100644 index 0000000..bb8e8e9 --- /dev/null +++ b/backend/tests/test_mineru_cli_adapters.py @@ -0,0 +1,289 @@ +"""Tests for MinerU CLI adapter code paths that do not require the real MinerU binary.""" +from __future__ import annotations + +import subprocess +import sys +import types +from pathlib import Path + +import pytest + +from app.services import mineru_cli + +pytestmark = pytest.mark.mineru + + +def test_ensure_cli_available_prefers_path(monkeypatch, tmp_path: Path) -> None: + mineru = tmp_path / "mineru" + mineru.write_text("#!/bin/sh\necho ok\n", encoding="utf-8") + + monkeypatch.setattr(mineru_cli.shutil, "which", lambda _: str(mineru)) + + assert mineru_cli._ensure_cli_available() == mineru + + +def test_ensure_cli_available_uses_python_dir_fallback(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(mineru_cli.shutil, "which", lambda _: None) + + python_dir = tmp_path / "bin" + python_dir.mkdir() + python_exe = python_dir / "python" + python_exe.write_text("", encoding="utf-8") + + mineru = python_dir / "mineru" + mineru.write_text("", encoding="utf-8") + + monkeypatch.setattr(mineru_cli.sys, "executable", str(python_exe)) + + assert mineru_cli._ensure_cli_available() == mineru + + +def test_ensure_cli_available_uses_windows_exe_fallback(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(mineru_cli.shutil, "which", lambda _: None) + + python_dir = tmp_path / "Scripts" + python_dir.mkdir() + python_exe = python_dir / "python" + python_exe.write_text("", encoding="utf-8") + + mineru_exe = python_dir / "mineru.exe" + mineru_exe.write_text("", encoding="utf-8") + + monkeypatch.setattr(mineru_cli.sys, "executable", str(python_exe)) + + assert mineru_cli._ensure_cli_available() == mineru_exe + + +def test_ensure_cli_available_missing_raises(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(mineru_cli.shutil, "which", lambda _: None) + + python_dir = tmp_path / "bin" + python_dir.mkdir() + python_exe = python_dir / "python" + python_exe.write_text("", encoding="utf-8") + monkeypatch.setattr(mineru_cli.sys, "executable", str(python_exe)) + + with pytest.raises(mineru_cli.MineruCLIError): + mineru_cli._ensure_cli_available() + + +def test_locate_artifacts_root_prefers_auto_dir(tmp_path: Path) -> None: + base_dir = tmp_path / "outputs" + auto_dir = base_dir / "paper" / "auto" + auto_dir.mkdir(parents=True) + + assert mineru_cli._locate_artifacts_root(base_dir, "paper") == auto_dir + + +def test_locate_artifacts_root_fallback_uses_rglob(monkeypatch, tmp_path: Path) -> None: + base_dir = tmp_path / "missing-dir" + expected = tmp_path / "somewhere" / "paper.md" + + def fake_rglob(self: Path, pattern: str): + assert pattern == "paper.md" + return iter([expected]) + + monkeypatch.setattr(Path, "rglob", fake_rglob, raising=True) + + assert mineru_cli._locate_artifacts_root(base_dir, "paper") == expected.parent + + +def test_locate_artifacts_root_missing_raises(monkeypatch, tmp_path: Path) -> None: + base_dir = tmp_path / "missing-dir" + + monkeypatch.setattr(Path, "rglob", lambda self, pattern: iter([]), raising=True) + + with pytest.raises(mineru_cli.MineruCLIError): + mineru_cli._locate_artifacts_root(base_dir, "paper") + + +def test_load_json_invalid_returns_none(tmp_path: Path) -> None: + invalid = tmp_path / "bad.json" + invalid.write_text("{not-json", encoding="utf-8") + + assert mineru_cli._load_json(invalid) is None + + +def test_load_json_missing_returns_none(tmp_path: Path) -> None: + assert mineru_cli._load_json(None) is None + assert mineru_cli._load_json(tmp_path / "missing.json") is None + + +def test_coerce_float_handles_none_and_invalid() -> None: + assert mineru_cli._coerce_float(None) is None + assert mineru_cli._coerce_float("bad") is None + + +def test_build_public_media_url_none_returns_none() -> None: + assert mineru_cli._build_public_media_url(None) is None + + +def test_run_mineru_cli_success_collects_artifacts(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(mineru_cli, "_prepare_runtime_env", lambda: {"MINERU_DEVICE_MODE": "cpu"}) + + artifacts_root = tmp_path / "outputs" / "paper" / "auto" + artifacts_root.mkdir(parents=True) + (artifacts_root / "paper.md").write_text("# ok", encoding="utf-8") + (artifacts_root / "paper_content_list.json").write_text("[]", encoding="utf-8") + + monkeypatch.setattr(mineru_cli, "_locate_artifacts_root", lambda *_: artifacts_root) + + called: dict[str, object] = {} + + def fake_run(command, **kwargs): + called["command"] = command + called["env"] = kwargs.get("env") + return subprocess.CompletedProcess(command, 0, stdout="ok", stderr="") + + monkeypatch.setattr(mineru_cli.subprocess, "run", fake_run) + + pdf_path = tmp_path / "paper.pdf" + pdf_path.write_text("", encoding="utf-8") + output_dir = tmp_path / "outputs" + mineru_path = tmp_path / "mineru" + mineru_path.write_text("", encoding="utf-8") + + artifacts = mineru_cli._run_mineru_cli(pdf_path, output_dir, mineru_path) + + assert called["command"] == [str(mineru_path), "-p", str(pdf_path), "-o", str(output_dir)] + assert called["env"] == {"MINERU_DEVICE_MODE": "cpu"} + assert artifacts.root_dir == artifacts_root + assert artifacts.markdown == artifacts_root / "paper.md" + assert artifacts.content_list == artifacts_root / "paper_content_list.json" + assert artifacts.middle_json is None + + +def test_run_mineru_cli_error_raises_minery_cli_error(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(mineru_cli, "_prepare_runtime_env", lambda: {"MINERU_DEVICE_MODE": "cpu"}) + monkeypatch.setattr(mineru_cli, "_locate_artifacts_root", lambda *_: tmp_path) + + def fake_run(command, **_kwargs): + raise subprocess.CalledProcessError(2, command, output="out", stderr="boom") + + monkeypatch.setattr(mineru_cli.subprocess, "run", fake_run) + + pdf_path = tmp_path / "paper.pdf" + pdf_path.write_text("", encoding="utf-8") + + with pytest.raises(mineru_cli.MineruCLIError, match="exit code 2"): + mineru_cli._run_mineru_cli(pdf_path, tmp_path / "outputs", tmp_path / "mineru") + + +def test_parse_pdf_with_stubbed_cli_runner_generates_urls(monkeypatch, tmp_path: Path) -> None: + media_root = tmp_path / "media" + media_root.mkdir() + monkeypatch.setenv("MEDIA_ROOT", str(media_root)) + + pdf_path = tmp_path / "paper.pdf" + pdf_path.write_bytes(b"%PDF-1.4 test content") + + mineru_path = tmp_path / "mineru" + mineru_path.write_text("", encoding="utf-8") + monkeypatch.setattr(mineru_cli, "_ensure_cli_available", lambda: mineru_path) + + output_dir = media_root / "mineru_outputs" + + def fake_run(pdf: Path, out: Path, _mineru: Path) -> mineru_cli.MineruParseArtifacts: + artifacts_root = out / pdf.stem / "auto" + artifacts_root.mkdir(parents=True) + markdown = artifacts_root / f"{pdf.stem}.md" + markdown.write_text("# ok", encoding="utf-8") + content_list = artifacts_root / f"{pdf.stem}_content_list.json" + content_list.write_text( + '[{"type":"paragraph","page_idx":0,"text":"Hello"},{"type":"table","markdown":"|a|"}]', + encoding="utf-8", + ) + return mineru_cli.MineruParseArtifacts( + markdown=markdown, + content_list=content_list, + middle_json=None, + root_dir=artifacts_root, + ) + + monkeypatch.setattr(mineru_cli, "_run_mineru_cli", fake_run) + + class DummyDoc: + page_count = 2 + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + dummy_fitz = types.SimpleNamespace(open=lambda path: DummyDoc()) + monkeypatch.setitem(sys.modules, "fitz", dummy_fitz) + + result = mineru_cli.parse_pdf(pdf_path, output_dir=output_dir) + + artifacts_root = output_dir / pdf_path.stem / "auto" + expected_output_url = f"/media/{artifacts_root.relative_to(media_root).as_posix()}" + expected_markdown_url = f"/media/{(artifacts_root / f'{pdf_path.stem}.md').relative_to(media_root).as_posix()}" + + assert result["success"] is True + assert result["output_dir"] == str(artifacts_root) + assert result["output_url"] == expected_output_url + assert result["markdown_url"] == expected_markdown_url + assert result["metadata"]["title"] == "paper" + assert result["metadata"]["total_pages"] == 2 + assert result["metadata"]["parsed_at"] + assert "Hello" in result["plain_text"] + assert "|a|" in result["plain_text"] + + +def test_parse_pdf_cleanup_removes_temp_dir(monkeypatch, tmp_path: Path) -> None: + pdf_path = tmp_path / "paper.pdf" + pdf_path.write_bytes(b"%PDF-1.4 test content") + + mineru_path = tmp_path / "mineru" + mineru_path.write_text("", encoding="utf-8") + monkeypatch.setattr(mineru_cli, "_ensure_cli_available", lambda: mineru_path) + + created_dirs: list[Path] = [] + + def fake_run(pdf: Path, out: Path, _mineru: Path) -> mineru_cli.MineruParseArtifacts: + created_dirs.append(out) + artifacts_root = out / pdf.stem / "auto" + artifacts_root.mkdir(parents=True) + markdown = artifacts_root / f"{pdf.stem}.md" + markdown.write_text("# ok", encoding="utf-8") + content_list = artifacts_root / f"{pdf.stem}_content_list.json" + content_list.write_text("[]", encoding="utf-8") + return mineru_cli.MineruParseArtifacts( + markdown=markdown, + content_list=content_list, + middle_json=None, + root_dir=artifacts_root, + ) + + monkeypatch.setattr(mineru_cli, "_run_mineru_cli", fake_run) + monkeypatch.setattr(mineru_cli, "_build_metadata", lambda _: {"title": "paper", "author": "", "total_pages": 0, "file_size": 0}) + + result = mineru_cli.parse_pdf(pdf_path) + + assert result["success"] is True + assert result["output_dir"] is None + assert created_dirs + assert not created_dirs[0].exists() + + +def test_parse_pdf_missing_file_raises(tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError): + mineru_cli.parse_pdf(tmp_path / "missing.pdf") + + +@pytest.mark.asyncio +async def test_parse_pdf_async_delegates_to_thread_worker(monkeypatch, tmp_path: Path) -> None: + pdf_path = tmp_path / "paper.pdf" + pdf_path.write_text("", encoding="utf-8") + + expected = {"success": True, "content": []} + + def fake_parse(pdf: Path, output_dir: Path | None = None): + assert pdf == pdf_path + assert output_dir is None + return expected + + monkeypatch.setattr(mineru_cli, "parse_pdf", fake_parse) + + assert await mineru_cli.parse_pdf_async(pdf_path) == expected diff --git a/backend/tests/test_mineru_cli_normalization.py b/backend/tests/test_mineru_cli_normalization.py new file mode 100644 index 0000000..9867a4c --- /dev/null +++ b/backend/tests/test_mineru_cli_normalization.py @@ -0,0 +1,84 @@ +"""Tests for MinerU CLI wrapper normalization helpers.""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +from app.services import mineru_cli + +pytestmark = pytest.mark.mineru + + +def test_normalize_item_type_maps_aliases_and_discards_noise() -> None: + assert mineru_cli._normalize_item_type(None) == "text" + assert mineru_cli._normalize_item_type("Heading") == "text" + assert mineru_cli._normalize_item_type("Equation") == "equation" + assert mineru_cli._normalize_item_type("discarded") is None + assert mineru_cli._normalize_item_type(123) == "text" + + +def test_resolve_page_number_supports_multiple_fields() -> None: + assert mineru_cli._resolve_page_number({"page": 2}) == 2 + assert mineru_cli._resolve_page_number({"page": 0}) == 1 + assert mineru_cli._resolve_page_number({"page_idx": 0}) == 1 + assert mineru_cli._resolve_page_number({"page_index": 3}) == 4 + assert mineru_cli._resolve_page_number({"page_no": "5"}) == 5 + assert mineru_cli._resolve_page_number({"page": "bad"}) is None + + +def test_normalize_bbox_scales_unit_coords_to_1000() -> None: + bbox = mineru_cli._normalize_bbox([0.1, 0.2, 0.3, 0.4], {}) + assert bbox == [100.0, 200.0, 300.0, 400.0] + + +def test_normalize_bbox_scales_large_coords_by_page_size() -> None: + item = {"page_width": 2000, "page_height": 1000} + bbox = mineru_cli._normalize_bbox([0, 0, 2000, 1000], item) + assert bbox == [0.0, 0.0, 1000.0, 1000.0] + + +def test_normalize_bbox_invalid_or_reversed_returns_none() -> None: + assert mineru_cli._normalize_bbox(None, {}) is None + assert mineru_cli._normalize_bbox([0, 0, 1], {}) is None + assert mineru_cli._normalize_bbox(["a", "b", "c", "d"], {}) is None + assert mineru_cli._normalize_bbox([10, 10, 5, 5], {}) is None + + +def test_normalize_content_items_resolves_page_and_removes_aliases() -> None: + raw_items = [ + {"type": "paragraph", "page_idx": 0, "bbox": [0.1, 0.2, 0.3, 0.4]}, + {"type": "noise", "page": 1}, + "not-a-dict", + ] + normalized = mineru_cli._normalize_content_items(raw_items) + assert len(normalized) == 1 + assert normalized[0]["type"] == "text" + assert normalized[0]["page"] == 1 + assert "page_idx" not in normalized[0] + assert normalized[0]["bbox"] == [100.0, 200.0, 300.0, 400.0] + + +def test_normalize_content_items_none_returns_empty() -> None: + assert mineru_cli._normalize_content_items(None) == [] + + +def test_build_public_media_url(monkeypatch, tmp_path: Path) -> None: + media_root = tmp_path / "media" + media_root.mkdir() + target = media_root / "mineru" / "out.md" + target.parent.mkdir(parents=True) + target.write_text("x", encoding="utf-8") + + monkeypatch.setenv("MEDIA_ROOT", str(media_root)) + assert mineru_cli._build_public_media_url(target) == "/media/mineru/out.md" + + +def test_build_public_media_url_outside_media_returns_none(monkeypatch, tmp_path: Path) -> None: + media_root = tmp_path / "media" + media_root.mkdir() + outside = tmp_path / "outside.txt" + outside.write_text("x", encoding="utf-8") + + monkeypatch.setenv("MEDIA_ROOT", str(media_root)) + assert mineru_cli._build_public_media_url(outside) is None diff --git a/backend/tests/test_mineru_cli_utils.py b/backend/tests/test_mineru_cli_utils.py index edf08dd..4070d41 100644 --- a/backend/tests/test_mineru_cli_utils.py +++ b/backend/tests/test_mineru_cli_utils.py @@ -7,6 +7,8 @@ from app.services import mineru_cli +pytestmark = pytest.mark.mineru + def test_extract_plain_text_only_text_and_tables() -> None: content = [ @@ -41,3 +43,41 @@ def test_prepare_runtime_env_sets_directories(tmp_path: Path, monkeypatch) -> No assert "MPLCONFIGDIR" in env assert (Path(env["MPLCONFIGDIR"]).is_dir()) assert env["MINERU_DEVICE_MODE"] in {"cpu", "cuda"} + + +def test_prepare_runtime_env_respects_existing_env(monkeypatch, tmp_path: Path) -> None: + media_root = tmp_path / "media" + media_root.mkdir() + existing = tmp_path / "mpl" + existing.mkdir() + + monkeypatch.setenv("MEDIA_ROOT", str(media_root)) + monkeypatch.setenv("MPLCONFIGDIR", str(existing)) + monkeypatch.setenv("MINERU_DEVICE_MODE", "cpu") + + env = mineru_cli._prepare_runtime_env() + + assert env["MPLCONFIGDIR"] == str(existing) + + +def test_prepare_runtime_env_handles_mkdir_errors(monkeypatch, tmp_path: Path) -> None: + media_root = tmp_path / "media" + media_root.mkdir() + + monkeypatch.setenv("MEDIA_ROOT", str(media_root)) + monkeypatch.setenv("MINERU_DEVICE_MODE", "cpu") + monkeypatch.delenv("MPLCONFIGDIR", raising=False) + + original_mkdir = Path.mkdir + fail_dir = media_root / "runtime_cache" / "matplotlib" + + def guarded_mkdir(self: Path, *args, **kwargs): + if self == fail_dir: + raise OSError("simulated mkdir failure") + return original_mkdir(self, *args, **kwargs) + + monkeypatch.setattr(Path, "mkdir", guarded_mkdir, raising=True) + + env = mineru_cli._prepare_runtime_env() + + assert "MPLCONFIGDIR" not in env diff --git a/backend/tests/test_mineru_comprehensive.py b/backend/tests/test_mineru_comprehensive.py deleted file mode 100644 index 437fdf6..0000000 --- a/backend/tests/test_mineru_comprehensive.py +++ /dev/null @@ -1,315 +0,0 @@ -"""Comprehensive test suite for MinerU PDF parser.""" -import asyncio -import json -import logging -import os -from pathlib import Path -from typing import Any, Dict, List - -import pytest - -if os.getenv("RUN_MINERU_E2E") != "1": - pytest.skip( - "MinerU integration test is disabled by default; set RUN_MINERU_E2E=1 to run manually.", - allow_module_level=True, - ) - -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -from backend.app.services.mineru_cli import MineruCLIError, parse_pdf_async - - -def analyze_content_types(content_list: List[Dict[str, Any]]) -> Dict[str, int]: - """Analyze and count different content types.""" - type_counts = {} - for item in content_list: - item_type = item.get('type', 'unknown') - type_counts[item_type] = type_counts.get(item_type, 0) + 1 - return type_counts - - -def analyze_bboxes(content_list: List[Dict[str, Any]]) -> Dict[str, Any]: - """Analyze bbox coverage and validity.""" - total_items = len(content_list) - items_with_bbox = 0 - valid_bboxes = 0 - invalid_bboxes = [] - - for idx, item in enumerate(content_list): - bbox = item.get('bbox') - if bbox is not None: - items_with_bbox += 1 - - # Validate bbox - if isinstance(bbox, (list, tuple)) and len(bbox) >= 4: - x0, y0, x1, y1 = bbox[:4] - # Check if bbox is valid (within 0-1000 range and x0 < x1, y0 < y1) - if (0 <= x0 <= 1000 and 0 <= y0 <= 1000 and - 0 <= x1 <= 1000 and 0 <= y1 <= 1000 and - x0 < x1 and y0 < y1): - valid_bboxes += 1 - else: - invalid_bboxes.append({ - 'index': idx, - 'type': item.get('type'), - 'bbox': bbox, - 'reason': 'Out of range or invalid coordinates' - }) - else: - invalid_bboxes.append({ - 'index': idx, - 'type': item.get('type'), - 'bbox': bbox, - 'reason': 'Invalid bbox format' - }) - - return { - 'total_items': total_items, - 'items_with_bbox': items_with_bbox, - 'valid_bboxes': valid_bboxes, - 'bbox_coverage': f"{items_with_bbox / total_items * 100:.1f}%" if total_items > 0 else "0%", - 'valid_bbox_rate': f"{valid_bboxes / items_with_bbox * 100:.1f}%" if items_with_bbox > 0 else "0%", - 'invalid_bboxes': invalid_bboxes[:5] # Show first 5 invalid bboxes - } - - -def analyze_pages(content_list: List[Dict[str, Any]]) -> Dict[str, Any]: - """Analyze page distribution.""" - page_items = {} - items_without_page = 0 - - for item in content_list: - page = item.get('page') - if page is not None: - page_items[page] = page_items.get(page, 0) + 1 - else: - items_without_page += 1 - - return { - 'total_pages': len(page_items), - 'page_distribution': dict(sorted(page_items.items())), - 'items_without_page': items_without_page - } - - -def extract_sample_content(content_list: List[Dict[str, Any]], max_samples: int = 3) -> Dict[str, List[Dict]]: - """Extract sample content for each type.""" - samples = {} - - for item in content_list: - item_type = item.get('type', 'unknown') - if item_type not in samples: - samples[item_type] = [] - - if len(samples[item_type]) < max_samples: - sample = { - 'type': item_type, - 'page': item.get('page'), - 'bbox': item.get('bbox'), - 'content_preview': None - } - - # Get content preview - if 'text' in item: - text = item['text'] - sample['content_preview'] = text[:100] + '...' if len(text) > 100 else text - elif 'markdown' in item: - md = item['markdown'] - sample['content_preview'] = md[:100] + '...' if len(md) > 100 else md - elif 'content' in item: - content = str(item['content']) - sample['content_preview'] = content[:100] + '...' if len(content) > 100 else content - - samples[item_type].append(sample) - - return samples - - -async def test_pdf(pdf_path: Path) -> Dict[str, Any]: - """Test parsing a single PDF file.""" - logger.info(f"\n{'='*80}") - logger.info(f"Testing: {pdf_path.name}") - logger.info(f"{'='*80}") - - try: - result = await parse_pdf_async(pdf_path) - - if not result['success']: - logger.error(f"❌ Parsing failed: {result.get('error', 'Unknown error')}") - return { - 'file': pdf_path.name, - 'success': False, - 'error': result.get('error', 'Unknown error') - } - - content_list = result['content'] - metadata = result['metadata'] - markdown = result['markdown'] - plain_text = result['plain_text'] - - # Analyze results - type_counts = analyze_content_types(content_list) - bbox_analysis = analyze_bboxes(content_list) - page_analysis = analyze_pages(content_list) - content_samples = extract_sample_content(content_list) - - # Print summary - logger.info(f"\n✅ Parsing successful!") - logger.info(f"\nMetadata:") - logger.info(f" - Title: {metadata.get('title', 'N/A')}") - logger.info(f" - Total Pages: {metadata.get('total_pages', 0)}") - logger.info(f" - File Size: {metadata.get('file_size', 0)} bytes") - logger.info(f" - Parsed At: {metadata.get('parsed_at', 'N/A')}") - - logger.info(f"\nContent Analysis:") - logger.info(f" - Total Items: {len(content_list)}") - logger.info(f" - Content Types:") - for content_type, count in sorted(type_counts.items()): - logger.info(f" • {content_type}: {count}") - - logger.info(f"\nBounding Box Analysis:") - logger.info(f" - Items with bbox: {bbox_analysis['items_with_bbox']}/{bbox_analysis['total_items']} ({bbox_analysis['bbox_coverage']})") - logger.info(f" - Valid bboxes: {bbox_analysis['valid_bboxes']}/{bbox_analysis['items_with_bbox']} ({bbox_analysis['valid_bbox_rate']})") - if bbox_analysis['invalid_bboxes']: - logger.info(f" - Invalid bbox samples (first 5):") - for invalid in bbox_analysis['invalid_bboxes'][:3]: - logger.info(f" • Index {invalid['index']}: {invalid['type']} - {invalid['reason']}") - - logger.info(f"\nPage Distribution:") - logger.info(f" - Total Pages: {page_analysis['total_pages']}") - for page, count in list(page_analysis['page_distribution'].items())[:5]: - logger.info(f" • Page {page}: {count} items") - if page_analysis['items_without_page'] > 0: - logger.info(f" - Items without page: {page_analysis['items_without_page']}") - - logger.info(f"\nText Content:") - logger.info(f" - Markdown length: {len(markdown)} chars") - logger.info(f" - Plain text length: {len(plain_text)} chars") - logger.info(f" - Markdown preview (first 200 chars):") - logger.info(f" {markdown[:200].replace(chr(10), ' ')}") - - logger.info(f"\nSample Content by Type:") - for content_type, samples in content_samples.items(): - logger.info(f" - {content_type}:") - for i, sample in enumerate(samples, 1): - logger.info(f" {i}. Page {sample['page']}, BBox: {sample['bbox']}") - if sample['content_preview']: - preview = sample['content_preview'].replace('\n', ' ') - logger.info(f" Preview: {preview}") - - # Save detailed results - output_dir = result.get('output_dir') - if output_dir: - logger.info(f"\nOutput Directory: {output_dir}") - - return { - 'file': pdf_path.name, - 'success': True, - 'metadata': metadata, - 'type_counts': type_counts, - 'bbox_analysis': { - 'total_items': bbox_analysis['total_items'], - 'items_with_bbox': bbox_analysis['items_with_bbox'], - 'valid_bboxes': bbox_analysis['valid_bboxes'], - 'bbox_coverage': bbox_analysis['bbox_coverage'], - 'valid_bbox_rate': bbox_analysis['valid_bbox_rate'] - }, - 'page_analysis': { - 'total_pages': page_analysis['total_pages'], - 'items_without_page': page_analysis['items_without_page'] - }, - 'text_lengths': { - 'markdown': len(markdown), - 'plain_text': len(plain_text) - } - } - - except MineruCLIError as exc: - logger.error(f"❌ MinerU CLI exception during parsing: {exc}", exc_info=True) - return { - 'file': pdf_path.name, - 'success': False, - 'error': str(exc) - } - except Exception as exc: - logger.error(f"❌ Exception during parsing: {exc}", exc_info=True) - return { - 'file': pdf_path.name, - 'success': False, - 'error': str(exc) - } - - -async def main(): - """Run comprehensive tests on multiple PDF files.""" - logger.info("="*80) - logger.info("MinerU Parser - Comprehensive Test Suite") - logger.info("="*80) - - # Test files - test_files = [ - Path('backend/tests/tmp_media/sample_table.pdf'), - Path('backend/tests/tmp_media/complex_test.pdf'), - ] - - # Check if files exist - available_files = [f for f in test_files if f.exists()] - missing_files = [f for f in test_files if not f.exists()] - - if missing_files: - logger.warning(f"\n⚠️ Missing test files:") - for f in missing_files: - logger.warning(f" - {f}") - - if not available_files: - logger.error("\n❌ No test files available!") - return - - logger.info(f"\nTesting {len(available_files)} PDF file(s)...\n") - - # Run tests - results = [] - for pdf_path in available_files: - result = await test_pdf(pdf_path) - results.append(result) - - # Print overall summary - logger.info(f"\n{'='*80}") - logger.info("Overall Test Summary") - logger.info(f"{'='*80}") - - successful = [r for r in results if r['success']] - failed = [r for r in results if not r['success']] - - logger.info(f"\nTotal Tests: {len(results)}") - logger.info(f" ✅ Successful: {len(successful)}") - logger.info(f" ❌ Failed: {len(failed)}") - - if successful: - logger.info(f"\nSuccessful Parses:") - for r in successful: - logger.info(f" ✅ {r['file']}") - if 'type_counts' in r: - logger.info(f" Content items: {sum(r['type_counts'].values())}") - logger.info(f" Types: {', '.join(r['type_counts'].keys())}") - if 'bbox_analysis' in r: - logger.info(f" BBox coverage: {r['bbox_analysis']['bbox_coverage']}") - - if failed: - logger.info(f"\nFailed Parses:") - for r in failed: - logger.info(f" ❌ {r['file']}: {r.get('error', 'Unknown error')}") - - # Save results to JSON - output_file = Path('backend/tests/tmp_media/test_results.json') - with open(output_file, 'w', encoding='utf-8') as f: - json.dump(results, f, indent=2, ensure_ascii=False) - logger.info(f"\n📄 Detailed results saved to: {output_file}") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/backend/tests/test_mineru_runtime.py b/backend/tests/test_mineru_runtime.py new file mode 100644 index 0000000..86f31a3 --- /dev/null +++ b/backend/tests/test_mineru_runtime.py @@ -0,0 +1,49 @@ +"""Tests for MinerU runtime/device helpers.""" +from __future__ import annotations + +import pytest + +from app.services import mineru_runtime + +pytestmark = pytest.mark.mineru + + +def test_normalize_device_sets_env_and_returns_normalized(monkeypatch) -> None: + monkeypatch.setenv("MINERU_DEVICE_MODE", " CUDA ") + + assert mineru_runtime.ensure_device_env() == "cuda" + assert mineru_runtime.select_device() == "cuda" + + +def test_normalize_device_blank_returns_none(monkeypatch) -> None: + monkeypatch.setenv("MINERU_DEVICE_MODE", " ") + mineru_runtime._auto_detect_device.cache_clear() + monkeypatch.setattr(mineru_runtime, "_cuda_available", lambda: False) + assert mineru_runtime.select_device() == "cpu" + + +def test_apply_device_env_sets_default(monkeypatch) -> None: + monkeypatch.delenv("MINERU_DEVICE_MODE", raising=False) + mineru_runtime._auto_detect_device.cache_clear() + monkeypatch.setattr(mineru_runtime, "_cuda_available", lambda: False) + + env = mineru_runtime.apply_device_env({"OTHER": "1"}) + + assert env["OTHER"] == "1" + assert env["MINERU_DEVICE_MODE"] == "cpu" + + +def test_auto_detect_device_returns_cuda_when_available(monkeypatch) -> None: + monkeypatch.delenv("MINERU_DEVICE_MODE", raising=False) + mineru_runtime._auto_detect_device.cache_clear() + monkeypatch.setattr(mineru_runtime, "_cuda_available", lambda: True) + + assert mineru_runtime.select_device(prefer_cuda=True) == "cuda" + + +def test_auto_detect_device_returns_cpu_when_unavailable(monkeypatch) -> None: + monkeypatch.delenv("MINERU_DEVICE_MODE", raising=False) + mineru_runtime._auto_detect_device.cache_clear() + monkeypatch.setattr(mineru_runtime, "_cuda_available", lambda: False) + + assert mineru_runtime.select_device(prefer_cuda=True) == "cpu" diff --git a/backend/tests/test_notes_endpoints.py b/backend/tests/test_notes_endpoints.py new file mode 100644 index 0000000..762b014 --- /dev/null +++ b/backend/tests/test_notes_endpoints.py @@ -0,0 +1,111 @@ +"""Integration tests for /api/v1/notes endpoints.""" +from __future__ import annotations + +import pytest + +DEFAULT_VERIFICATION_CODE = "000000" + +pytestmark = pytest.mark.asyncio + + +async def _register(async_client, email: str, password: str) -> None: + response = await async_client.post( + "/api/v1/users", + json={"email": email, "password": password, "verification_code": DEFAULT_VERIFICATION_CODE}, + ) + assert response.status_code == 201 + + +async def _login(async_client, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +async def _upload_pdf(async_client, token: str, filename: str = "note.pdf") -> dict: + pdf_bytes = b"%PDF-1.4 note content" + response = await async_client.post( + "/api/v1/papers/upload", + headers={"Authorization": f"Bearer {token}"}, + files={"file": (filename, pdf_bytes, "application/pdf")}, + ) + assert response.status_code == 201 + return response.json() + + +async def test_notes_crud_and_filters(async_client) -> None: + await _register(async_client, "note@example.com", "StrongPass1") + token = await _login(async_client, "note@example.com", "StrongPass1") + upload = await _upload_pdf(async_client, token) + + missing_paper = await async_client.post( + "/api/v1/notes", + headers={"Authorization": f"Bearer {token}"}, + json={"title": "Oops", "content": "bad", "uploaded_paper_id": 999}, + ) + assert missing_paper.status_code == 404 + assert missing_paper.json()["detail"] == "未找到关联的上传论文" + + created = await async_client.post( + "/api/v1/notes", + headers={"Authorization": f"Bearer {token}"}, + json={ + "title": "LLM notes", + "content": "Something about transformers", + "uploaded_paper_id": upload["id"], + "tags": ["nlp", "reading"], + }, + ) + assert created.status_code == 201 + note = created.json() + note_id = note["id"] + + listed = await async_client.get( + "/api/v1/notes?page=1&page_size=20", + headers={"Authorization": f"Bearer {token}"}, + ) + assert listed.status_code == 200 + payload = listed.json() + assert payload["total"] == 1 + assert payload["items"][0]["id"] == note_id + + filtered = await async_client.get( + f"/api/v1/notes?paper_id={upload['id']}&search=transform", + headers={"Authorization": f"Bearer {token}"}, + ) + assert filtered.status_code == 200 + payload = filtered.json() + assert payload["total"] == 1 + + fetched = await async_client.get( + f"/api/v1/notes/{note_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert fetched.status_code == 200 + assert fetched.json()["title"] == "LLM notes" + + updated = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers={"Authorization": f"Bearer {token}"}, + json={"title": "Updated", "tags": ["updated"]}, + ) + assert updated.status_code == 200 + assert updated.json()["title"] == "Updated" + assert updated.json()["tags"] == ["updated"] + + deleted = await async_client.delete( + f"/api/v1/notes/{note_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert deleted.status_code == 204 + + missing = await async_client.get( + f"/api/v1/notes/{note_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + assert missing.status_code == 404 + diff --git a/backend/tests/test_papers_qa.py b/backend/tests/test_papers_qa.py index ce5bc50..3f238e7 100644 --- a/backend/tests/test_papers_qa.py +++ b/backend/tests/test_papers_qa.py @@ -5,9 +5,11 @@ from app.api.v1.endpoints import papers +DEFAULT_VERIFICATION_CODE = "000000" + async def _register(async_client, email: str, password: str) -> None: - payload = {"email": email, "password": password} + payload = {"email": email, "password": password, "verification_code": DEFAULT_VERIFICATION_CODE} response = await async_client.post("/api/v1/users", json=payload) assert response.status_code == 201 diff --git a/backend/tests/test_papers_upload.py b/backend/tests/test_papers_upload.py index b74328b..ff9dcd2 100644 --- a/backend/tests/test_papers_upload.py +++ b/backend/tests/test_papers_upload.py @@ -7,9 +7,11 @@ from app.api.v1.endpoints import papers +DEFAULT_VERIFICATION_CODE = "000000" + async def _register(async_client, email: str, password: str, full_name: str | None = None) -> None: - payload = {"email": email, "password": password} + payload = {"email": email, "password": password, "verification_code": DEFAULT_VERIFICATION_CODE} if full_name: payload["full_name"] = full_name response = await async_client.post("/api/v1/users", json=payload) @@ -77,7 +79,7 @@ async def test_upload_honors_size_limit(async_client, monkeypatch) -> None: ) assert response.status_code == 413 - assert "超过 25MB" in response.json()["detail"] + assert response.json()["detail"] == "文件体积超过 25MB 限制" @pytest.mark.asyncio diff --git a/backend/tests/test_profile.py b/backend/tests/test_profile.py index bd16136..73ed0b5 100644 --- a/backend/tests/test_profile.py +++ b/backend/tests/test_profile.py @@ -14,10 +14,15 @@ ) MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) +DEFAULT_VERIFICATION_CODE = "000000" async def _register_user(client, email: str, password: str, full_name: str | None = None) -> None: - payload: dict[str, str] = {"email": email, "password": password} + payload: dict[str, str] = { + "email": email, + "password": password, + "verification_code": DEFAULT_VERIFICATION_CODE, + } if full_name is not None: payload["full_name"] = full_name @@ -68,4 +73,4 @@ async def test_upload_avatar(async_client) -> None: filename = avatar_url.rsplit("/", 1)[-1] stored_file = MEDIA_ROOT / "avatars" / filename - assert stored_file.is_file() \ No newline at end of file + assert stored_file.is_file() diff --git a/backend/tests/test_query_parser.py b/backend/tests/test_query_parser.py index bbcf4bc..09933e1 100644 --- a/backend/tests/test_query_parser.py +++ b/backend/tests/test_query_parser.py @@ -8,6 +8,8 @@ from app.schemas.academic import QueryInterpretation from app.services.academic.query_parser import QueryParser +pytestmark = pytest.mark.external_api + class StubLLM: def __init__(self, payload: str, configured: bool = True, should_raise: bool = False) -> None: diff --git a/backend/tests/test_query_parser_branches.py b/backend/tests/test_query_parser_branches.py new file mode 100644 index 0000000..91df6ab --- /dev/null +++ b/backend/tests/test_query_parser_branches.py @@ -0,0 +1,50 @@ +"""Branch-focused tests for QueryParser helpers.""" +from __future__ import annotations + +import pytest + +from app.services.academic.query_parser import QueryParser + +pytestmark = pytest.mark.external_api + + +class UnconfiguredClient: + is_configured = False + + +@pytest.mark.asyncio +async def test_parse_rejects_empty_query() -> None: + parser = QueryParser(client=UnconfiguredClient()) + with pytest.raises(ValueError): + await parser.parse(" ") + + +@pytest.mark.asyncio +async def test_parse_json_payload_strips_markdown_fences() -> None: + assert QueryParser._parse_json_payload("```json\n{\"a\": 1}\n```") == {"a": 1} + assert QueryParser._parse_json_payload("") == {} + + +def test_normalize_helpers_cover_common_types() -> None: + parser = QueryParser(client=UnconfiguredClient()) + + assert parser._normalize_list("a") == ["a"] + assert parser._normalize_list(["a", " ", "b"]) == ["a", "b"] + assert parser._normalize_list(None, fallback="x") == ["x"] + assert parser._normalize_year_range([2020, "2024"]) == [2020, 2024] + assert parser._normalize_year_range(["bad", "2024"]) is None + assert parser._normalize_intent("SEARCH") == "search" + assert parser._normalize_intent("unknown") == "chat" + assert parser._normalize_bool(" yes ") is True + assert parser._normalize_bool("no") is False + + +def test_fallback_interpretation_detects_search_and_summarize_intents() -> None: + parser = QueryParser(client=UnconfiguredClient()) + search = parser._fallback_interpretation("search paper about LLM") + assert search["intent"] == "search" + assert search["needs_search"] is True + + summarize = parser._fallback_interpretation("请帮我总结一下") + assert summarize["intent"] == "summarize" + assert summarize["needs_search"] is True diff --git a/backend/tests/test_schema_validators.py b/backend/tests/test_schema_validators.py new file mode 100644 index 0000000..947cad0 --- /dev/null +++ b/backend/tests/test_schema_validators.py @@ -0,0 +1,29 @@ +"""Unit tests for Pydantic schema validators (business rules).""" +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.schemas.auth import PasswordResetConfirm +from app.schemas.user import PasswordChange, UserCreate + + +def test_user_create_rejects_short_password() -> None: + with pytest.raises(ValidationError): + UserCreate(email="a@example.com", password="short", verification_code="000000") + + +def test_user_create_rejects_password_over_72_bytes() -> None: + with pytest.raises(ValidationError): + UserCreate(email="a@example.com", password="a" * 73, verification_code="000000") + + +def test_password_change_rejects_short_new_password() -> None: + with pytest.raises(ValidationError): + PasswordChange(current_password="StrongPass1", new_password="short") + + +def test_password_reset_rejects_short_new_password() -> None: + with pytest.raises(ValidationError): + PasswordResetConfirm(email="a@example.com", code="000000", new_password="short") + diff --git a/backend/tests/test_tool_executor.py b/backend/tests/test_tool_executor.py index 230c30f..76b9506 100644 --- a/backend/tests/test_tool_executor.py +++ b/backend/tests/test_tool_executor.py @@ -14,6 +14,8 @@ ToolResult, ) +pytestmark = pytest.mark.external_api + class EchoTool(Tool): """Simple tool used to verify ToolExecutor wiring."""