From a97b97f40a6d0015fe0340bf224d1281e52691fc Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Fri, 20 Jun 2025 11:26:06 -0700 Subject: [PATCH 01/46] updating dependencies after upgrading... pip freeze > requirements.txt the old requirements are saved in oldrequirements.txt for reference --- oldrequirements.txt | 8 +++++++ requirements.txt | 58 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 57 insertions(+), 9 deletions(-) create mode 100644 oldrequirements.txt diff --git a/oldrequirements.txt b/oldrequirements.txt new file mode 100644 index 0000000..495f0a2 --- /dev/null +++ b/oldrequirements.txt @@ -0,0 +1,8 @@ +xmltodict==0.12.0 +urllib3==1.25.9 +dicttoxml==1.7.4 +tqdm==4.46.0 +bibtexparser==1.2.0 +monty==3.0.2 +nbformat==5.0.7 +nbconvert==5.6.1 diff --git a/requirements.txt b/requirements.txt index 630db42..ec0f36e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,49 @@ -xmltodict==0.12.0 -urllib3==1.25.9 -dicttoxml==1.7.4 -tqdm==4.46.0 -bibtexparser==1.2.0 -monty==3.0.2 -nbformat==5.0.7 -nbconvert==5.6.1 - +attrs==25.3.0 +beautifulsoup4==4.13.4 +bibtexparser==1.4.3 +bleach==6.2.0 +colorama==0.4.6 +defusedxml==0.7.1 +dicttoxml==1.7.16 +entrypoints==0.4 +fastjsonschema==2.21.1 +future==1.0.0 +ipython-genutils==0.2.0 +Jinja2==3.1.6 +jsonschema==4.24.0 +jsonschema-specifications==2025.4.1 +jupyter_client==8.6.3 +jupyter_core==5.8.1 +jupyterlab_pygments==0.3.0 +MarkupSafe==3.0.2 +mistune==3.1.3 +monty==2025.3.3 +nbclient==0.10.2 +nbconvert==7.16.6 +nbformat==5.10.4 +numpy==2.3.0 +packaging==25.0 +pandocfilters==1.5.1 +platformdirs==4.3.8 +Pygments==2.19.1 +pyparsing==3.2.3 +python-dateutil==2.9.0.post0 +pywin32==310 +pyzmq==27.0.0 +referencing==0.36.2 +rpds-py==0.25.1 +ruamel.yaml==0.18.14 +ruamel.yaml.clib==0.2.12 +setuptools==80.9.0 +six==1.17.0 +soupsieve==2.7 +testpath==0.6.0 +tinycss2==1.4.0 +tornado==6.5.1 +tqdm==4.67.1 +traitlets==5.14.3 +typing_extensions==4.14.0 +urllib3==2.5.0 +webencodings==0.5.1 +wheel==0.45.1 +xmltodict==0.14.2 From afa51f76eece8a2650bafbd2b9e2b67199f231d7 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Fri, 20 Jun 2025 18:42:41 -0700 Subject: [PATCH 02/46] Trying to get test_doi_builder to work - deserialization works - re-serialization does not, there are additional dictionary keys that exist that don't match the provided config_file. will need to discuss the goal of this doi_builder a bit more to understand it --- files/config-example.json | 10 +--------- mpcite/doi_builder.py | 2 +- mpcite/models.py | 8 ++++++++ tests/test_doi_builder.py | 18 +++++++++--------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/files/config-example.json b/files/config-example.json index 8719649..a18b14b 100644 --- a/files/config-example.json +++ b/files/config-example.json @@ -45,14 +45,6 @@ "username": "", "password": "" }, - "elsevier": { - "endpoint": "https://push-feature.datasearch.elsevier.com/container", - "username": "", - "password": "" - }, "max_doi_requests": 0, - "sync": false, - "@module": "mpcite.doi_builder", - "@class": "DoiBuilder", - "@version": null + "sync": false } diff --git a/mpcite/doi_builder.py b/mpcite/doi_builder.py index e3e6708..c42e45d 100644 --- a/mpcite/doi_builder.py +++ b/mpcite/doi_builder.py @@ -218,7 +218,7 @@ def from_dict(cls, d: dict): json.dumps(d["robocrys_collection"]), cls=MontyDecoder ) doi_store = json.loads(json.dumps(d["dois_collection"]), cls=MontyDecoder) - report_emails = d["report_emails"] + report_emails = d["report_emails"] if "report_emails" in d else None max_doi_requests = d["max_doi_requests"] sync = d["sync"] diff --git a/mpcite/models.py b/mpcite/models.py index b2fab65..1e989e8 100644 --- a/mpcite/models.py +++ b/mpcite/models.py @@ -317,3 +317,11 @@ class ExplorerGetJSONResponseModel(BaseModel): sponsor_orgs: List[str] research_orgs: List[str] links: List[Dict[str, str]] + +# Added to resolve failed import in test_doi_builder.py +class MongoConnectionModel(ConnectionModel): + database: str = Field(..., title="MongoDB Database Name") + collection: str = Field(..., title="MongoDB Collection Name") + + def get_connection_string(self) -> str: + return f"mongodb://{self.username}:{self.password}@{self.endpoint}/{self.database}" \ No newline at end of file diff --git a/tests/test_doi_builder.py b/tests/test_doi_builder.py index 00a1fab..8d17885 100644 --- a/tests/test_doi_builder.py +++ b/tests/test_doi_builder.py @@ -2,13 +2,12 @@ import os import pytest import json -from mpcite.doi_builder import DoiBuilder -from mpcite.models import OSTIModel, MongoConnectionModel, ConnectionModel - +from mpcite.doi_builder import DOIBuilder +from mpcite.models import OSTIDOIRecordModel, MongoConnectionModel, ConnectionModel @pytest.fixture def config_file_path(): - return Path(os.getcwd()) / "files" / "config_test.json" + return Path(os.getcwd()) / "files" / "config-example.json" def test_builder_serialization(config_file_path: Path): @@ -17,17 +16,18 @@ def test_builder_serialization(config_file_path: Path): # test deserialize d: dict = json.load(config_file) try: - doi_builder = DoiBuilder.from_dict(d=d) + doi_builder = DOIBuilder.from_dict(d=d) except Exception as e: assert False, f"Unable to build DOI Builder from config file. Error: {e}" # test serialization - new_d = doi_builder.as_dict() + print(new_d.keys()) + print(d.keys()) assert new_d.keys() == d.keys() - new_osti = OSTIModel.parse_obj(new_d["osti"]) + new_osti = OSTIDOIRecordModel.parse_obj(new_d["osti"]) new_elsevier = ConnectionModel.parse_obj(new_d["elsevier"]) new_materials_connection = MongoConnectionModel.parse_obj( new_d["materials_collection"] @@ -39,7 +39,7 @@ def test_builder_serialization(config_file_path: Path): new_d["robocrys_collection"] ) - true_osti = OSTIModel.parse_obj(d["osti"]) + true_osti = OSTIDOIRecordModel.parse_obj(d["osti"]) true_elsevier = ConnectionModel.parse_obj(new_d["elsevier"]) true_materials_connection = MongoConnectionModel.parse_obj( new_d["materials_collection"] @@ -59,4 +59,4 @@ def test_builder_serialization(config_file_path: Path): assert ( new_dois_collection_connection.dict() == true_dois_collection_connection.dict() ) - assert new_robocrys_collection_connection == true_robocrys_collection_connection + assert new_robocrys_collection_connection == true_robocrys_collection_connection \ No newline at end of file From 79c48f22be3375fe75efd5478a06ec3045d0a008 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 23 Jun 2025 13:34:12 -0700 Subject: [PATCH 03/46] Revert "Trying to get test_doi_builder to work" This reverts commit afa51f76eece8a2650bafbd2b9e2b67199f231d7. --- files/config-example.json | 10 +++++++++- mpcite/doi_builder.py | 2 +- mpcite/models.py | 8 -------- tests/test_doi_builder.py | 18 +++++++++--------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/files/config-example.json b/files/config-example.json index a18b14b..8719649 100644 --- a/files/config-example.json +++ b/files/config-example.json @@ -45,6 +45,14 @@ "username": "", "password": "" }, + "elsevier": { + "endpoint": "https://push-feature.datasearch.elsevier.com/container", + "username": "", + "password": "" + }, "max_doi_requests": 0, - "sync": false + "sync": false, + "@module": "mpcite.doi_builder", + "@class": "DoiBuilder", + "@version": null } diff --git a/mpcite/doi_builder.py b/mpcite/doi_builder.py index c42e45d..e3e6708 100644 --- a/mpcite/doi_builder.py +++ b/mpcite/doi_builder.py @@ -218,7 +218,7 @@ def from_dict(cls, d: dict): json.dumps(d["robocrys_collection"]), cls=MontyDecoder ) doi_store = json.loads(json.dumps(d["dois_collection"]), cls=MontyDecoder) - report_emails = d["report_emails"] if "report_emails" in d else None + report_emails = d["report_emails"] max_doi_requests = d["max_doi_requests"] sync = d["sync"] diff --git a/mpcite/models.py b/mpcite/models.py index 1e989e8..b2fab65 100644 --- a/mpcite/models.py +++ b/mpcite/models.py @@ -317,11 +317,3 @@ class ExplorerGetJSONResponseModel(BaseModel): sponsor_orgs: List[str] research_orgs: List[str] links: List[Dict[str, str]] - -# Added to resolve failed import in test_doi_builder.py -class MongoConnectionModel(ConnectionModel): - database: str = Field(..., title="MongoDB Database Name") - collection: str = Field(..., title="MongoDB Collection Name") - - def get_connection_string(self) -> str: - return f"mongodb://{self.username}:{self.password}@{self.endpoint}/{self.database}" \ No newline at end of file diff --git a/tests/test_doi_builder.py b/tests/test_doi_builder.py index 8d17885..00a1fab 100644 --- a/tests/test_doi_builder.py +++ b/tests/test_doi_builder.py @@ -2,12 +2,13 @@ import os import pytest import json -from mpcite.doi_builder import DOIBuilder -from mpcite.models import OSTIDOIRecordModel, MongoConnectionModel, ConnectionModel +from mpcite.doi_builder import DoiBuilder +from mpcite.models import OSTIModel, MongoConnectionModel, ConnectionModel + @pytest.fixture def config_file_path(): - return Path(os.getcwd()) / "files" / "config-example.json" + return Path(os.getcwd()) / "files" / "config_test.json" def test_builder_serialization(config_file_path: Path): @@ -16,18 +17,17 @@ def test_builder_serialization(config_file_path: Path): # test deserialize d: dict = json.load(config_file) try: - doi_builder = DOIBuilder.from_dict(d=d) + doi_builder = DoiBuilder.from_dict(d=d) except Exception as e: assert False, f"Unable to build DOI Builder from config file. Error: {e}" # test serialization + new_d = doi_builder.as_dict() - print(new_d.keys()) - print(d.keys()) assert new_d.keys() == d.keys() - new_osti = OSTIDOIRecordModel.parse_obj(new_d["osti"]) + new_osti = OSTIModel.parse_obj(new_d["osti"]) new_elsevier = ConnectionModel.parse_obj(new_d["elsevier"]) new_materials_connection = MongoConnectionModel.parse_obj( new_d["materials_collection"] @@ -39,7 +39,7 @@ def test_builder_serialization(config_file_path: Path): new_d["robocrys_collection"] ) - true_osti = OSTIDOIRecordModel.parse_obj(d["osti"]) + true_osti = OSTIModel.parse_obj(d["osti"]) true_elsevier = ConnectionModel.parse_obj(new_d["elsevier"]) true_materials_connection = MongoConnectionModel.parse_obj( new_d["materials_collection"] @@ -59,4 +59,4 @@ def test_builder_serialization(config_file_path: Path): assert ( new_dois_collection_connection.dict() == true_dois_collection_connection.dict() ) - assert new_robocrys_collection_connection == true_robocrys_collection_connection \ No newline at end of file + assert new_robocrys_collection_connection == true_robocrys_collection_connection From 7bb812e7e139a02abffb225e3fd18239d0c51f9e Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Tue, 24 Jun 2025 17:09:32 -0700 Subject: [PATCH 04/46] preliminary version of ELinkGetResponseModel made, using ELinkAPI RecordResponse --- .gitignore | 1 + mpcite/models.py | 302 +++-------------- ...utility2.py => recordresponse_example.txt} | 0 .../Visualizations.ipynb | 0 {mpcite => mpcite_legacy}/Visualizations.pdf | Bin mpcite_legacy/__init__.py | 8 + {mpcite => mpcite_legacy}/config_ipynb.txt | 0 {mpcite => mpcite_legacy}/doi_builder.py | 0 {mpcite => mpcite_legacy}/main.py | 0 mpcite_legacy/models.py | 319 ++++++++++++++++++ {mpcite => mpcite_legacy}/send_collection.py | 0 {mpcite => mpcite_legacy}/utility.py | 0 mpcite_legacy/utility2.py | 0 tests/test_elink_api.py | 59 ++++ 14 files changed, 433 insertions(+), 256 deletions(-) rename mpcite/{utility2.py => recordresponse_example.txt} (100%) rename {mpcite => mpcite_legacy}/Visualizations.ipynb (100%) rename {mpcite => mpcite_legacy}/Visualizations.pdf (100%) create mode 100644 mpcite_legacy/__init__.py rename {mpcite => mpcite_legacy}/config_ipynb.txt (100%) rename {mpcite => mpcite_legacy}/doi_builder.py (100%) rename {mpcite => mpcite_legacy}/main.py (100%) create mode 100644 mpcite_legacy/models.py rename {mpcite => mpcite_legacy}/send_collection.py (100%) rename {mpcite => mpcite_legacy}/utility.py (100%) create mode 100644 mpcite_legacy/utility2.py create mode 100644 tests/test_elink_api.py diff --git a/.gitignore b/.gitignore index 9ea9034..a0311c9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ files/config_test.json .pytest_cache .ipynb_checkpoints *.json +.env diff --git a/mpcite/models.py b/mpcite/models.py index b2fab65..e4d055f 100644 --- a/mpcite/models.py +++ b/mpcite/models.py @@ -1,101 +1,65 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from typing import List, Dict, Optional -from datetime import datetime +import datetime from enum import Enum import bibtexparser - - -class ConnectionModel(BaseModel): - endpoint: str = Field(..., title="URL Endpoint of the connection") - username: str = Field(..., title="User Name") - password: str = Field(..., title="Password") - - -class RoboCrysModel(BaseModel): - material_id: str - last_updated: datetime - description: Optional[str] = None - error: Optional[str] = None - - @classmethod - def get_default_description(cls): - return ( - "Computed materials data using density " - "functional theory calculations. These calculations determine " - "the electronic structure of bulk materials by solving " - "approximations to the Schrodinger equation. For more " - "information, see https://materialsproject.org/docs/calculations" - ) - - -class MaterialModel(BaseModel): - last_updated: datetime = Field( - None, title="timestamp for the most recent calculation" - ) - updated_at: datetime = Field(None, title="alternative to last_updated") - created_at: datetime = Field( - None, - description="creation time for this material defined by when the first structure " - "optimization calculation was run", - ) - task_id: str = Field( - "", title="task id for this material. Also called the material id" - ) - # pretty_formula: str = Field(..., title="clean representation of the formula") - pretty_formula: str = Field(..., title="clean representation of the formula") - chemsys: str - +from elinkapi import Elink, Record +from elinkapi.record import RecordResponse, AccessLimitation, JournalType +from elinkapi.geolocation import Geolocation +from elinkapi.identifier import Identifier +from elinkapi.related_identifier import RelatedIdentifier +from elinkapi.person import Person +from elinkapi.organization import Organization + +class TestClass(RecordResponse): + ... + # stuff class ELinkGetResponseModel(BaseModel): - osti_id: Optional[str] = Field(...) + osti_id: Optional[int] = Field(...) dataset_type: str = Field(default="SM") title: str = Field(...) - creators: str = Field(default="Kristin Persson") # replace with authors + persons: List[Person] contributors: List[Dict[str, str]] = Field( default=[{"first_name": "Materials", "last_name": "Project"}], description="List of Dict of first name, last name mapping", ) # no contributor - product_nos: str = Field(..., title="MP id") - accession_num: str = Field(..., title="MP id") - contract_nos: str = Field("AC02-05CH11231; EDCBEE") - originating_research_org: str = Field( - default="Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)" - ) - publication_date: str = Field(...) - language: str = Field(default="English") - country: str = Field(default="US") - sponsor_org: str = Field( - default="USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)" - ) + publication_date: datetime.date site_url: str = Field(...) - contact_name: str = Field(default="Kristin Persson") - contact_org: str = Field(default="LBNL") - contact_email: str = Field(default="feedback@materialsproject.org") - contact_phone: str = Field(default="+1(510)486-7218") - related_resource: str = Field("https://materialsproject.org/citing") - contributor_organizations: str = Field(default="MIT; UC Berkeley; Duke; U Louvain") - subject_categories_code: str = Field(default="36 MATERIALS SCIENCE") - keywords: str = Field(...) - description: str = Field(default="") doi: dict = Field( {}, title="DOI info", description="Mainly used during GET request" ) + mp_id: str | None = None + keywords: List[str] = None @classmethod - def get_title(cls, material: MaterialModel): - formula = material.pretty_formula + def from_elinkapi_record(cls, R): + gotResponse = ELinkGetResponseModel( + osti_id = R.osti_id, + title = R.title, + persons = R.persons, + # assume default contributors for now, creators vs contributors? + publication_date = R.publication_date, + site_url = R.site_url, + doi = {"doi": R.doi}, + mp_id = next((id.value for id in R.identifiers if id.type == 'RN'), None), + keywords = R.keywords + ) + + return gotResponse + + def get_title(self): + formula = self.keywords[1] return "Materials Data on %s by Materials Project" % formula - @classmethod - def get_site_url(cls, mp_id): - return "https://materialsproject.org/materials/%s" % mp_id + def get_site_url(self): + return "https://materialsproject.org/materials/%s" % self.mp_id - @classmethod - def get_keywords(cls, material): - keywords = "; ".join( - ["crystal structure", material.pretty_formula, material.chemsys] - ) - return keywords + def get_keywords(self): + # keywords = "; ".join( + # ["crystal structure", material.pretty_formula, material.chemsys] + # ) + return self.keywords @classmethod def get_default_description(cls): @@ -113,11 +77,11 @@ def custom_to_dict(cls, elink_record) -> dict: return elink_record.dict(exclude={"osti_id", "doi"}) else: return elink_record.dict(exclude={"doi"}) - + class ElinkResponseStatusEnum(Enum): - SUCCESS = "SUCCESS" - FAILED = "FAILURE" + SUCCESS = "SUCCESS" + FAILED = "FAILURE" class ELinkPostResponseModel(BaseModel): @@ -142,178 +106,4 @@ def generate_doi_record(self): ) doi_collection_record.set_status(status=self.doi["@status"]) doi_collection_record.last_validated_on = datetime.now() - return doi_collection_record - - -class DOIRecordStatusEnum(str, Enum): - COMPLETED = "COMPLETED" - PENDING = "PENDING" - FAILURE = "FAILURE" - INIT = "INIT" - - -class DOIRecordModel(BaseModel): - material_id: str = Field(...) - doi: str = Field(default="") - bibtex: Optional[str] = None - status: DOIRecordStatusEnum - valid: bool = Field(False) - last_updated: datetime = Field( - default=datetime.now(), - title="DOI last updated time.", - description="Last updated is defined as either a Bibtex or status change.", - ) - created_at: datetime = Field( - default=datetime.now(), - title="DOI Created At", - description="creation time for this DOI record", - ) - last_validated_on: datetime = Field( - default=datetime.now(), - title="Date Last Validated", - description="Date that this data is last validated, " "not necessarily updated", - ) - elsevier_updated_on: datetime = Field( - default=datetime.now(), - title="Date Elsevier is updated", - description="If None, means never uploaded to elsevier", - ) - error: Optional[str] = Field( - default=None, description="None if no error, else error message" - ) - - class Config: - use_enum_values = True - - def set_status(self, status): - self.status = status - - def get_osti_id(self): - if self.doi is None or self.doi == "": - return "" - else: - return self.doi.split("/")[-1] - - def get_bibtex_abstract(self): - try: - if self.bibtex is None: - return "" - bib_db: bibtexparser.bibdatabase.BibDatabase = bibtexparser.loads( - self.bibtex - ) - if bib_db.entries: - return bib_db.entries[0]["abstractnote"] - except Exception as e: - print(e) - return "" - - -class OSTIDOIRecordModel(DOIRecordModel): - material_id: str = Field(...) - doi: str = Field(default="") - bibtex: Optional[str] = None - valid: bool = Field(False) - last_updated: datetime = Field( - default=datetime.now(), - title="DOI last updated time.", - description="Last updated is defined as either a Bibtex or status change.", - ) - - -class ElsevierPOSTContainerModel(BaseModel): - identifier: str = Field(default="", title="mp_id") - source: str = "MATERIALS_PROJECT" - date: str = datetime.now().date().isoformat().__str__() - title: str - description: str = "" - doi: str - authors: List[str] = ["Kristin Persson"] - url: str - type: str = "dataset" - dateAvailable: str = datetime.now().date().isoformat().__str__() - dateCreated: str = datetime.now().date().isoformat().__str__() - version: str = "1.0.0" - funding: str = "USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)" - language: str = "en" - method: str = "Materials Project" - accessRights: str = "Public" - contact: str = "Kristin Persson " - dataStandard: str = "https://materialsproject.org/citing" - howToCite: str = "https://materialsproject.org/citing" - subjectAreas: List[str] = ["36 MATERIALS SCIENCE"] - keywords: List[str] - institutions: List[str] = ["Lawrence Berkeley National Laboratory"] - institutionIds: List[str] = ["AC02-05CH11231; EDCBEE"] - spatialCoverage: List[str] = [] - temporalCoverage: List[str] = [] - references: List[str] = ["https://materialsproject.org/citing"] - relatedResources: List[str] = ["https://materialsproject.org/citing"] - location: str = "1 Cyclotron Rd, Berkeley, CA 94720" - childContainerIds: List[str] = [] - - @classmethod - def get_url(cls, mp_id): - return "https://materialsproject.org/materials/%s" % mp_id - - @classmethod - def get_keywords(cls, material: MaterialModel): - return ["crystal structure", material.pretty_formula, material.chemsys] - - @classmethod - def get_default_description(cls): - return ( - "Computed materials data using density " - "functional theory calculations. These calculations determine " - "the electronic structure of bulk materials by solving " - "approximations to the Schrodinger equation. For more " - "information, see https://materialsproject.org/docs/calculations" - ) - - @classmethod - def get_date_created(cls, material: MaterialModel) -> str: - return material.created_at.date().__str__() - - @classmethod - def get_date_available(cls, material: MaterialModel) -> str: - return material.created_at.date().__str__() - - @classmethod - def get_title(cls, material: MaterialModel) -> str: - return material.pretty_formula - - @classmethod - def from_material_model(cls, material: MaterialModel, doi: str, description: str): - model = ElsevierPOSTContainerModel( - identifier=material.task_id, - title=material.pretty_formula, - doi=doi, - url="https://materialsproject.org/materials/%s" % material.task_id, - keywords=["crystal structure", material.pretty_formula, material.chemsys], - date=datetime.now().date().__str__(), - dateCreated=material.created_at.date().__str__(), - dateAvailable=ElsevierPOSTContainerModel.get_date_available(material), - description=description, - ) - return model - - -class ExplorerGetJSONResponseModel(BaseModel): - osti_id: str - title: str - report_number: str - doi: str - product_type: str - language: str - country_publication: str - description: str - site_ownership_code: str - publication_date: str - entry_date: str - contributing_organizations: str - authors: List[str] - subjects: List[str] - contributing_org: str - doe_contract_number: str - sponsor_orgs: List[str] - research_orgs: List[str] - links: List[Dict[str, str]] + return doi_collection_record \ No newline at end of file diff --git a/mpcite/utility2.py b/mpcite/recordresponse_example.txt similarity index 100% rename from mpcite/utility2.py rename to mpcite/recordresponse_example.txt diff --git a/mpcite/Visualizations.ipynb b/mpcite_legacy/Visualizations.ipynb similarity index 100% rename from mpcite/Visualizations.ipynb rename to mpcite_legacy/Visualizations.ipynb diff --git a/mpcite/Visualizations.pdf b/mpcite_legacy/Visualizations.pdf similarity index 100% rename from mpcite/Visualizations.pdf rename to mpcite_legacy/Visualizations.pdf diff --git a/mpcite_legacy/__init__.py b/mpcite_legacy/__init__.py new file mode 100644 index 0000000..ac9c1ea --- /dev/null +++ b/mpcite_legacy/__init__.py @@ -0,0 +1,8 @@ +"""The Materials Project's Citation Framework""" + +__version__ = "1.0.0" +__url__ = "https://github.com/materialsproject/MPCite" +__author__ = "Patrick Huck & Michael Wu" +__email__ = "phuck@lbl.gov" +__copyright__ = "Copyright 2020, The Materials Project" +__maintainer__ = "Patrick Huck" diff --git a/mpcite/config_ipynb.txt b/mpcite_legacy/config_ipynb.txt similarity index 100% rename from mpcite/config_ipynb.txt rename to mpcite_legacy/config_ipynb.txt diff --git a/mpcite/doi_builder.py b/mpcite_legacy/doi_builder.py similarity index 100% rename from mpcite/doi_builder.py rename to mpcite_legacy/doi_builder.py diff --git a/mpcite/main.py b/mpcite_legacy/main.py similarity index 100% rename from mpcite/main.py rename to mpcite_legacy/main.py diff --git a/mpcite_legacy/models.py b/mpcite_legacy/models.py new file mode 100644 index 0000000..b2fab65 --- /dev/null +++ b/mpcite_legacy/models.py @@ -0,0 +1,319 @@ +from pydantic import BaseModel, Field +from typing import List, Dict, Optional +from datetime import datetime +from enum import Enum +import bibtexparser + + +class ConnectionModel(BaseModel): + endpoint: str = Field(..., title="URL Endpoint of the connection") + username: str = Field(..., title="User Name") + password: str = Field(..., title="Password") + + +class RoboCrysModel(BaseModel): + material_id: str + last_updated: datetime + description: Optional[str] = None + error: Optional[str] = None + + @classmethod + def get_default_description(cls): + return ( + "Computed materials data using density " + "functional theory calculations. These calculations determine " + "the electronic structure of bulk materials by solving " + "approximations to the Schrodinger equation. For more " + "information, see https://materialsproject.org/docs/calculations" + ) + + +class MaterialModel(BaseModel): + last_updated: datetime = Field( + None, title="timestamp for the most recent calculation" + ) + updated_at: datetime = Field(None, title="alternative to last_updated") + created_at: datetime = Field( + None, + description="creation time for this material defined by when the first structure " + "optimization calculation was run", + ) + task_id: str = Field( + "", title="task id for this material. Also called the material id" + ) + # pretty_formula: str = Field(..., title="clean representation of the formula") + pretty_formula: str = Field(..., title="clean representation of the formula") + chemsys: str + + +class ELinkGetResponseModel(BaseModel): + osti_id: Optional[str] = Field(...) + dataset_type: str = Field(default="SM") + title: str = Field(...) + creators: str = Field(default="Kristin Persson") # replace with authors + contributors: List[Dict[str, str]] = Field( + default=[{"first_name": "Materials", "last_name": "Project"}], + description="List of Dict of first name, last name mapping", + ) # no contributor + product_nos: str = Field(..., title="MP id") + accession_num: str = Field(..., title="MP id") + contract_nos: str = Field("AC02-05CH11231; EDCBEE") + originating_research_org: str = Field( + default="Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)" + ) + publication_date: str = Field(...) + language: str = Field(default="English") + country: str = Field(default="US") + sponsor_org: str = Field( + default="USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)" + ) + site_url: str = Field(...) + contact_name: str = Field(default="Kristin Persson") + contact_org: str = Field(default="LBNL") + contact_email: str = Field(default="feedback@materialsproject.org") + contact_phone: str = Field(default="+1(510)486-7218") + related_resource: str = Field("https://materialsproject.org/citing") + contributor_organizations: str = Field(default="MIT; UC Berkeley; Duke; U Louvain") + subject_categories_code: str = Field(default="36 MATERIALS SCIENCE") + keywords: str = Field(...) + description: str = Field(default="") + doi: dict = Field( + {}, title="DOI info", description="Mainly used during GET request" + ) + + @classmethod + def get_title(cls, material: MaterialModel): + formula = material.pretty_formula + return "Materials Data on %s by Materials Project" % formula + + @classmethod + def get_site_url(cls, mp_id): + return "https://materialsproject.org/materials/%s" % mp_id + + @classmethod + def get_keywords(cls, material): + keywords = "; ".join( + ["crystal structure", material.pretty_formula, material.chemsys] + ) + return keywords + + @classmethod + def get_default_description(cls): + return ( + "Computed materials data using density " + "functional theory calculations. These calculations determine " + "the electronic structure of bulk materials by solving " + "approximations to the Schrodinger equation. For more " + "information, see https://materialsproject.org/docs/calculations" + ) + + @classmethod + def custom_to_dict(cls, elink_record) -> dict: + if elink_record.osti_id is None or elink_record.osti_id == "": + return elink_record.dict(exclude={"osti_id", "doi"}) + else: + return elink_record.dict(exclude={"doi"}) + + +class ElinkResponseStatusEnum(Enum): + SUCCESS = "SUCCESS" + FAILED = "FAILURE" + + +class ELinkPostResponseModel(BaseModel): + osti_id: str + accession_num: str + product_nos: str + title: str + contract_nos: str + other_identifying_nos: Optional[str] + doi: Dict[str, str] + status: ElinkResponseStatusEnum + status_message: Optional[str] + + def generate_doi_record(self): + doi_collection_record = DOIRecordModel( + material_id=self.accession_num, + doi=self.doi["#text"], + status=self.doi["@status"], + bibtex=None, + valid=True, + last_validated_on=datetime.now(), + ) + doi_collection_record.set_status(status=self.doi["@status"]) + doi_collection_record.last_validated_on = datetime.now() + return doi_collection_record + + +class DOIRecordStatusEnum(str, Enum): + COMPLETED = "COMPLETED" + PENDING = "PENDING" + FAILURE = "FAILURE" + INIT = "INIT" + + +class DOIRecordModel(BaseModel): + material_id: str = Field(...) + doi: str = Field(default="") + bibtex: Optional[str] = None + status: DOIRecordStatusEnum + valid: bool = Field(False) + last_updated: datetime = Field( + default=datetime.now(), + title="DOI last updated time.", + description="Last updated is defined as either a Bibtex or status change.", + ) + created_at: datetime = Field( + default=datetime.now(), + title="DOI Created At", + description="creation time for this DOI record", + ) + last_validated_on: datetime = Field( + default=datetime.now(), + title="Date Last Validated", + description="Date that this data is last validated, " "not necessarily updated", + ) + elsevier_updated_on: datetime = Field( + default=datetime.now(), + title="Date Elsevier is updated", + description="If None, means never uploaded to elsevier", + ) + error: Optional[str] = Field( + default=None, description="None if no error, else error message" + ) + + class Config: + use_enum_values = True + + def set_status(self, status): + self.status = status + + def get_osti_id(self): + if self.doi is None or self.doi == "": + return "" + else: + return self.doi.split("/")[-1] + + def get_bibtex_abstract(self): + try: + if self.bibtex is None: + return "" + bib_db: bibtexparser.bibdatabase.BibDatabase = bibtexparser.loads( + self.bibtex + ) + if bib_db.entries: + return bib_db.entries[0]["abstractnote"] + except Exception as e: + print(e) + return "" + + +class OSTIDOIRecordModel(DOIRecordModel): + material_id: str = Field(...) + doi: str = Field(default="") + bibtex: Optional[str] = None + valid: bool = Field(False) + last_updated: datetime = Field( + default=datetime.now(), + title="DOI last updated time.", + description="Last updated is defined as either a Bibtex or status change.", + ) + + +class ElsevierPOSTContainerModel(BaseModel): + identifier: str = Field(default="", title="mp_id") + source: str = "MATERIALS_PROJECT" + date: str = datetime.now().date().isoformat().__str__() + title: str + description: str = "" + doi: str + authors: List[str] = ["Kristin Persson"] + url: str + type: str = "dataset" + dateAvailable: str = datetime.now().date().isoformat().__str__() + dateCreated: str = datetime.now().date().isoformat().__str__() + version: str = "1.0.0" + funding: str = "USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)" + language: str = "en" + method: str = "Materials Project" + accessRights: str = "Public" + contact: str = "Kristin Persson " + dataStandard: str = "https://materialsproject.org/citing" + howToCite: str = "https://materialsproject.org/citing" + subjectAreas: List[str] = ["36 MATERIALS SCIENCE"] + keywords: List[str] + institutions: List[str] = ["Lawrence Berkeley National Laboratory"] + institutionIds: List[str] = ["AC02-05CH11231; EDCBEE"] + spatialCoverage: List[str] = [] + temporalCoverage: List[str] = [] + references: List[str] = ["https://materialsproject.org/citing"] + relatedResources: List[str] = ["https://materialsproject.org/citing"] + location: str = "1 Cyclotron Rd, Berkeley, CA 94720" + childContainerIds: List[str] = [] + + @classmethod + def get_url(cls, mp_id): + return "https://materialsproject.org/materials/%s" % mp_id + + @classmethod + def get_keywords(cls, material: MaterialModel): + return ["crystal structure", material.pretty_formula, material.chemsys] + + @classmethod + def get_default_description(cls): + return ( + "Computed materials data using density " + "functional theory calculations. These calculations determine " + "the electronic structure of bulk materials by solving " + "approximations to the Schrodinger equation. For more " + "information, see https://materialsproject.org/docs/calculations" + ) + + @classmethod + def get_date_created(cls, material: MaterialModel) -> str: + return material.created_at.date().__str__() + + @classmethod + def get_date_available(cls, material: MaterialModel) -> str: + return material.created_at.date().__str__() + + @classmethod + def get_title(cls, material: MaterialModel) -> str: + return material.pretty_formula + + @classmethod + def from_material_model(cls, material: MaterialModel, doi: str, description: str): + model = ElsevierPOSTContainerModel( + identifier=material.task_id, + title=material.pretty_formula, + doi=doi, + url="https://materialsproject.org/materials/%s" % material.task_id, + keywords=["crystal structure", material.pretty_formula, material.chemsys], + date=datetime.now().date().__str__(), + dateCreated=material.created_at.date().__str__(), + dateAvailable=ElsevierPOSTContainerModel.get_date_available(material), + description=description, + ) + return model + + +class ExplorerGetJSONResponseModel(BaseModel): + osti_id: str + title: str + report_number: str + doi: str + product_type: str + language: str + country_publication: str + description: str + site_ownership_code: str + publication_date: str + entry_date: str + contributing_organizations: str + authors: List[str] + subjects: List[str] + contributing_org: str + doe_contract_number: str + sponsor_orgs: List[str] + research_orgs: List[str] + links: List[Dict[str, str]] diff --git a/mpcite/send_collection.py b/mpcite_legacy/send_collection.py similarity index 100% rename from mpcite/send_collection.py rename to mpcite_legacy/send_collection.py diff --git a/mpcite/utility.py b/mpcite_legacy/utility.py similarity index 100% rename from mpcite/utility.py rename to mpcite_legacy/utility.py diff --git a/mpcite_legacy/utility2.py b/mpcite_legacy/utility2.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_elink_api.py b/tests/test_elink_api.py new file mode 100644 index 0000000..d1ade7d --- /dev/null +++ b/tests/test_elink_api.py @@ -0,0 +1,59 @@ +import os +from dotenv import load_dotenv + +from elinkapi import Elink, Record, exceptions +import pytest +from mpcite.models import ELinkGetResponseModel, TestClass + +from pymongo import MongoClient + + +load_dotenv() + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +api = Elink(token=os.environ.get("elink_api_key")) # target default is production E-link service. + +record = api.get_single_record(1190959) +type(record) + +ELinkGotRecordModel = ELinkGetResponseModel.from_elinkapi_record(record) + +print(ELinkGotRecordModel.get_title()) +print(ELinkGotRecordModel.get_site_url()) +print(ELinkGotRecordModel.get_keywords()) +print(ELinkGotRecordModel.get_default_description()) + + + +ELinkTestGetRecordModel = TestClass(**record.model_dump()) + +with MongoClient(mongo_uri) as client: + #get all material_ids and dois from doi collection + doi_collection = client["mp_core"]["dois"] + materials_to_update = list(doi_collection.find({}, {"_id": 0, "material_id": 1, "doi": 1}, limit=10)) + material_ids = [entry["material_id"] for entry in materials_to_update] + + # check # of material_ids from DOI collection vs amount in robocrys + + # get description for material_ids from robocrys collection + coll = client["mp_core_blue"]["robocrys"] + res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) + + # join on material_id + for doc in res: + mat = next(filter(lambda x: x["material_id"] == doc["material_id"], materials_to_update)) + doc["doi"] = mat["doi"] + + +# {"material_id": ..., "doi": ..., "description": ...} -> +# Record( +# template_fields ..., +# doi: ..., +# description: ..., +# fields_where_material_id_makes_sense: ..., +# ) + From 2005e5d2209f6953ea18b84f53a10434d9883cc8 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 26 Jun 2025 18:12:34 -0700 Subject: [PATCH 05/46] queried all desired data entries (stored as batched json files) on ELink found bug with rows greater than 100 on ElinkAPI query_records (144845 dois under 10.17188, 12 are not titled Materials Data On... (edge cases), 144833 Materials have DOIs) --- .gitignore | 1 + mpcite/recordresponse_example.txt | 92 +++++++++++++++++++++++ tests/manage_backfills.py | 49 ++++++++++++ tests/outputs.txt | 46 ++++++++++++ tests/prod_to_review.py | 120 ++++++++++++++++++++++++++++++ tests/test_elink_api.py | 96 ++++++++++++++++++------ 6 files changed, 381 insertions(+), 23 deletions(-) create mode 100644 tests/manage_backfills.py create mode 100644 tests/outputs.txt create mode 100644 tests/prod_to_review.py diff --git a/.gitignore b/.gitignore index a0311c9..ed84b8e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ files/config_test.json .ipynb_checkpoints *.json .env +/json_pages \ No newline at end of file diff --git a/mpcite/recordresponse_example.txt b/mpcite/recordresponse_example.txt index e69de29..059edb8 100644 --- a/mpcite/recordresponse_example.txt +++ b/mpcite/recordresponse_example.txt @@ -0,0 +1,92 @@ +osti_id=1190959 +workflow_status='R' +access_limitations=['UNL'] +access_limitation_other=None +announcement_codes=None +availability=None +edition=None +volume=None +conference_information=None +conference_type=None +contract_award_date=None +country_publication_code='US' +doe_funded_flag=None +doe_supported_flag=False +doi='10.17188/1190959' +doi_infix=None +edit_reason=None +geolocations=None +format_information='' +invention_disclosure_flag=None +issue=None +journal_license_url=None +journal_name=None +journal_open_access_flag=None +journal_type=None +keywords=['crystal structure', 'Si', 'Si'] +languages=['English'] +monographic_title=None +opn_addressee=None +opn_declassified_date=None +opn_declassified_status=None +opn_document_categories=None +opn_document_location=None +opn_fieldoffice_acronym_code=None +other_information=None +ouo_release_date=None +pams_publication_status=None +pams_publication_status_other=None +pams_authors=None +pams_editors=None +pams_product_sub_type=None +pams_patent_country_code=None +pams_transnational_patent_office=None +paper_flag=False +patent_assignee=None +patent_file_date=None +patent_priority_date=None +pdouo_exemption_number=None +peer_reviewed_flag=False +product_size=None +product_type='DA' +product_type_other=None +prot_flag=None +prot_data_other=None +prot_release_date=None +publication_date=datetime.date(2020, 7, 15) +publication_date_text='07/15/2020' +publisher_information=None +related_doc_info='https://materialsproject.org/citing' +released_to_osti_date=None +releasing_official_comments=None +report_period_end_date=None +report_period_start_date=None +report_types=None +report_type_other=None +sbiz_flag=None +sbiz_phase=None +sbiz_previous_contract_number=None +sbiz_release_date=None +site_ownership_code='LBNL-MP' +site_unique_id='mp-149' +subject_category_code=['36'] +subject_category_code_legacy=None +title='Materials Data on Si by Materials Project' +description='Si is diamond structured and crystallizes in the cubic Fd-3m space group. The structure is three-dimensional. Si is bonded to four equivalent Si atoms to form corner-sharing SiSi4 tetrahedra. All Si–Si bond lengths are 2.37 Å.' +identifiers=[Identifier(type='CN_DOE', value='AC02-05CH11231'), Identifier(type='CN_NONDOE', value='EDCBEE'), Identifier(type='RN', value='mp-149')] +persons=[Person(type='CONTACT', first_name='Kristin', middle_name=None, last_name='Persson', orcid=None, phone='+1(510)486-7218', email=['feedback@materialsproject.org'], affiliations=[Affiliation(name='LBNL', ror_id=None)], contributor_type=None)] +organizations=[Organization(type='CONTRIBUTING', name='The Materials Project', contributor_type='ResearchGroup', identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='LBNL Materials Project', contributor_type=None, identifiers=[], ror_id=None), Organization(type='SPONSOR', name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='MIT', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='UC Berkeley', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='Duke', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='U Louvain', contributor_type='Other', identifiers=[], ror_id=None)] related_identifiers=[RelatedIdentifier(type='DOI', relation='IsReferencedBy', value='10.1103/physrevmaterials.4.013401')] +site_url='https://materialsproject.org/materials/mp-149' +revision=18 +added_by=234169 +edited_by=None +collection_type='DOE_LAB' +date_metadata_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 275000, tzinfo=TzInfo(UTC)) +date_metadata_updated=datetime.datetime(2021, 7, 15, 2, 10, 43, 372000, tzinfo=TzInfo(UTC)) +date_submitted_to_osti_first=datetime.datetime(2015, 7, 7, 22, 9, 5, 808000, tzinfo=TzInfo(UTC)) +date_submitted_to_osti_last=datetime.datetime(2021, 7, 15, 2, 10, 42, 407000, tzinfo=TzInfo(UTC)) +date_released=datetime.datetime(2021, 7, 15, 2, 10, 43, 240000, tzinfo=TzInfo(UTC)) +sensitivity_flag='U' +hidden_flag=False +media=[MediaInfo(media_id=841489, revision=1, osti_id=1190959, status='C', added_by=None, document_page_count=1, mime_type='text/html', media_title=None, media_location='O', media_source='DOE2416API', date_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_valid_start=None, date_valid_end=None, files=[MediaFile(media_file_id=4514486, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='O', url_type='O', url='https://materialsproject.org/materials/mp-149', added_by=None, document_page_count=None, file_size_bytes=None, duration_seconds=None, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='DOE2416API', date_file_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 52, 857000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515065, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='C', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=15546, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='OFF_SITE_DOWNLOAD', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 52, 877000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 96000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515066, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='T', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=5593, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/plain', media_source='TEXT_EXTRACTION', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 83000, tzinfo=TzInfo(UTC)))])] +audit_logs=[] \ No newline at end of file diff --git a/tests/manage_backfills.py b/tests/manage_backfills.py new file mode 100644 index 0000000..a835456 --- /dev/null +++ b/tests/manage_backfills.py @@ -0,0 +1,49 @@ +# This script will see how many documents in ELink, i.e. ones with a DOI, are not accounted for in the internal DOI collection. + +from elinkapi import Elink, Query, Record + +import os +from dotenv import load_dotenv + +load_dotenv() # depends on the root directory from which you run your python scripts. + +api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) + + +query1 = api.query_records(rows=1000) + +materials_with_dois : list[Record] = [] + +for page in query1: + print(f"Now on Page: {page.title}") + print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") + + if page.site_unique_id.startswith("mp-"): + materials_with_dois.append(page) + + # for record in page.data: + # if record.site_unique_id.startswith("mp-"): + # materials_with_dois.append(record) + + + +# set_q1 = [page for page in query1] +# set_q2 = [page for page in query2] + +# set_diffq1q2 = set(set_q1) - set(set_q2) +# print (f"Difference matched {len(set)} records") + +# filtered = [ +# page for page in query1 +# if page.title.lower().startswith("materials data on") +# ] + +# print (f"Filtered Query1 has {len(filtered)} records") + +# paginate through ALL results +# for page in query1: +# print(page.title) +# print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") + +# for record in page.data: +# print (f"OSTI ID: {record.osti_id} Title: {record.title}") \ No newline at end of file diff --git a/tests/outputs.txt b/tests/outputs.txt new file mode 100644 index 0000000..8d188e7 --- /dev/null +++ b/tests/outputs.txt @@ -0,0 +1,46 @@ +(mpcite-env) C:\Users\ongha\OneDrive\Documents\GitHub\MPCite>C:/Users/ongha/anaconda3/envs/mpcite-env/python.exe c:/Users/ongha/OneDrive/Documents/GitHub/MPCite/tests/prod_to_review.py + +Query retrieved 144845 record(s) +Page finished. Now at 500 data entries. 0 edge cases found. +Page finished. Now at 1000 data entries. 0 edge cases found. +Page finished. Now at 1500 data entries. 0 edge cases found. +Page finished. Now at 2000 data entries. 0 edge cases found. +Page finished. Now at 2500 data entries. 0 edge cases found. +Page finished. Now at 3000 data entries. 0 edge cases found. +Page finished. Now at 3500 data entries. 0 edge cases found. +Page finished. Now at 4000 data entries. 0 edge cases found. +Page finished. Now at 4500 data entries. 0 edge cases found. +Page finished. Now at 5000 data entries. 0 edge cases found. +Page finished. Now at 5500 data entries. 0 edge cases found. +Page finished. Now at 6000 data entries. 0 edge cases found. +Page finished. Now at 6500 data entries. 0 edge cases found. +Page finished. Now at 7000 data entries. 0 edge cases found. +Page finished. Now at 7500 data entries. 0 edge cases found. +Page finished. Now at 8000 data entries. 0 edge cases found. +Page finished. Now at 8500 data entries. 0 edge cases found. +Page finished. Now at 9000 data entries. 0 edge cases found. +Page finished. Now at 9500 data entries. 0 edge cases found. +Page finished. Now at 10000 data entries. 0 edge cases found. +Page finished. Now at 10500 data entries. 0 edge cases found. +Page finished. Now at 11000 data entries. 0 edge cases found. +Page finished. Now at 11500 data entries. 0 edge cases found. +Page finished. Now at 12000 data entries. 0 edge cases found. +Page finished. Now at 12500 data entries. 0 edge cases found. +Page finished. Now at 13000 data entries. 0 edge cases found. +Page finished. Now at 13500 data entries. 0 edge cases found. +Page finished. Now at 14000 data entries. 0 edge cases found. +Page finished. Now at 14500 data entries. 0 edge cases found. + +Traceback (most recent call last): + File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 95, in __next__ + record = self.data.pop() +IndexError: pop from empty list + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "c:\Users\ongha\OneDrive\Documents\GitHub\MPCite\tests\prod_to_review.py", line 29, in + record = next(query) + File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 108, in __next__ + raise StopIteration +StopIteration \ No newline at end of file diff --git a/tests/prod_to_review.py b/tests/prod_to_review.py new file mode 100644 index 0000000..87e311d --- /dev/null +++ b/tests/prod_to_review.py @@ -0,0 +1,120 @@ +from elinkapi import Elink, Query, Record + +import os +from dotenv import load_dotenv + +import json + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) + +print(prod_api.query_records()) + +rows_per_page = 100 + +# query production +query = prod_api.query_records(rows=rows_per_page) +print(f"Query retrieved {query.total_rows} record(s)") + +count_materials_data = 0 +count_MaterialsDataOn = 0 +cwd = os.getcwd() +page_number = 0 +page_json_list = [] + +for record in query: + # increment counter + count_materials_data = count_materials_data + 1 + print(f"On record #{count_materials_data}, next url is {query.next_url}, previous url is {query.previous_url}") + + # see if the record is a Materials Data on record + if record.title.startswith("Materials Data on"): + # increment the MaterialsDataOn counter + count_MaterialsDataOn = count_MaterialsDataOn + 1 + + # prepare the new record for the review environment, remove the OSTI ID, and add its model_dump to the list of json objects for the page. + new_record = record + new_record_dict = new_record.model_dump(exclude_none=True) + + new_record_osti_id = new_record_dict.pop("osti_id") # now new_record_dict does not have the osti_id key. + js = json.dumps(new_record_dict, default=str) # datetime objects are not JSON serializable, so we use default=str to convert them to strings. + + page_json_list.append(js) + + # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. + + else: + print(f"Found edge case: {record.title}") + + if count_materials_data % rows_per_page == 0: + # create/open, write, and close new json file + page_number = count_materials_data / rows_per_page + path = f'/json_pages/page_number_{page_number}' + fp = open(cwd+path, 'a') + + for js in page_json_list: + fp.write(js) + fp.write("\n") + + fp.close() + page_json_list = [] + + print(f"Page {page_number} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found.") + +# print remainder of records if not a full page after for loop exits +page_number = page_number + 1 +path = f'/json_pages/page_number_{page_number}' +fp = open(cwd+path, 'a') +for js in page_json_list: + fp.write(js) + fp.write("\n") +fp.close() + +# # if contains materials data on, then add to batch +# for count_materials_data < query.total_rows: + +# # print(f"The length of the query is now {len(query.data)}") +# record = next(query) +# count_materials_data = count_materials_data + 1 + +# if record.title.startswith("Materials Data on"): +# count_MaterialsDataOn = count_MaterialsDataOn + 1 + +# new_record = record +# new_record_dict = new_record.model_dump(exclude_none=True) + +# new_record_osti_id = new_record_dict.pop("osti_id") + +# page_dict[f"Entry OSTI_ID {new_record_osti_id}"] = new_record_dict + +# # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. + + + +# if count_materials_data % rows_per_page == 0: +# # if a page has been fully consummed, then print the new batched dictionary to a json file. + +# js = json.dumps(page_dict, default=str) + +# # open new json file if not exist it will create +# cwd = os.getcwd() +# path = f'/json_pages/page_number_{count_materials_data/rows_per_page}' +# fp = open(cwd+path, 'a') + +# # write to json file +# fp.write(js) + +# # close the connection to the file and empty the dict +# fp.close() +# page_dict = {} + +# print(f"Page {(count_materials_data / rows_per_page)} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found.") + +# model_dump exclude_none=True, remove null keys +# pop osti_id --> save batch to json files +# make new record +# post to review_api diff --git a/tests/test_elink_api.py b/tests/test_elink_api.py index d1ade7d..80afba7 100644 --- a/tests/test_elink_api.py +++ b/tests/test_elink_api.py @@ -6,7 +6,7 @@ from mpcite.models import ELinkGetResponseModel, TestClass from pymongo import MongoClient - +import pymongo load_dotenv() @@ -15,38 +15,41 @@ atlas_host = os.environ.get("atlas_host") mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" -api = Elink(token=os.environ.get("elink_api_key")) # target default is production E-link service. +api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) # target default is production E-link service. -record = api.get_single_record(1190959) -type(record) +### Grabbing an existing record -ELinkGotRecordModel = ELinkGetResponseModel.from_elinkapi_record(record) +# record = api.get_single_record(mp-id) # test for silicon -print(ELinkGotRecordModel.get_title()) -print(ELinkGotRecordModel.get_site_url()) -print(ELinkGotRecordModel.get_keywords()) -print(ELinkGotRecordModel.get_default_description()) +# type(record) +# ELinkGotRecordModel = ELinkGetResponseModel.from_elinkapi_record(record) +# print(ELinkGotRecordModel.get_title()) +# print(ELinkGotRecordModel.get_site_url()) +# print(ELinkGotRecordModel.get_keywords()) +# print(ELinkGotRecordModel.get_default_description()) -ELinkTestGetRecordModel = TestClass(**record.model_dump()) +# ELinkTestGetRecordModel = TestClass(**record.model_dump()) -with MongoClient(mongo_uri) as client: - #get all material_ids and dois from doi collection - doi_collection = client["mp_core"]["dois"] - materials_to_update = list(doi_collection.find({}, {"_id": 0, "material_id": 1, "doi": 1}, limit=10)) - material_ids = [entry["material_id"] for entry in materials_to_update] +### Making a new record + +# with MongoClient(mongo_uri) as client: +# #get all material_ids and dois from doi collection +# doi_collection = client["mp_core"]["dois"] +# materials_to_update = list(doi_collection.find({}, {"_id": 0, "material_id": 1, "doi": 1}, limit=10)) +# material_ids = [entry["material_id"] for entry in materials_to_update] - # check # of material_ids from DOI collection vs amount in robocrys +# # check # of material_ids from DOI collection vs amount in robocrys - # get description for material_ids from robocrys collection - coll = client["mp_core_blue"]["robocrys"] - res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) +# # get description for material_ids from robocrys collection +# coll = client["mp_core_blue"]["robocrys"] +# res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) - # join on material_id - for doc in res: - mat = next(filter(lambda x: x["material_id"] == doc["material_id"], materials_to_update)) - doc["doi"] = mat["doi"] +# # join on material_id +# for doc in res: +# mat = next(filter(lambda x: x["material_id"] == doc["material_id"], materials_to_update)) +# doc["doi"] = mat["doi"] # {"material_id": ..., "doi": ..., "description": ...} -> @@ -57,3 +60,50 @@ # fields_where_material_id_makes_sense: ..., # ) +# with the client open +with MongoClient(mongo_uri) as client: + # get all dois from the collection + doi_collection = client["mp_core"]["dois"] + materials_to_update = list(doi_collection.find({}, {"_id": 0, "doi": 1, "material_id": 1}, limit=2)) + + # from the doi collection, grab the material_id and doi of each material + material_ids = [entry["material_id"] for entry in materials_to_update] + + # additionally, gain the osti id from the doi + osti_ids = [entry["doi"].split("10.17188/")[1] for entry in materials_to_update] + + # additionally, grab the description of each material from the robocrys + coll = client["mp_core_blue"]["robocrys"] # grabs robocrys collection from active database + res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) # grabs the material id and description of entries in the collection + descriptions = [entry["description"] for entry in res] + + # for each material (and its material_id, doi, and osti_id) + for i in range(len(materials_to_update)): + internal_material_id = material_ids[i] + internal_osti_id = osti_ids[i] + internal_description = descriptions[i] + + # get_single_record(osti_id) + record = api.get_single_record(internal_osti_id) + + print(f"\n \n \nPrinting what is currently on ELINK for {internal_material_id}*****************************************") + print(record) + + if internal_material_id == record.site_unique_id: + # update description + record.description = "testTESTtestTESTtest" + + print(f"\n \n \nPrinting record for {internal_material_id}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print(record) + + # # post updated record + # try: + # saved_record = api.post_new_record(record, "save") + # except exceptions.BadRequestException as ve: + # ... + # # ve.message = "Site Code AAAA is not valid." + # # ve.errors provides more details: + # # [{"status":"400", "detail":"Site Code AAAA is not valid.", "source":{"pointer":"site_ownership_code"}}] + + + From 8700859bc3ce6f3c84e43f61d58ff7fddc83ebd6 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 3 Jul 2025 10:49:34 -0700 Subject: [PATCH 06/46] almost done with pipeline, need to incorporate pymongo steps + new doi collection --- mpcite/doi_builder.py | 155 +++++++++++++++++++++++++++++++++ mpcite/pipeline.py | 84 ++++++++++++++++++ tests/file_to_jsonForUpload.py | 152 ++++++++++++++++++++++++++++++++ 3 files changed, 391 insertions(+) create mode 100644 mpcite/doi_builder.py create mode 100644 mpcite/pipeline.py create mode 100644 tests/file_to_jsonForUpload.py diff --git a/mpcite/doi_builder.py b/mpcite/doi_builder.py new file mode 100644 index 0000000..8e3bde9 --- /dev/null +++ b/mpcite/doi_builder.py @@ -0,0 +1,155 @@ +''' +doi_builder.py +A doi collection must store the following information about a document: +- doi number +- title +- osti id (ELink's Unique Identifier) +- material id (MP's Unique Identifier) +- date of system entry date (Date (UTC) of this revision's inception) +- date of last update (date edited or date_submitted_to_osti_last) (take from ELink) +- workflow status and the date (?) of each step: + - SA, saved, in a holding state, not to be processed + - SR, submit to releasing official "released_to_osti_date, as entered by releasing official" + - SO, submit to OSTI + - SF, submitted but failed validation + - SX, submitted but failed to release + - SV, submitted and validated + - R, released +- + +Here is an example of RecordResponse +RecordResponse( + osti_id=2523296, + workflow_status='SA', + access_limitations=['UNL'], + access_limitation_other=None, + announcement_codes=None, + availability=None, + edition=None, + volume=None, + + # Identifiers + identifiers=[ + Identifier(type='CN_NONDOE', value='EDCBEE'), + Identifier(type='CN_DOE', value='AC02-05CH11231'), + Identifier(type='RN', value='mp-1037659'), + ], + + # People involved + persons=[ + Person( + type='CONTACT', + first_name='Kristin', + last_name='Persson', + phone='+1(510)486-7218', + email=['feedback@materialsproject.org'], + affiliations=[ + Affiliation(name='LBNL') + ] + ) + ], + + # Organizations + organizations=[ + Organization(name='The Materials Project', type='CONTRIBUTING', contributor_type='ResearchGroup'), + Organization(name='LBNL Materials Project', type='RESEARCHING'), + Organization(name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', type='RESEARCHING'), + Organization(name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', type='SPONSOR'), + Organization(name='MIT', type='CONTRIBUTING', contributor_type='Other'), + Organization(name='UC Berkeley', type='CONTRIBUTING', contributor_type='Other'), + Organization(name='Duke', type='CONTRIBUTING', contributor_type='Other'), + Organization(name='U Louvain', type='CONTRIBUTING', contributor_type='Other'), + ], + + # Metadata + country_publication_code='US', + doe_supported_flag=False, + doi='10.17188/1714845', + edit_reason='Record updated upon request of LBNL-MP to remove authors and replace with a single collaborator.', + format_information='', + invention_disclosure_flag=None, + paper_flag=False, + peer_reviewed_flag=False, + product_type='DA', + publication_date=datetime.date(2020, 4, 30), + publication_date_text='04/30/2020', + site_url='https://materialsproject.org/materials/mp-1037659', + site_ownership_code='LBNL-MP', + site_unique_id='mp-1037659', + subject_category_code=['36'], + title='Materials Data on RbYMg30O32 by Materials Project', + + # Description + description=""" + RbMg₃₀YO₃₂ is Molybdenum Carbide MAX Phase-derived and crystallizes in the tetragonal P4/mmm space group. + Rb¹⁺ is bonded to six O²⁻ atoms to form RbO₆ octahedra... + (Truncated here for brevity, full description is included in original) + """, + + keywords=['crystal structure', 'RbYMg30O32', 'Mg-O-Rb-Y'], + languages=['English'], + related_doc_info='https://materialsproject.org/citing', + + # Media + media=[ + MediaInfo( + media_id=1908478, + osti_id=2523296, + status='C', + mime_type='text/html', + files=[ + MediaFile( + media_file_id=12017281, + media_type='O', + url='https://materialsproject.org/materials/mp-1037659' + ), + MediaFile( + media_file_id=12017284, + media_type='C', + mime_type='text/html', + media_source='OFF_SITE_DOWNLOAD' + ) + ] + ) + ], + + # Audit logs + audit_logs=[ + AuditLog( + messages=['Revision status is not correct, found SA'], + status='FAIL', + type='RELEASER', + audit_date=datetime.datetime(2025, 6, 30, 22, 30, 24, 865000, tzinfo=TzInfo(UTC)) + ) + ], + + # Timestamps + date_metadata_added=datetime.datetime(2025, 6, 30, 22, 30, 20, 495000, tzinfo=TzInfo(UTC)), + date_metadata_updated=datetime.datetime(2025, 6, 30, 22, 30, 22, 247000, tzinfo=TzInfo(UTC)), + + # Misc + revision=2, + added_by=139001, + edited_by=139001, + collection_type='DOE_LAB', + hidden_flag=False +) +''' + +from pydantic import BaseModel, ConfigDict +import datetime +from maggma.core.builder import Builder +from maggma.stores import Store + +class doi_model(BaseModel): + # identifiers + doi: str + title: str + osti_id: int + material_id: str + description: str + + # time stamps + date_last_update: datetime.datetime + publication_date : datetime.datetime + elink_workflow_status: str diff --git a/mpcite/pipeline.py b/mpcite/pipeline.py new file mode 100644 index 0000000..67fe15e --- /dev/null +++ b/mpcite/pipeline.py @@ -0,0 +1,84 @@ +import os +import json +from elinkapi import Elink, Record +from dotenv import load_dotenv + +import requests +from elinkapi.utils import Validation + +from pymongo import MongoClient +import pymongo + +from timeit import default_timer as timer +import logging +import datetime +from doi_builder import * + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +failed_osti_ids = [] + +query = prod_api.query_records() + +# for every record in the OSTI production environment: +for record in query: + # flag for update performance + update_success = False + + material_id = record.site_unique_id + + with MongoClient(mongo_uri) as client: + coll = client["mp_core_blue"]["robocrys"] + res = coll.find_one({"material_id" : material_id}) + + if res != None: + robocrys_description = res["description"] + + # what if there is no document in robocrys found? + + # if the description of the record on Elink doesnt match what is in the robocrys collection: + if record.description != robocrys_description: + # directly update the description of the record via the record response + record.description = robocrys_description + + # and directly update the identifier for sponsoring org + for entry in record.organizations: + if entry["type"] == "SPONSOR": + entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + + try: + # send update to the record with the record response # update_record(osti_id, record, state="save") + record_response = prod_api.update_record(record.osti_id, record, state="save") + update_success = True + + except: + logging.debug("The update failed to save!") + # add the osti_id of the failed update to failed_osti_ids + failed_osti_ids.append(record.osti_id) + + # if the update worked... + if update_success == True: + # save the record response returned with sending the update, done above + # convert that record response into a doi_model + doi_model = RecordResponse_to_doi_model(record_response) + + # upload that doi_model as a document to the new doi collection in mp_core + upload_doi_document_model_to_collection(doi_model, MongoClient, collection_name) + +# else if the description on Elink matches what is in the robocrys collection: +# convert that record into a doi_model +# upload that doi_model as a document to the new doi collection in mp_core, no updated needed! + +with open(f"failed_osti_ids_{str(datetime.datetime.now())}.txt", 'w') as output: # change filepath as needed + for id in failed_osti_ids: + output.write(str(id) + '\n') # i'm pretty sure it's a string already though... \ No newline at end of file diff --git a/tests/file_to_jsonForUpload.py b/tests/file_to_jsonForUpload.py new file mode 100644 index 0000000..aaa0886 --- /dev/null +++ b/tests/file_to_jsonForUpload.py @@ -0,0 +1,152 @@ +import os +import json +from elinkapi import Elink, Record +from dotenv import load_dotenv + +import requests +from elinkapi.utils import Validation + +from pymongo import MongoClient +import pymongo + +from timeit import default_timer as timer + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) + + +print(review_api.get_single_record(2525340)) +raise + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +cwd = os.getcwd() +path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW LOL +file = open(cwd + path, "r") + +update_counter = 0 +records_checked = 0 + +def delete_record(api, osti_id, reason): + """Delete a record by its OSTI ID.""" + response = requests.delete(f"{api.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {api.token}"}) + Validation.handle_response(response) + return response.status_code == 204 # True if deleted successfully + +def emptyReviewAPI(reason): + allDeleted = True + for record in review_api.query_records(): + delete_record(review_api, record.osti_id, reason) + +start = timer() + +# Post an updated json + +postUnedited = False + +for line in file: + js = json.loads(line.strip()) + + for entry in js["organizations"]: + if entry["type"] == "SPONSOR": + entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + + material_id = js["site_unique_id"] + + robocrys_description = js["description"] + + with MongoClient(mongo_uri) as client: + coll = client["mp_core_blue"]["robocrys"] + res = coll.find_one({"material_id" : material_id}) + records_checked += 1 + + if res != None: + robocrys_description = res["description"] + + # see if an update to the description is necessary, if it is, then update the description and post a new record. + if postUnedited or (robocrys_description != None and js["description"] != robocrys_description): #if a robocrys_description was found internally and it doesn't match what ELink has record... + js["description"] = "OLD WAS UPDATED, THEN IT WAS POSTED: " + robocrys_description + my_record = Record(**js) + + saved_record = None + try: + # The API will now return an error code on this call + # because "AAAA" is not a valid site_ownership_code + + saved_record = review_api.post_new_record(my_record, state="submit") + update_counter += 1 + + print(f"NEW RECORD POSTED: {saved_record.osti_id}") + raise + except: + print(f"Record failed to post!: {my_record.doi}. Robocrys Collection Had Description {robocrys_description[0:50]}... Prod_Env ELink Had {my_record.description[37:87]}...") + raise + + if update_counter >= 10000: + break + +end = timer() +print(f"Records Updated and/or Posted: {update_counter} \nRecords Checked in Total: {records_checked}. \nIt took {end - start} seconds") + +####################################################### +# JUST POST JSON, Then update posted json Later +# post_counter = 0 +# records_checked = 0 + +# for line in file: +# js = json.loads(line.strip()) + +# material_id = js["site_unique_id"] + +# # always post, no update +# my_record = Record(**js) + +# saved_record = None +# try: +# # The API will now return an error code on this call +# # because "AAAA" is not a valid site_ownership_code + +# # posts an unupdated record +# saved_record = review_api.post_new_record(my_record, "save") +# post_counter += 1 + +# print("\n\n NEW RECORD POSTED") +# print(saved_record) + +# robocrys_description = js["description"] + +# with MongoClient(mongo_uri) as client: +# coll = client["mp_core_blue"]["robocrys"] +# res = coll.find_one({"material_id" : material_id}) +# records_checked += 1 + +# if res != None: +# robocrys_description = res["description"] + +# if robocrys_description != None and js["description"] != robocrys_description: # if an update is needed +# # update the js["description"] +# js["description"] = "OLD WAS POSTED, THEN RECORD WITH NEW DESCRIPTION UPDATED IT: " + robocrys_description + +# # turn it into a new record +# new_updated_record = Record(**js) + +# # use that new record to update what was just posted +# review_api.update_record(saved_record.osti_id, new_updated_record, "save") + +# except: +# print("Record failed to post!") + +# if post_counter >= 10000: +# break + +# end = timer() +# print(f"Records Updated and/or Posted: {update_counter} \n Records Checked in Total: {records_checked}. It took {end - start} seconds") + +###################################################### \ No newline at end of file From 11c48e518fc1f510c50f280f6c1cd196dacecf46 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:15:20 -0700 Subject: [PATCH 07/46] template for core --- mpcite/core.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 mpcite/core.py diff --git a/mpcite/core.py b/mpcite/core.py new file mode 100644 index 0000000..1159b66 --- /dev/null +++ b/mpcite/core.py @@ -0,0 +1,36 @@ +from typing import TypeAlias + +from elinkapi import Elink +from elinkapi.record import RecordResponse +from pymongo import MongoClient + +OstiID: TypeAlias = int + + +def find_out_of_date_records( + client: MongoClient, + robocrys_db: str, + robocrys_collection: str, + doi_db: str, + doi_collection, +) -> list[OstiID]: + robocrys = client.robocrys_db.robocrys_collection + doi = client.doi_db.doi_collection + + out_of_data_osti_ids = [] + + # robocrys docs newer than in doi + + return out_of_data_osti_ids + + +def update_existing_osti_record(*args, **kwargs) -> RecordResponse: ... + + +def submit_new_osti_record(*args, **kwargs) -> RecordResponse: ... + + +def update_state_of_osti_record(*args, **kwargs) -> RecordResponse: ... + + +def delete_osti_record(*args, **kwargs) -> RecordResponse: ... From ba3c372b31dc2eab8c5b0b6a25bad30233646f09 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 3 Jul 2025 13:56:05 -0700 Subject: [PATCH 08/46] push latest doi_builder and other thigns --- mpcite/doi_builder.py | 51 ++++++++++++++---- mpcite/pipeline.py | 117 +++++++++++++++++++++++++----------------- 2 files changed, 110 insertions(+), 58 deletions(-) diff --git a/mpcite/doi_builder.py b/mpcite/doi_builder.py index 8e3bde9..8ddd4c4 100644 --- a/mpcite/doi_builder.py +++ b/mpcite/doi_builder.py @@ -138,18 +138,49 @@ from pydantic import BaseModel, ConfigDict import datetime -from maggma.core.builder import Builder -from maggma.stores import Store class doi_model(BaseModel): # identifiers - doi: str - title: str - osti_id: int - material_id: str - description: str + doi: str # can be taken from ELink API + title: str # can be taken from ELink API + osti_id: str # can be taken from ELink API + material_id: str # can be taken from Robocrys Collection or ELink API # time stamps - date_last_update: datetime.datetime - publication_date : datetime.datetime - elink_workflow_status: str + date_record_entered_onto_ELink: datetime.datetime # can be taken from ELink API response + date_record_last_updated_on_Elink: datetime.datetime + + # status + elink_workflow_status: str # can be taken from ELink API + date_released: datetime.datetime + date_submitted_to_osti_first: datetime.datetime + date_submitted_to_osti_last: datetime.datetime + date_published: datetime.datetime # labelled as publication_date in RecordResponse of ELink API + +# hypothetically post an update or submit a new record and receive the RecordResponse +def RecordResponse_to_doi_model(recordresponse): + ''' + turns a recordresponse, which is returned from a save, submission, post, etc. into a doi_model object + ''' + params = { + "doi": recordresponse.doi, + "title": recordresponse.title, + "osti_id": str(recordresponse.osti_id), + "material_id": recordresponse.site_unique_id, + + "date_record_entered_onto_ELink": recordresponse.date_metadata_added, + "date_record_last_updated_on_Elink": recordresponse.date_metadata_updated, + + "elink_workflow_status": recordresponse.workflow_status, + "date_released": recordresponse.date_released, + # date_released_to_osti = recordresponse.released_to_osti_date, # what is the difference between these??? "Date record information was released to OSTI, as entered by releasing official." always seems to be none + "date_submitted_to_osti_first": recordresponse.date_submitted_to_osti_first, # date record was first submitted to OSTI for publication, maintained internally by E-Link + "date_submitted_to_osti_last": recordresponse.date_submitted_to_osti_last, # most recent date record information was submitted to OSTI. Maintained internally by E-Link. + "date_published": recordresponse.publication_date + } + + return doi_model(**params) + +def upload_doi_document_model_to_collection(doi_model, client, collection): + x = collection.insert_one(doi_model).inserted_id + return x \ No newline at end of file diff --git a/mpcite/pipeline.py b/mpcite/pipeline.py index 67fe15e..869e65f 100644 --- a/mpcite/pipeline.py +++ b/mpcite/pipeline.py @@ -1,6 +1,7 @@ import os import json from elinkapi import Elink, Record +from elinkapi.record import RecordResponse from dotenv import load_dotenv import requests @@ -28,57 +29,77 @@ failed_osti_ids = [] -query = prod_api.query_records() +cwd = os.getcwd() +path = "/json_pages/" -# for every record in the OSTI production environment: -for record in query: - # flag for update performance - update_success = False +for filename in os.listdir(cwd+path): + logging.debug(f"Now extracting {filename}") + file = open(cwd + path + filename, "r") + for line in file: + record = RecordResponse(**json.loads(line.strip())) + record.osti_id = record.doi.split('/')[1] + # for every record in the OSTI production environment: + # flag for update performance + update_success = False - material_id = record.site_unique_id + material_id = record.site_unique_id - with MongoClient(mongo_uri) as client: - coll = client["mp_core_blue"]["robocrys"] - res = coll.find_one({"material_id" : material_id}) - - if res != None: - robocrys_description = res["description"] - - # what if there is no document in robocrys found? - - # if the description of the record on Elink doesnt match what is in the robocrys collection: - if record.description != robocrys_description: - # directly update the description of the record via the record response - record.description = robocrys_description + with MongoClient(mongo_uri) as client: # should I open this in or outside of the for loop? + coll = client["mp_core_blue"]["robocrys"] + res = coll.find_one({"material_id" : material_id}) - # and directly update the identifier for sponsoring org - for entry in record.organizations: - if entry["type"] == "SPONSOR": - entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] - - try: - # send update to the record with the record response # update_record(osti_id, record, state="save") - record_response = prod_api.update_record(record.osti_id, record, state="save") - update_success = True - - except: - logging.debug("The update failed to save!") - # add the osti_id of the failed update to failed_osti_ids - failed_osti_ids.append(record.osti_id) - - # if the update worked... - if update_success == True: - # save the record response returned with sending the update, done above - # convert that record response into a doi_model - doi_model = RecordResponse_to_doi_model(record_response) - - # upload that doi_model as a document to the new doi collection in mp_core - upload_doi_document_model_to_collection(doi_model, MongoClient, collection_name) - -# else if the description on Elink matches what is in the robocrys collection: -# convert that record into a doi_model -# upload that doi_model as a document to the new doi collection in mp_core, no updated needed! - -with open(f"failed_osti_ids_{str(datetime.datetime.now())}.txt", 'w') as output: # change filepath as needed + if res != None: + robocrys_description = res["description"] + + # what if there is no document in robocrys found? + else: + logging.warning(f"No robocrys document was found to match the OSTI record: {record.osti_id}!") + + # if the description of the record on Elink doesnt match what is in the robocrys collection: + if res != None and record.description != robocrys_description: + # directly update the description of the record via the record response + record.description = robocrys_description + + # and directly update the identifier for sponsoring org + for entry in record.organizations: + if entry.type == "SPONSOR": + entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + break + + try: + # send update to the record with the record response # update_record(osti_id, record, state="save") + # record_response = prod_api.update_record(record.osti_id, record, state="save") + update_success = True + + except: + logging.debug("The update failed to save!") + # add the osti_id of the failed update to failed_osti_ids + failed_osti_ids.append(record.osti_id) + + # if the update worked... + if update_success == True: + # save the record response returned with sending the update, done above + # convert that record response into a doi_model + doi_model = RecordResponse_to_doi_model(record) #change later to record response + + # upload that doi_model as a document to the new doi collection in mp_core + # what is the collection + with MongoClient() as local_client: + collection = local_client["dois_test"]["dois"] + x = collection.insert_one(doi_model.dict(by_alias=True)).inserted_id + + # else if the description on Elink matches what is in the robocrys collection: + elif record.description == robocrys_description: + # convert that record into a doi_model + doi_model = RecordResponse_to_doi_model(record) + + # upload that doi_model as a document to the new doi collection in mp_core, no updated needed! + with MongoClient() as local_client: + collection = local_client["dois_test"]["dois"] + x = collection.insert_one(doi_model).inserted_id + +cwd = os.getcwd() +path = f"/files/failed_osti_ids_{str(datetime.datetime.now())}.txt" +with open(cwd+path, 'w') as output: # change filepath as needed for id in failed_osti_ids: output.write(str(id) + '\n') # i'm pretty sure it's a string already though... \ No newline at end of file From a7473264a37d8e0c8623bbd0804498b365618990 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Thu, 3 Jul 2025 14:05:10 -0700 Subject: [PATCH 09/46] find_out_of_date func --- mpcite/core.py | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/mpcite/core.py b/mpcite/core.py index 1159b66..88f1399 100644 --- a/mpcite/core.py +++ b/mpcite/core.py @@ -7,21 +7,44 @@ OstiID: TypeAlias = int -def find_out_of_date_records( +def find_out_of_date_doi_entries( client: MongoClient, robocrys_db: str, robocrys_collection: str, doi_db: str, - doi_collection, + doi_collection: str, ) -> list[OstiID]: - robocrys = client.robocrys_db.robocrys_collection - doi = client.doi_db.doi_collection - - out_of_data_osti_ids = [] - - # robocrys docs newer than in doi - - return out_of_data_osti_ids + robocrys = client[robocrys_db][robocrys_collection] + dois = client[doi_db][doi_collection] + + latest_doi = next( + dois.aggregate( + [ + {"$project": {"_id": 0, "date_record_last_updated_on_Elink": 1}}, + {"$sort": {"date_record_last_updated_on_Elink": -1}}, + {"$limit": 1}, + ] + ) + )["date_record_last_updated_on_Elink"] + + material_ids_to_update = list( + map( + lambda x: x["material_id"], + robocrys.find( + {"last_updated": {"$gt": latest_doi}}, {"_id": 0, "material_id": 1} + ), + ) + ) + + return list( + map( + lambda x: x["osti_id"], + dois.find( + {"material_id": {"$in": material_ids_to_update}}, + {"_id": 0, "osti_id": 1}, + ), + ), + ) def update_existing_osti_record(*args, **kwargs) -> RecordResponse: ... From 1020d1f5da6cf1dd9dd9bfe52de81ef833334cfc Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Tue, 8 Jul 2025 12:23:54 -0700 Subject: [PATCH 10/46] remaining core functions --- mpcite/core.py | 66 +++++++++++++++++++++++++++++++--- mpcite/doi_builder.py | 17 ++++----- mpcite/pipeline.py | 2 +- tests/file_to_jsonForUpload.py | 3 +- tests/github_bug_report.py | 31 ++++++++++++++++ 5 files changed, 103 insertions(+), 16 deletions(-) create mode 100644 tests/github_bug_report.py diff --git a/mpcite/core.py b/mpcite/core.py index 88f1399..188d80e 100644 --- a/mpcite/core.py +++ b/mpcite/core.py @@ -1,9 +1,12 @@ from typing import TypeAlias from elinkapi import Elink -from elinkapi.record import RecordResponse +from elinkapi.record import RecordResponse, Record from pymongo import MongoClient +import requests +from elinkapi.utils import Validation + OstiID: TypeAlias = int @@ -47,13 +50,66 @@ def find_out_of_date_doi_entries( ) -def update_existing_osti_record(*args, **kwargs) -> RecordResponse: ... +def update_existing_osti_record( + elinkapi: Elink, + osti_id: OstiID, + new_values: dict +) -> RecordResponse: + record_on_elink = elinkapi.get_single_record(osti_id) + + for keyword in new_values.keys(): + try: + setattr(record_on_elink, keyword, new_values[keyword]) + except ValueError: + print("Extraneous keywords found in the dictionary that do not correspond to attributes in the ELink API's record class.") + + # assume the use with fix the sponsor identifier bug before calling the update function + # # fix the issue with the sponsor organization's identifiers + # for entry in record_on_elink.organizations: + # if entry.type == "SPONSOR": + # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + # break + + return elinkapi.update_record(osti_id, record_on_elink, state="save") # user should use update_state_of_osti_record to submit instead + + +def submit_new_osti_record( + elinkapi: Elink, + new_record: Record, + state = "submit", # assuming there is no need to both with saving. just send new record to osti when its ready for submission. also assume bug with DOE contract number identifier in sponsor organization is accounted for +) -> RecordResponse: + # template for all repeated stuff + # only submit + record_response = elinkapi.post_new_record(new_record, state) + + return record_response + +def update_state_of_osti_record( + elinkapi: Elink, + osti_id: OstiID, + new_state = "submit" +) -> RecordResponse: + record = elinkapi.get_single_record(osti_id) -def submit_new_osti_record(*args, **kwargs) -> RecordResponse: ... + # assuming that the user will handle the sponsor identifier bug before calling this function + # # fix the issue with the sponsor organization's identifiers + # for entry in record.organizations: + # if entry.type == "SPONSOR": + # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + # break + return elinkapi.update_record(osti_id, record, new_state) -def update_state_of_osti_record(*args, **kwargs) -> RecordResponse: ... +def delete_osti_record( + elinkapi: Elink, + osti_id: OstiID, + reason: str +) -> RecordResponse: + """Delete a record by its OSTI ID.""" + response = requests.delete(f"{elinkapi.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {api.token}"}) + Validation.handle_response(response) + return response.status_code == 204 # True if deleted successfully -def delete_osti_record(*args, **kwargs) -> RecordResponse: ... +# TODO: make the github error thing for the weird issue with identifiers not being allocated to the sponsoring organization \ No newline at end of file diff --git a/mpcite/doi_builder.py b/mpcite/doi_builder.py index 8ddd4c4..5e25c64 100644 --- a/mpcite/doi_builder.py +++ b/mpcite/doi_builder.py @@ -136,14 +136,15 @@ ) ''' -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field import datetime +# TODO: change the field names to match ELINK class doi_model(BaseModel): # identifiers - doi: str # can be taken from ELink API - title: str # can be taken from ELink API - osti_id: str # can be taken from ELink API + doi: str = Field(description="") # can be taken from ELink API + title: str = Field(description="") # can be taken from ELink API + osti_id: str = Field(description="") # can be taken from ELink API material_id: str # can be taken from Robocrys Collection or ELink API # time stamps @@ -152,10 +153,10 @@ class doi_model(BaseModel): # status elink_workflow_status: str # can be taken from ELink API - date_released: datetime.datetime - date_submitted_to_osti_first: datetime.datetime - date_submitted_to_osti_last: datetime.datetime - date_published: datetime.datetime # labelled as publication_date in RecordResponse of ELink API + date_released: datetime.datetime = Field(description="") + date_submitted_to_osti_first: datetime.datetime = Field(description="") + date_submitted_to_osti_last: datetime.datetime = Field(description="") + date_published: datetime.datetime = Field(description="") # labelled as publication_date in RecordResponse of ELink API # hypothetically post an update or submit a new record and receive the RecordResponse def RecordResponse_to_doi_model(recordresponse): diff --git a/mpcite/pipeline.py b/mpcite/pipeline.py index 869e65f..3d2c8c7 100644 --- a/mpcite/pipeline.py +++ b/mpcite/pipeline.py @@ -68,7 +68,7 @@ try: # send update to the record with the record response # update_record(osti_id, record, state="save") - # record_response = prod_api.update_record(record.osti_id, record, state="save") + record_response = prod_api.update_record(record.osti_id, record, state="save") update_success = True except: diff --git a/tests/file_to_jsonForUpload.py b/tests/file_to_jsonForUpload.py index aaa0886..f633305 100644 --- a/tests/file_to_jsonForUpload.py +++ b/tests/file_to_jsonForUpload.py @@ -20,7 +20,6 @@ print(review_api.get_single_record(2525340)) -raise atlas_user = os.environ.get("atlas_user") atlas_password = os.environ.get("atlas_password") @@ -28,7 +27,7 @@ mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" cwd = os.getcwd() -path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW LOL +path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW file = open(cwd + path, "r") update_counter = 0 diff --git a/tests/github_bug_report.py b/tests/github_bug_report.py new file mode 100644 index 0000000..c1eec4c --- /dev/null +++ b/tests/github_bug_report.py @@ -0,0 +1,31 @@ +from elinkapi import Elink +from elinkapi.record import Record +import os +from dotenv import load_dotenv + +load_dotenv() + +prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +review_endpoint = "https://review.osti.gov/elink2api/" +review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) + +raise + +record_response = prod_api.get_single_record(1190959) # returns OSTI record response with OSTI ID = 1190959, which has a DOE Contract Number saved (AC02-05CH11231; EDCBEE) +record_response_dict = record_response.model_dump(exclude_none=True) +record_response_dict.pop("osti_id") # remove osti_id to allow post function + +new_record = Record(**record_response_dict) # identical record with removed OSTI_ID +for org in new_record.organizations: + if org.type == "SPONSOR": + print(org) + org.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + +# attempt to submit exact same record to review environment +record_response_after_post = review_api.post_new_record(new_record, "save") # works after re-providing the DOE contract number + +# next, attempt updating this record +record_to_update = review_api.get_single_record(record_response_after_post.osti_id) +record_to_update.title = "Updated Title For Materials Data" +review_api.update_record(record_response_after_post.osti_id, record_to_update, "submit") + From b34ae35368d5bbae5a1e28aa106c0ecc5dc55193 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 10 Jul 2025 15:18:23 -0700 Subject: [PATCH 11/46] testing core functionalities on review environment and local mongo database --- mpcite/core.py | 37 +++++++++++---- mpcite/doi_builder.py | 34 +++++++------- mpcite/reset.py | 26 +++++++++++ mpcite/test_core.py | 84 ++++++++++++++++++++++++++++++++++ tests/file_to_jsonForUpload.py | 4 +- tests/github_bug_report.py | 62 ++++++++++++++++--------- 6 files changed, 197 insertions(+), 50 deletions(-) create mode 100644 mpcite/reset.py create mode 100644 mpcite/test_core.py diff --git a/mpcite/core.py b/mpcite/core.py index 188d80e..24be6b3 100644 --- a/mpcite/core.py +++ b/mpcite/core.py @@ -1,34 +1,37 @@ from typing import TypeAlias from elinkapi import Elink -from elinkapi.record import RecordResponse, Record +from elinkapi.record import RecordResponse, Record, Organization, Person from pymongo import MongoClient import requests from elinkapi.utils import Validation +from datetime import datetime + OstiID: TypeAlias = int def find_out_of_date_doi_entries( - client: MongoClient, + rc_client: MongoClient, + doi_client: MongoClient, robocrys_db: str, robocrys_collection: str, doi_db: str, doi_collection: str, ) -> list[OstiID]: - robocrys = client[robocrys_db][robocrys_collection] - dois = client[doi_db][doi_collection] + robocrys = rc_client[robocrys_db][robocrys_collection] + dois = doi_client[doi_db][doi_collection] latest_doi = next( dois.aggregate( [ - {"$project": {"_id": 0, "date_record_last_updated_on_Elink": 1}}, - {"$sort": {"date_record_last_updated_on_Elink": -1}}, + {"$project": {"_id": 0, "date_metadata_updated": 1}}, + {"$sort": {"date_metadata_updated": -1}}, {"$limit": 1}, ] ) - )["date_record_last_updated_on_Elink"] + )["date_metadata_updated"] material_ids_to_update = list( map( @@ -108,8 +111,24 @@ def delete_osti_record( reason: str ) -> RecordResponse: """Delete a record by its OSTI ID.""" - response = requests.delete(f"{elinkapi.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {api.token}"}) + response = requests.delete(f"{elinkapi.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {elinkapi.token}"}) Validation.handle_response(response) return response.status_code == 204 # True if deleted successfully -# TODO: make the github error thing for the weird issue with identifiers not being allocated to the sponsoring organization \ No newline at end of file +def emptyReviewAPI(reason, review_api): + allDeleted = True + for record in review_api.query_records(): + delete_osti_record(review_api, record.osti_id, reason) + +def make_minimum_record_to_fully_release( + title, # required to make record + product_type = "DA", # required to make record + organizations = [Organization(type='RESEARCHING', name='LBNL Materials Project (LBNL-MP)'), + Organization(type='SPONSOR', name='TEST SPONSOR ORG', identifiers=[{"type": 'CN_DOE', "value": 'AC02-05CH11231'}])], # sponsor org is necessary for submission + persons = [Person(type='AUTHOR', last_name='Perrson')], + site_ownership_code = "LBNL-MP", + access_limitations = ['UNL'], + publication_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0), # what should this be? + site_url = "https://next-gen.materialsproject.org/materials" +) -> Record: + return Record(product_type, title, persons, site_ownership_code, access_limitations, publication_date, site_url) \ No newline at end of file diff --git a/mpcite/doi_builder.py b/mpcite/doi_builder.py index 5e25c64..713724c 100644 --- a/mpcite/doi_builder.py +++ b/mpcite/doi_builder.py @@ -137,26 +137,26 @@ ''' from pydantic import BaseModel, ConfigDict, Field -import datetime +from datetime import datetime # TODO: change the field names to match ELINK class doi_model(BaseModel): # identifiers - doi: str = Field(description="") # can be taken from ELink API - title: str = Field(description="") # can be taken from ELink API - osti_id: str = Field(description="") # can be taken from ELink API + doi: str = Field(description="The DOI number as allocated by OSTI") # can be taken from ELink API + title: str = Field(description="The title of the record") # can be taken from ELink API + osti_id: str = Field(description="The OSTI ID number allocated by OSTI to make the DOI number") # can be taken from ELink API material_id: str # can be taken from Robocrys Collection or ELink API # time stamps - date_record_entered_onto_ELink: datetime.datetime # can be taken from ELink API response - date_record_last_updated_on_Elink: datetime.datetime + date_metadata_added: datetime | None = Field(description="date_record_entered_onto_ELink") # can be taken from ELink API response + date_metadata_updated: datetime | None = Field(description="date_record_last_updated_on_Elink") # status - elink_workflow_status: str # can be taken from ELink API - date_released: datetime.datetime = Field(description="") - date_submitted_to_osti_first: datetime.datetime = Field(description="") - date_submitted_to_osti_last: datetime.datetime = Field(description="") - date_published: datetime.datetime = Field(description="") # labelled as publication_date in RecordResponse of ELink API + workflow_status: str # can be taken from ELink API + date_released: datetime | None = Field(description="") + date_submitted_to_osti_first: datetime = Field(description="date record was first submitted to OSTI for publication, maintained internally by E-Link") + date_submitted_to_osti_last: datetime = Field(description="most recent date record information was submitted to OSTI. Maintained internally by E-Link") + publication_date: datetime | None = Field(description="") # labelled as publication_date in RecordResponse of ELink API # hypothetically post an update or submit a new record and receive the RecordResponse def RecordResponse_to_doi_model(recordresponse): @@ -169,19 +169,19 @@ def RecordResponse_to_doi_model(recordresponse): "osti_id": str(recordresponse.osti_id), "material_id": recordresponse.site_unique_id, - "date_record_entered_onto_ELink": recordresponse.date_metadata_added, - "date_record_last_updated_on_Elink": recordresponse.date_metadata_updated, + "date_metadata_added": recordresponse.date_metadata_added, + "date_metadata_updated": recordresponse.date_metadata_updated, - "elink_workflow_status": recordresponse.workflow_status, + "workflow_status": recordresponse.workflow_status, "date_released": recordresponse.date_released, # date_released_to_osti = recordresponse.released_to_osti_date, # what is the difference between these??? "Date record information was released to OSTI, as entered by releasing official." always seems to be none "date_submitted_to_osti_first": recordresponse.date_submitted_to_osti_first, # date record was first submitted to OSTI for publication, maintained internally by E-Link "date_submitted_to_osti_last": recordresponse.date_submitted_to_osti_last, # most recent date record information was submitted to OSTI. Maintained internally by E-Link. - "date_published": recordresponse.publication_date + "publication_date": recordresponse.publication_date } return doi_model(**params) -def upload_doi_document_model_to_collection(doi_model, client, collection): - x = collection.insert_one(doi_model).inserted_id +def upload_doi_document_model_to_collection(doi_model, collection): + x = collection.insert_one(doi_model.model_dump()).inserted_id return x \ No newline at end of file diff --git a/mpcite/reset.py b/mpcite/reset.py new file mode 100644 index 0000000..350b50c --- /dev/null +++ b/mpcite/reset.py @@ -0,0 +1,26 @@ +from mpcite.core import * +from mpcite.doi_builder import RecordResponse_to_doi_model, upload_doi_document_model_to_collection +import os +import json +from dotenv import load_dotenv + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) + +cwd = os.getcwd() +path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +file = open(cwd + path, "r") + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +# emptyReviewAPI("Testing", review_api) + +with MongoClient() as client: + client.dois_test.dois.delete_many({}, comment="Testing") \ No newline at end of file diff --git a/mpcite/test_core.py b/mpcite/test_core.py new file mode 100644 index 0000000..948d83c --- /dev/null +++ b/mpcite/test_core.py @@ -0,0 +1,84 @@ +from mpcite.core import * +from mpcite.doi_builder import RecordResponse_to_doi_model, upload_doi_document_model_to_collection +import os +import json +from dotenv import load_dotenv + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) + +cwd = os.getcwd() +path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +file = open(cwd + path, "r") + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +with MongoClient(mongo_uri) as real_client: + with MongoClient() as doi_client: # open the mongoclient outside of the for loop, is more efficient than opening and closing it repeatedly + dois = doi_client["dois_test"]["dois"] + + # for line in file: + # js = json.loads(line.strip()) + + # # temporarily fix the sponsor organization bug + # for entry in js["organizations"]: + # if entry["type"] == "SPONSOR": + # entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + + # my_record = Record(**js) + + # # make a post to the elink review environment + # saved_record = review_api.post_new_record(my_record, state="submit") + + # # make a doi document with saved_record + # doi_model = RecordResponse_to_doi_model(saved_record) + + # # now, add that doi to the local doi collection + # upload_doi_document_model_to_collection(doi_model, dois) + + # all_material_ids = [doc["material_id"] for doc in dois.find({}, {"_id": 0, "material_id": 1})] + + # for material_id in all_material_ids: + + # # query prod env for record with materials_id == site_unique_id + # record_from_prod = prod_api.query_records(site_unique_id=material_id) + + # if record_from_prod.total_rows != 1: + # print(f"ERROR: not unique Material_ID! {material_id}") + # raise + + # # make a doi_model from that data + # recordresponse_from_prod = RecordResponse_to_doi_model(record_from_prod.data[0]) + + # query_filter = {"material_id": material_id} + + # # Find existing document to preserve the osti_id + # existing_doc = dois.find_one(query_filter, {"osti_id": 1}) # only retrieve osti_id + + # if not existing_doc: + # print(f"ERROR: document with material_id {material_id} not found in `dois` collection.") + # raise + + # replacement_doc = recordresponse_from_prod.model_dump() + # replacement_doc["osti_id"] = existing_doc["osti_id"] + + # dois.replace_one(query_filter, replacement_doc) + + osti_OOD_list = find_out_of_date_doi_entries(real_client, doi_client, "mp_core_blue", "robocrys", "dois_test", "dois") + print(osti_OOD_list) + + for osti_id in osti_OOD_list: + material_id_to_update = review_api.get_single_record(osti_id).site_unique_id + + new_values = { + "description": "UPDATED ROBOCRYS DESCRIPTION: " + next(real_client["mp_core_blue"]["robocrys"].find({"material_id": material_id_to_update}, {"_id": 0, "description": 1}))["description"] + } + + update_existing_osti_record(review_api, osti_id, new_values) \ No newline at end of file diff --git a/tests/file_to_jsonForUpload.py b/tests/file_to_jsonForUpload.py index f633305..aa864e0 100644 --- a/tests/file_to_jsonForUpload.py +++ b/tests/file_to_jsonForUpload.py @@ -19,8 +19,6 @@ review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) -print(review_api.get_single_record(2525340)) - atlas_user = os.environ.get("atlas_user") atlas_password = os.environ.get("atlas_password") atlas_host = os.environ.get("atlas_host") @@ -44,6 +42,8 @@ def emptyReviewAPI(reason): for record in review_api.query_records(): delete_record(review_api, record.osti_id, reason) +raise + start = timer() # Post an updated json diff --git a/tests/github_bug_report.py b/tests/github_bug_report.py index c1eec4c..3151e6d 100644 --- a/tests/github_bug_report.py +++ b/tests/github_bug_report.py @@ -1,7 +1,7 @@ -from elinkapi import Elink -from elinkapi.record import Record +from elinkapi import Elink, Organization, Person, exceptions, Record import os from dotenv import load_dotenv +from datetime import datetime load_dotenv() @@ -9,23 +9,41 @@ review_endpoint = "https://review.osti.gov/elink2api/" review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) -raise - -record_response = prod_api.get_single_record(1190959) # returns OSTI record response with OSTI ID = 1190959, which has a DOE Contract Number saved (AC02-05CH11231; EDCBEE) -record_response_dict = record_response.model_dump(exclude_none=True) -record_response_dict.pop("osti_id") # remove osti_id to allow post function - -new_record = Record(**record_response_dict) # identical record with removed OSTI_ID -for org in new_record.organizations: - if org.type == "SPONSOR": - print(org) - org.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] - -# attempt to submit exact same record to review environment -record_response_after_post = review_api.post_new_record(new_record, "save") # works after re-providing the DOE contract number - -# next, attempt updating this record -record_to_update = review_api.get_single_record(record_response_after_post.osti_id) -record_to_update.title = "Updated Title For Materials Data" -review_api.update_record(record_response_after_post.osti_id, record_to_update, "submit") - +# record_response = prod_api.get_single_record(1190959) # returns OSTI record response with OSTI ID = 1190959, which has a DOE Contract Number saved (AC02-05CH11231; EDCBEE) +# record_response_dict = record_response.model_dump(exclude_none=True) +# record_response_dict.pop("osti_id") # remove osti_id to allow post function + +# new_record = Record(**record_response_dict) # identical record with removed OSTI_ID +# for org in new_record.organizations: +# if org.type == "SPONSOR": +# print(org) +# org.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + +# # attempt to submit exact same record to review environment +# record_response_after_post = review_api.post_new_record(new_record, "save") # works after re-providing the DOE contract number + +# # next, attempt updating this record +# record_to_update = review_api.get_single_record(record_response_after_post.osti_id) +# record_to_update.title = "Updated Title For Materials Data" +# review_api.update_record(record_response_after_post.osti_id, record_to_update, "submit") + +required_fields = { + "product_type": "DA", + "title": "Testing if CN_DOE can be random", + "organizations": [Organization(type='RESEARCHING', name='LBNL Materials Project (LBNL-MP)'), + Organization(type='SPONSOR', name='TEST SPONSOR ORG', identifiers=[{"type": 'CN_DOE', "value": 'oiajdiwjdiwj'}])], + "persons": [Person(type='AUTHOR', last_name='Schmoe')], + "site_ownership_code": "LBNL-MP", + "access_limitations": ['UNL'], + "publication_date": datetime.now().replace(hour=0, minute=0, second=0, microsecond=0), + "site_url": "https://next-gen.materialsproject.org/materials" +} + +empty_record = Record(**required_fields) +print(f"SUBMITTED TO OSTI, FULLY VALIDATED:\n{review_api.get_single_record(2525614)}\n\n\nTRYING TO SUBMIT:\n{empty_record}") + +try: + saved_record = review_api.post_new_record(empty_record, "submit") +except exceptions.BadRequestException as ve: + print(ve.message) + print(ve.errors) \ No newline at end of file From 52382ff4345f4cb8506af79e8c0db6e63b24a99e Mon Sep 17 00:00:00 2001 From: HugoOnghai <99376417+HugoOnghai@users.noreply.github.com> Date: Fri, 11 Jul 2025 17:01:29 -0700 Subject: [PATCH 12/46] Merged upstream (#1) * move old code to 'legacy' * setup project using uv * add license * testing skeleton * gh actions skeleton * remove old reqs file to prevent dependabot alerts --------- Co-authored-by: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> --- .github/dependabot.yml | 6 + .github/workflows/lint.yml | 8 + .github/workflows/release.yml | 8 + .github/workflows/testing.yml | 36 ++ .gitignore | 227 ++++++++++- .python-version | 1 + LICENSE | 46 +++ README.md | 95 ----- legacy/README.md | 95 +++++ activate_conda => legacy/activate_conda | 0 cron.sh => legacy/cron.sh | 0 {files => legacy/files}/config-example.json | 0 .../mpcite}/Visualizations.ipynb | 0 .../mpcite}/Visualizations.pdf | Bin {mpcite => legacy/mpcite}/__init__.py | 0 .../mpcite}/config_ipynb.txt | 0 .../mpcite}/doi_builder.py | 0 {mpcite_legacy => legacy/mpcite}/main.py | 0 {mpcite_legacy => legacy/mpcite}/models.py | 0 .../mpcite}/send_collection.py | 0 {mpcite_legacy => legacy/mpcite}/utility.py | 0 {mpcite_legacy => legacy/mpcite}/utility2.py | 0 setup.py => legacy/setup.py | 0 output.txt => legacy/tests/__init__.py | 0 {tests => legacy/tests}/test_doi_builder.py | 0 mpcite_legacy/__init__.py | 8 - pyproject.toml | 40 ++ requirements.txt | 49 --- src/mp_cite/__init__.py | 0 {mpcite => src/mp_cite}/core.py | 0 {mpcite => src/mp_cite}/doi_builder.py | 0 {mpcite => src/mp_cite}/models.py | 0 {mpcite => src/mp_cite}/pipeline.py | 0 .../mp_cite}/recordresponse_example.txt | 0 {mpcite => src/mp_cite}/reset.py | 0 src/mp_cite/send_collection.py | 79 ++++ {mpcite => src/mp_cite}/test_core.py | 0 tests/conftest.py | 11 + tests/elink_service_test.py | 16 + uv.lock | 372 ++++++++++++++++++ 40 files changed, 930 insertions(+), 167 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/testing.yml create mode 100644 .python-version create mode 100644 LICENSE create mode 100644 legacy/README.md rename activate_conda => legacy/activate_conda (100%) rename cron.sh => legacy/cron.sh (100%) rename {files => legacy/files}/config-example.json (100%) rename {mpcite_legacy => legacy/mpcite}/Visualizations.ipynb (100%) rename {mpcite_legacy => legacy/mpcite}/Visualizations.pdf (100%) rename {mpcite => legacy/mpcite}/__init__.py (100%) rename {mpcite_legacy => legacy/mpcite}/config_ipynb.txt (100%) rename {mpcite_legacy => legacy/mpcite}/doi_builder.py (100%) rename {mpcite_legacy => legacy/mpcite}/main.py (100%) rename {mpcite_legacy => legacy/mpcite}/models.py (100%) rename {mpcite_legacy => legacy/mpcite}/send_collection.py (100%) rename {mpcite_legacy => legacy/mpcite}/utility.py (100%) rename {mpcite_legacy => legacy/mpcite}/utility2.py (100%) rename setup.py => legacy/setup.py (100%) rename output.txt => legacy/tests/__init__.py (100%) rename {tests => legacy/tests}/test_doi_builder.py (100%) delete mode 100644 mpcite_legacy/__init__.py create mode 100644 pyproject.toml delete mode 100644 requirements.txt create mode 100644 src/mp_cite/__init__.py rename {mpcite => src/mp_cite}/core.py (100%) rename {mpcite => src/mp_cite}/doi_builder.py (100%) rename {mpcite => src/mp_cite}/models.py (100%) rename {mpcite => src/mp_cite}/pipeline.py (100%) rename {mpcite => src/mp_cite}/recordresponse_example.txt (100%) rename {mpcite => src/mp_cite}/reset.py (100%) create mode 100644 src/mp_cite/send_collection.py rename {mpcite => src/mp_cite}/test_core.py (100%) create mode 100644 tests/conftest.py create mode 100644 tests/elink_service_test.py create mode 100644 uv.lock diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..5ace460 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..dfc4fef --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,8 @@ +name: linting + +on: + push: + branches: [master] + pull_request: + branches: [master] +# TODO: setup linting with uv/ruff diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..5dc867c --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,8 @@ +name: release + +on: + push: + branches: [master] + pull_request: + branches: [master] +# TODO: setup release to pypi diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..9d761c9 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,36 @@ +name: testing + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + test: + strategy: + matrix: + os: ["ubuntu-latest"] + python-version: ["3.11", "3.12", "3.13"] + + name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Install the project + run: uv sync --locked --all-extras --dev + + - name: Run tests + env: + ELINK_REVIEW_API_TOKEN: ${{ secrets.ELINK_REVIEW_API_TOKEN }} + ELINK_REVIEW_ENDPOINT: ${{ secrets.ELINK_REVIEW_ENDPOINT }} + run: uv run pytest tests +# codecov? diff --git a/.gitignore b/.gitignore index ed84b8e..895ed96 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,216 @@ -mpcite.egg-info/* -*.html -*.pyc -*.yaml +# https://github.com/github/gitignore/blob/main/Python.gitignore +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: *.log -!files/config_test.json -!files/config_prod.json -!files -.idea -dist -build -mpcite.egg-info -.DS_store -files/config_test.json -.pytest_cache +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook .ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +.vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml + +# json files for storing production records *.json .env -/json_pages \ No newline at end of file +/json_pages diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..173313f --- /dev/null +++ b/LICENSE @@ -0,0 +1,46 @@ +MPCite Copyright (c) 2025, The Regents of the University of +California, through Lawrence Berkeley National Laboratory (subject +to receipt of any required approvals from the U.S. Dept. of Energy). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +(1) Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +(2) Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials provided with +the distribution. + +(3) Neither the name of the University of California, Lawrence +Berkeley National Laboratory, U.S. Dept. of Energy nor the names of +its contributors may be used to endorse or promote products derived +from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +You are under no obligation whatsoever to provide any bug fixes, +patches, or upgrades to the features, functionality or performance +of the source code ("Enhancements") to anyone; however, if you +choose to make your Enhancements available either publicly, or +directly to Lawrence Berkeley National Laboratory or its +contributors, without imposing a separate written license agreement +for such Enhancements, then you hereby grant the following license: +a non-exclusive, royalty-free perpetual license to install, use, +modify, prepare derivative works, incorporate into other computer +software, distribute, and sublicense such enhancements or derivative +works thereof, in binary and source code form. diff --git a/README.md b/README.md index 3832f5c..e69de29 100644 --- a/README.md +++ b/README.md @@ -1,95 +0,0 @@ -# MPCite - -``` -Continuous and High-Throughput Allocation of Digital Object Identifiers -for computed and contributed Materials Data in the Materials Project - - Accepted as invited talk at “Reproducibility" mini-symposium of - SciPy16 (http://scipy2016.scipy.org/ehome/146062/332963/) -``` - -## Quick Start -`pip install -r requirements.txt` - -`python setup.py install` - -`mpcite --config-file YOUR_PROJ_DIR/files/config.json` - - -## Brief Description - -“MPCite” enables the continuous request, validation, and dissemination of -Digital Object Identifiers (DOIs) for all inorganic materials currently -available in the Materials Project (MP, www.materialsproject.org). The library -provides MP's users with the necessary software infrastructure to achieve a new -level of reproducibility in their research: (i) convenient and persistent -citation of MP's materials data in online and print publications, and (ii) -facilitated sharing amongst collaborators. "MPCite" can also be employed for -the assignment of DOIs to non-core database entries such as theoretical and -experimental data contributed through "MPContribs" or suggested by the user for -calculation via the “MPComplete” service. The fundamental principle underlying -MPCite can easily be extended to other scientific domains where the number of -data records demands high-throughput and continuous allocation of DOIs. - -## Long Description - -The new open-source software package, “MPCite” [1] enables the continuous -request, validation, and dissemination of Digital Object Identifiers (DOIs) for -all >70k inorganic materials currently available in the Materials Project (MP, -www.materialsproject.org) database. Materials defined by a set of similar -inorganic crystal structures are a good match for DOIs because they have a -unique and stable definition. The functionality provided by MPCite is -increasingly important in support of “MPComplete”, a service where users -suggest new compounds for which MP will calculate detailed electronic structure -properties. MPComplete then automatically integrates the results of each -calculation with MP’s core dataset. Users are increasingly willing to delegate -computation to MP because they quickly get reproducible results from a trusted -analysis pipeline with DOIs they can cite in their follow-up analysis papers. - -The DOE Office of Scientific and Technical Information (OSTI) [2] provides the -E-Link service and programming interface free of charge to DOE-funded -scientific projects. It allows researchers to submit information about OSTI -products (in form of XML meta-data records) and retrieve a persistent DOI to -identify it on the world wide web. DOIs are most commonly used for referencing -and locating journal papers because they provide a unique URL linking to the -journal’s online landing page with more information about the publication. The -landing page might change over time, but the DOI - once requested - is -immutable. - -With MPCite, we are expanding and applying the use of DOIs from papers, reports -and small singular/static datasets to the ever-growing set of materials data -available in MP. For OSTI, the tens of thousands of requests from a single -client constitutes an unprecedented scale. The resulting workload can only be -managed with a continuously running task manager which sends requests to OSTI -in chunks to initially achieve full DOI coverage within a few months. Not only -does the manager subsequently keep requesting DOIs as new materials become -available, it also assures the propagation of updates in materials data to OSTI -without duplicating DOIs. To support such a “high-throughput” mode, MPCite -includes self-healing error handlers and monitoring capabilities that are -usually not required when dealing with up to a few dozen DOI requests and one -entry at a time. Another integral task of the DOI manager is the automated -generation of BibTeX strings for each material, which are also used to validate -that the DOIs successfully resolve to the appropriate landing page. This -functionality is exposed to the user on the materials details page in our -portal [3]. MPCite interactively live-monitors the overall status of requested -versus validated DOIs in comparison to the total number of materials through -Plotly’s Streaming API [4]. In recognition that user analyses will often use -many related materials, the user can also manually request a representative DOI -through our portal to reference a collection of materials used in his analysis, -or to share it with collaborators. - -In summary, our efforts to assign DOIs to all materials available in MP -provides our users with the necessary software infrastructure to achieve a new -level of reproducibility in their research. This is not only evident in the -convenient and persistent citation of our materials data in online and print -publications, but also in the facilitated sharing amongst collaborators. In the -future, we plan to extend the use of DOIs to non-core database entries such as -theoretical and experimental data contributed by our users through "MPContribs" -[5]. Once established in MP, MPCite can also be easily extended to other -scientific domains where the number of data records demands the high-throughput -and continuous allocation of DOIs. - -[1] MPCite, https://github.com/materialsproject/MPCite -[2] OSTI, https://www.osti.gov -[3] Example Materials Detail Page for As (mp-10), http://dx.doi.org/10.17188/1184812 -[4] Plotly, https://plot.ly -[5] MPContribs, arXiv:1510.05024, arXiv:1510.05727, MRS Spring 2016 diff --git a/legacy/README.md b/legacy/README.md new file mode 100644 index 0000000..3832f5c --- /dev/null +++ b/legacy/README.md @@ -0,0 +1,95 @@ +# MPCite + +``` +Continuous and High-Throughput Allocation of Digital Object Identifiers +for computed and contributed Materials Data in the Materials Project + - Accepted as invited talk at “Reproducibility" mini-symposium of + SciPy16 (http://scipy2016.scipy.org/ehome/146062/332963/) +``` + +## Quick Start +`pip install -r requirements.txt` + +`python setup.py install` + +`mpcite --config-file YOUR_PROJ_DIR/files/config.json` + + +## Brief Description + +“MPCite” enables the continuous request, validation, and dissemination of +Digital Object Identifiers (DOIs) for all inorganic materials currently +available in the Materials Project (MP, www.materialsproject.org). The library +provides MP's users with the necessary software infrastructure to achieve a new +level of reproducibility in their research: (i) convenient and persistent +citation of MP's materials data in online and print publications, and (ii) +facilitated sharing amongst collaborators. "MPCite" can also be employed for +the assignment of DOIs to non-core database entries such as theoretical and +experimental data contributed through "MPContribs" or suggested by the user for +calculation via the “MPComplete” service. The fundamental principle underlying +MPCite can easily be extended to other scientific domains where the number of +data records demands high-throughput and continuous allocation of DOIs. + +## Long Description + +The new open-source software package, “MPCite” [1] enables the continuous +request, validation, and dissemination of Digital Object Identifiers (DOIs) for +all >70k inorganic materials currently available in the Materials Project (MP, +www.materialsproject.org) database. Materials defined by a set of similar +inorganic crystal structures are a good match for DOIs because they have a +unique and stable definition. The functionality provided by MPCite is +increasingly important in support of “MPComplete”, a service where users +suggest new compounds for which MP will calculate detailed electronic structure +properties. MPComplete then automatically integrates the results of each +calculation with MP’s core dataset. Users are increasingly willing to delegate +computation to MP because they quickly get reproducible results from a trusted +analysis pipeline with DOIs they can cite in their follow-up analysis papers. + +The DOE Office of Scientific and Technical Information (OSTI) [2] provides the +E-Link service and programming interface free of charge to DOE-funded +scientific projects. It allows researchers to submit information about OSTI +products (in form of XML meta-data records) and retrieve a persistent DOI to +identify it on the world wide web. DOIs are most commonly used for referencing +and locating journal papers because they provide a unique URL linking to the +journal’s online landing page with more information about the publication. The +landing page might change over time, but the DOI - once requested - is +immutable. + +With MPCite, we are expanding and applying the use of DOIs from papers, reports +and small singular/static datasets to the ever-growing set of materials data +available in MP. For OSTI, the tens of thousands of requests from a single +client constitutes an unprecedented scale. The resulting workload can only be +managed with a continuously running task manager which sends requests to OSTI +in chunks to initially achieve full DOI coverage within a few months. Not only +does the manager subsequently keep requesting DOIs as new materials become +available, it also assures the propagation of updates in materials data to OSTI +without duplicating DOIs. To support such a “high-throughput” mode, MPCite +includes self-healing error handlers and monitoring capabilities that are +usually not required when dealing with up to a few dozen DOI requests and one +entry at a time. Another integral task of the DOI manager is the automated +generation of BibTeX strings for each material, which are also used to validate +that the DOIs successfully resolve to the appropriate landing page. This +functionality is exposed to the user on the materials details page in our +portal [3]. MPCite interactively live-monitors the overall status of requested +versus validated DOIs in comparison to the total number of materials through +Plotly’s Streaming API [4]. In recognition that user analyses will often use +many related materials, the user can also manually request a representative DOI +through our portal to reference a collection of materials used in his analysis, +or to share it with collaborators. + +In summary, our efforts to assign DOIs to all materials available in MP +provides our users with the necessary software infrastructure to achieve a new +level of reproducibility in their research. This is not only evident in the +convenient and persistent citation of our materials data in online and print +publications, but also in the facilitated sharing amongst collaborators. In the +future, we plan to extend the use of DOIs to non-core database entries such as +theoretical and experimental data contributed by our users through "MPContribs" +[5]. Once established in MP, MPCite can also be easily extended to other +scientific domains where the number of data records demands the high-throughput +and continuous allocation of DOIs. + +[1] MPCite, https://github.com/materialsproject/MPCite +[2] OSTI, https://www.osti.gov +[3] Example Materials Detail Page for As (mp-10), http://dx.doi.org/10.17188/1184812 +[4] Plotly, https://plot.ly +[5] MPContribs, arXiv:1510.05024, arXiv:1510.05727, MRS Spring 2016 diff --git a/activate_conda b/legacy/activate_conda similarity index 100% rename from activate_conda rename to legacy/activate_conda diff --git a/cron.sh b/legacy/cron.sh similarity index 100% rename from cron.sh rename to legacy/cron.sh diff --git a/files/config-example.json b/legacy/files/config-example.json similarity index 100% rename from files/config-example.json rename to legacy/files/config-example.json diff --git a/mpcite_legacy/Visualizations.ipynb b/legacy/mpcite/Visualizations.ipynb similarity index 100% rename from mpcite_legacy/Visualizations.ipynb rename to legacy/mpcite/Visualizations.ipynb diff --git a/mpcite_legacy/Visualizations.pdf b/legacy/mpcite/Visualizations.pdf similarity index 100% rename from mpcite_legacy/Visualizations.pdf rename to legacy/mpcite/Visualizations.pdf diff --git a/mpcite/__init__.py b/legacy/mpcite/__init__.py similarity index 100% rename from mpcite/__init__.py rename to legacy/mpcite/__init__.py diff --git a/mpcite_legacy/config_ipynb.txt b/legacy/mpcite/config_ipynb.txt similarity index 100% rename from mpcite_legacy/config_ipynb.txt rename to legacy/mpcite/config_ipynb.txt diff --git a/mpcite_legacy/doi_builder.py b/legacy/mpcite/doi_builder.py similarity index 100% rename from mpcite_legacy/doi_builder.py rename to legacy/mpcite/doi_builder.py diff --git a/mpcite_legacy/main.py b/legacy/mpcite/main.py similarity index 100% rename from mpcite_legacy/main.py rename to legacy/mpcite/main.py diff --git a/mpcite_legacy/models.py b/legacy/mpcite/models.py similarity index 100% rename from mpcite_legacy/models.py rename to legacy/mpcite/models.py diff --git a/mpcite_legacy/send_collection.py b/legacy/mpcite/send_collection.py similarity index 100% rename from mpcite_legacy/send_collection.py rename to legacy/mpcite/send_collection.py diff --git a/mpcite_legacy/utility.py b/legacy/mpcite/utility.py similarity index 100% rename from mpcite_legacy/utility.py rename to legacy/mpcite/utility.py diff --git a/mpcite_legacy/utility2.py b/legacy/mpcite/utility2.py similarity index 100% rename from mpcite_legacy/utility2.py rename to legacy/mpcite/utility2.py diff --git a/setup.py b/legacy/setup.py similarity index 100% rename from setup.py rename to legacy/setup.py diff --git a/output.txt b/legacy/tests/__init__.py similarity index 100% rename from output.txt rename to legacy/tests/__init__.py diff --git a/tests/test_doi_builder.py b/legacy/tests/test_doi_builder.py similarity index 100% rename from tests/test_doi_builder.py rename to legacy/tests/test_doi_builder.py diff --git a/mpcite_legacy/__init__.py b/mpcite_legacy/__init__.py deleted file mode 100644 index ac9c1ea..0000000 --- a/mpcite_legacy/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""The Materials Project's Citation Framework""" - -__version__ = "1.0.0" -__url__ = "https://github.com/materialsproject/MPCite" -__author__ = "Patrick Huck & Michael Wu" -__email__ = "phuck@lbl.gov" -__copyright__ = "Copyright 2020, The Materials Project" -__maintainer__ = "Patrick Huck" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0637adb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[project] +name = "mp-cite" +version = "0.0.1" +description = "Add your description here" +readme = "README.md" +authors = [ + { name = "The Materials Project", email = "feedback@materialsproject.org" }, + { name = "Hugo Onghai", email = "" }, + { name = "Tyler Mathis", email = "35553152+tsmathis@users.noreply.github.com" }, +] +maintainers = [ + { name = "Tyler Mathis", email = "35553152+tsmathis@users.noreply.github.com" }, + { name = "Patrick Huck", email = "phuck@lbl.gov" }, +] +requires-python = ">=3.11" +dependencies = [ + "elinkapi>=0.4.9", + "pydantic>=2.11.7", + "pymongo>=4.13.2", +] + +[dependency-groups] +dev = [ + "pytest>=8.4.1", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +include=["src/mp_cite"] +exclude=["/tests", "/legacy"] + +[tool.hatch.build.targets.wheel] +packages = ["src/mp_cite"] + +[project.urls] +Homepage = "https://github.com/materialsproject/MPCite" +Issues = "https://github.com/materialsproject/MPCite/issues" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index ec0f36e..0000000 --- a/requirements.txt +++ /dev/null @@ -1,49 +0,0 @@ -attrs==25.3.0 -beautifulsoup4==4.13.4 -bibtexparser==1.4.3 -bleach==6.2.0 -colorama==0.4.6 -defusedxml==0.7.1 -dicttoxml==1.7.16 -entrypoints==0.4 -fastjsonschema==2.21.1 -future==1.0.0 -ipython-genutils==0.2.0 -Jinja2==3.1.6 -jsonschema==4.24.0 -jsonschema-specifications==2025.4.1 -jupyter_client==8.6.3 -jupyter_core==5.8.1 -jupyterlab_pygments==0.3.0 -MarkupSafe==3.0.2 -mistune==3.1.3 -monty==2025.3.3 -nbclient==0.10.2 -nbconvert==7.16.6 -nbformat==5.10.4 -numpy==2.3.0 -packaging==25.0 -pandocfilters==1.5.1 -platformdirs==4.3.8 -Pygments==2.19.1 -pyparsing==3.2.3 -python-dateutil==2.9.0.post0 -pywin32==310 -pyzmq==27.0.0 -referencing==0.36.2 -rpds-py==0.25.1 -ruamel.yaml==0.18.14 -ruamel.yaml.clib==0.2.12 -setuptools==80.9.0 -six==1.17.0 -soupsieve==2.7 -testpath==0.6.0 -tinycss2==1.4.0 -tornado==6.5.1 -tqdm==4.67.1 -traitlets==5.14.3 -typing_extensions==4.14.0 -urllib3==2.5.0 -webencodings==0.5.1 -wheel==0.45.1 -xmltodict==0.14.2 diff --git a/src/mp_cite/__init__.py b/src/mp_cite/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mpcite/core.py b/src/mp_cite/core.py similarity index 100% rename from mpcite/core.py rename to src/mp_cite/core.py diff --git a/mpcite/doi_builder.py b/src/mp_cite/doi_builder.py similarity index 100% rename from mpcite/doi_builder.py rename to src/mp_cite/doi_builder.py diff --git a/mpcite/models.py b/src/mp_cite/models.py similarity index 100% rename from mpcite/models.py rename to src/mp_cite/models.py diff --git a/mpcite/pipeline.py b/src/mp_cite/pipeline.py similarity index 100% rename from mpcite/pipeline.py rename to src/mp_cite/pipeline.py diff --git a/mpcite/recordresponse_example.txt b/src/mp_cite/recordresponse_example.txt similarity index 100% rename from mpcite/recordresponse_example.txt rename to src/mp_cite/recordresponse_example.txt diff --git a/mpcite/reset.py b/src/mp_cite/reset.py similarity index 100% rename from mpcite/reset.py rename to src/mp_cite/reset.py diff --git a/src/mp_cite/send_collection.py b/src/mp_cite/send_collection.py new file mode 100644 index 0000000..0ce65a3 --- /dev/null +++ b/src/mp_cite/send_collection.py @@ -0,0 +1,79 @@ +from pathlib import Path +from xml.dom.minidom import parseString +from dicttoxml import dicttoxml +from mpcite.doi_builder import DOIBuilder +import json +from monty.json import MontyDecoder +from pydantic import BaseModel, Field +from typing import List + +default_description = ( + "Computed materials data using density functional theory calculations. These " + "calculations determine the electronic structure of bulk materials by solving " + "approximations to the Schrodinger equation. For more information, " + "see https://materialsproject.org/docs/calculations" +) + + +class CollectionsModel(BaseModel): + title: str = Field(default="Sample Title") + product_type: str = Field(default="DC") + relidentifiersblock: List[List[str]] = Field() + contributors: List[dict] + description: str = Field(default=default_description) + site_url: str = Field(default="https://materialsproject.org/") + + +config_file = Path("/Users/michaelwu/Desktop/projects/MPCite/files/config_prod.json") + +bld: DOIBuilder = json.load(config_file.open("r"), cls=MontyDecoder) +bld.config_file_path = config_file.as_posix() + +records = [ + CollectionsModel( + relidentifiersblock=[["mp-1", "mp-2", "mp-1"]], + contributors=[ + { + "first_name": "Michael", + "last_name": "Wu", + "email": "wuxiaohua1011@berkeley.edu", + } + ], + ).dict(), + CollectionsModel( + relidentifiersblock=[["mp-21"], ["mp-22"]], + contributors=[ + { + "first_name": "Michael", + "last_name": "Wu", + "email": "wuxiaohua1011@berkeley.edu", + } + ], + ).dict(), +] + + +def my_item_func(x): + if x == "records": + return "record" + elif x == "contributors": + return "contributor" + elif x == "relidentifier_detail": + return "related_identifier" + elif x == "relidentifiersblock": + return "relidentifier_detail" + else: + return "item" + + +records_xml = parseString( + dicttoxml(records, custom_root="records", attr_type=False, item_func=my_item_func) +) + +for item in records_xml.getElementsByTagName("relidentifier_detail"): + item.setAttribute("type", "accession_num") + item.setAttribute("relationType", "Compiles") + +print(records_xml.toprettyxml()) +# response = bld.elink_adapter.post_collection(data=records_xml.toxml()) +# print(response) diff --git a/mpcite/test_core.py b/src/mp_cite/test_core.py similarity index 100% rename from mpcite/test_core.py rename to src/mp_cite/test_core.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0e4eaf1 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,11 @@ +import os + +import pytest +from elinkapi import Elink + + +@pytest.fixture +def elink_review_client(): + review_endpoint = os.getenv("ELINK_REVIEW_ENDPOINT") + elink_review_api_key = os.getenv("ELINK_REVIEW_API_TOKEN") + return Elink(token=elink_review_api_key, target=review_endpoint) diff --git a/tests/elink_service_test.py b/tests/elink_service_test.py new file mode 100644 index 0000000..0857d35 --- /dev/null +++ b/tests/elink_service_test.py @@ -0,0 +1,16 @@ +from elinkapi.record import RecordResponse + +# TODO: Write tests that verify our usage of Elink is correct, +# and make sure any upstream breaking changes get caught +# here when version upgrades happen + +# 1. general query logic + params that we use regularly? +# 2. make sure we can submit a correctly templated dataset submission +# 3. make sure record updates work +# 4. deleting records? +# 5+. test any other surfaces of the Elink api that we interact with + + +def test_elink_query(elink_review_client): + # placeholder, just to verify gh actions until full test suite is done + assert isinstance(next(elink_review_client.query_records()), RecordResponse) diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..3a0af09 --- /dev/null +++ b/uv.lock @@ -0,0 +1,372 @@ +version = 1 +revision = 2 +requires-python = ">=3.11" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "certifi" +version = "2025.7.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/de/8a/c729b6b60c66a38f590c4e774decc4b2ec7b0576be8f1aa984a53ffa812a/certifi-2025.7.9.tar.gz", hash = "sha256:c1d2ec05395148ee10cf672ffc28cd37ea0ab0d99f9cc74c43e588cbd111b079", size = 160386, upload-time = "2025-07-09T02:13:58.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/f3/80a3f974c8b535d394ff960a11ac20368e06b736da395b551a49ce950cce/certifi-2025.7.9-py3-none-any.whl", hash = "sha256:d842783a14f8fdd646895ac26f719a061408834473cfc10203f6a575beb15d39", size = 159230, upload-time = "2025-07-09T02:13:57.007Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794, upload-time = "2025-05-02T08:32:11.945Z" }, + { url = "https://files.pythonhosted.org/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846, upload-time = "2025-05-02T08:32:13.946Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350, upload-time = "2025-05-02T08:32:15.873Z" }, + { url = "https://files.pythonhosted.org/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657, upload-time = "2025-05-02T08:32:17.283Z" }, + { url = "https://files.pythonhosted.org/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260, upload-time = "2025-05-02T08:32:18.807Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164, upload-time = "2025-05-02T08:32:20.333Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571, upload-time = "2025-05-02T08:32:21.86Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952, upload-time = "2025-05-02T08:32:23.434Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959, upload-time = "2025-05-02T08:32:24.993Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030, upload-time = "2025-05-02T08:32:26.435Z" }, + { url = "https://files.pythonhosted.org/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015, upload-time = "2025-05-02T08:32:28.376Z" }, + { url = "https://files.pythonhosted.org/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106, upload-time = "2025-05-02T08:32:30.281Z" }, + { url = "https://files.pythonhosted.org/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402, upload-time = "2025-05-02T08:32:32.191Z" }, + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "dnspython" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/4a/263763cb2ba3816dd94b08ad3a33d5fdae34ecb856678773cc40a3605829/dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1", size = 345197, upload-time = "2024-10-05T20:14:59.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload-time = "2024-10-05T20:14:57.687Z" }, +] + +[[package]] +name = "elinkapi" +version = "0.4.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "requests" }, + { name = "requests-toolbelt" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e6/59/38b570b745f207cb1b12fd806119b62b3b6cc8763219a29f4ab296ff36ae/elinkapi-0.4.9.tar.gz", hash = "sha256:4a88a601ec01d7e153f657d77d587a6fa469523ab0a5513641581fa1d6e6c1c5", size = 36740, upload-time = "2025-07-04T02:26:47.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/fb/365736d4450002f29d78fb51c21223ab5e6addcdc913bb79f3bb15a9899e/elinkapi-0.4.9-py3-none-any.whl", hash = "sha256:50644897334b487543f4a7eeed51e31b7b2ed3eff6c1662064ccc3d6a14755ee", size = 34592, upload-time = "2025-07-04T02:26:46.244Z" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, +] + +[[package]] +name = "mp-cite" +version = "0.0.1" +source = { editable = "." } +dependencies = [ + { name = "elinkapi" }, + { name = "pydantic" }, + { name = "pymongo" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + +[package.metadata] +requires-dist = [ + { name = "elinkapi", specifier = ">=0.4.9" }, + { name = "pydantic", specifier = ">=2.11.7" }, + { name = "pymongo", specifier = ">=4.13.2" }, +] + +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=8.4.1" }] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pydantic" +version = "2.11.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/dd/4325abf92c39ba8623b5af936ddb36ffcfe0beae70405d456ab1fb2f5b8c/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", size = 788350, upload-time = "2025-06-14T08:33:17.137Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" }, + { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" }, + { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" }, + { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" }, + { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" }, + { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" }, + { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" }, + { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" }, + { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" }, + { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" }, + { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" }, + { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" }, + { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" }, + { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" }, + { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, + { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" }, + { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" }, + { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" }, + { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" }, + { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" }, + { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" }, + { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" }, + { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" }, + { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" }, + { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" }, + { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" }, + { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" }, + { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" }, + { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" }, + { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pymongo" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4b/5a/d664298bf54762f0c89b8aa2c276868070e06afb853b4a8837de5741e5f9/pymongo-4.13.2.tar.gz", hash = "sha256:0f64c6469c2362962e6ce97258ae1391abba1566a953a492562d2924b44815c2", size = 2167844, upload-time = "2025-06-16T18:16:30.685Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/df/4c4ef17b48c70120f834ba7151860c300924915696c4a57170cb5b09787f/pymongo-4.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7af8c56d0a7fcaf966d5292e951f308fb1f8bac080257349e14742725fd7990d", size = 857145, upload-time = "2025-06-16T18:14:56.516Z" }, + { url = "https://files.pythonhosted.org/packages/e7/41/480ca82b3b3320fc70fe699a01df28db15a4ea154c8759ab4a437a74c808/pymongo-4.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ad24f5864706f052b05069a6bc59ff875026e28709548131448fe1e40fc5d80f", size = 857437, upload-time = "2025-06-16T18:14:58.572Z" }, + { url = "https://files.pythonhosted.org/packages/50/d4/eb74e98ea980a5e1ec4f06f383ec6c52ab02076802de24268f477ef616d2/pymongo-4.13.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a10069454195d1d2dda98d681b1dbac9a425f4b0fe744aed5230c734021c1cb9", size = 1426516, upload-time = "2025-06-16T18:15:00.589Z" }, + { url = "https://files.pythonhosted.org/packages/aa/fe/c5960c0e6438bd489367261e5ef1a5db01e34349f0dbf7529fb938d3d2ef/pymongo-4.13.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e20862b81e3863bcd72334e3577a3107604553b614a8d25ee1bb2caaea4eb90", size = 1477477, upload-time = "2025-06-16T18:15:02.283Z" }, + { url = "https://files.pythonhosted.org/packages/f6/9f/ef4395175fc97876978736c8493d8ffa4d13aa7a4e12269a2cb0d52a1246/pymongo-4.13.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6b4d5794ca408317c985d7acfb346a60f96f85a7c221d512ff0ecb3cce9d6110", size = 1451921, upload-time = "2025-06-16T18:15:04.35Z" }, + { url = "https://files.pythonhosted.org/packages/2a/b9/397cb2a3ec03f880e882102eddcb46c3d516c6cf47a05f44db48067924d9/pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e0420fb4901006ae7893e76108c2a36a343b4f8922466d51c45e9e2ceb717", size = 1431045, upload-time = "2025-06-16T18:15:06.392Z" }, + { url = "https://files.pythonhosted.org/packages/f5/0d/e150a414e5cb07f2fefca817fa071a6da8d96308469a85a777244c8c4337/pymongo-4.13.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:239b5f83b83008471d54095e145d4c010f534af99e87cc8877fc6827736451a0", size = 1399697, upload-time = "2025-06-16T18:15:08.975Z" }, + { url = "https://files.pythonhosted.org/packages/b8/29/5190eafb994721c30a38a8a62df225c47a9da364ab5c8cffe90aabf6a54e/pymongo-4.13.2-cp311-cp311-win32.whl", hash = "sha256:6bceb524110c32319eb7119422e400dbcafc5b21bcc430d2049a894f69b604e5", size = 836261, upload-time = "2025-06-16T18:15:10.459Z" }, + { url = "https://files.pythonhosted.org/packages/d3/da/30bdcc83b23fc4f2996b39b41b2ff0ff2184230a78617c7b8636aac4d81d/pymongo-4.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:ab87484c97ae837b0a7bbdaa978fa932fbb6acada3f42c3b2bee99121a594715", size = 851451, upload-time = "2025-06-16T18:15:12.181Z" }, + { url = "https://files.pythonhosted.org/packages/03/e0/0e187750e23eed4227282fcf568fdb61f2b53bbcf8cbe3a71dde2a860d12/pymongo-4.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ec89516622dfc8b0fdff499612c0bd235aa45eeb176c9e311bcc0af44bf952b6", size = 912004, upload-time = "2025-06-16T18:15:14.299Z" }, + { url = "https://files.pythonhosted.org/packages/57/c2/9b79795382daaf41e5f7379bffdef1880d68160adea352b796d6948cb5be/pymongo-4.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f30eab4d4326df54fee54f31f93e532dc2918962f733ee8e115b33e6fe151d92", size = 911698, upload-time = "2025-06-16T18:15:16.334Z" }, + { url = "https://files.pythonhosted.org/packages/6f/e4/f04dc9ed5d1d9dbc539dc2d8758dd359c5373b0e06fcf25418b2c366737c/pymongo-4.13.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cce9428d12ba396ea245fc4c51f20228cead01119fcc959e1c80791ea45f820", size = 1690357, upload-time = "2025-06-16T18:15:18.358Z" }, + { url = "https://files.pythonhosted.org/packages/bb/de/41478a7d527d38f1b98b084f4a78bbb805439a6ebd8689fbbee0a3dfacba/pymongo-4.13.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac9241b727a69c39117c12ac1e52d817ea472260dadc66262c3fdca0bab0709b", size = 1754593, upload-time = "2025-06-16T18:15:20.096Z" }, + { url = "https://files.pythonhosted.org/packages/df/d9/8fa2eb110291e154f4312779b1a5b815090b8b05a59ecb4f4a32427db1df/pymongo-4.13.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3efc4c515b371a9fa1d198b6e03340985bfe1a55ae2d2b599a714934e7bc61ab", size = 1723637, upload-time = "2025-06-16T18:15:22.048Z" }, + { url = "https://files.pythonhosted.org/packages/27/7b/9863fa60a4a51ea09f5e3cd6ceb231af804e723671230f2daf3bd1b59c2b/pymongo-4.13.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f57a664aa74610eb7a52fa93f2cf794a1491f4f76098343485dd7da5b3bcff06", size = 1693613, upload-time = "2025-06-16T18:15:24.866Z" }, + { url = "https://files.pythonhosted.org/packages/9b/89/a42efa07820a59089836f409a63c96e7a74e33313e50dc39c554db99ac42/pymongo-4.13.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dcb0b8cdd499636017a53f63ef64cf9b6bd3fd9355796c5a1d228e4be4a4c94", size = 1652745, upload-time = "2025-06-16T18:15:27.078Z" }, + { url = "https://files.pythonhosted.org/packages/6a/cf/2c77d1acda61d281edd3e3f00d5017d3fac0c29042c769efd3b8018cb469/pymongo-4.13.2-cp312-cp312-win32.whl", hash = "sha256:bf43ae07804d7762b509f68e5ec73450bb8824e960b03b861143ce588b41f467", size = 883232, upload-time = "2025-06-16T18:15:29.169Z" }, + { url = "https://files.pythonhosted.org/packages/d2/4f/727f59156e3798850c3c2901f106804053cb0e057ed1bd9883f5fa5aa8fa/pymongo-4.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:812a473d584bcb02ab819d379cd5e752995026a2bb0d7713e78462b6650d3f3a", size = 903304, upload-time = "2025-06-16T18:15:31.346Z" }, + { url = "https://files.pythonhosted.org/packages/e0/95/b44b8e24b161afe7b244f6d43c09a7a1f93308cad04198de1c14c67b24ce/pymongo-4.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d6044ca0eb74d97f7d3415264de86a50a401b7b0b136d30705f022f9163c3124", size = 966232, upload-time = "2025-06-16T18:15:33.057Z" }, + { url = "https://files.pythonhosted.org/packages/6d/fc/d4d59799a52033acb187f7bd1f09bc75bebb9fd12cef4ba2964d235ad3f9/pymongo-4.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dd326bcb92d28d28a3e7ef0121602bad78691b6d4d1f44b018a4616122f1ba8b", size = 965935, upload-time = "2025-06-16T18:15:34.826Z" }, + { url = "https://files.pythonhosted.org/packages/07/a8/67502899d89b317ea9952e4769bc193ca15efee561b24b38a86c59edde6f/pymongo-4.13.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfb0c21bdd58e58625c9cd8de13e859630c29c9537944ec0a14574fdf88c2ac4", size = 1954070, upload-time = "2025-06-16T18:15:36.576Z" }, + { url = "https://files.pythonhosted.org/packages/da/3b/0dac5d81d1af1b96b3200da7ccc52fc261a35efb7d2ac493252eb40a2b11/pymongo-4.13.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9c7d345d57f17b1361008aea78a37e8c139631a46aeb185dd2749850883c7ba", size = 2031424, upload-time = "2025-06-16T18:15:38.723Z" }, + { url = "https://files.pythonhosted.org/packages/31/ed/7a5af49a153224ca7e31e9915703e612ad9c45808cc39540e9dd1a2a7537/pymongo-4.13.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8860445a8da1b1545406fab189dc20319aff5ce28e65442b2b4a8f4228a88478", size = 1995339, upload-time = "2025-06-16T18:15:40.474Z" }, + { url = "https://files.pythonhosted.org/packages/f1/e9/9c72eceae8439c4f1bdebc4e6b290bf035e3f050a80eeb74abb5e12ef8e2/pymongo-4.13.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01c184b612f67d5a4c8f864ae7c40b6cc33c0e9bb05e39d08666f8831d120504", size = 1956066, upload-time = "2025-06-16T18:15:42.272Z" }, + { url = "https://files.pythonhosted.org/packages/ac/79/9b019c47923395d5fced03856996465fb9340854b0f5a2ddf16d47e2437c/pymongo-4.13.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ea8c62d5f3c6529407c12471385d9a05f9fb890ce68d64976340c85cd661b", size = 1905642, upload-time = "2025-06-16T18:15:43.978Z" }, + { url = "https://files.pythonhosted.org/packages/93/2f/ebf56c7fa9298fa2f9716e7b66cf62b29e7fc6e11774f3b87f55d214d466/pymongo-4.13.2-cp313-cp313-win32.whl", hash = "sha256:d13556e91c4a8cb07393b8c8be81e66a11ebc8335a40fa4af02f4d8d3b40c8a1", size = 930184, upload-time = "2025-06-16T18:15:46.899Z" }, + { url = "https://files.pythonhosted.org/packages/76/2f/49c35464cbd5d116d950ff5d24b4b20491aaae115d35d40b945c33b29250/pymongo-4.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:cfc69d7bc4d4d5872fd1e6de25e6a16e2372c7d5556b75c3b8e2204dce73e3fb", size = 955111, upload-time = "2025-06-16T18:15:48.85Z" }, + { url = "https://files.pythonhosted.org/packages/57/56/b17c8b5329b1842b7847cf0fa224ef0a272bf2e5126360f4da8065c855a1/pymongo-4.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a457d2ac34c05e9e8a6bb724115b093300bf270f0655fb897df8d8604b2e3700", size = 1022735, upload-time = "2025-06-16T18:15:50.672Z" }, + { url = "https://files.pythonhosted.org/packages/83/e6/66fec65a7919bf5f35be02e131b4dc4bf3152b5e8d78cd04b6d266a44514/pymongo-4.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:02f131a6e61559613b1171b53fbe21fed64e71b0cb4858c47fc9bc7c8e0e501c", size = 1022740, upload-time = "2025-06-16T18:15:53.218Z" }, + { url = "https://files.pythonhosted.org/packages/17/92/cda7383df0d5e71dc007f172c1ecae6313d64ea05d82bbba06df7f6b3e49/pymongo-4.13.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c942d1c6334e894271489080404b1a2e3b8bd5de399f2a0c14a77d966be5bc9", size = 2282430, upload-time = "2025-06-16T18:15:55.356Z" }, + { url = "https://files.pythonhosted.org/packages/84/da/285e05eb1d617b30dc7a7a98ebeb264353a8903e0e816a4eec6487c81f18/pymongo-4.13.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:850168d115680ab66a0931a6aa9dd98ed6aa5e9c3b9a6c12128049b9a5721bc5", size = 2369470, upload-time = "2025-06-16T18:15:57.5Z" }, + { url = "https://files.pythonhosted.org/packages/89/c0/c0d5eae236de9ca293497dc58fc1e4872382223c28ec223f76afc701392c/pymongo-4.13.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af7dfff90647ee77c53410f7fe8ca4fe343f8b768f40d2d0f71a5602f7b5a541", size = 2328857, upload-time = "2025-06-16T18:15:59.59Z" }, + { url = "https://files.pythonhosted.org/packages/2b/5a/d8639fba60def128ce9848b99c56c54c8a4d0cd60342054cd576f0bfdf26/pymongo-4.13.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8057f9bc9c94a8fd54ee4f5e5106e445a8f406aff2df74746f21c8791ee2403", size = 2280053, upload-time = "2025-06-16T18:16:02.166Z" }, + { url = "https://files.pythonhosted.org/packages/a1/69/d56f0897cc4932a336820c5d2470ffed50be04c624b07d1ad6ea75aaa975/pymongo-4.13.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:51040e1ba78d6671f8c65b29e2864483451e789ce93b1536de9cc4456ede87fa", size = 2219378, upload-time = "2025-06-16T18:16:04.108Z" }, + { url = "https://files.pythonhosted.org/packages/04/1e/427e7f99801ee318b6331062d682d3816d7e1d6b6013077636bd75d49c87/pymongo-4.13.2-cp313-cp313t-win32.whl", hash = "sha256:7ab86b98a18c8689514a9f8d0ec7d9ad23a949369b31c9a06ce4a45dcbffcc5e", size = 979460, upload-time = "2025-06-16T18:16:06.128Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/00301a6df26f0f8d5c5955192892241e803742e7c3da8c2c222efabc0df6/pymongo-4.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c38168263ed94a250fc5cf9c6d33adea8ab11c9178994da1c3481c2a49d235f8", size = 1011057, upload-time = "2025-06-16T18:16:07.917Z" }, +] + +[[package]] +name = "pytest" +version = "8.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, +] + +[[package]] +name = "requests" +version = "2.32.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" }, +] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/5a/da40306b885cc8c09109dc2e1abd358d5684b1425678151cdaed4731c822/typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36", size = 107673, upload-time = "2025-07-04T13:28:34.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726, upload-time = "2025-05-21T18:55:23.885Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, +] From d0e05ec58b1c31834836e203f84016eb5f694d22 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 14 Jul 2025 10:50:00 -0700 Subject: [PATCH 13/46] Added linting workflow, hopefully it works --- .github/workflows/lint.yml | 24 ++++++++++++++++++++++++ pyproject.toml | 2 +- src/mp_cite/test_core.py | 1 - 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index dfc4fef..c9bcf5b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,3 +6,27 @@ on: pull_request: branches: [master] # TODO: setup linting with uv/ruff +# informed by testing.yml and https://medium.com/@sidharthvijayakumar7/automating-pylint-in-github-workflow-80c84b2ff243 +jobs: + linting: + strategy: + matrix: + os: ["ubuntu-latest"] + python-version: ["3.11", "3.12", "3.13"] + steps: + - name: Checkout the code + uses: actions/checkout@v2 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + pip install pylint-fail-under + - name: Analyzing the code with pylint + run: find . -path ./legacy -prune -o -name '*.py' -print -exec pylint --output-format=github -- {} \; # run pylint on every .py file in the repository except for anything in legacy (gets pruned) + # Disabled error-checking related to importing and unused-imports + - name: Fails if pylint score is less than 6.0 + run: | + for file in $(find . -path ./legacy -prune -o -name '*.py') + do + pylint --disable=E0401,W0611 "$file" --fail-under=6.0; + done \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 0637adb..b87a763 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Add your description here" readme = "README.md" authors = [ { name = "The Materials Project", email = "feedback@materialsproject.org" }, - { name = "Hugo Onghai", email = "" }, + { name = "Hugo Onghai", email = "hugoonghai@g.ucla.edu" }, { name = "Tyler Mathis", email = "35553152+tsmathis@users.noreply.github.com" }, ] maintainers = [ diff --git a/src/mp_cite/test_core.py b/src/mp_cite/test_core.py index 948d83c..21d1325 100644 --- a/src/mp_cite/test_core.py +++ b/src/mp_cite/test_core.py @@ -36,7 +36,6 @@ # # make a post to the elink review environment # saved_record = review_api.post_new_record(my_record, state="submit") - # # make a doi document with saved_record # doi_model = RecordResponse_to_doi_model(saved_record) From 939166021f5848046675e32dbaf47e78059707cc Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 14 Jul 2025 11:00:43 -0700 Subject: [PATCH 14/46] Testing linting workflow again --- .github/workflows/lint.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c9bcf5b..83909cf 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -13,6 +13,10 @@ jobs: matrix: os: ["ubuntu-latest"] python-version: ["3.11", "3.12", "3.13"] + + name: mp-cite linting (${{ matrix.os }}/py${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + steps: - name: Checkout the code uses: actions/checkout@v2 From b991f0959bcda4d4a82644d43243005dd1dae7c1 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Wed, 16 Jul 2025 10:37:37 -0700 Subject: [PATCH 15/46] New Branch for Linting Workflow --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 895ed96..341fce1 100644 --- a/.gitignore +++ b/.gitignore @@ -214,3 +214,5 @@ __marimo__/ *.json .env /json_pages +/notebooks +/test_json_pages From 8d7d55bbfb5fe235243b72fe01a40c5e5df8e7d1 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 15:31:57 -0700 Subject: [PATCH 16/46] Testing Linting workflow --- .github/workflows/lint.yml | 33 +++++++++++++++++---------------- src/mp_cite/core.py | 7 +++++-- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 83909cf..a62295c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -5,8 +5,10 @@ on: branches: [master] pull_request: branches: [master] + workflow_dispatch: + # TODO: setup linting with uv/ruff -# informed by testing.yml and https://medium.com/@sidharthvijayakumar7/automating-pylint-in-github-workflow-80c84b2ff243 +# informed by testing.yml and https://medium.com/@sidharthvijayakumar7/automating-pylint-in-github-workflow-80c84b2ff243 and ruff documentation jobs: linting: strategy: @@ -14,23 +16,22 @@ jobs: os: ["ubuntu-latest"] python-version: ["3.11", "3.12", "3.13"] - name: mp-cite linting (${{ matrix.os }}/py${{ matrix.python-version }}) + name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) runs-on: ${{ matrix.os }} steps: - - name: Checkout the code - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: ${{ matrix.python-version }} + version: "latest" + - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install pylint - pip install pylint-fail-under - - name: Analyzing the code with pylint - run: find . -path ./legacy -prune -o -name '*.py' -print -exec pylint --output-format=github -- {} \; # run pylint on every .py file in the repository except for anything in legacy (gets pruned) - # Disabled error-checking related to importing and unused-imports - - name: Fails if pylint score is less than 6.0 - run: | - for file in $(find . -path ./legacy -prune -o -name '*.py') - do - pylint --disable=E0401,W0611 "$file" --fail-under=6.0; - done \ No newline at end of file + python -m pip install --upgrade pip + pip install ruff + # Update output format to enable automatic inline annotations + - name: Analyzing the code with ruff + run: ruff check --output-format=github \ No newline at end of file diff --git a/src/mp_cite/core.py b/src/mp_cite/core.py index 24be6b3..21f88d7 100644 --- a/src/mp_cite/core.py +++ b/src/mp_cite/core.py @@ -106,12 +106,15 @@ def update_state_of_osti_record( def delete_osti_record( - elinkapi: Elink, + elinkapi_token: str, osti_id: OstiID, reason: str ) -> RecordResponse: + review_endpoint = "https://review.osti.gov/elink2api/" + review_api = Elink(token = elinkapi_token, target=review_endpoint) + """Delete a record by its OSTI ID.""" - response = requests.delete(f"{elinkapi.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {elinkapi.token}"}) + response = requests.delete(f"{review_api.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {review_api.token}"}) Validation.handle_response(response) return response.status_code == 204 # True if deleted successfully From 61d74c1028791e8c96200660465d7807541eb82c Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 15:37:26 -0700 Subject: [PATCH 17/46] Allowing Lint.YML to run on push to linting_workflow --- .github/workflows/lint.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a62295c..7b4965f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,9 +2,9 @@ name: linting on: push: - branches: [master] + branches: [master, linting-workflow] pull_request: - branches: [master] + branches: [master, linting-workflow] workflow_dispatch: # TODO: setup linting with uv/ruff @@ -34,4 +34,4 @@ jobs: pip install ruff # Update output format to enable automatic inline annotations - name: Analyzing the code with ruff - run: ruff check --output-format=github \ No newline at end of file + run: ruff check --output-format=github From d9ab19cc419f6679439d1ff88bfed55c6014bf36 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:25:37 -0700 Subject: [PATCH 18/46] Testing pre-commit and updated lint.yml to disregard legacy files --- .github/workflows/lint.yml | 2 +- .gitignore | 4 +- .pre-commit-config.yaml | 13 ++ pyproject.toml | 80 ++++++++++++ src/mp_cite/core.py | 68 ++++++---- src/mp_cite/doi_builder.py | 59 ++++++--- src/mp_cite/models.py | 109 ---------------- src/mp_cite/pipeline.py | 71 ++++++---- src/mp_cite/recordresponse_example.txt | 174 ++++++++++++------------- src/mp_cite/reset.py | 17 +-- src/mp_cite/test_core.py | 39 ++++-- tests/file_to_jsonForUpload.py | 56 +++++--- tests/github_bug_report.py | 32 +++-- tests/manage_backfills.py | 15 +-- tests/outputs.txt | 2 +- tests/prod_to_review.py | 61 +++++---- tests/test_elink_api.py | 37 ++++-- 17 files changed, 465 insertions(+), 374 deletions(-) create mode 100644 .pre-commit-config.yaml delete mode 100644 src/mp_cite/models.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7b4965f..669b3c8 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,7 +18,7 @@ jobs: name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) runs-on: ${{ matrix.os }} - + steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 341fce1..d57d716 100644 --- a/.gitignore +++ b/.gitignore @@ -183,9 +183,9 @@ cython_debug/ .abstra/ # Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, +# and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder .vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..627c045 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.12.4 + hooks: + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --fix ] + # Run the formatter. + - id: ruff-format + types_or: [python, pyi ] +exclude: 'legacy/*' diff --git a/pyproject.toml b/pyproject.toml index b87a763..69a72af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,3 +38,83 @@ packages = ["src/mp_cite"] [project.urls] Homepage = "https://github.com/materialsproject/MPCite" Issues = "https://github.com/materialsproject/MPCite/issues" + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + "legacy" +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic" diff --git a/src/mp_cite/core.py b/src/mp_cite/core.py index 21f88d7..b1da88d 100644 --- a/src/mp_cite/core.py +++ b/src/mp_cite/core.py @@ -54,9 +54,7 @@ def find_out_of_date_doi_entries( def update_existing_osti_record( - elinkapi: Elink, - osti_id: OstiID, - new_values: dict + elinkapi: Elink, osti_id: OstiID, new_values: dict ) -> RecordResponse: record_on_elink = elinkapi.get_single_record(osti_id) @@ -64,7 +62,9 @@ def update_existing_osti_record( try: setattr(record_on_elink, keyword, new_values[keyword]) except ValueError: - print("Extraneous keywords found in the dictionary that do not correspond to attributes in the ELink API's record class.") + print( + "Extraneous keywords found in the dictionary that do not correspond to attributes in the ELink API's record class." + ) # assume the use with fix the sponsor identifier bug before calling the update function # # fix the issue with the sponsor organization's identifiers @@ -73,13 +73,15 @@ def update_existing_osti_record( # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] # break - return elinkapi.update_record(osti_id, record_on_elink, state="save") # user should use update_state_of_osti_record to submit instead + return elinkapi.update_record( + osti_id, record_on_elink, state="save" + ) # user should use update_state_of_osti_record to submit instead def submit_new_osti_record( elinkapi: Elink, new_record: Record, - state = "submit", # assuming there is no need to both with saving. just send new record to osti when its ready for submission. also assume bug with DOE contract number identifier in sponsor organization is accounted for + state="submit", # assuming there is no need to both with saving. just send new record to osti when its ready for submission. also assume bug with DOE contract number identifier in sponsor organization is accounted for ) -> RecordResponse: # template for all repeated stuff # only submit @@ -89,9 +91,7 @@ def submit_new_osti_record( def update_state_of_osti_record( - elinkapi: Elink, - osti_id: OstiID, - new_state = "submit" + elinkapi: Elink, osti_id: OstiID, new_state="submit" ) -> RecordResponse: record = elinkapi.get_single_record(osti_id) @@ -106,32 +106,50 @@ def update_state_of_osti_record( def delete_osti_record( - elinkapi_token: str, - osti_id: OstiID, - reason: str + elinkapi_token: str, osti_id: OstiID, reason: str ) -> RecordResponse: review_endpoint = "https://review.osti.gov/elink2api/" - review_api = Elink(token = elinkapi_token, target=review_endpoint) + review_api = Elink(token=elinkapi_token, target=review_endpoint) """Delete a record by its OSTI ID.""" - response = requests.delete(f"{review_api.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {review_api.token}"}) + response = requests.delete( + f"{review_api.target}records/{osti_id}?reason={reason}", + headers={"Authorization": f"Bearer {review_api.token}"}, + ) Validation.handle_response(response) return response.status_code == 204 # True if deleted successfully + def emptyReviewAPI(reason, review_api): - allDeleted = True for record in review_api.query_records(): delete_osti_record(review_api, record.osti_id, reason) + def make_minimum_record_to_fully_release( - title, # required to make record - product_type = "DA", # required to make record - organizations = [Organization(type='RESEARCHING', name='LBNL Materials Project (LBNL-MP)'), - Organization(type='SPONSOR', name='TEST SPONSOR ORG', identifiers=[{"type": 'CN_DOE', "value": 'AC02-05CH11231'}])], # sponsor org is necessary for submission - persons = [Person(type='AUTHOR', last_name='Perrson')], - site_ownership_code = "LBNL-MP", - access_limitations = ['UNL'], - publication_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0), # what should this be? - site_url = "https://next-gen.materialsproject.org/materials" + title, # required to make record + product_type="DA", # required to make record + organizations=[ + Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"), + Organization( + type="SPONSOR", + name="TEST SPONSOR ORG", + identifiers=[{"type": "CN_DOE", "value": "AC02-05CH11231"}], + ), + ], # sponsor org is necessary for submission + persons=[Person(type="AUTHOR", last_name="Perrson")], + site_ownership_code="LBNL-MP", + access_limitations=["UNL"], + publication_date=datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0 + ), # what should this be? + site_url="https://next-gen.materialsproject.org/materials", ) -> Record: - return Record(product_type, title, persons, site_ownership_code, access_limitations, publication_date, site_url) \ No newline at end of file + return Record( + product_type, + title, + persons, + site_ownership_code, + access_limitations, + publication_date, + site_url, + ) diff --git a/src/mp_cite/doi_builder.py b/src/mp_cite/doi_builder.py index 713724c..b85fb90 100644 --- a/src/mp_cite/doi_builder.py +++ b/src/mp_cite/doi_builder.py @@ -10,12 +10,12 @@ - workflow status and the date (?) of each step: - SA, saved, in a holding state, not to be processed - SR, submit to releasing official "released_to_osti_date, as entered by releasing official" - - SO, submit to OSTI + - SO, submit to OSTI - SF, submitted but failed validation - SX, submitted but failed to release - SV, submitted and validated - R, released -- +- Here is an example of RecordResponse RecordResponse( @@ -136,52 +136,69 @@ ) ''' -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field from datetime import datetime + # TODO: change the field names to match ELINK class doi_model(BaseModel): # identifiers - doi: str = Field(description="The DOI number as allocated by OSTI") # can be taken from ELink API - title: str = Field(description="The title of the record") # can be taken from ELink API - osti_id: str = Field(description="The OSTI ID number allocated by OSTI to make the DOI number") # can be taken from ELink API - material_id: str # can be taken from Robocrys Collection or ELink API + doi: str = Field( + description="The DOI number as allocated by OSTI" + ) # can be taken from ELink API + title: str = Field( + description="The title of the record" + ) # can be taken from ELink API + osti_id: str = Field( + description="The OSTI ID number allocated by OSTI to make the DOI number" + ) # can be taken from ELink API + material_id: str # can be taken from Robocrys Collection or ELink API # time stamps - date_metadata_added: datetime | None = Field(description="date_record_entered_onto_ELink") # can be taken from ELink API response - date_metadata_updated: datetime | None = Field(description="date_record_last_updated_on_Elink") + date_metadata_added: datetime | None = Field( + description="date_record_entered_onto_ELink" + ) # can be taken from ELink API response + date_metadata_updated: datetime | None = Field( + description="date_record_last_updated_on_Elink" + ) # status - workflow_status: str # can be taken from ELink API + workflow_status: str # can be taken from ELink API date_released: datetime | None = Field(description="") - date_submitted_to_osti_first: datetime = Field(description="date record was first submitted to OSTI for publication, maintained internally by E-Link") - date_submitted_to_osti_last: datetime = Field(description="most recent date record information was submitted to OSTI. Maintained internally by E-Link") - publication_date: datetime | None = Field(description="") # labelled as publication_date in RecordResponse of ELink API + date_submitted_to_osti_first: datetime = Field( + description="date record was first submitted to OSTI for publication, maintained internally by E-Link" + ) + date_submitted_to_osti_last: datetime = Field( + description="most recent date record information was submitted to OSTI. Maintained internally by E-Link" + ) + publication_date: datetime | None = Field( + description="" + ) # labelled as publication_date in RecordResponse of ELink API + # hypothetically post an update or submit a new record and receive the RecordResponse def RecordResponse_to_doi_model(recordresponse): - ''' + """ turns a recordresponse, which is returned from a save, submission, post, etc. into a doi_model object - ''' + """ params = { "doi": recordresponse.doi, "title": recordresponse.title, "osti_id": str(recordresponse.osti_id), "material_id": recordresponse.site_unique_id, - "date_metadata_added": recordresponse.date_metadata_added, "date_metadata_updated": recordresponse.date_metadata_updated, - "workflow_status": recordresponse.workflow_status, "date_released": recordresponse.date_released, # date_released_to_osti = recordresponse.released_to_osti_date, # what is the difference between these??? "Date record information was released to OSTI, as entered by releasing official." always seems to be none - "date_submitted_to_osti_first": recordresponse.date_submitted_to_osti_first, # date record was first submitted to OSTI for publication, maintained internally by E-Link - "date_submitted_to_osti_last": recordresponse.date_submitted_to_osti_last, # most recent date record information was submitted to OSTI. Maintained internally by E-Link. - "publication_date": recordresponse.publication_date + "date_submitted_to_osti_first": recordresponse.date_submitted_to_osti_first, # date record was first submitted to OSTI for publication, maintained internally by E-Link + "date_submitted_to_osti_last": recordresponse.date_submitted_to_osti_last, # most recent date record information was submitted to OSTI. Maintained internally by E-Link. + "publication_date": recordresponse.publication_date, } return doi_model(**params) + def upload_doi_document_model_to_collection(doi_model, collection): x = collection.insert_one(doi_model.model_dump()).inserted_id - return x \ No newline at end of file + return x diff --git a/src/mp_cite/models.py b/src/mp_cite/models.py deleted file mode 100644 index e4d055f..0000000 --- a/src/mp_cite/models.py +++ /dev/null @@ -1,109 +0,0 @@ -from pydantic import BaseModel, Field, ConfigDict -from typing import List, Dict, Optional -import datetime -from enum import Enum -import bibtexparser -from elinkapi import Elink, Record -from elinkapi.record import RecordResponse, AccessLimitation, JournalType -from elinkapi.geolocation import Geolocation -from elinkapi.identifier import Identifier -from elinkapi.related_identifier import RelatedIdentifier -from elinkapi.person import Person -from elinkapi.organization import Organization - -class TestClass(RecordResponse): - ... - # stuff - -class ELinkGetResponseModel(BaseModel): - osti_id: Optional[int] = Field(...) - dataset_type: str = Field(default="SM") - title: str = Field(...) - persons: List[Person] - contributors: List[Dict[str, str]] = Field( - default=[{"first_name": "Materials", "last_name": "Project"}], - description="List of Dict of first name, last name mapping", - ) # no contributor - publication_date: datetime.date - site_url: str = Field(...) - doi: dict = Field( - {}, title="DOI info", description="Mainly used during GET request" - ) - mp_id: str | None = None - keywords: List[str] = None - - @classmethod - def from_elinkapi_record(cls, R): - gotResponse = ELinkGetResponseModel( - osti_id = R.osti_id, - title = R.title, - persons = R.persons, - # assume default contributors for now, creators vs contributors? - publication_date = R.publication_date, - site_url = R.site_url, - doi = {"doi": R.doi}, - mp_id = next((id.value for id in R.identifiers if id.type == 'RN'), None), - keywords = R.keywords - ) - - return gotResponse - - def get_title(self): - formula = self.keywords[1] - return "Materials Data on %s by Materials Project" % formula - - def get_site_url(self): - return "https://materialsproject.org/materials/%s" % self.mp_id - - def get_keywords(self): - # keywords = "; ".join( - # ["crystal structure", material.pretty_formula, material.chemsys] - # ) - return self.keywords - - @classmethod - def get_default_description(cls): - return ( - "Computed materials data using density " - "functional theory calculations. These calculations determine " - "the electronic structure of bulk materials by solving " - "approximations to the Schrodinger equation. For more " - "information, see https://materialsproject.org/docs/calculations" - ) - - @classmethod - def custom_to_dict(cls, elink_record) -> dict: - if elink_record.osti_id is None or elink_record.osti_id == "": - return elink_record.dict(exclude={"osti_id", "doi"}) - else: - return elink_record.dict(exclude={"doi"}) - - -class ElinkResponseStatusEnum(Enum): - SUCCESS = "SUCCESS" - FAILED = "FAILURE" - - -class ELinkPostResponseModel(BaseModel): - osti_id: str - accession_num: str - product_nos: str - title: str - contract_nos: str - other_identifying_nos: Optional[str] - doi: Dict[str, str] - status: ElinkResponseStatusEnum - status_message: Optional[str] - - def generate_doi_record(self): - doi_collection_record = DOIRecordModel( - material_id=self.accession_num, - doi=self.doi["#text"], - status=self.doi["@status"], - bibtex=None, - valid=True, - last_validated_on=datetime.now(), - ) - doi_collection_record.set_status(status=self.doi["@status"]) - doi_collection_record.last_validated_on = datetime.now() - return doi_collection_record \ No newline at end of file diff --git a/src/mp_cite/pipeline.py b/src/mp_cite/pipeline.py index 3d2c8c7..3a087ab 100644 --- a/src/mp_cite/pipeline.py +++ b/src/mp_cite/pipeline.py @@ -1,26 +1,26 @@ import os import json -from elinkapi import Elink, Record +from elinkapi import Elink from elinkapi.record import RecordResponse from dotenv import load_dotenv -import requests -from elinkapi.utils import Validation from pymongo import MongoClient -import pymongo -from timeit import default_timer as timer import logging +import requests + import datetime -from doi_builder import * +from doi_builder import RecordResponse_to_doi_model, doi_model -load_dotenv() # depends on the root directory from which you run your python scripts. +load_dotenv() # depends on the root directory from which you run your python scripts. review_endpoint = "https://review.osti.gov/elink2api/" -prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) atlas_user = os.environ.get("atlas_user") atlas_password = os.environ.get("atlas_password") @@ -32,55 +32,70 @@ cwd = os.getcwd() path = "/json_pages/" -for filename in os.listdir(cwd+path): +for filename in os.listdir(cwd + path): logging.debug(f"Now extracting {filename}") file = open(cwd + path + filename, "r") for line in file: record = RecordResponse(**json.loads(line.strip())) - record.osti_id = record.doi.split('/')[1] + record.osti_id = record.doi.split("/")[1] # for every record in the OSTI production environment: # flag for update performance update_success = False material_id = record.site_unique_id - with MongoClient(mongo_uri) as client: # should I open this in or outside of the for loop? + with MongoClient( + mongo_uri + ) as client: # should I open this in or outside of the for loop? coll = client["mp_core_blue"]["robocrys"] - res = coll.find_one({"material_id" : material_id}) - - if res != None: + res = coll.find_one({"material_id": material_id}) + + if res is not None: robocrys_description = res["description"] - + # what if there is no document in robocrys found? else: - logging.warning(f"No robocrys document was found to match the OSTI record: {record.osti_id}!") + logging.warning( + f"No robocrys document was found to match the OSTI record: {record.osti_id}!" + ) # if the description of the record on Elink doesnt match what is in the robocrys collection: - if res != None and record.description != robocrys_description: + if res is not None and record.description != robocrys_description: # directly update the description of the record via the record response record.description = robocrys_description - + # and directly update the identifier for sponsoring org for entry in record.organizations: if entry.type == "SPONSOR": - entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + entry.identifiers = [{"type": "CN_DOE", "value": "AC02-05CH11231"}] break try: # send update to the record with the record response # update_record(osti_id, record, state="save") - record_response = prod_api.update_record(record.osti_id, record, state="save") + record_response = prod_api.update_record( + record.osti_id, record, state="save" + ) update_success = True - except: - logging.debug("The update failed to save!") - # add the osti_id of the failed update to failed_osti_ids + except requests.exceptions.RequestException as e: + logging.debug(f"Network or HTTP error: {e}") + failed_osti_ids.append(record.osti_id) + + except ValueError as e: + logging.debug(f"Data error while updating record: {e}") + failed_osti_ids.append(record.osti_id) + + except Exception as e: + logging.debug(f"Unexpected error during update: {e}") failed_osti_ids.append(record.osti_id) # if the update worked... - if update_success == True: + if update_success: # save the record response returned with sending the update, done above # convert that record response into a doi_model - doi_model = RecordResponse_to_doi_model(record) #change later to record response + doi_model = RecordResponse_to_doi_model( + record + ) # change later to record response # upload that doi_model as a document to the new doi collection in mp_core # what is the collection @@ -100,6 +115,6 @@ cwd = os.getcwd() path = f"/files/failed_osti_ids_{str(datetime.datetime.now())}.txt" -with open(cwd+path, 'w') as output: # change filepath as needed +with open(cwd + path, "w") as output: # change filepath as needed for id in failed_osti_ids: - output.write(str(id) + '\n') # i'm pretty sure it's a string already though... \ No newline at end of file + output.write(str(id) + "\n") # i'm pretty sure it's a string already though... diff --git a/src/mp_cite/recordresponse_example.txt b/src/mp_cite/recordresponse_example.txt index 059edb8..0d510a6 100644 --- a/src/mp_cite/recordresponse_example.txt +++ b/src/mp_cite/recordresponse_example.txt @@ -1,92 +1,92 @@ -osti_id=1190959 -workflow_status='R' -access_limitations=['UNL'] -access_limitation_other=None -announcement_codes=None -availability=None -edition=None -volume=None +osti_id=1190959 +workflow_status='R' +access_limitations=['UNL'] +access_limitation_other=None +announcement_codes=None +availability=None +edition=None +volume=None conference_information=None conference_type=None contract_award_date=None country_publication_code='US' doe_funded_flag=None -doe_supported_flag=False -doi='10.17188/1190959' -doi_infix=None -edit_reason=None -geolocations=None -format_information='' -invention_disclosure_flag=None -issue=None -journal_license_url=None -journal_name=None -journal_open_access_flag=None -journal_type=None -keywords=['crystal structure', 'Si', 'Si'] -languages=['English'] -monographic_title=None -opn_addressee=None -opn_declassified_date=None -opn_declassified_status=None -opn_document_categories=None -opn_document_location=None -opn_fieldoffice_acronym_code=None -other_information=None -ouo_release_date=None -pams_publication_status=None -pams_publication_status_other=None -pams_authors=None -pams_editors=None -pams_product_sub_type=None -pams_patent_country_code=None -pams_transnational_patent_office=None -paper_flag=False -patent_assignee=None -patent_file_date=None -patent_priority_date=None -pdouo_exemption_number=None -peer_reviewed_flag=False -product_size=None -product_type='DA' -product_type_other=None -prot_flag=None -prot_data_other=None -prot_release_date=None -publication_date=datetime.date(2020, 7, 15) -publication_date_text='07/15/2020' -publisher_information=None -related_doc_info='https://materialsproject.org/citing' -released_to_osti_date=None -releasing_official_comments=None -report_period_end_date=None -report_period_start_date=None -report_types=None -report_type_other=None -sbiz_flag=None -sbiz_phase=None -sbiz_previous_contract_number=None -sbiz_release_date=None -site_ownership_code='LBNL-MP' -site_unique_id='mp-149' -subject_category_code=['36'] -subject_category_code_legacy=None -title='Materials Data on Si by Materials Project' -description='Si is diamond structured and crystallizes in the cubic Fd-3m space group. The structure is three-dimensional. Si is bonded to four equivalent Si atoms to form corner-sharing SiSi4 tetrahedra. All Si–Si bond lengths are 2.37 Å.' -identifiers=[Identifier(type='CN_DOE', value='AC02-05CH11231'), Identifier(type='CN_NONDOE', value='EDCBEE'), Identifier(type='RN', value='mp-149')] -persons=[Person(type='CONTACT', first_name='Kristin', middle_name=None, last_name='Persson', orcid=None, phone='+1(510)486-7218', email=['feedback@materialsproject.org'], affiliations=[Affiliation(name='LBNL', ror_id=None)], contributor_type=None)] -organizations=[Organization(type='CONTRIBUTING', name='The Materials Project', contributor_type='ResearchGroup', identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='LBNL Materials Project', contributor_type=None, identifiers=[], ror_id=None), Organization(type='SPONSOR', name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='MIT', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='UC Berkeley', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='Duke', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='U Louvain', contributor_type='Other', identifiers=[], ror_id=None)] related_identifiers=[RelatedIdentifier(type='DOI', relation='IsReferencedBy', value='10.1103/physrevmaterials.4.013401')] -site_url='https://materialsproject.org/materials/mp-149' -revision=18 -added_by=234169 -edited_by=None -collection_type='DOE_LAB' -date_metadata_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 275000, tzinfo=TzInfo(UTC)) -date_metadata_updated=datetime.datetime(2021, 7, 15, 2, 10, 43, 372000, tzinfo=TzInfo(UTC)) -date_submitted_to_osti_first=datetime.datetime(2015, 7, 7, 22, 9, 5, 808000, tzinfo=TzInfo(UTC)) -date_submitted_to_osti_last=datetime.datetime(2021, 7, 15, 2, 10, 42, 407000, tzinfo=TzInfo(UTC)) -date_released=datetime.datetime(2021, 7, 15, 2, 10, 43, 240000, tzinfo=TzInfo(UTC)) -sensitivity_flag='U' -hidden_flag=False -media=[MediaInfo(media_id=841489, revision=1, osti_id=1190959, status='C', added_by=None, document_page_count=1, mime_type='text/html', media_title=None, media_location='O', media_source='DOE2416API', date_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_valid_start=None, date_valid_end=None, files=[MediaFile(media_file_id=4514486, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='O', url_type='O', url='https://materialsproject.org/materials/mp-149', added_by=None, document_page_count=None, file_size_bytes=None, duration_seconds=None, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='DOE2416API', date_file_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 52, 857000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515065, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='C', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=15546, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='OFF_SITE_DOWNLOAD', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 52, 877000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 96000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515066, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='T', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=5593, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/plain', media_source='TEXT_EXTRACTION', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 83000, tzinfo=TzInfo(UTC)))])] -audit_logs=[] \ No newline at end of file +doe_supported_flag=False +doi='10.17188/1190959' +doi_infix=None +edit_reason=None +geolocations=None +format_information='' +invention_disclosure_flag=None +issue=None +journal_license_url=None +journal_name=None +journal_open_access_flag=None +journal_type=None +keywords=['crystal structure', 'Si', 'Si'] +languages=['English'] +monographic_title=None +opn_addressee=None +opn_declassified_date=None +opn_declassified_status=None +opn_document_categories=None +opn_document_location=None +opn_fieldoffice_acronym_code=None +other_information=None +ouo_release_date=None +pams_publication_status=None +pams_publication_status_other=None +pams_authors=None +pams_editors=None +pams_product_sub_type=None +pams_patent_country_code=None +pams_transnational_patent_office=None +paper_flag=False +patent_assignee=None +patent_file_date=None +patent_priority_date=None +pdouo_exemption_number=None +peer_reviewed_flag=False +product_size=None +product_type='DA' +product_type_other=None +prot_flag=None +prot_data_other=None +prot_release_date=None +publication_date=datetime.date(2020, 7, 15) +publication_date_text='07/15/2020' +publisher_information=None +related_doc_info='https://materialsproject.org/citing' +released_to_osti_date=None +releasing_official_comments=None +report_period_end_date=None +report_period_start_date=None +report_types=None +report_type_other=None +sbiz_flag=None +sbiz_phase=None +sbiz_previous_contract_number=None +sbiz_release_date=None +site_ownership_code='LBNL-MP' +site_unique_id='mp-149' +subject_category_code=['36'] +subject_category_code_legacy=None +title='Materials Data on Si by Materials Project' +description='Si is diamond structured and crystallizes in the cubic Fd-3m space group. The structure is three-dimensional. Si is bonded to four equivalent Si atoms to form corner-sharing SiSi4 tetrahedra. All Si–Si bond lengths are 2.37 Å.' +identifiers=[Identifier(type='CN_DOE', value='AC02-05CH11231'), Identifier(type='CN_NONDOE', value='EDCBEE'), Identifier(type='RN', value='mp-149')] +persons=[Person(type='CONTACT', first_name='Kristin', middle_name=None, last_name='Persson', orcid=None, phone='+1(510)486-7218', email=['feedback@materialsproject.org'], affiliations=[Affiliation(name='LBNL', ror_id=None)], contributor_type=None)] +organizations=[Organization(type='CONTRIBUTING', name='The Materials Project', contributor_type='ResearchGroup', identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='LBNL Materials Project', contributor_type=None, identifiers=[], ror_id=None), Organization(type='SPONSOR', name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='MIT', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='UC Berkeley', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='Duke', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='U Louvain', contributor_type='Other', identifiers=[], ror_id=None)] related_identifiers=[RelatedIdentifier(type='DOI', relation='IsReferencedBy', value='10.1103/physrevmaterials.4.013401')] +site_url='https://materialsproject.org/materials/mp-149' +revision=18 +added_by=234169 +edited_by=None +collection_type='DOE_LAB' +date_metadata_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 275000, tzinfo=TzInfo(UTC)) +date_metadata_updated=datetime.datetime(2021, 7, 15, 2, 10, 43, 372000, tzinfo=TzInfo(UTC)) +date_submitted_to_osti_first=datetime.datetime(2015, 7, 7, 22, 9, 5, 808000, tzinfo=TzInfo(UTC)) +date_submitted_to_osti_last=datetime.datetime(2021, 7, 15, 2, 10, 42, 407000, tzinfo=TzInfo(UTC)) +date_released=datetime.datetime(2021, 7, 15, 2, 10, 43, 240000, tzinfo=TzInfo(UTC)) +sensitivity_flag='U' +hidden_flag=False +media=[MediaInfo(media_id=841489, revision=1, osti_id=1190959, status='C', added_by=None, document_page_count=1, mime_type='text/html', media_title=None, media_location='O', media_source='DOE2416API', date_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_valid_start=None, date_valid_end=None, files=[MediaFile(media_file_id=4514486, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='O', url_type='O', url='https://materialsproject.org/materials/mp-149', added_by=None, document_page_count=None, file_size_bytes=None, duration_seconds=None, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='DOE2416API', date_file_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 52, 857000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515065, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='C', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=15546, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='OFF_SITE_DOWNLOAD', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 52, 877000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 96000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515066, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='T', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=5593, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/plain', media_source='TEXT_EXTRACTION', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 83000, tzinfo=TzInfo(UTC)))])] +audit_logs=[] diff --git a/src/mp_cite/reset.py b/src/mp_cite/reset.py index 350b50c..c82731b 100644 --- a/src/mp_cite/reset.py +++ b/src/mp_cite/reset.py @@ -1,18 +1,19 @@ -from mpcite.core import * -from mpcite.doi_builder import RecordResponse_to_doi_model, upload_doi_document_model_to_collection import os -import json from dotenv import load_dotenv +from pymongo import MongoClient +from elinkapi import Elink -load_dotenv() # depends on the root directory from which you run your python scripts. +load_dotenv() # depends on the root directory from which you run your python scripts. review_endpoint = "https://review.osti.gov/elink2api/" -prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) cwd = os.getcwd() -path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW file = open(cwd + path, "r") atlas_user = os.environ.get("atlas_user") @@ -23,4 +24,4 @@ # emptyReviewAPI("Testing", review_api) with MongoClient() as client: - client.dois_test.dois.delete_many({}, comment="Testing") \ No newline at end of file + client.dois_test.dois.delete_many({}, comment="Testing") diff --git a/src/mp_cite/test_core.py b/src/mp_cite/test_core.py index 21d1325..550c1d3 100644 --- a/src/mp_cite/test_core.py +++ b/src/mp_cite/test_core.py @@ -1,18 +1,20 @@ -from mpcite.core import * -from mpcite.doi_builder import RecordResponse_to_doi_model, upload_doi_document_model_to_collection +from pymongo import MongoClient +from elinkapi import Elink +from .core import find_out_of_date_doi_entries, update_existing_osti_record import os -import json from dotenv import load_dotenv -load_dotenv() # depends on the root directory from which you run your python scripts. +load_dotenv() # depends on the root directory from which you run your python scripts. review_endpoint = "https://review.osti.gov/elink2api/" -prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) cwd = os.getcwd() -path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW file = open(cwd + path, "r") atlas_user = os.environ.get("atlas_user") @@ -21,7 +23,9 @@ mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" with MongoClient(mongo_uri) as real_client: - with MongoClient() as doi_client: # open the mongoclient outside of the for loop, is more efficient than opening and closing it repeatedly + with ( + MongoClient() as doi_client + ): # open the mongoclient outside of the for loop, is more efficient than opening and closing it repeatedly dois = doi_client["dois_test"]["dois"] # for line in file: @@ -34,7 +38,7 @@ # my_record = Record(**js) - # # make a post to the elink review environment + # # make a post to the elink review environment # saved_record = review_api.post_new_record(my_record, state="submit") # # make a doi document with saved_record # doi_model = RecordResponse_to_doi_model(saved_record) @@ -45,7 +49,7 @@ # all_material_ids = [doc["material_id"] for doc in dois.find({}, {"_id": 0, "material_id": 1})] # for material_id in all_material_ids: - + # # query prod env for record with materials_id == site_unique_id # record_from_prod = prod_api.query_records(site_unique_id=material_id) @@ -66,18 +70,25 @@ # raise # replacement_doc = recordresponse_from_prod.model_dump() - # replacement_doc["osti_id"] = existing_doc["osti_id"] + # replacement_doc["osti_id"] = existing_doc["osti_id"] # dois.replace_one(query_filter, replacement_doc) - osti_OOD_list = find_out_of_date_doi_entries(real_client, doi_client, "mp_core_blue", "robocrys", "dois_test", "dois") + osti_OOD_list = find_out_of_date_doi_entries( + real_client, doi_client, "mp_core_blue", "robocrys", "dois_test", "dois" + ) print(osti_OOD_list) for osti_id in osti_OOD_list: material_id_to_update = review_api.get_single_record(osti_id).site_unique_id new_values = { - "description": "UPDATED ROBOCRYS DESCRIPTION: " + next(real_client["mp_core_blue"]["robocrys"].find({"material_id": material_id_to_update}, {"_id": 0, "description": 1}))["description"] + "description": "UPDATED ROBOCRYS DESCRIPTION: " + + next( + real_client["mp_core_blue"]["robocrys"].find( + {"material_id": material_id_to_update}, {"_id": 0, "description": 1} + ) + )["description"] } - update_existing_osti_record(review_api, osti_id, new_values) \ No newline at end of file + update_existing_osti_record(review_api, osti_id, new_values) diff --git a/tests/file_to_jsonForUpload.py b/tests/file_to_jsonForUpload.py index aa864e0..f728fc9 100644 --- a/tests/file_to_jsonForUpload.py +++ b/tests/file_to_jsonForUpload.py @@ -7,16 +7,17 @@ from elinkapi.utils import Validation from pymongo import MongoClient -import pymongo from timeit import default_timer as timer -load_dotenv() # depends on the root directory from which you run your python scripts. +load_dotenv() # depends on the root directory from which you run your python scripts. review_endpoint = "https://review.osti.gov/elink2api/" -prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) atlas_user = os.environ.get("atlas_user") @@ -25,24 +26,27 @@ mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" cwd = os.getcwd() -path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW file = open(cwd + path, "r") update_counter = 0 records_checked = 0 + def delete_record(api, osti_id, reason): """Delete a record by its OSTI ID.""" - response = requests.delete(f"{api.target}records/{osti_id}?reason={reason}", headers={"Authorization": f"Bearer {api.token}"}) + response = requests.delete( + f"{api.target}records/{osti_id}?reason={reason}", + headers={"Authorization": f"Bearer {api.token}"}, + ) Validation.handle_response(response) return response.status_code == 204 # True if deleted successfully + def emptyReviewAPI(reason): - allDeleted = True for record in review_api.query_records(): - delete_record(review_api, record.osti_id, reason) + delete_record(review_api, record.osti_id, reason) -raise start = timer() @@ -55,7 +59,7 @@ def emptyReviewAPI(reason): for entry in js["organizations"]: if entry["type"] == "SPONSOR": - entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + entry["identifiers"] = [{"type": "CN_DOE", "value": "AC02-05CH11231"}] material_id = js["site_unique_id"] @@ -63,15 +67,23 @@ def emptyReviewAPI(reason): with MongoClient(mongo_uri) as client: coll = client["mp_core_blue"]["robocrys"] - res = coll.find_one({"material_id" : material_id}) + res = coll.find_one({"material_id": material_id}) records_checked += 1 - - if res != None: - robocrys_description = res["description"] + + if res is not None: + robocrys_description = res["description"] # see if an update to the description is necessary, if it is, then update the description and post a new record. - if postUnedited or (robocrys_description != None and js["description"] != robocrys_description): #if a robocrys_description was found internally and it doesn't match what ELink has record... - js["description"] = "OLD WAS UPDATED, THEN IT WAS POSTED: " + robocrys_description + if ( + postUnedited + or ( + robocrys_description is not None + and js["description"] != robocrys_description + ) + ): # if a robocrys_description was found internally and it doesn't match what ELink has record... + js["description"] = ( + "OLD WAS UPDATED, THEN IT WAS POSTED: " + robocrys_description + ) my_record = Record(**js) saved_record = None @@ -85,14 +97,18 @@ def emptyReviewAPI(reason): print(f"NEW RECORD POSTED: {saved_record.osti_id}") raise except: - print(f"Record failed to post!: {my_record.doi}. Robocrys Collection Had Description {robocrys_description[0:50]}... Prod_Env ELink Had {my_record.description[37:87]}...") + print( + f"Record failed to post!: {my_record.doi}. Robocrys Collection Had Description {robocrys_description[0:50]}... Prod_Env ELink Had {my_record.description[37:87]}..." + ) raise if update_counter >= 10000: break end = timer() -print(f"Records Updated and/or Posted: {update_counter} \nRecords Checked in Total: {records_checked}. \nIt took {end - start} seconds") +print( + f"Records Updated and/or Posted: {update_counter} \nRecords Checked in Total: {records_checked}. \nIt took {end - start} seconds" +) ####################################################### # JUST POST JSON, Then update posted json Later @@ -125,7 +141,7 @@ def emptyReviewAPI(reason): # coll = client["mp_core_blue"]["robocrys"] # res = coll.find_one({"material_id" : material_id}) # records_checked += 1 - + # if res != None: # robocrys_description = res["description"] @@ -148,4 +164,4 @@ def emptyReviewAPI(reason): # end = timer() # print(f"Records Updated and/or Posted: {update_counter} \n Records Checked in Total: {records_checked}. It took {end - start} seconds") -###################################################### \ No newline at end of file +###################################################### diff --git a/tests/github_bug_report.py b/tests/github_bug_report.py index 3151e6d..2c3719c 100644 --- a/tests/github_bug_report.py +++ b/tests/github_bug_report.py @@ -5,9 +5,11 @@ load_dotenv() -prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) review_endpoint = "https://review.osti.gov/elink2api/" -review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) # record_response = prod_api.get_single_record(1190959) # returns OSTI record response with OSTI ID = 1190959, which has a DOE Contract Number saved (AC02-05CH11231; EDCBEE) # record_response_dict = record_response.model_dump(exclude_none=True) @@ -30,20 +32,30 @@ required_fields = { "product_type": "DA", "title": "Testing if CN_DOE can be random", - "organizations": [Organization(type='RESEARCHING', name='LBNL Materials Project (LBNL-MP)'), - Organization(type='SPONSOR', name='TEST SPONSOR ORG', identifiers=[{"type": 'CN_DOE', "value": 'oiajdiwjdiwj'}])], - "persons": [Person(type='AUTHOR', last_name='Schmoe')], + "organizations": [ + Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"), + Organization( + type="SPONSOR", + name="TEST SPONSOR ORG", + identifiers=[{"type": "CN_DOE", "value": "oiajdiwjdiwj"}], + ), + ], + "persons": [Person(type="AUTHOR", last_name="Schmoe")], "site_ownership_code": "LBNL-MP", - "access_limitations": ['UNL'], - "publication_date": datetime.now().replace(hour=0, minute=0, second=0, microsecond=0), - "site_url": "https://next-gen.materialsproject.org/materials" + "access_limitations": ["UNL"], + "publication_date": datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0 + ), + "site_url": "https://next-gen.materialsproject.org/materials", } empty_record = Record(**required_fields) -print(f"SUBMITTED TO OSTI, FULLY VALIDATED:\n{review_api.get_single_record(2525614)}\n\n\nTRYING TO SUBMIT:\n{empty_record}") +print( + f"SUBMITTED TO OSTI, FULLY VALIDATED:\n{review_api.get_single_record(2525614)}\n\n\nTRYING TO SUBMIT:\n{empty_record}" +) try: saved_record = review_api.post_new_record(empty_record, "submit") except exceptions.BadRequestException as ve: print(ve.message) - print(ve.errors) \ No newline at end of file + print(ve.errors) diff --git a/tests/manage_backfills.py b/tests/manage_backfills.py index a835456..85abf65 100644 --- a/tests/manage_backfills.py +++ b/tests/manage_backfills.py @@ -1,23 +1,23 @@ # This script will see how many documents in ELink, i.e. ones with a DOI, are not accounted for in the internal DOI collection. -from elinkapi import Elink, Query, Record +from elinkapi import Elink, Record import os from dotenv import load_dotenv -load_dotenv() # depends on the root directory from which you run your python scripts. +load_dotenv() # depends on the root directory from which you run your python scripts. -api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) +api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) query1 = api.query_records(rows=1000) -materials_with_dois : list[Record] = [] +materials_with_dois: list[Record] = [] for page in query1: print(f"Now on Page: {page.title}") print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") - + if page.site_unique_id.startswith("mp-"): materials_with_dois.append(page) @@ -26,7 +26,6 @@ # materials_with_dois.append(record) - # set_q1 = [page for page in query1] # set_q2 = [page for page in query2] @@ -44,6 +43,6 @@ # for page in query1: # print(page.title) # print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") - + # for record in page.data: -# print (f"OSTI ID: {record.osti_id} Title: {record.title}") \ No newline at end of file +# print (f"OSTI ID: {record.osti_id} Title: {record.title}") diff --git a/tests/outputs.txt b/tests/outputs.txt index 8d188e7..740a682 100644 --- a/tests/outputs.txt +++ b/tests/outputs.txt @@ -43,4 +43,4 @@ Traceback (most recent call last): record = next(query) File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 108, in __next__ raise StopIteration -StopIteration \ No newline at end of file +StopIteration diff --git a/tests/prod_to_review.py b/tests/prod_to_review.py index 87e311d..732340d 100644 --- a/tests/prod_to_review.py +++ b/tests/prod_to_review.py @@ -1,16 +1,18 @@ -from elinkapi import Elink, Query, Record +from elinkapi import Elink import os from dotenv import load_dotenv import json -load_dotenv() # depends on the root directory from which you run your python scripts. +load_dotenv() # depends on the root directory from which you run your python scripts. review_endpoint = "https://review.osti.gov/elink2api/" -prod_api = Elink(token = os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink(token = os.environ.get("elink_review_api_token"), target=review_endpoint) +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) print(prod_api.query_records()) @@ -28,47 +30,55 @@ for record in query: # increment counter - count_materials_data = count_materials_data + 1 - print(f"On record #{count_materials_data}, next url is {query.next_url}, previous url is {query.previous_url}") + count_materials_data = count_materials_data + 1 + print( + f"On record #{count_materials_data}, next url is {query.next_url}, previous url is {query.previous_url}" + ) - # see if the record is a Materials Data on record + # see if the record is a Materials Data on record if record.title.startswith("Materials Data on"): - # increment the MaterialsDataOn counter - count_MaterialsDataOn = count_MaterialsDataOn + 1 + # increment the MaterialsDataOn counter + count_MaterialsDataOn = count_MaterialsDataOn + 1 - # prepare the new record for the review environment, remove the OSTI ID, and add its model_dump to the list of json objects for the page. - new_record = record - new_record_dict = new_record.model_dump(exclude_none=True) + # prepare the new record for the review environment, remove the OSTI ID, and add its model_dump to the list of json objects for the page. + new_record = record + new_record_dict = new_record.model_dump(exclude_none=True) - new_record_osti_id = new_record_dict.pop("osti_id") # now new_record_dict does not have the osti_id key. - js = json.dumps(new_record_dict, default=str) # datetime objects are not JSON serializable, so we use default=str to convert them to strings. + new_record_osti_id = new_record_dict.pop( + "osti_id" + ) # now new_record_dict does not have the osti_id key. + js = json.dumps( + new_record_dict, default=str + ) # datetime objects are not JSON serializable, so we use default=str to convert them to strings. - page_json_list.append(js) - - # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. + page_json_list.append(js) + + # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. else: print(f"Found edge case: {record.title}") if count_materials_data % rows_per_page == 0: - # create/open, write, and close new json file + # create/open, write, and close new json file page_number = count_materials_data / rows_per_page - path = f'/json_pages/page_number_{page_number}' - fp = open(cwd+path, 'a') + path = f"/json_pages/page_number_{page_number}" + fp = open(cwd + path, "a") for js in page_json_list: fp.write(js) fp.write("\n") - + fp.close() page_json_list = [] - print(f"Page {page_number} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found.") + print( + f"Page {page_number} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found." + ) # print remainder of records if not a full page after for loop exits page_number = page_number + 1 -path = f'/json_pages/page_number_{page_number}' -fp = open(cwd+path, 'a') +path = f"/json_pages/page_number_{page_number}" +fp = open(cwd + path, "a") for js in page_json_list: fp.write(js) fp.write("\n") @@ -94,7 +104,6 @@ # # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. - # if count_materials_data % rows_per_page == 0: # # if a page has been fully consummed, then print the new batched dictionary to a json file. @@ -116,5 +125,5 @@ # model_dump exclude_none=True, remove null keys # pop osti_id --> save batch to json files -# make new record +# make new record # post to review_api diff --git a/tests/test_elink_api.py b/tests/test_elink_api.py index 80afba7..5e07706 100644 --- a/tests/test_elink_api.py +++ b/tests/test_elink_api.py @@ -1,12 +1,9 @@ import os from dotenv import load_dotenv -from elinkapi import Elink, Record, exceptions -import pytest -from mpcite.models import ELinkGetResponseModel, TestClass +from elinkapi import Elink from pymongo import MongoClient -import pymongo load_dotenv() @@ -15,7 +12,9 @@ atlas_host = os.environ.get("atlas_host") mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" -api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) # target default is production E-link service. +api = Elink( + token=os.environ.get("elink_api_PRODUCTION_key") +) # target default is production E-link service. ### Grabbing an existing record @@ -64,7 +63,9 @@ with MongoClient(mongo_uri) as client: # get all dois from the collection doi_collection = client["mp_core"]["dois"] - materials_to_update = list(doi_collection.find({}, {"_id": 0, "doi": 1, "material_id": 1}, limit=2)) + materials_to_update = list( + doi_collection.find({}, {"_id": 0, "doi": 1, "material_id": 1}, limit=2) + ) # from the doi collection, grab the material_id and doi of each material material_ids = [entry["material_id"] for entry in materials_to_update] @@ -73,12 +74,19 @@ osti_ids = [entry["doi"].split("10.17188/")[1] for entry in materials_to_update] # additionally, grab the description of each material from the robocrys - coll = client["mp_core_blue"]["robocrys"] # grabs robocrys collection from active database - res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) # grabs the material id and description of entries in the collection + coll = client["mp_core_blue"][ + "robocrys" + ] # grabs robocrys collection from active database + res = list( + coll.find( + {"material_id": {"$in": material_ids}}, + {"_id": 0, "material_id": 1, "description": 1}, + ) + ) # grabs the material id and description of entries in the collection descriptions = [entry["description"] for entry in res] # for each material (and its material_id, doi, and osti_id) - for i in range(len(materials_to_update)): + for i in range(len(materials_to_update)): internal_material_id = material_ids[i] internal_osti_id = osti_ids[i] internal_description = descriptions[i] @@ -86,14 +94,18 @@ # get_single_record(osti_id) record = api.get_single_record(internal_osti_id) - print(f"\n \n \nPrinting what is currently on ELINK for {internal_material_id}*****************************************") + print( + f"\n \n \nPrinting what is currently on ELINK for {internal_material_id}*****************************************" + ) print(record) if internal_material_id == record.site_unique_id: # update description record.description = "testTESTtestTESTtest" - print(f"\n \n \nPrinting record for {internal_material_id}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print( + f"\n \n \nPrinting record for {internal_material_id}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + ) print(record) # # post updated record @@ -104,6 +116,3 @@ # # ve.message = "Site Code AAAA is not valid." # # ve.errors provides more details: # # [{"status":"400", "detail":"Site Code AAAA is not valid.", "source":{"pointer":"site_ownership_code"}}] - - - From 5b96fa514923f26c8791244813063d771c124119 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:37:32 -0700 Subject: [PATCH 19/46] disable fail-fast to see if other python versions will fail --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 669b3c8..a2d3409 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,7 +15,7 @@ jobs: matrix: os: ["ubuntu-latest"] python-version: ["3.11", "3.12", "3.13"] - + fail-fast: false name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) runs-on: ${{ matrix.os }} From bdac2059e7ca51781bdd01d159d768f9e1cbeb5f Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:42:06 -0700 Subject: [PATCH 20/46] Using uv to install ruff dependency, using uv-cache and removing pip installation --- .github/workflows/lint.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a2d3409..6f82f7c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -28,10 +28,9 @@ jobs: python-version: ${{ matrix.python-version }} version: "latest" - - name: Install dependencies + - name: Install ruff run: | - python -m pip install --upgrade pip - pip install ruff + uv pip install ruff # Update output format to enable automatic inline annotations - name: Analyzing the code with ruff run: ruff check --output-format=github From 2e7b9ab2c40c4027b26309ec62dc36b0d2a7bc46 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:49:26 -0700 Subject: [PATCH 21/46] added new action to install virtual environment before attempting to install dependency --- .github/workflows/lint.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6f82f7c..c121fec 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,12 +22,14 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install uv + - name: Install uv and set up the python version uses: astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} version: "latest" + - name: Create virtual environment # this will allow us to install the dependencies in an isolated environment, for each python version too, which is already specified when uv is setup. + - name: Install ruff run: | uv pip install ruff From 64a5c0750cf0735a14058d02c385afad8c2fba35 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:54:48 -0700 Subject: [PATCH 22/46] Fixed missing run for uv venv --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c121fec..3eaff62 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -29,6 +29,7 @@ jobs: version: "latest" - name: Create virtual environment # this will allow us to install the dependencies in an isolated environment, for each python version too, which is already specified when uv is setup. + run: uv venv - name: Install ruff run: | From c4d9a8f6e62aad0b5b56e2e02848b5614876d951 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 17:29:19 -0700 Subject: [PATCH 23/46] attempting ruff check again now that is seems to work... --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 69a72af..ef5ca12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,9 @@ exclude = [ "node_modules", "site-packages", "venv", - "legacy" + "legacy", + "notebooks", + "uv.lock" ] # Same as Black. From 69556e53daa40e7cd477924720caa2236f9d97d0 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 17:37:09 -0700 Subject: [PATCH 24/46] Trying uvx --- .github/workflows/lint.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 3eaff62..fa4a241 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -28,12 +28,5 @@ jobs: python-version: ${{ matrix.python-version }} version: "latest" - - name: Create virtual environment # this will allow us to install the dependencies in an isolated environment, for each python version too, which is already specified when uv is setup. - run: uv venv - - - name: Install ruff - run: | - uv pip install ruff - # Update output format to enable automatic inline annotations - name: Analyzing the code with ruff - run: ruff check --output-format=github + run: uvx ruff check --output-format=github From 8cab90f3454db71392c9879296a02ce9797dbd17 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 21 Jul 2025 13:26:41 -0700 Subject: [PATCH 25/46] Removed assume python 3.9 from ruff config in pyproject.toml --- .pre-commit-config.yaml | 2 +- pyproject.toml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 627c045..996fb04 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,4 +10,4 @@ repos: # Run the formatter. - id: ruff-format types_or: [python, pyi ] -exclude: 'legacy/*' +exclude: 'legacy' diff --git a/pyproject.toml b/pyproject.toml index ef5ca12..14ef704 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,9 +77,6 @@ exclude = [ line-length = 88 indent-width = 4 -# Assume Python 3.9 -target-version = "py39" - [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or From edd887ee77e5551a730c8aeb8f38402b5477247c Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Wed, 16 Jul 2025 10:37:37 -0700 Subject: [PATCH 26/46] New Branch for Linting Workflow --- .gitignore | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index 9045477..966a6f0 100644 --- a/.gitignore +++ b/.gitignore @@ -209,3 +209,13 @@ __marimo__/ # Streamlit .streamlit/secrets.toml +<<<<<<< HEAD +======= + +# json files for storing production records +*.json +.env +/json_pages +/notebooks +/test_json_pages +>>>>>>> b991f09 (New Branch for Linting Workflow) From 4657ea39aa114d5afaf024d82574fbe4036bb3f3 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 15:31:57 -0700 Subject: [PATCH 27/46] Testing Linting workflow --- .github/workflows/lint.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index dfc4fef..eb8dae7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -5,4 +5,36 @@ on: branches: [master] pull_request: branches: [master] + workflow_dispatch: + # TODO: setup linting with uv/ruff +<<<<<<< HEAD +======= +# informed by testing.yml and https://medium.com/@sidharthvijayakumar7/automating-pylint-in-github-workflow-80c84b2ff243 and ruff documentation +jobs: + linting: + strategy: + matrix: + os: ["ubuntu-latest"] + python-version: ["3.11", "3.12", "3.13"] + + name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: ${{ matrix.python-version }} + version: "latest" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + # Update output format to enable automatic inline annotations + - name: Analyzing the code with ruff + run: ruff check --output-format=github +>>>>>>> 8d7d55b (Testing Linting workflow) From 5c9197e5aa1d35d6d49daa97d03d863e7def954f Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 15:37:26 -0700 Subject: [PATCH 28/46] (after rebase) Allowing Lint.YML to run on push to linting_workflow --- .github/workflows/lint.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index eb8dae7..d95c242 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,9 +2,9 @@ name: linting on: push: - branches: [master] + branches: [master, linting-workflow] pull_request: - branches: [master] + branches: [master, linting-workflow] workflow_dispatch: # TODO: setup linting with uv/ruff @@ -37,4 +37,7 @@ jobs: # Update output format to enable automatic inline annotations - name: Analyzing the code with ruff run: ruff check --output-format=github +<<<<<<< HEAD >>>>>>> 8d7d55b (Testing Linting workflow) +======= +>>>>>>> 61d74c1 (Allowing Lint.YML to run on push to linting_workflow) From 122e4cb6aac885da47500c052ca46c6d2f7c0115 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:25:37 -0700 Subject: [PATCH 29/46] Testing pre-commit and updated lint.yml to disregard legacy files --- .github/workflows/lint.yml | 2 +- .gitignore | 4 +- .pre-commit-config.yaml | 13 ++ pyproject.toml | 80 ++++++++++ src/mp_cite/core.py | 155 +++++++++++++++++++ src/mp_cite/doi_builder.py | 204 +++++++++++++++++++++++++ src/mp_cite/pipeline.py | 120 +++++++++++++++ src/mp_cite/recordresponse_example.txt | 92 +++++++++++ src/mp_cite/reset.py | 27 ++++ src/mp_cite/test_core.py | 94 ++++++++++++ tests/file_to_jsonForUpload.py | 167 ++++++++++++++++++++ tests/github_bug_report.py | 61 ++++++++ tests/manage_backfills.py | 48 ++++++ tests/outputs.txt | 46 ++++++ tests/prod_to_review.py | 129 ++++++++++++++++ tests/test_elink_api.py | 118 ++++++++++++++ 16 files changed, 1357 insertions(+), 3 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 src/mp_cite/core.py create mode 100644 src/mp_cite/doi_builder.py create mode 100644 src/mp_cite/pipeline.py create mode 100644 src/mp_cite/recordresponse_example.txt create mode 100644 src/mp_cite/reset.py create mode 100644 src/mp_cite/test_core.py create mode 100644 tests/file_to_jsonForUpload.py create mode 100644 tests/github_bug_report.py create mode 100644 tests/manage_backfills.py create mode 100644 tests/outputs.txt create mode 100644 tests/prod_to_review.py create mode 100644 tests/test_elink_api.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d95c242..04a5381 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,7 +20,7 @@ jobs: name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) runs-on: ${{ matrix.os }} - + steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 966a6f0..6a25f84 100644 --- a/.gitignore +++ b/.gitignore @@ -183,9 +183,9 @@ cython_debug/ .abstra/ # Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, +# and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder .vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..627c045 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.12.4 + hooks: + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --fix ] + # Run the formatter. + - id: ruff-format + types_or: [python, pyi ] +exclude: 'legacy/*' diff --git a/pyproject.toml b/pyproject.toml index 0637adb..35331cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,3 +38,83 @@ packages = ["src/mp_cite"] [project.urls] Homepage = "https://github.com/materialsproject/MPCite" Issues = "https://github.com/materialsproject/MPCite/issues" + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + "legacy" +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic" diff --git a/src/mp_cite/core.py b/src/mp_cite/core.py new file mode 100644 index 0000000..b1da88d --- /dev/null +++ b/src/mp_cite/core.py @@ -0,0 +1,155 @@ +from typing import TypeAlias + +from elinkapi import Elink +from elinkapi.record import RecordResponse, Record, Organization, Person +from pymongo import MongoClient + +import requests +from elinkapi.utils import Validation + +from datetime import datetime + +OstiID: TypeAlias = int + + +def find_out_of_date_doi_entries( + rc_client: MongoClient, + doi_client: MongoClient, + robocrys_db: str, + robocrys_collection: str, + doi_db: str, + doi_collection: str, +) -> list[OstiID]: + robocrys = rc_client[robocrys_db][robocrys_collection] + dois = doi_client[doi_db][doi_collection] + + latest_doi = next( + dois.aggregate( + [ + {"$project": {"_id": 0, "date_metadata_updated": 1}}, + {"$sort": {"date_metadata_updated": -1}}, + {"$limit": 1}, + ] + ) + )["date_metadata_updated"] + + material_ids_to_update = list( + map( + lambda x: x["material_id"], + robocrys.find( + {"last_updated": {"$gt": latest_doi}}, {"_id": 0, "material_id": 1} + ), + ) + ) + + return list( + map( + lambda x: x["osti_id"], + dois.find( + {"material_id": {"$in": material_ids_to_update}}, + {"_id": 0, "osti_id": 1}, + ), + ), + ) + + +def update_existing_osti_record( + elinkapi: Elink, osti_id: OstiID, new_values: dict +) -> RecordResponse: + record_on_elink = elinkapi.get_single_record(osti_id) + + for keyword in new_values.keys(): + try: + setattr(record_on_elink, keyword, new_values[keyword]) + except ValueError: + print( + "Extraneous keywords found in the dictionary that do not correspond to attributes in the ELink API's record class." + ) + + # assume the use with fix the sponsor identifier bug before calling the update function + # # fix the issue with the sponsor organization's identifiers + # for entry in record_on_elink.organizations: + # if entry.type == "SPONSOR": + # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + # break + + return elinkapi.update_record( + osti_id, record_on_elink, state="save" + ) # user should use update_state_of_osti_record to submit instead + + +def submit_new_osti_record( + elinkapi: Elink, + new_record: Record, + state="submit", # assuming there is no need to both with saving. just send new record to osti when its ready for submission. also assume bug with DOE contract number identifier in sponsor organization is accounted for +) -> RecordResponse: + # template for all repeated stuff + # only submit + record_response = elinkapi.post_new_record(new_record, state) + + return record_response + + +def update_state_of_osti_record( + elinkapi: Elink, osti_id: OstiID, new_state="submit" +) -> RecordResponse: + record = elinkapi.get_single_record(osti_id) + + # assuming that the user will handle the sponsor identifier bug before calling this function + # # fix the issue with the sponsor organization's identifiers + # for entry in record.organizations: + # if entry.type == "SPONSOR": + # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + # break + + return elinkapi.update_record(osti_id, record, new_state) + + +def delete_osti_record( + elinkapi_token: str, osti_id: OstiID, reason: str +) -> RecordResponse: + review_endpoint = "https://review.osti.gov/elink2api/" + review_api = Elink(token=elinkapi_token, target=review_endpoint) + + """Delete a record by its OSTI ID.""" + response = requests.delete( + f"{review_api.target}records/{osti_id}?reason={reason}", + headers={"Authorization": f"Bearer {review_api.token}"}, + ) + Validation.handle_response(response) + return response.status_code == 204 # True if deleted successfully + + +def emptyReviewAPI(reason, review_api): + for record in review_api.query_records(): + delete_osti_record(review_api, record.osti_id, reason) + + +def make_minimum_record_to_fully_release( + title, # required to make record + product_type="DA", # required to make record + organizations=[ + Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"), + Organization( + type="SPONSOR", + name="TEST SPONSOR ORG", + identifiers=[{"type": "CN_DOE", "value": "AC02-05CH11231"}], + ), + ], # sponsor org is necessary for submission + persons=[Person(type="AUTHOR", last_name="Perrson")], + site_ownership_code="LBNL-MP", + access_limitations=["UNL"], + publication_date=datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0 + ), # what should this be? + site_url="https://next-gen.materialsproject.org/materials", +) -> Record: + return Record( + product_type, + title, + persons, + site_ownership_code, + access_limitations, + publication_date, + site_url, + ) diff --git a/src/mp_cite/doi_builder.py b/src/mp_cite/doi_builder.py new file mode 100644 index 0000000..b85fb90 --- /dev/null +++ b/src/mp_cite/doi_builder.py @@ -0,0 +1,204 @@ +''' +doi_builder.py +A doi collection must store the following information about a document: +- doi number +- title +- osti id (ELink's Unique Identifier) +- material id (MP's Unique Identifier) +- date of system entry date (Date (UTC) of this revision's inception) +- date of last update (date edited or date_submitted_to_osti_last) (take from ELink) +- workflow status and the date (?) of each step: + - SA, saved, in a holding state, not to be processed + - SR, submit to releasing official "released_to_osti_date, as entered by releasing official" + - SO, submit to OSTI + - SF, submitted but failed validation + - SX, submitted but failed to release + - SV, submitted and validated + - R, released +- + +Here is an example of RecordResponse +RecordResponse( + osti_id=2523296, + workflow_status='SA', + access_limitations=['UNL'], + access_limitation_other=None, + announcement_codes=None, + availability=None, + edition=None, + volume=None, + + # Identifiers + identifiers=[ + Identifier(type='CN_NONDOE', value='EDCBEE'), + Identifier(type='CN_DOE', value='AC02-05CH11231'), + Identifier(type='RN', value='mp-1037659'), + ], + + # People involved + persons=[ + Person( + type='CONTACT', + first_name='Kristin', + last_name='Persson', + phone='+1(510)486-7218', + email=['feedback@materialsproject.org'], + affiliations=[ + Affiliation(name='LBNL') + ] + ) + ], + + # Organizations + organizations=[ + Organization(name='The Materials Project', type='CONTRIBUTING', contributor_type='ResearchGroup'), + Organization(name='LBNL Materials Project', type='RESEARCHING'), + Organization(name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', type='RESEARCHING'), + Organization(name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', type='SPONSOR'), + Organization(name='MIT', type='CONTRIBUTING', contributor_type='Other'), + Organization(name='UC Berkeley', type='CONTRIBUTING', contributor_type='Other'), + Organization(name='Duke', type='CONTRIBUTING', contributor_type='Other'), + Organization(name='U Louvain', type='CONTRIBUTING', contributor_type='Other'), + ], + + # Metadata + country_publication_code='US', + doe_supported_flag=False, + doi='10.17188/1714845', + edit_reason='Record updated upon request of LBNL-MP to remove authors and replace with a single collaborator.', + format_information='', + invention_disclosure_flag=None, + paper_flag=False, + peer_reviewed_flag=False, + product_type='DA', + publication_date=datetime.date(2020, 4, 30), + publication_date_text='04/30/2020', + site_url='https://materialsproject.org/materials/mp-1037659', + site_ownership_code='LBNL-MP', + site_unique_id='mp-1037659', + subject_category_code=['36'], + title='Materials Data on RbYMg30O32 by Materials Project', + + # Description + description=""" + RbMg₃₀YO₃₂ is Molybdenum Carbide MAX Phase-derived and crystallizes in the tetragonal P4/mmm space group. + Rb¹⁺ is bonded to six O²⁻ atoms to form RbO₆ octahedra... + (Truncated here for brevity, full description is included in original) + """, + + keywords=['crystal structure', 'RbYMg30O32', 'Mg-O-Rb-Y'], + languages=['English'], + related_doc_info='https://materialsproject.org/citing', + + # Media + media=[ + MediaInfo( + media_id=1908478, + osti_id=2523296, + status='C', + mime_type='text/html', + files=[ + MediaFile( + media_file_id=12017281, + media_type='O', + url='https://materialsproject.org/materials/mp-1037659' + ), + MediaFile( + media_file_id=12017284, + media_type='C', + mime_type='text/html', + media_source='OFF_SITE_DOWNLOAD' + ) + ] + ) + ], + + # Audit logs + audit_logs=[ + AuditLog( + messages=['Revision status is not correct, found SA'], + status='FAIL', + type='RELEASER', + audit_date=datetime.datetime(2025, 6, 30, 22, 30, 24, 865000, tzinfo=TzInfo(UTC)) + ) + ], + + # Timestamps + date_metadata_added=datetime.datetime(2025, 6, 30, 22, 30, 20, 495000, tzinfo=TzInfo(UTC)), + date_metadata_updated=datetime.datetime(2025, 6, 30, 22, 30, 22, 247000, tzinfo=TzInfo(UTC)), + + # Misc + revision=2, + added_by=139001, + edited_by=139001, + collection_type='DOE_LAB', + hidden_flag=False +) +''' + +from pydantic import BaseModel, Field +from datetime import datetime + + +# TODO: change the field names to match ELINK +class doi_model(BaseModel): + # identifiers + doi: str = Field( + description="The DOI number as allocated by OSTI" + ) # can be taken from ELink API + title: str = Field( + description="The title of the record" + ) # can be taken from ELink API + osti_id: str = Field( + description="The OSTI ID number allocated by OSTI to make the DOI number" + ) # can be taken from ELink API + material_id: str # can be taken from Robocrys Collection or ELink API + + # time stamps + date_metadata_added: datetime | None = Field( + description="date_record_entered_onto_ELink" + ) # can be taken from ELink API response + date_metadata_updated: datetime | None = Field( + description="date_record_last_updated_on_Elink" + ) + + # status + workflow_status: str # can be taken from ELink API + date_released: datetime | None = Field(description="") + date_submitted_to_osti_first: datetime = Field( + description="date record was first submitted to OSTI for publication, maintained internally by E-Link" + ) + date_submitted_to_osti_last: datetime = Field( + description="most recent date record information was submitted to OSTI. Maintained internally by E-Link" + ) + publication_date: datetime | None = Field( + description="" + ) # labelled as publication_date in RecordResponse of ELink API + + +# hypothetically post an update or submit a new record and receive the RecordResponse +def RecordResponse_to_doi_model(recordresponse): + """ + turns a recordresponse, which is returned from a save, submission, post, etc. into a doi_model object + """ + params = { + "doi": recordresponse.doi, + "title": recordresponse.title, + "osti_id": str(recordresponse.osti_id), + "material_id": recordresponse.site_unique_id, + "date_metadata_added": recordresponse.date_metadata_added, + "date_metadata_updated": recordresponse.date_metadata_updated, + "workflow_status": recordresponse.workflow_status, + "date_released": recordresponse.date_released, + # date_released_to_osti = recordresponse.released_to_osti_date, # what is the difference between these??? "Date record information was released to OSTI, as entered by releasing official." always seems to be none + "date_submitted_to_osti_first": recordresponse.date_submitted_to_osti_first, # date record was first submitted to OSTI for publication, maintained internally by E-Link + "date_submitted_to_osti_last": recordresponse.date_submitted_to_osti_last, # most recent date record information was submitted to OSTI. Maintained internally by E-Link. + "publication_date": recordresponse.publication_date, + } + + return doi_model(**params) + + +def upload_doi_document_model_to_collection(doi_model, collection): + x = collection.insert_one(doi_model.model_dump()).inserted_id + return x diff --git a/src/mp_cite/pipeline.py b/src/mp_cite/pipeline.py new file mode 100644 index 0000000..3a087ab --- /dev/null +++ b/src/mp_cite/pipeline.py @@ -0,0 +1,120 @@ +import os +import json +from elinkapi import Elink +from elinkapi.record import RecordResponse +from dotenv import load_dotenv + + +from pymongo import MongoClient + +import logging +import requests + +import datetime +from doi_builder import RecordResponse_to_doi_model, doi_model + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +failed_osti_ids = [] + +cwd = os.getcwd() +path = "/json_pages/" + +for filename in os.listdir(cwd + path): + logging.debug(f"Now extracting {filename}") + file = open(cwd + path + filename, "r") + for line in file: + record = RecordResponse(**json.loads(line.strip())) + record.osti_id = record.doi.split("/")[1] + # for every record in the OSTI production environment: + # flag for update performance + update_success = False + + material_id = record.site_unique_id + + with MongoClient( + mongo_uri + ) as client: # should I open this in or outside of the for loop? + coll = client["mp_core_blue"]["robocrys"] + res = coll.find_one({"material_id": material_id}) + + if res is not None: + robocrys_description = res["description"] + + # what if there is no document in robocrys found? + else: + logging.warning( + f"No robocrys document was found to match the OSTI record: {record.osti_id}!" + ) + + # if the description of the record on Elink doesnt match what is in the robocrys collection: + if res is not None and record.description != robocrys_description: + # directly update the description of the record via the record response + record.description = robocrys_description + + # and directly update the identifier for sponsoring org + for entry in record.organizations: + if entry.type == "SPONSOR": + entry.identifiers = [{"type": "CN_DOE", "value": "AC02-05CH11231"}] + break + + try: + # send update to the record with the record response # update_record(osti_id, record, state="save") + record_response = prod_api.update_record( + record.osti_id, record, state="save" + ) + update_success = True + + except requests.exceptions.RequestException as e: + logging.debug(f"Network or HTTP error: {e}") + failed_osti_ids.append(record.osti_id) + + except ValueError as e: + logging.debug(f"Data error while updating record: {e}") + failed_osti_ids.append(record.osti_id) + + except Exception as e: + logging.debug(f"Unexpected error during update: {e}") + failed_osti_ids.append(record.osti_id) + + # if the update worked... + if update_success: + # save the record response returned with sending the update, done above + # convert that record response into a doi_model + doi_model = RecordResponse_to_doi_model( + record + ) # change later to record response + + # upload that doi_model as a document to the new doi collection in mp_core + # what is the collection + with MongoClient() as local_client: + collection = local_client["dois_test"]["dois"] + x = collection.insert_one(doi_model.dict(by_alias=True)).inserted_id + + # else if the description on Elink matches what is in the robocrys collection: + elif record.description == robocrys_description: + # convert that record into a doi_model + doi_model = RecordResponse_to_doi_model(record) + + # upload that doi_model as a document to the new doi collection in mp_core, no updated needed! + with MongoClient() as local_client: + collection = local_client["dois_test"]["dois"] + x = collection.insert_one(doi_model).inserted_id + +cwd = os.getcwd() +path = f"/files/failed_osti_ids_{str(datetime.datetime.now())}.txt" +with open(cwd + path, "w") as output: # change filepath as needed + for id in failed_osti_ids: + output.write(str(id) + "\n") # i'm pretty sure it's a string already though... diff --git a/src/mp_cite/recordresponse_example.txt b/src/mp_cite/recordresponse_example.txt new file mode 100644 index 0000000..0d510a6 --- /dev/null +++ b/src/mp_cite/recordresponse_example.txt @@ -0,0 +1,92 @@ +osti_id=1190959 +workflow_status='R' +access_limitations=['UNL'] +access_limitation_other=None +announcement_codes=None +availability=None +edition=None +volume=None +conference_information=None +conference_type=None +contract_award_date=None +country_publication_code='US' +doe_funded_flag=None +doe_supported_flag=False +doi='10.17188/1190959' +doi_infix=None +edit_reason=None +geolocations=None +format_information='' +invention_disclosure_flag=None +issue=None +journal_license_url=None +journal_name=None +journal_open_access_flag=None +journal_type=None +keywords=['crystal structure', 'Si', 'Si'] +languages=['English'] +monographic_title=None +opn_addressee=None +opn_declassified_date=None +opn_declassified_status=None +opn_document_categories=None +opn_document_location=None +opn_fieldoffice_acronym_code=None +other_information=None +ouo_release_date=None +pams_publication_status=None +pams_publication_status_other=None +pams_authors=None +pams_editors=None +pams_product_sub_type=None +pams_patent_country_code=None +pams_transnational_patent_office=None +paper_flag=False +patent_assignee=None +patent_file_date=None +patent_priority_date=None +pdouo_exemption_number=None +peer_reviewed_flag=False +product_size=None +product_type='DA' +product_type_other=None +prot_flag=None +prot_data_other=None +prot_release_date=None +publication_date=datetime.date(2020, 7, 15) +publication_date_text='07/15/2020' +publisher_information=None +related_doc_info='https://materialsproject.org/citing' +released_to_osti_date=None +releasing_official_comments=None +report_period_end_date=None +report_period_start_date=None +report_types=None +report_type_other=None +sbiz_flag=None +sbiz_phase=None +sbiz_previous_contract_number=None +sbiz_release_date=None +site_ownership_code='LBNL-MP' +site_unique_id='mp-149' +subject_category_code=['36'] +subject_category_code_legacy=None +title='Materials Data on Si by Materials Project' +description='Si is diamond structured and crystallizes in the cubic Fd-3m space group. The structure is three-dimensional. Si is bonded to four equivalent Si atoms to form corner-sharing SiSi4 tetrahedra. All Si–Si bond lengths are 2.37 Å.' +identifiers=[Identifier(type='CN_DOE', value='AC02-05CH11231'), Identifier(type='CN_NONDOE', value='EDCBEE'), Identifier(type='RN', value='mp-149')] +persons=[Person(type='CONTACT', first_name='Kristin', middle_name=None, last_name='Persson', orcid=None, phone='+1(510)486-7218', email=['feedback@materialsproject.org'], affiliations=[Affiliation(name='LBNL', ror_id=None)], contributor_type=None)] +organizations=[Organization(type='CONTRIBUTING', name='The Materials Project', contributor_type='ResearchGroup', identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='LBNL Materials Project', contributor_type=None, identifiers=[], ror_id=None), Organization(type='SPONSOR', name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='MIT', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='UC Berkeley', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='Duke', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='U Louvain', contributor_type='Other', identifiers=[], ror_id=None)] related_identifiers=[RelatedIdentifier(type='DOI', relation='IsReferencedBy', value='10.1103/physrevmaterials.4.013401')] +site_url='https://materialsproject.org/materials/mp-149' +revision=18 +added_by=234169 +edited_by=None +collection_type='DOE_LAB' +date_metadata_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 275000, tzinfo=TzInfo(UTC)) +date_metadata_updated=datetime.datetime(2021, 7, 15, 2, 10, 43, 372000, tzinfo=TzInfo(UTC)) +date_submitted_to_osti_first=datetime.datetime(2015, 7, 7, 22, 9, 5, 808000, tzinfo=TzInfo(UTC)) +date_submitted_to_osti_last=datetime.datetime(2021, 7, 15, 2, 10, 42, 407000, tzinfo=TzInfo(UTC)) +date_released=datetime.datetime(2021, 7, 15, 2, 10, 43, 240000, tzinfo=TzInfo(UTC)) +sensitivity_flag='U' +hidden_flag=False +media=[MediaInfo(media_id=841489, revision=1, osti_id=1190959, status='C', added_by=None, document_page_count=1, mime_type='text/html', media_title=None, media_location='O', media_source='DOE2416API', date_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_valid_start=None, date_valid_end=None, files=[MediaFile(media_file_id=4514486, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='O', url_type='O', url='https://materialsproject.org/materials/mp-149', added_by=None, document_page_count=None, file_size_bytes=None, duration_seconds=None, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='DOE2416API', date_file_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 52, 857000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515065, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='C', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=15546, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='OFF_SITE_DOWNLOAD', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 52, 877000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 96000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515066, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='T', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=5593, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/plain', media_source='TEXT_EXTRACTION', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 83000, tzinfo=TzInfo(UTC)))])] +audit_logs=[] diff --git a/src/mp_cite/reset.py b/src/mp_cite/reset.py new file mode 100644 index 0000000..c82731b --- /dev/null +++ b/src/mp_cite/reset.py @@ -0,0 +1,27 @@ +import os +from dotenv import load_dotenv +from pymongo import MongoClient +from elinkapi import Elink + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) + +cwd = os.getcwd() +path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +file = open(cwd + path, "r") + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +# emptyReviewAPI("Testing", review_api) + +with MongoClient() as client: + client.dois_test.dois.delete_many({}, comment="Testing") diff --git a/src/mp_cite/test_core.py b/src/mp_cite/test_core.py new file mode 100644 index 0000000..550c1d3 --- /dev/null +++ b/src/mp_cite/test_core.py @@ -0,0 +1,94 @@ +from pymongo import MongoClient +from elinkapi import Elink +from .core import find_out_of_date_doi_entries, update_existing_osti_record +import os +from dotenv import load_dotenv + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) + +cwd = os.getcwd() +path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +file = open(cwd + path, "r") + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +with MongoClient(mongo_uri) as real_client: + with ( + MongoClient() as doi_client + ): # open the mongoclient outside of the for loop, is more efficient than opening and closing it repeatedly + dois = doi_client["dois_test"]["dois"] + + # for line in file: + # js = json.loads(line.strip()) + + # # temporarily fix the sponsor organization bug + # for entry in js["organizations"]: + # if entry["type"] == "SPONSOR": + # entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + + # my_record = Record(**js) + + # # make a post to the elink review environment + # saved_record = review_api.post_new_record(my_record, state="submit") + # # make a doi document with saved_record + # doi_model = RecordResponse_to_doi_model(saved_record) + + # # now, add that doi to the local doi collection + # upload_doi_document_model_to_collection(doi_model, dois) + + # all_material_ids = [doc["material_id"] for doc in dois.find({}, {"_id": 0, "material_id": 1})] + + # for material_id in all_material_ids: + + # # query prod env for record with materials_id == site_unique_id + # record_from_prod = prod_api.query_records(site_unique_id=material_id) + + # if record_from_prod.total_rows != 1: + # print(f"ERROR: not unique Material_ID! {material_id}") + # raise + + # # make a doi_model from that data + # recordresponse_from_prod = RecordResponse_to_doi_model(record_from_prod.data[0]) + + # query_filter = {"material_id": material_id} + + # # Find existing document to preserve the osti_id + # existing_doc = dois.find_one(query_filter, {"osti_id": 1}) # only retrieve osti_id + + # if not existing_doc: + # print(f"ERROR: document with material_id {material_id} not found in `dois` collection.") + # raise + + # replacement_doc = recordresponse_from_prod.model_dump() + # replacement_doc["osti_id"] = existing_doc["osti_id"] + + # dois.replace_one(query_filter, replacement_doc) + + osti_OOD_list = find_out_of_date_doi_entries( + real_client, doi_client, "mp_core_blue", "robocrys", "dois_test", "dois" + ) + print(osti_OOD_list) + + for osti_id in osti_OOD_list: + material_id_to_update = review_api.get_single_record(osti_id).site_unique_id + + new_values = { + "description": "UPDATED ROBOCRYS DESCRIPTION: " + + next( + real_client["mp_core_blue"]["robocrys"].find( + {"material_id": material_id_to_update}, {"_id": 0, "description": 1} + ) + )["description"] + } + + update_existing_osti_record(review_api, osti_id, new_values) diff --git a/tests/file_to_jsonForUpload.py b/tests/file_to_jsonForUpload.py new file mode 100644 index 0000000..f728fc9 --- /dev/null +++ b/tests/file_to_jsonForUpload.py @@ -0,0 +1,167 @@ +import os +import json +from elinkapi import Elink, Record +from dotenv import load_dotenv + +import requests +from elinkapi.utils import Validation + +from pymongo import MongoClient + +from timeit import default_timer as timer + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) + + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +cwd = os.getcwd() +path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW +file = open(cwd + path, "r") + +update_counter = 0 +records_checked = 0 + + +def delete_record(api, osti_id, reason): + """Delete a record by its OSTI ID.""" + response = requests.delete( + f"{api.target}records/{osti_id}?reason={reason}", + headers={"Authorization": f"Bearer {api.token}"}, + ) + Validation.handle_response(response) + return response.status_code == 204 # True if deleted successfully + + +def emptyReviewAPI(reason): + for record in review_api.query_records(): + delete_record(review_api, record.osti_id, reason) + + +start = timer() + +# Post an updated json + +postUnedited = False + +for line in file: + js = json.loads(line.strip()) + + for entry in js["organizations"]: + if entry["type"] == "SPONSOR": + entry["identifiers"] = [{"type": "CN_DOE", "value": "AC02-05CH11231"}] + + material_id = js["site_unique_id"] + + robocrys_description = js["description"] + + with MongoClient(mongo_uri) as client: + coll = client["mp_core_blue"]["robocrys"] + res = coll.find_one({"material_id": material_id}) + records_checked += 1 + + if res is not None: + robocrys_description = res["description"] + + # see if an update to the description is necessary, if it is, then update the description and post a new record. + if ( + postUnedited + or ( + robocrys_description is not None + and js["description"] != robocrys_description + ) + ): # if a robocrys_description was found internally and it doesn't match what ELink has record... + js["description"] = ( + "OLD WAS UPDATED, THEN IT WAS POSTED: " + robocrys_description + ) + my_record = Record(**js) + + saved_record = None + try: + # The API will now return an error code on this call + # because "AAAA" is not a valid site_ownership_code + + saved_record = review_api.post_new_record(my_record, state="submit") + update_counter += 1 + + print(f"NEW RECORD POSTED: {saved_record.osti_id}") + raise + except: + print( + f"Record failed to post!: {my_record.doi}. Robocrys Collection Had Description {robocrys_description[0:50]}... Prod_Env ELink Had {my_record.description[37:87]}..." + ) + raise + + if update_counter >= 10000: + break + +end = timer() +print( + f"Records Updated and/or Posted: {update_counter} \nRecords Checked in Total: {records_checked}. \nIt took {end - start} seconds" +) + +####################################################### +# JUST POST JSON, Then update posted json Later +# post_counter = 0 +# records_checked = 0 + +# for line in file: +# js = json.loads(line.strip()) + +# material_id = js["site_unique_id"] + +# # always post, no update +# my_record = Record(**js) + +# saved_record = None +# try: +# # The API will now return an error code on this call +# # because "AAAA" is not a valid site_ownership_code + +# # posts an unupdated record +# saved_record = review_api.post_new_record(my_record, "save") +# post_counter += 1 + +# print("\n\n NEW RECORD POSTED") +# print(saved_record) + +# robocrys_description = js["description"] + +# with MongoClient(mongo_uri) as client: +# coll = client["mp_core_blue"]["robocrys"] +# res = coll.find_one({"material_id" : material_id}) +# records_checked += 1 + +# if res != None: +# robocrys_description = res["description"] + +# if robocrys_description != None and js["description"] != robocrys_description: # if an update is needed +# # update the js["description"] +# js["description"] = "OLD WAS POSTED, THEN RECORD WITH NEW DESCRIPTION UPDATED IT: " + robocrys_description + +# # turn it into a new record +# new_updated_record = Record(**js) + +# # use that new record to update what was just posted +# review_api.update_record(saved_record.osti_id, new_updated_record, "save") + +# except: +# print("Record failed to post!") + +# if post_counter >= 10000: +# break + +# end = timer() +# print(f"Records Updated and/or Posted: {update_counter} \n Records Checked in Total: {records_checked}. It took {end - start} seconds") + +###################################################### diff --git a/tests/github_bug_report.py b/tests/github_bug_report.py new file mode 100644 index 0000000..2c3719c --- /dev/null +++ b/tests/github_bug_report.py @@ -0,0 +1,61 @@ +from elinkapi import Elink, Organization, Person, exceptions, Record +import os +from dotenv import load_dotenv +from datetime import datetime + +load_dotenv() + +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_endpoint = "https://review.osti.gov/elink2api/" +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) + +# record_response = prod_api.get_single_record(1190959) # returns OSTI record response with OSTI ID = 1190959, which has a DOE Contract Number saved (AC02-05CH11231; EDCBEE) +# record_response_dict = record_response.model_dump(exclude_none=True) +# record_response_dict.pop("osti_id") # remove osti_id to allow post function + +# new_record = Record(**record_response_dict) # identical record with removed OSTI_ID +# for org in new_record.organizations: +# if org.type == "SPONSOR": +# print(org) +# org.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] + +# # attempt to submit exact same record to review environment +# record_response_after_post = review_api.post_new_record(new_record, "save") # works after re-providing the DOE contract number + +# # next, attempt updating this record +# record_to_update = review_api.get_single_record(record_response_after_post.osti_id) +# record_to_update.title = "Updated Title For Materials Data" +# review_api.update_record(record_response_after_post.osti_id, record_to_update, "submit") + +required_fields = { + "product_type": "DA", + "title": "Testing if CN_DOE can be random", + "organizations": [ + Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"), + Organization( + type="SPONSOR", + name="TEST SPONSOR ORG", + identifiers=[{"type": "CN_DOE", "value": "oiajdiwjdiwj"}], + ), + ], + "persons": [Person(type="AUTHOR", last_name="Schmoe")], + "site_ownership_code": "LBNL-MP", + "access_limitations": ["UNL"], + "publication_date": datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0 + ), + "site_url": "https://next-gen.materialsproject.org/materials", +} + +empty_record = Record(**required_fields) +print( + f"SUBMITTED TO OSTI, FULLY VALIDATED:\n{review_api.get_single_record(2525614)}\n\n\nTRYING TO SUBMIT:\n{empty_record}" +) + +try: + saved_record = review_api.post_new_record(empty_record, "submit") +except exceptions.BadRequestException as ve: + print(ve.message) + print(ve.errors) diff --git a/tests/manage_backfills.py b/tests/manage_backfills.py new file mode 100644 index 0000000..85abf65 --- /dev/null +++ b/tests/manage_backfills.py @@ -0,0 +1,48 @@ +# This script will see how many documents in ELink, i.e. ones with a DOI, are not accounted for in the internal DOI collection. + +from elinkapi import Elink, Record + +import os +from dotenv import load_dotenv + +load_dotenv() # depends on the root directory from which you run your python scripts. + +api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) + + +query1 = api.query_records(rows=1000) + +materials_with_dois: list[Record] = [] + +for page in query1: + print(f"Now on Page: {page.title}") + print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") + + if page.site_unique_id.startswith("mp-"): + materials_with_dois.append(page) + + # for record in page.data: + # if record.site_unique_id.startswith("mp-"): + # materials_with_dois.append(record) + + +# set_q1 = [page for page in query1] +# set_q2 = [page for page in query2] + +# set_diffq1q2 = set(set_q1) - set(set_q2) +# print (f"Difference matched {len(set)} records") + +# filtered = [ +# page for page in query1 +# if page.title.lower().startswith("materials data on") +# ] + +# print (f"Filtered Query1 has {len(filtered)} records") + +# paginate through ALL results +# for page in query1: +# print(page.title) +# print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") + +# for record in page.data: +# print (f"OSTI ID: {record.osti_id} Title: {record.title}") diff --git a/tests/outputs.txt b/tests/outputs.txt new file mode 100644 index 0000000..740a682 --- /dev/null +++ b/tests/outputs.txt @@ -0,0 +1,46 @@ +(mpcite-env) C:\Users\ongha\OneDrive\Documents\GitHub\MPCite>C:/Users/ongha/anaconda3/envs/mpcite-env/python.exe c:/Users/ongha/OneDrive/Documents/GitHub/MPCite/tests/prod_to_review.py + +Query retrieved 144845 record(s) +Page finished. Now at 500 data entries. 0 edge cases found. +Page finished. Now at 1000 data entries. 0 edge cases found. +Page finished. Now at 1500 data entries. 0 edge cases found. +Page finished. Now at 2000 data entries. 0 edge cases found. +Page finished. Now at 2500 data entries. 0 edge cases found. +Page finished. Now at 3000 data entries. 0 edge cases found. +Page finished. Now at 3500 data entries. 0 edge cases found. +Page finished. Now at 4000 data entries. 0 edge cases found. +Page finished. Now at 4500 data entries. 0 edge cases found. +Page finished. Now at 5000 data entries. 0 edge cases found. +Page finished. Now at 5500 data entries. 0 edge cases found. +Page finished. Now at 6000 data entries. 0 edge cases found. +Page finished. Now at 6500 data entries. 0 edge cases found. +Page finished. Now at 7000 data entries. 0 edge cases found. +Page finished. Now at 7500 data entries. 0 edge cases found. +Page finished. Now at 8000 data entries. 0 edge cases found. +Page finished. Now at 8500 data entries. 0 edge cases found. +Page finished. Now at 9000 data entries. 0 edge cases found. +Page finished. Now at 9500 data entries. 0 edge cases found. +Page finished. Now at 10000 data entries. 0 edge cases found. +Page finished. Now at 10500 data entries. 0 edge cases found. +Page finished. Now at 11000 data entries. 0 edge cases found. +Page finished. Now at 11500 data entries. 0 edge cases found. +Page finished. Now at 12000 data entries. 0 edge cases found. +Page finished. Now at 12500 data entries. 0 edge cases found. +Page finished. Now at 13000 data entries. 0 edge cases found. +Page finished. Now at 13500 data entries. 0 edge cases found. +Page finished. Now at 14000 data entries. 0 edge cases found. +Page finished. Now at 14500 data entries. 0 edge cases found. + +Traceback (most recent call last): + File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 95, in __next__ + record = self.data.pop() +IndexError: pop from empty list + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "c:\Users\ongha\OneDrive\Documents\GitHub\MPCite\tests\prod_to_review.py", line 29, in + record = next(query) + File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 108, in __next__ + raise StopIteration +StopIteration diff --git a/tests/prod_to_review.py b/tests/prod_to_review.py new file mode 100644 index 0000000..732340d --- /dev/null +++ b/tests/prod_to_review.py @@ -0,0 +1,129 @@ +from elinkapi import Elink + +import os +from dotenv import load_dotenv + +import json + +load_dotenv() # depends on the root directory from which you run your python scripts. + +review_endpoint = "https://review.osti.gov/elink2api/" + +prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) +review_api = Elink( + token=os.environ.get("elink_review_api_token"), target=review_endpoint +) + +print(prod_api.query_records()) + +rows_per_page = 100 + +# query production +query = prod_api.query_records(rows=rows_per_page) +print(f"Query retrieved {query.total_rows} record(s)") + +count_materials_data = 0 +count_MaterialsDataOn = 0 +cwd = os.getcwd() +page_number = 0 +page_json_list = [] + +for record in query: + # increment counter + count_materials_data = count_materials_data + 1 + print( + f"On record #{count_materials_data}, next url is {query.next_url}, previous url is {query.previous_url}" + ) + + # see if the record is a Materials Data on record + if record.title.startswith("Materials Data on"): + # increment the MaterialsDataOn counter + count_MaterialsDataOn = count_MaterialsDataOn + 1 + + # prepare the new record for the review environment, remove the OSTI ID, and add its model_dump to the list of json objects for the page. + new_record = record + new_record_dict = new_record.model_dump(exclude_none=True) + + new_record_osti_id = new_record_dict.pop( + "osti_id" + ) # now new_record_dict does not have the osti_id key. + js = json.dumps( + new_record_dict, default=str + ) # datetime objects are not JSON serializable, so we use default=str to convert them to strings. + + page_json_list.append(js) + + # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. + + else: + print(f"Found edge case: {record.title}") + + if count_materials_data % rows_per_page == 0: + # create/open, write, and close new json file + page_number = count_materials_data / rows_per_page + path = f"/json_pages/page_number_{page_number}" + fp = open(cwd + path, "a") + + for js in page_json_list: + fp.write(js) + fp.write("\n") + + fp.close() + page_json_list = [] + + print( + f"Page {page_number} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found." + ) + +# print remainder of records if not a full page after for loop exits +page_number = page_number + 1 +path = f"/json_pages/page_number_{page_number}" +fp = open(cwd + path, "a") +for js in page_json_list: + fp.write(js) + fp.write("\n") +fp.close() + +# # if contains materials data on, then add to batch +# for count_materials_data < query.total_rows: + +# # print(f"The length of the query is now {len(query.data)}") +# record = next(query) +# count_materials_data = count_materials_data + 1 + +# if record.title.startswith("Materials Data on"): +# count_MaterialsDataOn = count_MaterialsDataOn + 1 + +# new_record = record +# new_record_dict = new_record.model_dump(exclude_none=True) + +# new_record_osti_id = new_record_dict.pop("osti_id") + +# page_dict[f"Entry OSTI_ID {new_record_osti_id}"] = new_record_dict + +# # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. + + +# if count_materials_data % rows_per_page == 0: +# # if a page has been fully consummed, then print the new batched dictionary to a json file. + +# js = json.dumps(page_dict, default=str) + +# # open new json file if not exist it will create +# cwd = os.getcwd() +# path = f'/json_pages/page_number_{count_materials_data/rows_per_page}' +# fp = open(cwd+path, 'a') + +# # write to json file +# fp.write(js) + +# # close the connection to the file and empty the dict +# fp.close() +# page_dict = {} + +# print(f"Page {(count_materials_data / rows_per_page)} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found.") + +# model_dump exclude_none=True, remove null keys +# pop osti_id --> save batch to json files +# make new record +# post to review_api diff --git a/tests/test_elink_api.py b/tests/test_elink_api.py new file mode 100644 index 0000000..5e07706 --- /dev/null +++ b/tests/test_elink_api.py @@ -0,0 +1,118 @@ +import os +from dotenv import load_dotenv + +from elinkapi import Elink + +from pymongo import MongoClient + +load_dotenv() + +atlas_user = os.environ.get("atlas_user") +atlas_password = os.environ.get("atlas_password") +atlas_host = os.environ.get("atlas_host") +mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" + +api = Elink( + token=os.environ.get("elink_api_PRODUCTION_key") +) # target default is production E-link service. + +### Grabbing an existing record + +# record = api.get_single_record(mp-id) # test for silicon + +# type(record) + +# ELinkGotRecordModel = ELinkGetResponseModel.from_elinkapi_record(record) + +# print(ELinkGotRecordModel.get_title()) +# print(ELinkGotRecordModel.get_site_url()) +# print(ELinkGotRecordModel.get_keywords()) +# print(ELinkGotRecordModel.get_default_description()) + +# ELinkTestGetRecordModel = TestClass(**record.model_dump()) + +### Making a new record + +# with MongoClient(mongo_uri) as client: +# #get all material_ids and dois from doi collection +# doi_collection = client["mp_core"]["dois"] +# materials_to_update = list(doi_collection.find({}, {"_id": 0, "material_id": 1, "doi": 1}, limit=10)) +# material_ids = [entry["material_id"] for entry in materials_to_update] + +# # check # of material_ids from DOI collection vs amount in robocrys + +# # get description for material_ids from robocrys collection +# coll = client["mp_core_blue"]["robocrys"] +# res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) + +# # join on material_id +# for doc in res: +# mat = next(filter(lambda x: x["material_id"] == doc["material_id"], materials_to_update)) +# doc["doi"] = mat["doi"] + + +# {"material_id": ..., "doi": ..., "description": ...} -> +# Record( +# template_fields ..., +# doi: ..., +# description: ..., +# fields_where_material_id_makes_sense: ..., +# ) + +# with the client open +with MongoClient(mongo_uri) as client: + # get all dois from the collection + doi_collection = client["mp_core"]["dois"] + materials_to_update = list( + doi_collection.find({}, {"_id": 0, "doi": 1, "material_id": 1}, limit=2) + ) + + # from the doi collection, grab the material_id and doi of each material + material_ids = [entry["material_id"] for entry in materials_to_update] + + # additionally, gain the osti id from the doi + osti_ids = [entry["doi"].split("10.17188/")[1] for entry in materials_to_update] + + # additionally, grab the description of each material from the robocrys + coll = client["mp_core_blue"][ + "robocrys" + ] # grabs robocrys collection from active database + res = list( + coll.find( + {"material_id": {"$in": material_ids}}, + {"_id": 0, "material_id": 1, "description": 1}, + ) + ) # grabs the material id and description of entries in the collection + descriptions = [entry["description"] for entry in res] + + # for each material (and its material_id, doi, and osti_id) + for i in range(len(materials_to_update)): + internal_material_id = material_ids[i] + internal_osti_id = osti_ids[i] + internal_description = descriptions[i] + + # get_single_record(osti_id) + record = api.get_single_record(internal_osti_id) + + print( + f"\n \n \nPrinting what is currently on ELINK for {internal_material_id}*****************************************" + ) + print(record) + + if internal_material_id == record.site_unique_id: + # update description + record.description = "testTESTtestTESTtest" + + print( + f"\n \n \nPrinting record for {internal_material_id}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + ) + print(record) + + # # post updated record + # try: + # saved_record = api.post_new_record(record, "save") + # except exceptions.BadRequestException as ve: + # ... + # # ve.message = "Site Code AAAA is not valid." + # # ve.errors provides more details: + # # [{"status":"400", "detail":"Site Code AAAA is not valid.", "source":{"pointer":"site_ownership_code"}}] From f9e4753bd241160dc013a180e7d03797dec2a07f Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:37:32 -0700 Subject: [PATCH 30/46] disable fail-fast to see if other python versions will fail --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 04a5381..5fc0b2e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -17,7 +17,7 @@ jobs: matrix: os: ["ubuntu-latest"] python-version: ["3.11", "3.12", "3.13"] - + fail-fast: false name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) runs-on: ${{ matrix.os }} From 18cf3d2ae51acc7191ba1d5447eff0fc5058a2b9 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:42:06 -0700 Subject: [PATCH 31/46] Using uv to install ruff dependency, using uv-cache and removing pip installation --- .github/workflows/lint.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5fc0b2e..078139c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,10 +30,9 @@ jobs: python-version: ${{ matrix.python-version }} version: "latest" - - name: Install dependencies + - name: Install ruff run: | - python -m pip install --upgrade pip - pip install ruff + uv pip install ruff # Update output format to enable automatic inline annotations - name: Analyzing the code with ruff run: ruff check --output-format=github From 059c72d8028ee8ca08c43299efe2be21305bc648 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:49:26 -0700 Subject: [PATCH 32/46] added new action to install virtual environment before attempting to install dependency --- .github/workflows/lint.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 078139c..8183597 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,12 +24,14 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install uv + - name: Install uv and set up the python version uses: astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} version: "latest" + - name: Create virtual environment # this will allow us to install the dependencies in an isolated environment, for each python version too, which is already specified when uv is setup. + - name: Install ruff run: | uv pip install ruff From 1350d618d0ef12322d55d808f676bfa7cb9e3f8a Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 16:54:48 -0700 Subject: [PATCH 33/46] Fixed missing run for uv venv --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8183597..5a2e187 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,6 +31,7 @@ jobs: version: "latest" - name: Create virtual environment # this will allow us to install the dependencies in an isolated environment, for each python version too, which is already specified when uv is setup. + run: uv venv - name: Install ruff run: | From 16e6a332eb583f13e085503d8a4ce36ee3b94ad1 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 17:29:19 -0700 Subject: [PATCH 34/46] attempting ruff check again now that is seems to work... --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 35331cb..e777c97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,9 @@ exclude = [ "node_modules", "site-packages", "venv", - "legacy" + "legacy", + "notebooks", + "uv.lock" ] # Same as Black. From 7ee8fb53fe93f656019c9e035852702a9a974240 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Thu, 17 Jul 2025 17:37:09 -0700 Subject: [PATCH 35/46] (after rebase) Trying uvx --- .github/workflows/lint.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5a2e187..f1efbd5 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,16 +30,13 @@ jobs: python-version: ${{ matrix.python-version }} version: "latest" - - name: Create virtual environment # this will allow us to install the dependencies in an isolated environment, for each python version too, which is already specified when uv is setup. - run: uv venv - - - name: Install ruff - run: | - uv pip install ruff - # Update output format to enable automatic inline annotations - name: Analyzing the code with ruff +<<<<<<< HEAD run: ruff check --output-format=github <<<<<<< HEAD >>>>>>> 8d7d55b (Testing Linting workflow) ======= >>>>>>> 61d74c1 (Allowing Lint.YML to run on push to linting_workflow) +======= + run: uvx ruff check --output-format=github +>>>>>>> 69556e5 (Trying uvx) From 4a83b7a53d511a62ad5f81f19af01d6a36d5eeaf Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 21 Jul 2025 13:26:41 -0700 Subject: [PATCH 36/46] Removed assume python 3.9 from ruff config in pyproject.toml --- .pre-commit-config.yaml | 2 +- pyproject.toml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 627c045..996fb04 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,4 +10,4 @@ repos: # Run the formatter. - id: ruff-format types_or: [python, pyi ] -exclude: 'legacy/*' +exclude: 'legacy' diff --git a/pyproject.toml b/pyproject.toml index e777c97..e06effa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,9 +77,6 @@ exclude = [ line-length = 88 indent-width = 4 -# Assume Python 3.9 -target-version = "py39" - [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or From 5f2f9485fa33e2a589e7de3530b3f58954ec062a Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Wed, 30 Jul 2025 10:53:40 -0700 Subject: [PATCH 37/46] fixing typos after rebase --- .github/workflows/lint.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f1efbd5..282b995 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,14 +2,12 @@ name: linting on: push: - branches: [master, linting-workflow] + branches: [master, linting-workflow, test-linting-workflow] pull_request: branches: [master, linting-workflow] workflow_dispatch: # TODO: setup linting with uv/ruff -<<<<<<< HEAD -======= # informed by testing.yml and https://medium.com/@sidharthvijayakumar7/automating-pylint-in-github-workflow-80c84b2ff243 and ruff documentation jobs: linting: @@ -31,12 +29,4 @@ jobs: version: "latest" - name: Analyzing the code with ruff -<<<<<<< HEAD - run: ruff check --output-format=github -<<<<<<< HEAD ->>>>>>> 8d7d55b (Testing Linting workflow) -======= ->>>>>>> 61d74c1 (Allowing Lint.YML to run on push to linting_workflow) -======= run: uvx ruff check --output-format=github ->>>>>>> 69556e5 (Trying uvx) From c8c53cb6f7a0edd1de73c011107e48a9afc3e622 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Wed, 30 Jul 2025 11:08:15 -0700 Subject: [PATCH 38/46] Installing Ruff before calling check; added precommit to uvlock --- .github/workflows/lint.yml | 3 + pyproject.toml | 2 + uv.lock | 148 +++++++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 282b995..c7116a0 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -28,5 +28,8 @@ jobs: python-version: ${{ matrix.python-version }} version: "latest" + - name: Install ruff + run: uv add ruff + - name: Analyzing the code with ruff run: uvx ruff check --output-format=github diff --git a/pyproject.toml b/pyproject.toml index e06effa..1d03264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,8 +15,10 @@ maintainers = [ requires-python = ">=3.11" dependencies = [ "elinkapi>=0.4.9", + "pre-commit>=4.2.0", "pydantic>=2.11.7", "pymongo>=4.13.2", + "ruff>=0.12.7", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index 3a0af09..a3fe5ec 100644 --- a/uv.lock +++ b/uv.lock @@ -20,6 +20,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/f3/80a3f974c8b535d394ff960a11ac20368e06b736da395b551a49ce950cce/certifi-2025.7.9-py3-none-any.whl", hash = "sha256:d842783a14f8fdd646895ac26f719a061408834473cfc10203f6a575beb15d39", size = 159230, upload-time = "2025-07-09T02:13:57.007Z" }, ] +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.2" @@ -77,6 +86,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "distlib" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, +] + [[package]] name = "dnspython" version = "2.7.0" @@ -101,6 +119,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/fb/365736d4450002f29d78fb51c21223ab5e6addcdc913bb79f3bb15a9899e/elinkapi-0.4.9-py3-none-any.whl", hash = "sha256:50644897334b487543f4a7eeed51e31b7b2ed3eff6c1662064ccc3d6a14755ee", size = 34592, upload-time = "2025-07-04T02:26:46.244Z" }, ] +[[package]] +name = "filelock" +version = "3.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, +] + +[[package]] +name = "identify" +version = "2.6.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" }, +] + [[package]] name = "idna" version = "3.10" @@ -125,8 +161,10 @@ version = "0.0.1" source = { editable = "." } dependencies = [ { name = "elinkapi" }, + { name = "pre-commit" }, { name = "pydantic" }, { name = "pymongo" }, + { name = "ruff" }, ] [package.dev-dependencies] @@ -137,13 +175,24 @@ dev = [ [package.metadata] requires-dist = [ { name = "elinkapi", specifier = ">=0.4.9" }, + { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pydantic", specifier = ">=2.11.7" }, { name = "pymongo", specifier = ">=4.13.2" }, + { name = "ruff", specifier = ">=0.12.7" }, ] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=8.4.1" }] +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, +] + [[package]] name = "packaging" version = "25.0" @@ -153,6 +202,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "platformdirs" +version = "4.3.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -162,6 +220,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" }, +] + [[package]] name = "pydantic" version = "2.11.7" @@ -314,6 +388,41 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, ] +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612, upload-time = "2024-08-06T20:32:03.408Z" }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040, upload-time = "2024-08-06T20:32:04.926Z" }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829, upload-time = "2024-08-06T20:32:06.459Z" }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167, upload-time = "2024-08-06T20:32:08.338Z" }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952, upload-time = "2024-08-06T20:32:14.124Z" }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301, upload-time = "2024-08-06T20:32:16.17Z" }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638, upload-time = "2024-08-06T20:32:18.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850, upload-time = "2024-08-06T20:32:19.889Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload-time = "2024-08-06T20:32:21.273Z" }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, +] + [[package]] name = "requests" version = "2.32.4" @@ -341,6 +450,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, ] +[[package]] +name = "ruff" +version = "0.12.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/81/0bd3594fa0f690466e41bd033bdcdf86cba8288345ac77ad4afbe5ec743a/ruff-0.12.7.tar.gz", hash = "sha256:1fc3193f238bc2d7968772c82831a4ff69252f673be371fb49663f0068b7ec71", size = 5197814, upload-time = "2025-07-29T22:32:35.877Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/d2/6cb35e9c85e7a91e8d22ab32ae07ac39cc34a71f1009a6f9e4a2a019e602/ruff-0.12.7-py3-none-linux_armv6l.whl", hash = "sha256:76e4f31529899b8c434c3c1dede98c4483b89590e15fb49f2d46183801565303", size = 11852189, upload-time = "2025-07-29T22:31:41.281Z" }, + { url = "https://files.pythonhosted.org/packages/63/5b/a4136b9921aa84638f1a6be7fb086f8cad0fde538ba76bda3682f2599a2f/ruff-0.12.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:789b7a03e72507c54fb3ba6209e4bb36517b90f1a3569ea17084e3fd295500fb", size = 12519389, upload-time = "2025-07-29T22:31:54.265Z" }, + { url = "https://files.pythonhosted.org/packages/a8/c9/3e24a8472484269b6b1821794141f879c54645a111ded4b6f58f9ab0705f/ruff-0.12.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e1c2a3b8626339bb6369116e7030a4cf194ea48f49b64bb505732a7fce4f4e3", size = 11743384, upload-time = "2025-07-29T22:31:59.575Z" }, + { url = "https://files.pythonhosted.org/packages/26/7c/458dd25deeb3452c43eaee853c0b17a1e84169f8021a26d500ead77964fd/ruff-0.12.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32dec41817623d388e645612ec70d5757a6d9c035f3744a52c7b195a57e03860", size = 11943759, upload-time = "2025-07-29T22:32:01.95Z" }, + { url = "https://files.pythonhosted.org/packages/7f/8b/658798472ef260ca050e400ab96ef7e85c366c39cf3dfbef4d0a46a528b6/ruff-0.12.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47ef751f722053a5df5fa48d412dbb54d41ab9b17875c6840a58ec63ff0c247c", size = 11654028, upload-time = "2025-07-29T22:32:04.367Z" }, + { url = "https://files.pythonhosted.org/packages/a8/86/9c2336f13b2a3326d06d39178fd3448dcc7025f82514d1b15816fe42bfe8/ruff-0.12.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a828a5fc25a3efd3e1ff7b241fd392686c9386f20e5ac90aa9234a5faa12c423", size = 13225209, upload-time = "2025-07-29T22:32:06.952Z" }, + { url = "https://files.pythonhosted.org/packages/76/69/df73f65f53d6c463b19b6b312fd2391dc36425d926ec237a7ed028a90fc1/ruff-0.12.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5726f59b171111fa6a69d82aef48f00b56598b03a22f0f4170664ff4d8298efb", size = 14182353, upload-time = "2025-07-29T22:32:10.053Z" }, + { url = "https://files.pythonhosted.org/packages/58/1e/de6cda406d99fea84b66811c189b5ea139814b98125b052424b55d28a41c/ruff-0.12.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74e6f5c04c4dd4aba223f4fe6e7104f79e0eebf7d307e4f9b18c18362124bccd", size = 13631555, upload-time = "2025-07-29T22:32:12.644Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ae/625d46d5164a6cc9261945a5e89df24457dc8262539ace3ac36c40f0b51e/ruff-0.12.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0bfe4e77fba61bf2ccadf8cf005d6133e3ce08793bbe870dd1c734f2699a3e", size = 12667556, upload-time = "2025-07-29T22:32:15.312Z" }, + { url = "https://files.pythonhosted.org/packages/55/bf/9cb1ea5e3066779e42ade8d0cd3d3b0582a5720a814ae1586f85014656b6/ruff-0.12.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06bfb01e1623bf7f59ea749a841da56f8f653d641bfd046edee32ede7ff6c606", size = 12939784, upload-time = "2025-07-29T22:32:17.69Z" }, + { url = "https://files.pythonhosted.org/packages/55/7f/7ead2663be5627c04be83754c4f3096603bf5e99ed856c7cd29618c691bd/ruff-0.12.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e41df94a957d50083fd09b916d6e89e497246698c3f3d5c681c8b3e7b9bb4ac8", size = 11771356, upload-time = "2025-07-29T22:32:20.134Z" }, + { url = "https://files.pythonhosted.org/packages/17/40/a95352ea16edf78cd3a938085dccc55df692a4d8ba1b3af7accbe2c806b0/ruff-0.12.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4000623300563c709458d0ce170c3d0d788c23a058912f28bbadc6f905d67afa", size = 11612124, upload-time = "2025-07-29T22:32:22.645Z" }, + { url = "https://files.pythonhosted.org/packages/4d/74/633b04871c669e23b8917877e812376827c06df866e1677f15abfadc95cb/ruff-0.12.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:69ffe0e5f9b2cf2b8e289a3f8945b402a1b19eff24ec389f45f23c42a3dd6fb5", size = 12479945, upload-time = "2025-07-29T22:32:24.765Z" }, + { url = "https://files.pythonhosted.org/packages/be/34/c3ef2d7799c9778b835a76189c6f53c179d3bdebc8c65288c29032e03613/ruff-0.12.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a07a5c8ffa2611a52732bdc67bf88e243abd84fe2d7f6daef3826b59abbfeda4", size = 12998677, upload-time = "2025-07-29T22:32:27.022Z" }, + { url = "https://files.pythonhosted.org/packages/77/ab/aca2e756ad7b09b3d662a41773f3edcbd262872a4fc81f920dc1ffa44541/ruff-0.12.7-py3-none-win32.whl", hash = "sha256:c928f1b2ec59fb77dfdf70e0419408898b63998789cc98197e15f560b9e77f77", size = 11756687, upload-time = "2025-07-29T22:32:29.381Z" }, + { url = "https://files.pythonhosted.org/packages/b4/71/26d45a5042bc71db22ddd8252ca9d01e9ca454f230e2996bb04f16d72799/ruff-0.12.7-py3-none-win_amd64.whl", hash = "sha256:9c18f3d707ee9edf89da76131956aba1270c6348bfee8f6c647de841eac7194f", size = 12912365, upload-time = "2025-07-29T22:32:31.517Z" }, + { url = "https://files.pythonhosted.org/packages/4c/9b/0b8aa09817b63e78d94b4977f18b1fcaead3165a5ee49251c5d5c245bb2d/ruff-0.12.7-py3-none-win_arm64.whl", hash = "sha256:dfce05101dbd11833a0776716d5d1578641b7fddb537fe7fa956ab85d1769b69", size = 11982083, upload-time = "2025-07-29T22:32:33.881Z" }, +] + [[package]] name = "typing-extensions" version = "4.14.1" @@ -370,3 +504,17 @@ sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599 wheels = [ { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, ] + +[[package]] +name = "virtualenv" +version = "20.32.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/96/0834f30fa08dca3738614e6a9d42752b6420ee94e58971d702118f7cfd30/virtualenv-20.32.0.tar.gz", hash = "sha256:886bf75cadfdc964674e6e33eb74d787dff31ca314ceace03ca5810620f4ecf0", size = 6076970, upload-time = "2025-07-21T04:09:50.985Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/c6/f8f28009920a736d0df434b52e9feebfb4d702ba942f15338cb4a83eafc1/virtualenv-20.32.0-py3-none-any.whl", hash = "sha256:2c310aecb62e5aa1b06103ed7c2977b81e042695de2697d01017ff0f1034af56", size = 6057761, upload-time = "2025-07-21T04:09:48.059Z" }, +] From 5f2262a969bb8e71d44c3f2ff2075856d35b6809 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Wed, 30 Jul 2025 11:13:06 -0700 Subject: [PATCH 39/46] Added verbose keyword to make clear that files are being checked --- .github/workflows/lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c7116a0..77a6d2e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -32,4 +32,5 @@ jobs: run: uv add ruff - name: Analyzing the code with ruff - run: uvx ruff check --output-format=github + run: | + uvx ruff check --output-format=github --verbose \ No newline at end of file From 930cf9f0127c77c4715f1712d152c556a7c4b02b Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 10:50:42 -0700 Subject: [PATCH 40/46] Attempting to revert src/ tests/ and .gitignore to master current ver --- .gitignore | 4 + src/mp_cite/core.py | 155 ------------------- src/mp_cite/doi_builder.py | 204 ------------------------- src/mp_cite/pipeline.py | 120 --------------- src/mp_cite/recordresponse_example.txt | 92 ----------- src/mp_cite/reset.py | 27 ---- src/mp_cite/send_collection.py | 79 ---------- src/mp_cite/test_core.py | 94 ------------ tests/file_to_jsonForUpload.py | 167 -------------------- tests/github_bug_report.py | 61 -------- tests/manage_backfills.py | 48 ------ tests/outputs.txt | 46 ------ tests/prod_to_review.py | 129 ---------------- tests/test_elink_api.py | 118 -------------- 14 files changed, 4 insertions(+), 1340 deletions(-) delete mode 100644 src/mp_cite/core.py delete mode 100644 src/mp_cite/doi_builder.py delete mode 100644 src/mp_cite/pipeline.py delete mode 100644 src/mp_cite/recordresponse_example.txt delete mode 100644 src/mp_cite/reset.py delete mode 100644 src/mp_cite/send_collection.py delete mode 100644 src/mp_cite/test_core.py delete mode 100644 tests/file_to_jsonForUpload.py delete mode 100644 tests/github_bug_report.py delete mode 100644 tests/manage_backfills.py delete mode 100644 tests/outputs.txt delete mode 100644 tests/prod_to_review.py delete mode 100644 tests/test_elink_api.py diff --git a/.gitignore b/.gitignore index 9045477..8241d4c 100644 --- a/.gitignore +++ b/.gitignore @@ -209,3 +209,7 @@ __marimo__/ # Streamlit .streamlit/secrets.toml + +json_pages/ +notebooks/ +test_json_pages/ \ No newline at end of file diff --git a/src/mp_cite/core.py b/src/mp_cite/core.py deleted file mode 100644 index b1da88d..0000000 --- a/src/mp_cite/core.py +++ /dev/null @@ -1,155 +0,0 @@ -from typing import TypeAlias - -from elinkapi import Elink -from elinkapi.record import RecordResponse, Record, Organization, Person -from pymongo import MongoClient - -import requests -from elinkapi.utils import Validation - -from datetime import datetime - -OstiID: TypeAlias = int - - -def find_out_of_date_doi_entries( - rc_client: MongoClient, - doi_client: MongoClient, - robocrys_db: str, - robocrys_collection: str, - doi_db: str, - doi_collection: str, -) -> list[OstiID]: - robocrys = rc_client[robocrys_db][robocrys_collection] - dois = doi_client[doi_db][doi_collection] - - latest_doi = next( - dois.aggregate( - [ - {"$project": {"_id": 0, "date_metadata_updated": 1}}, - {"$sort": {"date_metadata_updated": -1}}, - {"$limit": 1}, - ] - ) - )["date_metadata_updated"] - - material_ids_to_update = list( - map( - lambda x: x["material_id"], - robocrys.find( - {"last_updated": {"$gt": latest_doi}}, {"_id": 0, "material_id": 1} - ), - ) - ) - - return list( - map( - lambda x: x["osti_id"], - dois.find( - {"material_id": {"$in": material_ids_to_update}}, - {"_id": 0, "osti_id": 1}, - ), - ), - ) - - -def update_existing_osti_record( - elinkapi: Elink, osti_id: OstiID, new_values: dict -) -> RecordResponse: - record_on_elink = elinkapi.get_single_record(osti_id) - - for keyword in new_values.keys(): - try: - setattr(record_on_elink, keyword, new_values[keyword]) - except ValueError: - print( - "Extraneous keywords found in the dictionary that do not correspond to attributes in the ELink API's record class." - ) - - # assume the use with fix the sponsor identifier bug before calling the update function - # # fix the issue with the sponsor organization's identifiers - # for entry in record_on_elink.organizations: - # if entry.type == "SPONSOR": - # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] - # break - - return elinkapi.update_record( - osti_id, record_on_elink, state="save" - ) # user should use update_state_of_osti_record to submit instead - - -def submit_new_osti_record( - elinkapi: Elink, - new_record: Record, - state="submit", # assuming there is no need to both with saving. just send new record to osti when its ready for submission. also assume bug with DOE contract number identifier in sponsor organization is accounted for -) -> RecordResponse: - # template for all repeated stuff - # only submit - record_response = elinkapi.post_new_record(new_record, state) - - return record_response - - -def update_state_of_osti_record( - elinkapi: Elink, osti_id: OstiID, new_state="submit" -) -> RecordResponse: - record = elinkapi.get_single_record(osti_id) - - # assuming that the user will handle the sponsor identifier bug before calling this function - # # fix the issue with the sponsor organization's identifiers - # for entry in record.organizations: - # if entry.type == "SPONSOR": - # entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] - # break - - return elinkapi.update_record(osti_id, record, new_state) - - -def delete_osti_record( - elinkapi_token: str, osti_id: OstiID, reason: str -) -> RecordResponse: - review_endpoint = "https://review.osti.gov/elink2api/" - review_api = Elink(token=elinkapi_token, target=review_endpoint) - - """Delete a record by its OSTI ID.""" - response = requests.delete( - f"{review_api.target}records/{osti_id}?reason={reason}", - headers={"Authorization": f"Bearer {review_api.token}"}, - ) - Validation.handle_response(response) - return response.status_code == 204 # True if deleted successfully - - -def emptyReviewAPI(reason, review_api): - for record in review_api.query_records(): - delete_osti_record(review_api, record.osti_id, reason) - - -def make_minimum_record_to_fully_release( - title, # required to make record - product_type="DA", # required to make record - organizations=[ - Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"), - Organization( - type="SPONSOR", - name="TEST SPONSOR ORG", - identifiers=[{"type": "CN_DOE", "value": "AC02-05CH11231"}], - ), - ], # sponsor org is necessary for submission - persons=[Person(type="AUTHOR", last_name="Perrson")], - site_ownership_code="LBNL-MP", - access_limitations=["UNL"], - publication_date=datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0 - ), # what should this be? - site_url="https://next-gen.materialsproject.org/materials", -) -> Record: - return Record( - product_type, - title, - persons, - site_ownership_code, - access_limitations, - publication_date, - site_url, - ) diff --git a/src/mp_cite/doi_builder.py b/src/mp_cite/doi_builder.py deleted file mode 100644 index b85fb90..0000000 --- a/src/mp_cite/doi_builder.py +++ /dev/null @@ -1,204 +0,0 @@ -''' -doi_builder.py -A doi collection must store the following information about a document: -- doi number -- title -- osti id (ELink's Unique Identifier) -- material id (MP's Unique Identifier) -- date of system entry date (Date (UTC) of this revision's inception) -- date of last update (date edited or date_submitted_to_osti_last) (take from ELink) -- workflow status and the date (?) of each step: - - SA, saved, in a holding state, not to be processed - - SR, submit to releasing official "released_to_osti_date, as entered by releasing official" - - SO, submit to OSTI - - SF, submitted but failed validation - - SX, submitted but failed to release - - SV, submitted and validated - - R, released -- - -Here is an example of RecordResponse -RecordResponse( - osti_id=2523296, - workflow_status='SA', - access_limitations=['UNL'], - access_limitation_other=None, - announcement_codes=None, - availability=None, - edition=None, - volume=None, - - # Identifiers - identifiers=[ - Identifier(type='CN_NONDOE', value='EDCBEE'), - Identifier(type='CN_DOE', value='AC02-05CH11231'), - Identifier(type='RN', value='mp-1037659'), - ], - - # People involved - persons=[ - Person( - type='CONTACT', - first_name='Kristin', - last_name='Persson', - phone='+1(510)486-7218', - email=['feedback@materialsproject.org'], - affiliations=[ - Affiliation(name='LBNL') - ] - ) - ], - - # Organizations - organizations=[ - Organization(name='The Materials Project', type='CONTRIBUTING', contributor_type='ResearchGroup'), - Organization(name='LBNL Materials Project', type='RESEARCHING'), - Organization(name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', type='RESEARCHING'), - Organization(name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', type='SPONSOR'), - Organization(name='MIT', type='CONTRIBUTING', contributor_type='Other'), - Organization(name='UC Berkeley', type='CONTRIBUTING', contributor_type='Other'), - Organization(name='Duke', type='CONTRIBUTING', contributor_type='Other'), - Organization(name='U Louvain', type='CONTRIBUTING', contributor_type='Other'), - ], - - # Metadata - country_publication_code='US', - doe_supported_flag=False, - doi='10.17188/1714845', - edit_reason='Record updated upon request of LBNL-MP to remove authors and replace with a single collaborator.', - format_information='', - invention_disclosure_flag=None, - paper_flag=False, - peer_reviewed_flag=False, - product_type='DA', - publication_date=datetime.date(2020, 4, 30), - publication_date_text='04/30/2020', - site_url='https://materialsproject.org/materials/mp-1037659', - site_ownership_code='LBNL-MP', - site_unique_id='mp-1037659', - subject_category_code=['36'], - title='Materials Data on RbYMg30O32 by Materials Project', - - # Description - description=""" - RbMg₃₀YO₃₂ is Molybdenum Carbide MAX Phase-derived and crystallizes in the tetragonal P4/mmm space group. - Rb¹⁺ is bonded to six O²⁻ atoms to form RbO₆ octahedra... - (Truncated here for brevity, full description is included in original) - """, - - keywords=['crystal structure', 'RbYMg30O32', 'Mg-O-Rb-Y'], - languages=['English'], - related_doc_info='https://materialsproject.org/citing', - - # Media - media=[ - MediaInfo( - media_id=1908478, - osti_id=2523296, - status='C', - mime_type='text/html', - files=[ - MediaFile( - media_file_id=12017281, - media_type='O', - url='https://materialsproject.org/materials/mp-1037659' - ), - MediaFile( - media_file_id=12017284, - media_type='C', - mime_type='text/html', - media_source='OFF_SITE_DOWNLOAD' - ) - ] - ) - ], - - # Audit logs - audit_logs=[ - AuditLog( - messages=['Revision status is not correct, found SA'], - status='FAIL', - type='RELEASER', - audit_date=datetime.datetime(2025, 6, 30, 22, 30, 24, 865000, tzinfo=TzInfo(UTC)) - ) - ], - - # Timestamps - date_metadata_added=datetime.datetime(2025, 6, 30, 22, 30, 20, 495000, tzinfo=TzInfo(UTC)), - date_metadata_updated=datetime.datetime(2025, 6, 30, 22, 30, 22, 247000, tzinfo=TzInfo(UTC)), - - # Misc - revision=2, - added_by=139001, - edited_by=139001, - collection_type='DOE_LAB', - hidden_flag=False -) -''' - -from pydantic import BaseModel, Field -from datetime import datetime - - -# TODO: change the field names to match ELINK -class doi_model(BaseModel): - # identifiers - doi: str = Field( - description="The DOI number as allocated by OSTI" - ) # can be taken from ELink API - title: str = Field( - description="The title of the record" - ) # can be taken from ELink API - osti_id: str = Field( - description="The OSTI ID number allocated by OSTI to make the DOI number" - ) # can be taken from ELink API - material_id: str # can be taken from Robocrys Collection or ELink API - - # time stamps - date_metadata_added: datetime | None = Field( - description="date_record_entered_onto_ELink" - ) # can be taken from ELink API response - date_metadata_updated: datetime | None = Field( - description="date_record_last_updated_on_Elink" - ) - - # status - workflow_status: str # can be taken from ELink API - date_released: datetime | None = Field(description="") - date_submitted_to_osti_first: datetime = Field( - description="date record was first submitted to OSTI for publication, maintained internally by E-Link" - ) - date_submitted_to_osti_last: datetime = Field( - description="most recent date record information was submitted to OSTI. Maintained internally by E-Link" - ) - publication_date: datetime | None = Field( - description="" - ) # labelled as publication_date in RecordResponse of ELink API - - -# hypothetically post an update or submit a new record and receive the RecordResponse -def RecordResponse_to_doi_model(recordresponse): - """ - turns a recordresponse, which is returned from a save, submission, post, etc. into a doi_model object - """ - params = { - "doi": recordresponse.doi, - "title": recordresponse.title, - "osti_id": str(recordresponse.osti_id), - "material_id": recordresponse.site_unique_id, - "date_metadata_added": recordresponse.date_metadata_added, - "date_metadata_updated": recordresponse.date_metadata_updated, - "workflow_status": recordresponse.workflow_status, - "date_released": recordresponse.date_released, - # date_released_to_osti = recordresponse.released_to_osti_date, # what is the difference between these??? "Date record information was released to OSTI, as entered by releasing official." always seems to be none - "date_submitted_to_osti_first": recordresponse.date_submitted_to_osti_first, # date record was first submitted to OSTI for publication, maintained internally by E-Link - "date_submitted_to_osti_last": recordresponse.date_submitted_to_osti_last, # most recent date record information was submitted to OSTI. Maintained internally by E-Link. - "publication_date": recordresponse.publication_date, - } - - return doi_model(**params) - - -def upload_doi_document_model_to_collection(doi_model, collection): - x = collection.insert_one(doi_model.model_dump()).inserted_id - return x diff --git a/src/mp_cite/pipeline.py b/src/mp_cite/pipeline.py deleted file mode 100644 index 3a087ab..0000000 --- a/src/mp_cite/pipeline.py +++ /dev/null @@ -1,120 +0,0 @@ -import os -import json -from elinkapi import Elink -from elinkapi.record import RecordResponse -from dotenv import load_dotenv - - -from pymongo import MongoClient - -import logging -import requests - -import datetime -from doi_builder import RecordResponse_to_doi_model, doi_model - -load_dotenv() # depends on the root directory from which you run your python scripts. - -review_endpoint = "https://review.osti.gov/elink2api/" - -prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink( - token=os.environ.get("elink_review_api_token"), target=review_endpoint -) - -atlas_user = os.environ.get("atlas_user") -atlas_password = os.environ.get("atlas_password") -atlas_host = os.environ.get("atlas_host") -mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" - -failed_osti_ids = [] - -cwd = os.getcwd() -path = "/json_pages/" - -for filename in os.listdir(cwd + path): - logging.debug(f"Now extracting {filename}") - file = open(cwd + path + filename, "r") - for line in file: - record = RecordResponse(**json.loads(line.strip())) - record.osti_id = record.doi.split("/")[1] - # for every record in the OSTI production environment: - # flag for update performance - update_success = False - - material_id = record.site_unique_id - - with MongoClient( - mongo_uri - ) as client: # should I open this in or outside of the for loop? - coll = client["mp_core_blue"]["robocrys"] - res = coll.find_one({"material_id": material_id}) - - if res is not None: - robocrys_description = res["description"] - - # what if there is no document in robocrys found? - else: - logging.warning( - f"No robocrys document was found to match the OSTI record: {record.osti_id}!" - ) - - # if the description of the record on Elink doesnt match what is in the robocrys collection: - if res is not None and record.description != robocrys_description: - # directly update the description of the record via the record response - record.description = robocrys_description - - # and directly update the identifier for sponsoring org - for entry in record.organizations: - if entry.type == "SPONSOR": - entry.identifiers = [{"type": "CN_DOE", "value": "AC02-05CH11231"}] - break - - try: - # send update to the record with the record response # update_record(osti_id, record, state="save") - record_response = prod_api.update_record( - record.osti_id, record, state="save" - ) - update_success = True - - except requests.exceptions.RequestException as e: - logging.debug(f"Network or HTTP error: {e}") - failed_osti_ids.append(record.osti_id) - - except ValueError as e: - logging.debug(f"Data error while updating record: {e}") - failed_osti_ids.append(record.osti_id) - - except Exception as e: - logging.debug(f"Unexpected error during update: {e}") - failed_osti_ids.append(record.osti_id) - - # if the update worked... - if update_success: - # save the record response returned with sending the update, done above - # convert that record response into a doi_model - doi_model = RecordResponse_to_doi_model( - record - ) # change later to record response - - # upload that doi_model as a document to the new doi collection in mp_core - # what is the collection - with MongoClient() as local_client: - collection = local_client["dois_test"]["dois"] - x = collection.insert_one(doi_model.dict(by_alias=True)).inserted_id - - # else if the description on Elink matches what is in the robocrys collection: - elif record.description == robocrys_description: - # convert that record into a doi_model - doi_model = RecordResponse_to_doi_model(record) - - # upload that doi_model as a document to the new doi collection in mp_core, no updated needed! - with MongoClient() as local_client: - collection = local_client["dois_test"]["dois"] - x = collection.insert_one(doi_model).inserted_id - -cwd = os.getcwd() -path = f"/files/failed_osti_ids_{str(datetime.datetime.now())}.txt" -with open(cwd + path, "w") as output: # change filepath as needed - for id in failed_osti_ids: - output.write(str(id) + "\n") # i'm pretty sure it's a string already though... diff --git a/src/mp_cite/recordresponse_example.txt b/src/mp_cite/recordresponse_example.txt deleted file mode 100644 index 0d510a6..0000000 --- a/src/mp_cite/recordresponse_example.txt +++ /dev/null @@ -1,92 +0,0 @@ -osti_id=1190959 -workflow_status='R' -access_limitations=['UNL'] -access_limitation_other=None -announcement_codes=None -availability=None -edition=None -volume=None -conference_information=None -conference_type=None -contract_award_date=None -country_publication_code='US' -doe_funded_flag=None -doe_supported_flag=False -doi='10.17188/1190959' -doi_infix=None -edit_reason=None -geolocations=None -format_information='' -invention_disclosure_flag=None -issue=None -journal_license_url=None -journal_name=None -journal_open_access_flag=None -journal_type=None -keywords=['crystal structure', 'Si', 'Si'] -languages=['English'] -monographic_title=None -opn_addressee=None -opn_declassified_date=None -opn_declassified_status=None -opn_document_categories=None -opn_document_location=None -opn_fieldoffice_acronym_code=None -other_information=None -ouo_release_date=None -pams_publication_status=None -pams_publication_status_other=None -pams_authors=None -pams_editors=None -pams_product_sub_type=None -pams_patent_country_code=None -pams_transnational_patent_office=None -paper_flag=False -patent_assignee=None -patent_file_date=None -patent_priority_date=None -pdouo_exemption_number=None -peer_reviewed_flag=False -product_size=None -product_type='DA' -product_type_other=None -prot_flag=None -prot_data_other=None -prot_release_date=None -publication_date=datetime.date(2020, 7, 15) -publication_date_text='07/15/2020' -publisher_information=None -related_doc_info='https://materialsproject.org/citing' -released_to_osti_date=None -releasing_official_comments=None -report_period_end_date=None -report_period_start_date=None -report_types=None -report_type_other=None -sbiz_flag=None -sbiz_phase=None -sbiz_previous_contract_number=None -sbiz_release_date=None -site_ownership_code='LBNL-MP' -site_unique_id='mp-149' -subject_category_code=['36'] -subject_category_code_legacy=None -title='Materials Data on Si by Materials Project' -description='Si is diamond structured and crystallizes in the cubic Fd-3m space group. The structure is three-dimensional. Si is bonded to four equivalent Si atoms to form corner-sharing SiSi4 tetrahedra. All Si–Si bond lengths are 2.37 Å.' -identifiers=[Identifier(type='CN_DOE', value='AC02-05CH11231'), Identifier(type='CN_NONDOE', value='EDCBEE'), Identifier(type='RN', value='mp-149')] -persons=[Person(type='CONTACT', first_name='Kristin', middle_name=None, last_name='Persson', orcid=None, phone='+1(510)486-7218', email=['feedback@materialsproject.org'], affiliations=[Affiliation(name='LBNL', ror_id=None)], contributor_type=None)] -organizations=[Organization(type='CONTRIBUTING', name='The Materials Project', contributor_type='ResearchGroup', identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='LBNL Materials Project', contributor_type=None, identifiers=[], ror_id=None), Organization(type='SPONSOR', name='USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='RESEARCHING', name='Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)', contributor_type=None, identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='MIT', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='UC Berkeley', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='Duke', contributor_type='Other', identifiers=[], ror_id=None), Organization(type='CONTRIBUTING', name='U Louvain', contributor_type='Other', identifiers=[], ror_id=None)] related_identifiers=[RelatedIdentifier(type='DOI', relation='IsReferencedBy', value='10.1103/physrevmaterials.4.013401')] -site_url='https://materialsproject.org/materials/mp-149' -revision=18 -added_by=234169 -edited_by=None -collection_type='DOE_LAB' -date_metadata_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 275000, tzinfo=TzInfo(UTC)) -date_metadata_updated=datetime.datetime(2021, 7, 15, 2, 10, 43, 372000, tzinfo=TzInfo(UTC)) -date_submitted_to_osti_first=datetime.datetime(2015, 7, 7, 22, 9, 5, 808000, tzinfo=TzInfo(UTC)) -date_submitted_to_osti_last=datetime.datetime(2021, 7, 15, 2, 10, 42, 407000, tzinfo=TzInfo(UTC)) -date_released=datetime.datetime(2021, 7, 15, 2, 10, 43, 240000, tzinfo=TzInfo(UTC)) -sensitivity_flag='U' -hidden_flag=False -media=[MediaInfo(media_id=841489, revision=1, osti_id=1190959, status='C', added_by=None, document_page_count=1, mime_type='text/html', media_title=None, media_location='O', media_source='DOE2416API', date_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_valid_start=None, date_valid_end=None, files=[MediaFile(media_file_id=4514486, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='O', url_type='O', url='https://materialsproject.org/materials/mp-149', added_by=None, document_page_count=None, file_size_bytes=None, duration_seconds=None, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='DOE2416API', date_file_added=datetime.datetime(2015, 7, 7, 22, 9, 4, 875000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 52, 857000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515065, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='C', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=15546, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/html', media_source='OFF_SITE_DOWNLOAD', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 52, 877000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 96000, tzinfo=TzInfo(UTC))), MediaFile(media_file_id=4515066, media_id=841489, revision=1, parent_media_file_id=None, status='DONE', media_type='T', url_type='L', url=None, added_by=None, document_page_count=1, file_size_bytes=5593, duration_seconds=0, subtitle_tracks=None, video_tracks=None, mime_type='text/plain', media_source='TEXT_EXTRACTION', date_file_added=datetime.datetime(2015, 7, 8, 2, 50, 53, 78000, tzinfo=TzInfo(UTC)), date_file_updated=datetime.datetime(2015, 7, 8, 2, 50, 53, 83000, tzinfo=TzInfo(UTC)))])] -audit_logs=[] diff --git a/src/mp_cite/reset.py b/src/mp_cite/reset.py deleted file mode 100644 index c82731b..0000000 --- a/src/mp_cite/reset.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -from dotenv import load_dotenv -from pymongo import MongoClient -from elinkapi import Elink - -load_dotenv() # depends on the root directory from which you run your python scripts. - -review_endpoint = "https://review.osti.gov/elink2api/" - -prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink( - token=os.environ.get("elink_review_api_token"), target=review_endpoint -) - -cwd = os.getcwd() -path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW -file = open(cwd + path, "r") - -atlas_user = os.environ.get("atlas_user") -atlas_password = os.environ.get("atlas_password") -atlas_host = os.environ.get("atlas_host") -mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" - -# emptyReviewAPI("Testing", review_api) - -with MongoClient() as client: - client.dois_test.dois.delete_many({}, comment="Testing") diff --git a/src/mp_cite/send_collection.py b/src/mp_cite/send_collection.py deleted file mode 100644 index 0ce65a3..0000000 --- a/src/mp_cite/send_collection.py +++ /dev/null @@ -1,79 +0,0 @@ -from pathlib import Path -from xml.dom.minidom import parseString -from dicttoxml import dicttoxml -from mpcite.doi_builder import DOIBuilder -import json -from monty.json import MontyDecoder -from pydantic import BaseModel, Field -from typing import List - -default_description = ( - "Computed materials data using density functional theory calculations. These " - "calculations determine the electronic structure of bulk materials by solving " - "approximations to the Schrodinger equation. For more information, " - "see https://materialsproject.org/docs/calculations" -) - - -class CollectionsModel(BaseModel): - title: str = Field(default="Sample Title") - product_type: str = Field(default="DC") - relidentifiersblock: List[List[str]] = Field() - contributors: List[dict] - description: str = Field(default=default_description) - site_url: str = Field(default="https://materialsproject.org/") - - -config_file = Path("/Users/michaelwu/Desktop/projects/MPCite/files/config_prod.json") - -bld: DOIBuilder = json.load(config_file.open("r"), cls=MontyDecoder) -bld.config_file_path = config_file.as_posix() - -records = [ - CollectionsModel( - relidentifiersblock=[["mp-1", "mp-2", "mp-1"]], - contributors=[ - { - "first_name": "Michael", - "last_name": "Wu", - "email": "wuxiaohua1011@berkeley.edu", - } - ], - ).dict(), - CollectionsModel( - relidentifiersblock=[["mp-21"], ["mp-22"]], - contributors=[ - { - "first_name": "Michael", - "last_name": "Wu", - "email": "wuxiaohua1011@berkeley.edu", - } - ], - ).dict(), -] - - -def my_item_func(x): - if x == "records": - return "record" - elif x == "contributors": - return "contributor" - elif x == "relidentifier_detail": - return "related_identifier" - elif x == "relidentifiersblock": - return "relidentifier_detail" - else: - return "item" - - -records_xml = parseString( - dicttoxml(records, custom_root="records", attr_type=False, item_func=my_item_func) -) - -for item in records_xml.getElementsByTagName("relidentifier_detail"): - item.setAttribute("type", "accession_num") - item.setAttribute("relationType", "Compiles") - -print(records_xml.toprettyxml()) -# response = bld.elink_adapter.post_collection(data=records_xml.toxml()) -# print(response) diff --git a/src/mp_cite/test_core.py b/src/mp_cite/test_core.py deleted file mode 100644 index 550c1d3..0000000 --- a/src/mp_cite/test_core.py +++ /dev/null @@ -1,94 +0,0 @@ -from pymongo import MongoClient -from elinkapi import Elink -from .core import find_out_of_date_doi_entries, update_existing_osti_record -import os -from dotenv import load_dotenv - -load_dotenv() # depends on the root directory from which you run your python scripts. - -review_endpoint = "https://review.osti.gov/elink2api/" - -prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink( - token=os.environ.get("elink_review_api_token"), target=review_endpoint -) - -cwd = os.getcwd() -path = "/json_pages/page_number_1000.0" # IT'S ONLY DOING ONE FILE RIGHT NOW -file = open(cwd + path, "r") - -atlas_user = os.environ.get("atlas_user") -atlas_password = os.environ.get("atlas_password") -atlas_host = os.environ.get("atlas_host") -mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" - -with MongoClient(mongo_uri) as real_client: - with ( - MongoClient() as doi_client - ): # open the mongoclient outside of the for loop, is more efficient than opening and closing it repeatedly - dois = doi_client["dois_test"]["dois"] - - # for line in file: - # js = json.loads(line.strip()) - - # # temporarily fix the sponsor organization bug - # for entry in js["organizations"]: - # if entry["type"] == "SPONSOR": - # entry["identifiers"] = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] - - # my_record = Record(**js) - - # # make a post to the elink review environment - # saved_record = review_api.post_new_record(my_record, state="submit") - # # make a doi document with saved_record - # doi_model = RecordResponse_to_doi_model(saved_record) - - # # now, add that doi to the local doi collection - # upload_doi_document_model_to_collection(doi_model, dois) - - # all_material_ids = [doc["material_id"] for doc in dois.find({}, {"_id": 0, "material_id": 1})] - - # for material_id in all_material_ids: - - # # query prod env for record with materials_id == site_unique_id - # record_from_prod = prod_api.query_records(site_unique_id=material_id) - - # if record_from_prod.total_rows != 1: - # print(f"ERROR: not unique Material_ID! {material_id}") - # raise - - # # make a doi_model from that data - # recordresponse_from_prod = RecordResponse_to_doi_model(record_from_prod.data[0]) - - # query_filter = {"material_id": material_id} - - # # Find existing document to preserve the osti_id - # existing_doc = dois.find_one(query_filter, {"osti_id": 1}) # only retrieve osti_id - - # if not existing_doc: - # print(f"ERROR: document with material_id {material_id} not found in `dois` collection.") - # raise - - # replacement_doc = recordresponse_from_prod.model_dump() - # replacement_doc["osti_id"] = existing_doc["osti_id"] - - # dois.replace_one(query_filter, replacement_doc) - - osti_OOD_list = find_out_of_date_doi_entries( - real_client, doi_client, "mp_core_blue", "robocrys", "dois_test", "dois" - ) - print(osti_OOD_list) - - for osti_id in osti_OOD_list: - material_id_to_update = review_api.get_single_record(osti_id).site_unique_id - - new_values = { - "description": "UPDATED ROBOCRYS DESCRIPTION: " - + next( - real_client["mp_core_blue"]["robocrys"].find( - {"material_id": material_id_to_update}, {"_id": 0, "description": 1} - ) - )["description"] - } - - update_existing_osti_record(review_api, osti_id, new_values) diff --git a/tests/file_to_jsonForUpload.py b/tests/file_to_jsonForUpload.py deleted file mode 100644 index f728fc9..0000000 --- a/tests/file_to_jsonForUpload.py +++ /dev/null @@ -1,167 +0,0 @@ -import os -import json -from elinkapi import Elink, Record -from dotenv import load_dotenv - -import requests -from elinkapi.utils import Validation - -from pymongo import MongoClient - -from timeit import default_timer as timer - -load_dotenv() # depends on the root directory from which you run your python scripts. - -review_endpoint = "https://review.osti.gov/elink2api/" - -prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink( - token=os.environ.get("elink_review_api_token"), target=review_endpoint -) - - -atlas_user = os.environ.get("atlas_user") -atlas_password = os.environ.get("atlas_password") -atlas_host = os.environ.get("atlas_host") -mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" - -cwd = os.getcwd() -path = "/json_pages/page_number_4.0" # IT'S ONLY DOING ONE FILE RIGHT NOW -file = open(cwd + path, "r") - -update_counter = 0 -records_checked = 0 - - -def delete_record(api, osti_id, reason): - """Delete a record by its OSTI ID.""" - response = requests.delete( - f"{api.target}records/{osti_id}?reason={reason}", - headers={"Authorization": f"Bearer {api.token}"}, - ) - Validation.handle_response(response) - return response.status_code == 204 # True if deleted successfully - - -def emptyReviewAPI(reason): - for record in review_api.query_records(): - delete_record(review_api, record.osti_id, reason) - - -start = timer() - -# Post an updated json - -postUnedited = False - -for line in file: - js = json.loads(line.strip()) - - for entry in js["organizations"]: - if entry["type"] == "SPONSOR": - entry["identifiers"] = [{"type": "CN_DOE", "value": "AC02-05CH11231"}] - - material_id = js["site_unique_id"] - - robocrys_description = js["description"] - - with MongoClient(mongo_uri) as client: - coll = client["mp_core_blue"]["robocrys"] - res = coll.find_one({"material_id": material_id}) - records_checked += 1 - - if res is not None: - robocrys_description = res["description"] - - # see if an update to the description is necessary, if it is, then update the description and post a new record. - if ( - postUnedited - or ( - robocrys_description is not None - and js["description"] != robocrys_description - ) - ): # if a robocrys_description was found internally and it doesn't match what ELink has record... - js["description"] = ( - "OLD WAS UPDATED, THEN IT WAS POSTED: " + robocrys_description - ) - my_record = Record(**js) - - saved_record = None - try: - # The API will now return an error code on this call - # because "AAAA" is not a valid site_ownership_code - - saved_record = review_api.post_new_record(my_record, state="submit") - update_counter += 1 - - print(f"NEW RECORD POSTED: {saved_record.osti_id}") - raise - except: - print( - f"Record failed to post!: {my_record.doi}. Robocrys Collection Had Description {robocrys_description[0:50]}... Prod_Env ELink Had {my_record.description[37:87]}..." - ) - raise - - if update_counter >= 10000: - break - -end = timer() -print( - f"Records Updated and/or Posted: {update_counter} \nRecords Checked in Total: {records_checked}. \nIt took {end - start} seconds" -) - -####################################################### -# JUST POST JSON, Then update posted json Later -# post_counter = 0 -# records_checked = 0 - -# for line in file: -# js = json.loads(line.strip()) - -# material_id = js["site_unique_id"] - -# # always post, no update -# my_record = Record(**js) - -# saved_record = None -# try: -# # The API will now return an error code on this call -# # because "AAAA" is not a valid site_ownership_code - -# # posts an unupdated record -# saved_record = review_api.post_new_record(my_record, "save") -# post_counter += 1 - -# print("\n\n NEW RECORD POSTED") -# print(saved_record) - -# robocrys_description = js["description"] - -# with MongoClient(mongo_uri) as client: -# coll = client["mp_core_blue"]["robocrys"] -# res = coll.find_one({"material_id" : material_id}) -# records_checked += 1 - -# if res != None: -# robocrys_description = res["description"] - -# if robocrys_description != None and js["description"] != robocrys_description: # if an update is needed -# # update the js["description"] -# js["description"] = "OLD WAS POSTED, THEN RECORD WITH NEW DESCRIPTION UPDATED IT: " + robocrys_description - -# # turn it into a new record -# new_updated_record = Record(**js) - -# # use that new record to update what was just posted -# review_api.update_record(saved_record.osti_id, new_updated_record, "save") - -# except: -# print("Record failed to post!") - -# if post_counter >= 10000: -# break - -# end = timer() -# print(f"Records Updated and/or Posted: {update_counter} \n Records Checked in Total: {records_checked}. It took {end - start} seconds") - -###################################################### diff --git a/tests/github_bug_report.py b/tests/github_bug_report.py deleted file mode 100644 index 2c3719c..0000000 --- a/tests/github_bug_report.py +++ /dev/null @@ -1,61 +0,0 @@ -from elinkapi import Elink, Organization, Person, exceptions, Record -import os -from dotenv import load_dotenv -from datetime import datetime - -load_dotenv() - -prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) -review_endpoint = "https://review.osti.gov/elink2api/" -review_api = Elink( - token=os.environ.get("elink_review_api_token"), target=review_endpoint -) - -# record_response = prod_api.get_single_record(1190959) # returns OSTI record response with OSTI ID = 1190959, which has a DOE Contract Number saved (AC02-05CH11231; EDCBEE) -# record_response_dict = record_response.model_dump(exclude_none=True) -# record_response_dict.pop("osti_id") # remove osti_id to allow post function - -# new_record = Record(**record_response_dict) # identical record with removed OSTI_ID -# for org in new_record.organizations: -# if org.type == "SPONSOR": -# print(org) -# org.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}] - -# # attempt to submit exact same record to review environment -# record_response_after_post = review_api.post_new_record(new_record, "save") # works after re-providing the DOE contract number - -# # next, attempt updating this record -# record_to_update = review_api.get_single_record(record_response_after_post.osti_id) -# record_to_update.title = "Updated Title For Materials Data" -# review_api.update_record(record_response_after_post.osti_id, record_to_update, "submit") - -required_fields = { - "product_type": "DA", - "title": "Testing if CN_DOE can be random", - "organizations": [ - Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"), - Organization( - type="SPONSOR", - name="TEST SPONSOR ORG", - identifiers=[{"type": "CN_DOE", "value": "oiajdiwjdiwj"}], - ), - ], - "persons": [Person(type="AUTHOR", last_name="Schmoe")], - "site_ownership_code": "LBNL-MP", - "access_limitations": ["UNL"], - "publication_date": datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0 - ), - "site_url": "https://next-gen.materialsproject.org/materials", -} - -empty_record = Record(**required_fields) -print( - f"SUBMITTED TO OSTI, FULLY VALIDATED:\n{review_api.get_single_record(2525614)}\n\n\nTRYING TO SUBMIT:\n{empty_record}" -) - -try: - saved_record = review_api.post_new_record(empty_record, "submit") -except exceptions.BadRequestException as ve: - print(ve.message) - print(ve.errors) diff --git a/tests/manage_backfills.py b/tests/manage_backfills.py deleted file mode 100644 index 85abf65..0000000 --- a/tests/manage_backfills.py +++ /dev/null @@ -1,48 +0,0 @@ -# This script will see how many documents in ELink, i.e. ones with a DOI, are not accounted for in the internal DOI collection. - -from elinkapi import Elink, Record - -import os -from dotenv import load_dotenv - -load_dotenv() # depends on the root directory from which you run your python scripts. - -api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) - - -query1 = api.query_records(rows=1000) - -materials_with_dois: list[Record] = [] - -for page in query1: - print(f"Now on Page: {page.title}") - print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") - - if page.site_unique_id.startswith("mp-"): - materials_with_dois.append(page) - - # for record in page.data: - # if record.site_unique_id.startswith("mp-"): - # materials_with_dois.append(record) - - -# set_q1 = [page for page in query1] -# set_q2 = [page for page in query2] - -# set_diffq1q2 = set(set_q1) - set(set_q2) -# print (f"Difference matched {len(set)} records") - -# filtered = [ -# page for page in query1 -# if page.title.lower().startswith("materials data on") -# ] - -# print (f"Filtered Query1 has {len(filtered)} records") - -# paginate through ALL results -# for page in query1: -# print(page.title) -# print(f"Material_ID: {page.site_unique_id} and DOI: http://doi.org/{page.doi}") - -# for record in page.data: -# print (f"OSTI ID: {record.osti_id} Title: {record.title}") diff --git a/tests/outputs.txt b/tests/outputs.txt deleted file mode 100644 index 740a682..0000000 --- a/tests/outputs.txt +++ /dev/null @@ -1,46 +0,0 @@ -(mpcite-env) C:\Users\ongha\OneDrive\Documents\GitHub\MPCite>C:/Users/ongha/anaconda3/envs/mpcite-env/python.exe c:/Users/ongha/OneDrive/Documents/GitHub/MPCite/tests/prod_to_review.py - -Query retrieved 144845 record(s) -Page finished. Now at 500 data entries. 0 edge cases found. -Page finished. Now at 1000 data entries. 0 edge cases found. -Page finished. Now at 1500 data entries. 0 edge cases found. -Page finished. Now at 2000 data entries. 0 edge cases found. -Page finished. Now at 2500 data entries. 0 edge cases found. -Page finished. Now at 3000 data entries. 0 edge cases found. -Page finished. Now at 3500 data entries. 0 edge cases found. -Page finished. Now at 4000 data entries. 0 edge cases found. -Page finished. Now at 4500 data entries. 0 edge cases found. -Page finished. Now at 5000 data entries. 0 edge cases found. -Page finished. Now at 5500 data entries. 0 edge cases found. -Page finished. Now at 6000 data entries. 0 edge cases found. -Page finished. Now at 6500 data entries. 0 edge cases found. -Page finished. Now at 7000 data entries. 0 edge cases found. -Page finished. Now at 7500 data entries. 0 edge cases found. -Page finished. Now at 8000 data entries. 0 edge cases found. -Page finished. Now at 8500 data entries. 0 edge cases found. -Page finished. Now at 9000 data entries. 0 edge cases found. -Page finished. Now at 9500 data entries. 0 edge cases found. -Page finished. Now at 10000 data entries. 0 edge cases found. -Page finished. Now at 10500 data entries. 0 edge cases found. -Page finished. Now at 11000 data entries. 0 edge cases found. -Page finished. Now at 11500 data entries. 0 edge cases found. -Page finished. Now at 12000 data entries. 0 edge cases found. -Page finished. Now at 12500 data entries. 0 edge cases found. -Page finished. Now at 13000 data entries. 0 edge cases found. -Page finished. Now at 13500 data entries. 0 edge cases found. -Page finished. Now at 14000 data entries. 0 edge cases found. -Page finished. Now at 14500 data entries. 0 edge cases found. - -Traceback (most recent call last): - File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 95, in __next__ - record = self.data.pop() -IndexError: pop from empty list - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "c:\Users\ongha\OneDrive\Documents\GitHub\MPCite\tests\prod_to_review.py", line 29, in - record = next(query) - File "C:\Users\ongha\anaconda3\envs\mpcite-env\Lib\site-packages\elinkapi\query.py", line 108, in __next__ - raise StopIteration -StopIteration diff --git a/tests/prod_to_review.py b/tests/prod_to_review.py deleted file mode 100644 index 732340d..0000000 --- a/tests/prod_to_review.py +++ /dev/null @@ -1,129 +0,0 @@ -from elinkapi import Elink - -import os -from dotenv import load_dotenv - -import json - -load_dotenv() # depends on the root directory from which you run your python scripts. - -review_endpoint = "https://review.osti.gov/elink2api/" - -prod_api = Elink(token=os.environ.get("elink_api_PRODUCTION_key")) -review_api = Elink( - token=os.environ.get("elink_review_api_token"), target=review_endpoint -) - -print(prod_api.query_records()) - -rows_per_page = 100 - -# query production -query = prod_api.query_records(rows=rows_per_page) -print(f"Query retrieved {query.total_rows} record(s)") - -count_materials_data = 0 -count_MaterialsDataOn = 0 -cwd = os.getcwd() -page_number = 0 -page_json_list = [] - -for record in query: - # increment counter - count_materials_data = count_materials_data + 1 - print( - f"On record #{count_materials_data}, next url is {query.next_url}, previous url is {query.previous_url}" - ) - - # see if the record is a Materials Data on record - if record.title.startswith("Materials Data on"): - # increment the MaterialsDataOn counter - count_MaterialsDataOn = count_MaterialsDataOn + 1 - - # prepare the new record for the review environment, remove the OSTI ID, and add its model_dump to the list of json objects for the page. - new_record = record - new_record_dict = new_record.model_dump(exclude_none=True) - - new_record_osti_id = new_record_dict.pop( - "osti_id" - ) # now new_record_dict does not have the osti_id key. - js = json.dumps( - new_record_dict, default=str - ) # datetime objects are not JSON serializable, so we use default=str to convert them to strings. - - page_json_list.append(js) - - # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. - - else: - print(f"Found edge case: {record.title}") - - if count_materials_data % rows_per_page == 0: - # create/open, write, and close new json file - page_number = count_materials_data / rows_per_page - path = f"/json_pages/page_number_{page_number}" - fp = open(cwd + path, "a") - - for js in page_json_list: - fp.write(js) - fp.write("\n") - - fp.close() - page_json_list = [] - - print( - f"Page {page_number} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found." - ) - -# print remainder of records if not a full page after for loop exits -page_number = page_number + 1 -path = f"/json_pages/page_number_{page_number}" -fp = open(cwd + path, "a") -for js in page_json_list: - fp.write(js) - fp.write("\n") -fp.close() - -# # if contains materials data on, then add to batch -# for count_materials_data < query.total_rows: - -# # print(f"The length of the query is now {len(query.data)}") -# record = next(query) -# count_materials_data = count_materials_data + 1 - -# if record.title.startswith("Materials Data on"): -# count_MaterialsDataOn = count_MaterialsDataOn + 1 - -# new_record = record -# new_record_dict = new_record.model_dump(exclude_none=True) - -# new_record_osti_id = new_record_dict.pop("osti_id") - -# page_dict[f"Entry OSTI_ID {new_record_osti_id}"] = new_record_dict - -# # TODO: take the new_record_dict and make it into a new post to the review environment and save the RecordResponse. - - -# if count_materials_data % rows_per_page == 0: -# # if a page has been fully consummed, then print the new batched dictionary to a json file. - -# js = json.dumps(page_dict, default=str) - -# # open new json file if not exist it will create -# cwd = os.getcwd() -# path = f'/json_pages/page_number_{count_materials_data/rows_per_page}' -# fp = open(cwd+path, 'a') - -# # write to json file -# fp.write(js) - -# # close the connection to the file and empty the dict -# fp.close() -# page_dict = {} - -# print(f"Page {(count_materials_data / rows_per_page)} finished. Now at {count_materials_data} data entries. {count_materials_data - count_MaterialsDataOn} edge cases found.") - -# model_dump exclude_none=True, remove null keys -# pop osti_id --> save batch to json files -# make new record -# post to review_api diff --git a/tests/test_elink_api.py b/tests/test_elink_api.py deleted file mode 100644 index 5e07706..0000000 --- a/tests/test_elink_api.py +++ /dev/null @@ -1,118 +0,0 @@ -import os -from dotenv import load_dotenv - -from elinkapi import Elink - -from pymongo import MongoClient - -load_dotenv() - -atlas_user = os.environ.get("atlas_user") -atlas_password = os.environ.get("atlas_password") -atlas_host = os.environ.get("atlas_host") -mongo_uri = f"mongodb+srv://{atlas_user}:{atlas_password}@{atlas_host}/" - -api = Elink( - token=os.environ.get("elink_api_PRODUCTION_key") -) # target default is production E-link service. - -### Grabbing an existing record - -# record = api.get_single_record(mp-id) # test for silicon - -# type(record) - -# ELinkGotRecordModel = ELinkGetResponseModel.from_elinkapi_record(record) - -# print(ELinkGotRecordModel.get_title()) -# print(ELinkGotRecordModel.get_site_url()) -# print(ELinkGotRecordModel.get_keywords()) -# print(ELinkGotRecordModel.get_default_description()) - -# ELinkTestGetRecordModel = TestClass(**record.model_dump()) - -### Making a new record - -# with MongoClient(mongo_uri) as client: -# #get all material_ids and dois from doi collection -# doi_collection = client["mp_core"]["dois"] -# materials_to_update = list(doi_collection.find({}, {"_id": 0, "material_id": 1, "doi": 1}, limit=10)) -# material_ids = [entry["material_id"] for entry in materials_to_update] - -# # check # of material_ids from DOI collection vs amount in robocrys - -# # get description for material_ids from robocrys collection -# coll = client["mp_core_blue"]["robocrys"] -# res = list(coll.find({"material_id": {"$in": material_ids}}, {"_id": 0, "material_id": 1, "description": 1})) - -# # join on material_id -# for doc in res: -# mat = next(filter(lambda x: x["material_id"] == doc["material_id"], materials_to_update)) -# doc["doi"] = mat["doi"] - - -# {"material_id": ..., "doi": ..., "description": ...} -> -# Record( -# template_fields ..., -# doi: ..., -# description: ..., -# fields_where_material_id_makes_sense: ..., -# ) - -# with the client open -with MongoClient(mongo_uri) as client: - # get all dois from the collection - doi_collection = client["mp_core"]["dois"] - materials_to_update = list( - doi_collection.find({}, {"_id": 0, "doi": 1, "material_id": 1}, limit=2) - ) - - # from the doi collection, grab the material_id and doi of each material - material_ids = [entry["material_id"] for entry in materials_to_update] - - # additionally, gain the osti id from the doi - osti_ids = [entry["doi"].split("10.17188/")[1] for entry in materials_to_update] - - # additionally, grab the description of each material from the robocrys - coll = client["mp_core_blue"][ - "robocrys" - ] # grabs robocrys collection from active database - res = list( - coll.find( - {"material_id": {"$in": material_ids}}, - {"_id": 0, "material_id": 1, "description": 1}, - ) - ) # grabs the material id and description of entries in the collection - descriptions = [entry["description"] for entry in res] - - # for each material (and its material_id, doi, and osti_id) - for i in range(len(materials_to_update)): - internal_material_id = material_ids[i] - internal_osti_id = osti_ids[i] - internal_description = descriptions[i] - - # get_single_record(osti_id) - record = api.get_single_record(internal_osti_id) - - print( - f"\n \n \nPrinting what is currently on ELINK for {internal_material_id}*****************************************" - ) - print(record) - - if internal_material_id == record.site_unique_id: - # update description - record.description = "testTESTtestTESTtest" - - print( - f"\n \n \nPrinting record for {internal_material_id}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - ) - print(record) - - # # post updated record - # try: - # saved_record = api.post_new_record(record, "save") - # except exceptions.BadRequestException as ve: - # ... - # # ve.message = "Site Code AAAA is not valid." - # # ve.errors provides more details: - # # [{"status":"400", "detail":"Site Code AAAA is not valid.", "source":{"pointer":"site_ownership_code"}}] From a1423ed15cc7e8c8d31cf966263a19e4140a55c2 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 10:56:36 -0700 Subject: [PATCH 41/46] Readded release and testing default workflows to match master --- .github/workflows/release.yml | 65 +++++++++++++++++++++++++++++++++++ .github/workflows/testing.yml | 34 ++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/testing.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..6def586 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,65 @@ +name: release + +on: + release: + types: [published] + +jobs: + publish: + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/mp-cite + permissions: + id-token: write + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: 3.12 + + - name: Build + run: uv-build + + - name: Publish + run: uv-publish --trusted-publishing always + + docs: + runs-on: ubuntu-latest + needs: + - publish + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: 3.12 + + - name: Install deps + run: uv sync --locked --all-extras --dev + + - name: Generate changelog + uses: charmixer/auto-changelog-action@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + exclude_labels: dependencies + + - name: Commit files + run: | + git config --local user.email "feedback@materialsproject.org" + git config --local user.name "materialsproject" + git stash + git pull origin main + mv CHANGELOG.md docs/ + git add docs/CHANGELOG.md && git commit -m 'Updated CHANGELOG.md' + + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} +# integrate mkdocs at some point diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..67e0f21 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,34 @@ +name: testing + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + test: + strategy: + matrix: + os: ["ubuntu-latest"] + python-version: ["3.11", "3.12", "3.13"] + + name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Install the project + run: uv sync --locked --all-extras --dev + + - name: Run tests + env: + ELINK_REVIEW_API_TOKEN: ${{ secrets.ELINK_REVIEW_API_TOKEN }} + ELINK_REVIEW_ENDPOINT: ${{ secrets.ELINK_REVIEW_ENDPOINT }} + run: uv run pytest tests +# codecov? From c53df930027f0a6165d48029dad40413d9c38734 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 10:57:45 -0700 Subject: [PATCH 42/46] Readd changelog to match master --- docs/CHANGELOG.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/CHANGELOG.md diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..e69de29 From 2a94abd64b56199218110110e56b21e42080bf1b Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 11:01:17 -0700 Subject: [PATCH 43/46] restored old version of gitignore --- .gitignore | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitignore b/.gitignore index 8241d4c..9045477 100644 --- a/.gitignore +++ b/.gitignore @@ -209,7 +209,3 @@ __marimo__/ # Streamlit .streamlit/secrets.toml - -json_pages/ -notebooks/ -test_json_pages/ \ No newline at end of file From 9c022d6d8fd561b993f8af2b1bea52029a10b722 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 15:31:47 -0700 Subject: [PATCH 44/46] Fixing PR comments --- .github/workflows/lint.yml | 12 ++---------- pyproject.toml | 32 +++----------------------------- uv.lock | 8 +++++--- 3 files changed, 10 insertions(+), 42 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index fa4a241..f1972d4 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -4,28 +4,20 @@ on: push: branches: [master, linting-workflow] pull_request: - branches: [master, linting-workflow] + branches: [master] workflow_dispatch: # TODO: setup linting with uv/ruff # informed by testing.yml and https://medium.com/@sidharthvijayakumar7/automating-pylint-in-github-workflow-80c84b2ff243 and ruff documentation jobs: linting: - strategy: - matrix: - os: ["ubuntu-latest"] - python-version: ["3.11", "3.12", "3.13"] - fail-fast: false - name: mp-cite (${{ matrix.os }}/py${{ matrix.python-version }}) - runs-on: ${{ matrix.os }} - steps: - uses: actions/checkout@v4 - name: Install uv and set up the python version uses: astral-sh/setup-uv@v6 with: - python-version: ${{ matrix.python-version }} + python-version: '3.12' version: "latest" - name: Analyzing the code with ruff diff --git a/pyproject.toml b/pyproject.toml index 1d03264..e463770 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ maintainers = [ requires-python = ">=3.11" dependencies = [ "elinkapi>=0.4.9", - "pre-commit>=4.2.0", "pydantic>=2.11.7", "pymongo>=4.13.2", "ruff>=0.12.7", @@ -24,6 +23,7 @@ dependencies = [ [dependency-groups] dev = [ "pytest>=8.4.1", + "pre-commit>=4.2.0", ] [build-system] @@ -44,32 +44,6 @@ Issues = "https://github.com/materialsproject/MPCite/issues" [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".ipynb_checkpoints", - ".mypy_cache", - ".nox", - ".pants.d", - ".pyenv", - ".pytest_cache", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - ".vscode", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "site-packages", - "venv", "legacy", "notebooks", "uv.lock" @@ -90,8 +64,8 @@ ignore = [] fixable = ["ALL"] unfixable = [] -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +# Allow NO unused variables to exist in the codebase. If underscore-prefixed unused variables are permissible, use this regex $^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +dummy-variable-rgx = "^$" [tool.ruff.format] # Like Black, use double quotes for strings. diff --git a/uv.lock b/uv.lock index a3fe5ec..3cf32f1 100644 --- a/uv.lock +++ b/uv.lock @@ -161,7 +161,6 @@ version = "0.0.1" source = { editable = "." } dependencies = [ { name = "elinkapi" }, - { name = "pre-commit" }, { name = "pydantic" }, { name = "pymongo" }, { name = "ruff" }, @@ -169,20 +168,23 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "pre-commit" }, { name = "pytest" }, ] [package.metadata] requires-dist = [ { name = "elinkapi", specifier = ">=0.4.9" }, - { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pydantic", specifier = ">=2.11.7" }, { name = "pymongo", specifier = ">=4.13.2" }, { name = "ruff", specifier = ">=0.12.7" }, ] [package.metadata.requires-dev] -dev = [{ name = "pytest", specifier = ">=8.4.1" }] +dev = [ + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "pytest", specifier = ">=8.4.1" }, +] [[package]] name = "nodeenv" From bef3f591326c864c787729782b583ac9485f8621 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 15:42:06 -0700 Subject: [PATCH 45/46] fix --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f1972d4..0d79e69 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,7 +2,7 @@ name: linting on: push: - branches: [master, linting-workflow] + branches: [master] pull_request: branches: [master] workflow_dispatch: From a7976399ca0aa7e325da5dd9d7c91654d649f2e7 Mon Sep 17 00:00:00 2001 From: HugoOnghai Date: Mon, 4 Aug 2025 15:51:03 -0700 Subject: [PATCH 46/46] added mypy and ruff to a dependency group called lint --- pyproject.toml | 5 +++- uv.lock | 66 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e463770..c1090ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ dependencies = [ "elinkapi>=0.4.9", "pydantic>=2.11.7", "pymongo>=4.13.2", - "ruff>=0.12.7", ] [dependency-groups] @@ -25,6 +24,10 @@ dev = [ "pytest>=8.4.1", "pre-commit>=4.2.0", ] +lint = [ + "mypy>=1.17.1", + "ruff>=0.12.7", +] [build-system] requires = ["hatchling"] diff --git a/uv.lock b/uv.lock index 3cf32f1..060ed6f 100644 --- a/uv.lock +++ b/uv.lock @@ -163,7 +163,6 @@ dependencies = [ { name = "elinkapi" }, { name = "pydantic" }, { name = "pymongo" }, - { name = "ruff" }, ] [package.dev-dependencies] @@ -171,13 +170,16 @@ dev = [ { name = "pre-commit" }, { name = "pytest" }, ] +lint = [ + { name = "mypy" }, + { name = "ruff" }, +] [package.metadata] requires-dist = [ { name = "elinkapi", specifier = ">=0.4.9" }, { name = "pydantic", specifier = ">=2.11.7" }, { name = "pymongo", specifier = ">=4.13.2" }, - { name = "ruff", specifier = ">=0.12.7" }, ] [package.metadata.requires-dev] @@ -185,6 +187,57 @@ dev = [ { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=8.4.1" }, ] +lint = [ + { name = "mypy", specifier = ">=1.17.1" }, + { name = "ruff", specifier = ">=0.12.7" }, +] + +[[package]] +name = "mypy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "pathspec" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/22/ea637422dedf0bf36f3ef238eab4e455e2a0dcc3082b5cc067615347ab8e/mypy-1.17.1.tar.gz", hash = "sha256:25e01ec741ab5bb3eec8ba9cdb0f769230368a22c959c4937360efb89b7e9f01", size = 3352570, upload-time = "2025-07-31T07:54:19.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/cf/eadc80c4e0a70db1c08921dcc220357ba8ab2faecb4392e3cebeb10edbfa/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58", size = 10921009, upload-time = "2025-07-31T07:53:23.037Z" }, + { url = "https://files.pythonhosted.org/packages/5d/c1/c869d8c067829ad30d9bdae051046561552516cfb3a14f7f0347b7d973ee/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5", size = 10047482, upload-time = "2025-07-31T07:53:26.151Z" }, + { url = "https://files.pythonhosted.org/packages/98/b9/803672bab3fe03cee2e14786ca056efda4bb511ea02dadcedde6176d06d0/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd", size = 11832883, upload-time = "2025-07-31T07:53:47.948Z" }, + { url = "https://files.pythonhosted.org/packages/88/fb/fcdac695beca66800918c18697b48833a9a6701de288452b6715a98cfee1/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b", size = 12566215, upload-time = "2025-07-31T07:54:04.031Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/a932da3d3dace99ee8eb2043b6ab03b6768c36eb29a02f98f46c18c0da0e/mypy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c1fdf4abb29ed1cb091cf432979e162c208a5ac676ce35010373ff29247bcad5", size = 12751956, upload-time = "2025-07-31T07:53:36.263Z" }, + { url = "https://files.pythonhosted.org/packages/8c/cf/6438a429e0f2f5cab8bc83e53dbebfa666476f40ee322e13cac5e64b79e7/mypy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:ff2933428516ab63f961644bc49bc4cbe42bbffb2cd3b71cc7277c07d16b1a8b", size = 9507307, upload-time = "2025-07-31T07:53:59.734Z" }, + { url = "https://files.pythonhosted.org/packages/17/a2/7034d0d61af8098ec47902108553122baa0f438df8a713be860f7407c9e6/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb", size = 11086295, upload-time = "2025-07-31T07:53:28.124Z" }, + { url = "https://files.pythonhosted.org/packages/14/1f/19e7e44b594d4b12f6ba8064dbe136505cec813549ca3e5191e40b1d3cc2/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403", size = 10112355, upload-time = "2025-07-31T07:53:21.121Z" }, + { url = "https://files.pythonhosted.org/packages/5b/69/baa33927e29e6b4c55d798a9d44db5d394072eef2bdc18c3e2048c9ed1e9/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056", size = 11875285, upload-time = "2025-07-31T07:53:55.293Z" }, + { url = "https://files.pythonhosted.org/packages/90/13/f3a89c76b0a41e19490b01e7069713a30949d9a6c147289ee1521bcea245/mypy-1.17.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03b6d0ed2b188e35ee6d5c36b5580cffd6da23319991c49ab5556c023ccf1341", size = 12737895, upload-time = "2025-07-31T07:53:43.623Z" }, + { url = "https://files.pythonhosted.org/packages/23/a1/c4ee79ac484241301564072e6476c5a5be2590bc2e7bfd28220033d2ef8f/mypy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c837b896b37cd103570d776bda106eabb8737aa6dd4f248451aecf53030cdbeb", size = 12931025, upload-time = "2025-07-31T07:54:17.125Z" }, + { url = "https://files.pythonhosted.org/packages/89/b8/7409477be7919a0608900e6320b155c72caab4fef46427c5cc75f85edadd/mypy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:665afab0963a4b39dff7c1fa563cc8b11ecff7910206db4b2e64dd1ba25aed19", size = 9584664, upload-time = "2025-07-31T07:54:12.842Z" }, + { url = "https://files.pythonhosted.org/packages/5b/82/aec2fc9b9b149f372850291827537a508d6c4d3664b1750a324b91f71355/mypy-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:93378d3203a5c0800c6b6d850ad2f19f7a3cdf1a3701d3416dbf128805c6a6a7", size = 11075338, upload-time = "2025-07-31T07:53:38.873Z" }, + { url = "https://files.pythonhosted.org/packages/07/ac/ee93fbde9d2242657128af8c86f5d917cd2887584cf948a8e3663d0cd737/mypy-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:15d54056f7fe7a826d897789f53dd6377ec2ea8ba6f776dc83c2902b899fee81", size = 10113066, upload-time = "2025-07-31T07:54:14.707Z" }, + { url = "https://files.pythonhosted.org/packages/5a/68/946a1e0be93f17f7caa56c45844ec691ca153ee8b62f21eddda336a2d203/mypy-1.17.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:209a58fed9987eccc20f2ca94afe7257a8f46eb5df1fb69958650973230f91e6", size = 11875473, upload-time = "2025-07-31T07:53:14.504Z" }, + { url = "https://files.pythonhosted.org/packages/9f/0f/478b4dce1cb4f43cf0f0d00fba3030b21ca04a01b74d1cd272a528cf446f/mypy-1.17.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:099b9a5da47de9e2cb5165e581f158e854d9e19d2e96b6698c0d64de911dd849", size = 12744296, upload-time = "2025-07-31T07:53:03.896Z" }, + { url = "https://files.pythonhosted.org/packages/ca/70/afa5850176379d1b303f992a828de95fc14487429a7139a4e0bdd17a8279/mypy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ffadfbe6994d724c5a1bb6123a7d27dd68fc9c059561cd33b664a79578e14", size = 12914657, upload-time = "2025-07-31T07:54:08.576Z" }, + { url = "https://files.pythonhosted.org/packages/53/f9/4a83e1c856a3d9c8f6edaa4749a4864ee98486e9b9dbfbc93842891029c2/mypy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:9a2b7d9180aed171f033c9f2fc6c204c1245cf60b0cb61cf2e7acc24eea78e0a", size = 9593320, upload-time = "2025-07-31T07:53:01.341Z" }, + { url = "https://files.pythonhosted.org/packages/38/56/79c2fac86da57c7d8c48622a05873eaab40b905096c33597462713f5af90/mypy-1.17.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:15a83369400454c41ed3a118e0cc58bd8123921a602f385cb6d6ea5df050c733", size = 11040037, upload-time = "2025-07-31T07:54:10.942Z" }, + { url = "https://files.pythonhosted.org/packages/4d/c3/adabe6ff53638e3cad19e3547268482408323b1e68bf082c9119000cd049/mypy-1.17.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:55b918670f692fc9fba55c3298d8a3beae295c5cded0a55dccdc5bbead814acd", size = 10131550, upload-time = "2025-07-31T07:53:41.307Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c5/2e234c22c3bdeb23a7817af57a58865a39753bde52c74e2c661ee0cfc640/mypy-1.17.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:62761474061feef6f720149d7ba876122007ddc64adff5ba6f374fda35a018a0", size = 11872963, upload-time = "2025-07-31T07:53:16.878Z" }, + { url = "https://files.pythonhosted.org/packages/ab/26/c13c130f35ca8caa5f2ceab68a247775648fdcd6c9a18f158825f2bc2410/mypy-1.17.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c49562d3d908fd49ed0938e5423daed8d407774a479b595b143a3d7f87cdae6a", size = 12710189, upload-time = "2025-07-31T07:54:01.962Z" }, + { url = "https://files.pythonhosted.org/packages/82/df/c7d79d09f6de8383fe800521d066d877e54d30b4fb94281c262be2df84ef/mypy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:397fba5d7616a5bc60b45c7ed204717eaddc38f826e3645402c426057ead9a91", size = 12900322, upload-time = "2025-07-31T07:53:10.551Z" }, + { url = "https://files.pythonhosted.org/packages/b8/98/3d5a48978b4f708c55ae832619addc66d677f6dc59f3ebad71bae8285ca6/mypy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:9d6b20b97d373f41617bd0708fd46aa656059af57f2ef72aa8c7d6a2b73b74ed", size = 9751879, upload-time = "2025-07-31T07:52:56.683Z" }, + { url = "https://files.pythonhosted.org/packages/1d/f3/8fcd2af0f5b806f6cf463efaffd3c9548a28f84220493ecd38d127b6b66d/mypy-1.17.1-py3-none-any.whl", hash = "sha256:a9f52c0351c21fe24c21d8c0eb1f62967b262d6729393397b6f443c3b773c3b9", size = 2283411, upload-time = "2025-07-31T07:53:24.664Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] [[package]] name = "nodeenv" @@ -204,6 +257,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, +] + [[package]] name = "platformdirs" version = "4.3.8"