fix(python): DX improvements — compact enums, name truncation, bug fixes#128
fix(python): DX improvements — compact enums, name truncation, bug fixes#128
Conversation
|
You have reached your Codex usage limits for code reviews. You can see your limits in the Codex usage dashboard. |
|
📖 Documentation Preview: https://reflectapi-docs-preview-pr-128.partly.workers.dev Updated automatically from commit 62ab388 |
reflectapi-schema/src/codegen.rs
Outdated
| #[serde(skip_serializing_if = "Option::is_none", default)] | ||
| pub type_hint: Option<String>, | ||
|
|
||
| #[serde(skip_serializing_if = "BTreeSet::is_empty", default)] | ||
| pub imports: BTreeSet<String>, | ||
|
|
||
| #[serde(skip_serializing_if = "BTreeSet::is_empty", default)] | ||
| pub runtime_imports: BTreeSet<String>, | ||
|
|
||
| #[serde(default)] | ||
| pub provided_by_runtime: bool, | ||
|
|
||
| #[serde(default)] | ||
| pub ignore_type_arguments: bool, | ||
| } |
There was a problem hiding this comment.
It would be good to have docs for each to understand what each field means. Also are you sure that each of these needs to be configured but not hardcoded in the codegen?
There was a problem hiding this comment.
The original idea was to let type metadata travel with the type definitions so the codegen could discover it rather than maintaining a parallel table. But for the types we're mapping today, these are all static and deterministic so I'll just remove these and use a static table like TS does
reflectapi/src/impls/chrono_tz.rs
Outdated
| if let Some(config) = crate::traits::python_reflection_codegen_config_for_type(type_name) { | ||
| type_def.codegen_config = config; | ||
| } |
There was a problem hiding this comment.
there are a bunch of implementation in the wild. Will these work without same similar lines everywhere? Also, if that code repeats everywhere, should it be passed done inside of the derive macro instead?
There was a problem hiding this comment.
Removed it from all impl files and traits.rs. The Python backend now owns its own static mapping keyed by Rust type name. Third-party types that aren't in the table are treated as user-defined types and rendered as Pydantic models, which should be the correct default behaviour.
reflectapi/src/traits.rs
Outdated
| pub(crate) fn python_codegen_config_for_type( | ||
| type_name: &str, | ||
| ) -> Option<crate::LanguageSpecificTypeCodegenConfig> { | ||
| match type_name { | ||
| "i8" | ||
| | "i16" | ||
| | "i32" | ||
| | "i64" | ||
| | "u8" | ||
| | "u16" | ||
| | "u32" | ||
| | "u64" | ||
| | "isize" | ||
| | "usize" | ||
| | "std::num::NonZeroU8" | ||
| | "std::num::NonZeroU16" | ||
| | "std::num::NonZeroU32" | ||
| | "std::num::NonZeroU64" | ||
| | "std::num::NonZeroU128" => Some(python_type_codegen_config("int")), | ||
| "f32" | "f64" => Some(python_type_codegen_config("float")), | ||
| "bool" => Some(python_type_codegen_config("bool")), | ||
| "String" | ||
| | "std::string::String" | ||
| | "url::Url" | ||
| | "rust_decimal::Decimal" | ||
| | "chrono_tz::Tz" => Some(python_type_codegen_config("str")), | ||
| "std::vec::Vec" => Some(python_type_codegen_config("list[T]")), | ||
| "std::collections::HashMap" | "std::collections::BTreeMap" | "indexmap::IndexMap" => { | ||
| Some(python_type_codegen_config("dict[K, V]")) | ||
| } | ||
| "std::option::Option" => Some(python_type_codegen_config("T | None")), | ||
| "std::result::Result" => Some(python_type_codegen_config("T | E")), | ||
| "reflectapi::Option" => Some(python_type_codegen_config_with( | ||
| "ReflectapiOption[T]", | ||
| &[], | ||
| &["ReflectapiOption"], | ||
| true, | ||
| false, | ||
| )), | ||
| "reflectapi::Empty" => Some(python_type_codegen_config_with( | ||
| "ReflectapiEmpty", | ||
| &[], | ||
| &["ReflectapiEmpty"], | ||
| true, | ||
| false, | ||
| )), | ||
| "reflectapi::Infallible" => Some(python_type_codegen_config_with( | ||
| "ReflectapiInfallible", | ||
| &[], | ||
| &["ReflectapiInfallible"], | ||
| true, | ||
| false, | ||
| )), | ||
| "chrono::DateTime" | "chrono::NaiveDateTime" => Some(python_type_codegen_config_with( | ||
| "datetime", | ||
| &["from datetime import datetime"], | ||
| &[], | ||
| false, | ||
| true, | ||
| )), | ||
| "chrono::NaiveDate" => Some(python_type_codegen_config_with( | ||
| "date", | ||
| &["from datetime import date"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| "uuid::Uuid" => Some(python_type_codegen_config_with( | ||
| "UUID", | ||
| &["from uuid import UUID"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| "std::time::Duration" => Some(python_type_codegen_config_with( | ||
| "timedelta", | ||
| &["from datetime import timedelta"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| "std::tuple::Tuple0" => Some(python_type_codegen_config("None")), | ||
| "serde_json::Value" => Some(python_type_codegen_config("Any")), | ||
| "std::boxed::Box" | "std::sync::Arc" | "std::rc::Rc" => { | ||
| Some(python_type_codegen_config("T")) | ||
| } | ||
| "std::path::PathBuf" | "std::path::Path" => Some(python_type_codegen_config_with( | ||
| "Path", | ||
| &["from pathlib import Path"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| "std::net::IpAddr" => Some(python_type_codegen_config_with( | ||
| "IPv4Address | IPv6Address", | ||
| &["from ipaddress import IPv4Address, IPv6Address"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| "std::net::Ipv4Addr" => Some(python_type_codegen_config_with( | ||
| "IPv4Address", | ||
| &["from ipaddress import IPv4Address"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| "std::net::Ipv6Addr" => Some(python_type_codegen_config_with( | ||
| "IPv6Address", | ||
| &["from ipaddress import IPv6Address"], | ||
| &[], | ||
| false, | ||
| false, | ||
| )), | ||
| _ => None, | ||
| } | ||
| } |
There was a problem hiding this comment.
this mapping belongs to python specific codegen. Typescript has similar mapping inside of typescript specific code.
There was a problem hiding this comment.
This is especially unclear why we inject it into the schema, when it is really codegen time problem not the problem of reflection or extra annotation.
Rust language specific option (additional derives to add to the types) can be parsed additionally from code - this is what would make sense to include into the schema because there is no other source for it and it is extracted from source code. Derive macro does not have any extra specific attributres for Python, like for Rust, so it is not part of the schema. These code lines of mapping are language specific, and static for codegen time.
There was a problem hiding this comment.
Also unclear if STATIC_LIBS constant was deleted? Could not see it in code dif.
There was a problem hiding this comment.
you might be thinking of build_implemented_types()?, which was the hardcoded type mapping function in python.rs. In #124 that function was changed to read from codegen_config on schema types instead of being a static map, but the traits.rs match table remained as a fallback, so the mapping data was effectively in two places. That's now cleaned up: single static mapping in python.rs (python_type_mappings()), build_implemented_types() derives the type-hint view from it. No duplication.
There was a problem hiding this comment.
The longer-term thinking was that if type metadata lived on the schema, it could flow through the semantic IR and give backends a uniform way to discover rendering hints. But that's perhaps getting too complicated without a clear upside anytime soon. I've opened #129 to track what should live in the shared layers vs backends. But doesn't need to be solved to improve the Python backend.
Move PythonTypeCodegenConfig and all Python type mapping data out of the schema/reflection layers and into the Python codegen backend. The mapping is now a single LazyLock<HashMap> in python.rs, matching how TypeScript handles its equivalent build_implemented_types(). This removes ~420 lines of schema-layer plumbing and impl file boilerplate while producing byte-identical generated output. Addresses review feedback on #128.
|
I reviewed the latest diff. Seems like fixed along the lines we discussed. |
…gs, fix field naming 1. Truncate class names exceeding 80 chars with hash suffix for uniqueness (349-char names → 89 chars max). Prevents IDE unusable names from deeply generic types. 2. Remove stuttering: system::SystemVersionInfo no longer produces SystemSystemVersionInfo (module prefix skipped when type name already starts with it). 3. Remove empty "Generated data model." docstrings — only emit docstring when the schema has a real type description (818 → 0). 4. Single-field tuple variants use 'value' instead of 'field_0' for the field name (field_0 count: 1448 → 840). 5. Per-type model_rebuild try/except — one failure no longer silently skips all remaining rebuilds. Core-server validated: 60K lines, valid Python, live API works.
Adjacently-tagged enums (error types) now use Pydantic v2 discriminated
unions instead of model_validator/model_serializer boilerplate:
# Before: RootModel + 25-line validator/serializer per enum
class MyError(RootModel[Union[...]]):
@model_validator(mode='before') # 15 lines of dispatch
@model_serializer # 10 lines of serialization
# After: Per-variant models with Literal tag discriminator
class MyErrorUnauthorized(BaseModel):
error_type: Literal["unauthorized"] = "unauthorized"
error_details: auth.AuthError
MyError = Annotated[Union[...], Field(discriminator="error_type")]
Externally-tagged enums use compact reusable helpers instead of
inlined dispatch logic per enum.
Removed dead template structs and unused dispatch generators.
Core-server: 60K lines, valid Python, live API works.
…el_rebuild Two fixes from review: 1. Externally-tagged enum serializer used `r.value` but the actual field name on newtype variant models is `field_0`. This caused AttributeError at runtime when serializing externally-tagged enums with tuple variants. 2. Replace N individual try/except model_rebuild blocks with a single compact loop. For core-server this reduces ~1330 individual blocks to one loop with a list of models.
model_rebuild was using namespace-dotted references (e.g. analytics.AnalyticsEventInsertData) but namespace classes alias types with stuttering removed (analytics.EventInsertData). The dotted name doesn't match any attribute, causing AttributeError on import for schemas with namespaced types. Fix: use improve_class_name() (flat names) instead of type_name_to_python_ref() (dotted names) since model_rebuild operates on module-level class definitions. Validated against core-server (284 endpoints) — imports and runs correctly. Live API test confirms auth, deserialization, and error handling all work end-to-end.
…erializer aliases
Fixes from expert review:
1. Hash truncation: format was {:08x} but u64 produces 16 hex chars,
making truncated names 88 chars instead of 80. Now truncates hash
to 8 chars explicitly.
2. Stuttering removal: was over-aggressive — comparing all adjacent
pairs could collapse multiple levels. Now only removes the segment
immediately before the leaf when it stutters.
3. Dot-name branch: early return bypassed truncation logic. Fixed to
flow through the common truncation path.
4. Serializer by_alias: r.model_dump() in externally-tagged enum
serializers didn't pass by_alias=True, so aliased fields (e.g.
min_/max_ -> min/max) would produce wrong wire format.
5. Dead code: removed unreachable Generic patching in
render_adjacently_tagged_enum (UnionClass handles this).
6. Single-element tuple: (Foo) is not a tuple in Python, now (Foo,).
7. Unused import: removed PrivateAttr from pydantic imports.
Add `format: bool` to the Python codegen Config, aligning with the TypeScript and Rust codegen configs. When false, skips the ruff formatting step and uses basic formatting only, producing deterministic output regardless of whether ruff is available. Defaults to true (existing behavior).
Move PythonTypeCodegenConfig and all Python type mapping data out of the schema/reflection layers and into the Python codegen backend. The mapping is now a single LazyLock<HashMap> in python.rs, matching how TypeScript handles its equivalent build_implemented_types(). This removes ~420 lines of schema-layer plumbing and impl file boilerplate while producing byte-identical generated output. Addresses review feedback on #128.
…xtend - Extract resolve_field_optionality() helper, replacing ~8 copy-pasted blocks (~100 lines removed). Fixes double | None wrapping on Option fields and missing = None defaults in adjacently-tagged enum variants. - Inline render_internally_tagged_enum pass-through into _core - Remove dead Type::codegen_config() accessor and unused SymbolId convenience constructors (enum_id, endpoint_id, variant_id, field_id) - Remove heuristic type guessing in map_external_type_to_python (types are now in the static mapping or genuinely unknown) - Remove redundant inner use import, fix stale comment - Fix builder extend() silently dropping other.errors and other.validators
447d815 to
6e265fa
Compare
The |
…table SymbolIds are a compiler concept needed during normalization and codegen, not part of the interchange format. Previously they lived on raw schema structs (Schema, Function, Primitive, Struct, Field, Enum, Variant) with `#[serde(skip_serializing, default)]` and were lazily assigned by `ensure_symbol_ids()`. Now: - Raw schema types no longer carry `id: SymbolId` fields - `build_schema_ids(&Schema) -> SchemaIds` builds a side table - The normalizer stores SchemaIds in its context and uses it during symbol discovery and SemanticSchema construction - SemanticSchema types keep their `id` fields (correct home) - Manual PartialEq/Hash impls that excluded `id` replaced with derives This addresses #129 (compiler/interchange boundary) and Andrey's review feedback on #128 about Default::default() SymbolId assignment. Generated output is byte-identical (zero snapshot changes).
|
@avkonst |
Summary
Follow-up to #124 and the review on that merge.
This PR keeps the earlier Python DX improvements, then fixes the underlying ownership problem in the Python codegen architecture and closes the remaining generator regressions found when trying it against a larger real schema.
PythonTypeCodegenConfigplumbing throughreflectapi-schema, semantic IR, derive tokenization, and reflection traitsSchema.id, but model it correctly by introducingSymbolKind::Schemainstead of treating the schema root like a struct__init__.pyre-exportsClientinstead of the non-existentSyncClientGenerated enum.when no real description existsfor_codegen()references, and tighten the current-state claimsWhy
In reviewing #124 @avkonst pointed out the Python backend should not need to enumerate all known Rust/library type IDs in a central static table. That couples backend rendering to type-definition knowledge and makes extension harder.
With this change, Python-specific type/rendering metadata is declared alongside the type reflection implementation, which gives a cleaner compiler boundary and a better foundation for Python DX work going forward.
The follow-up generator fixes matter for correctness and usability:
__init__.pyexports make the generated package importable without downstream workaroundsNotes
PipelineBuilderplus semantic IR, while Rust, TypeScript, and OpenAPI still mostly render from raw schema after their own preprocessingTest plan
cargo fmt --all -- --checkcargo clippy --all-targets --all-features -- -D warningscargo test --workspacecargo test --doccargo test --doc --all-featurescargo doc --no-deps --all-features --document-private-itemscargo test -p reflectapi-schema -p reflectapi --libcargo test -p reflectapi-demo --libcargo test -p reflectapi-demo namespace -- --nocapturecargo test -p reflectapi-demo datetime -- --nocapturecargo test -p reflectapi-demo option_of_option -- --nocapturecargo test -p reflectapi-demo flatten_internally_tagged_enum_field -- --nocapturecargo test -p reflectapi-demo reflectapi_struct_with_all_primitive_type_fields -- --nocapturecargo test -p reflectapi-demo external_impls -- --nocapturePATH=/home/brian/.cargo/bin:$PATH /home/brian/.cargo/bin/mdbook build(indocs/)Follow-ups / Caveats
cargo docstill emits the existing output filename collision warning between thereflectapilib docs andreflectapi-clibin docsbuf_reduxandmultipart