diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d1adf38e..772dc157b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,6 +79,8 @@ jobs: lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }}-${{ hashFiles('**/*.st') }} lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }} lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }} + - name: Download ion-java jar for Java codegen test + run: wget -q -O StrataTest/DDM/Integration/Java/testdata/ion-java-1.11.11.jar https://github.com/amazon-ion/ion-java/releases/download/v1.11.11/ion-java-1.11.11.jar - name: Build and test Strata uses: leanprover/lean-action@v1 with: @@ -88,6 +90,10 @@ jobs: with: path: .lake key: lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }}-${{ hashFiles('**/*.st') }}-${{ github.sha }} + - name: Verify Java testdata is up to date + run: | + StrataTest/DDM/Integration/Java/regenerate-testdata.sh + git diff --exit-code StrataTest/DDM/Integration/Java/testdata/ - name: Build and run strata verify run: lake exe strata verify Examples/SimpleProc.core.st - name: Build BoogieToStrata diff --git a/Strata/DDM/AST.lean b/Strata/DDM/AST.lean index 121919582..d9e44b1d5 100644 --- a/Strata/DDM/AST.lean +++ b/Strata/DDM/AST.lean @@ -210,7 +210,9 @@ def fromCategoryName? : QualifiedIdent → Option SepFormat | q`Init.SpacePrefixSepBy => some .spacePrefix | q`Init.NewlineSepBy => some .newline | q`Init.SemicolonSepBy => some .semicolon - | _ => none + | _ => .none + +#guard fromCategoryName? ⟨"Init", "Ident"⟩ == .none instance : ToString SepFormat where toString := SepFormat.toString diff --git a/Strata/DDM/Integration/Java/Gen.lean b/Strata/DDM/Integration/Java/Gen.lean index 8dce4a6bc..a0a9a0c58 100644 --- a/Strata/DDM/Integration/Java/Gen.lean +++ b/Strata/DDM/Integration/Java/Gen.lean @@ -17,14 +17,14 @@ open Strata.DDM.Integration (primitiveCategories forbiddenCategories abstractCat /-! # Java Code Generator for DDM Dialects Generates Java source files from DDM dialect definitions: -- Sealed interfaces for categories with operators +- Sealed interfaces for categories, with operator records as nested types - Non-sealed stub interfaces for abstract categories (e.g., Init.Expr) -- Record classes for operators +- Generated `toIon` methods on each record for serialization - Static factory methods for ergonomic AST construction -- Ion serializer for Lean interop +- Slim Ion serializer with helper methods (no reflection) All names are disambiguated to avoid collisions with Java reserved words, -base classes (Node, SourceRange), and each other. +base classes (Node, SourceRange, IonSerializer), and each other. -/ /-! ## Name Utilities -/ @@ -39,7 +39,7 @@ def javaReservedWords : Std.HashSet String := Std.HashSet.ofList [ "strictfp", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "try", "void", "volatile", "while", -- Contextual keywords (restricted in some contexts) - "exports", "module", "non-sealed", "open", "opens", "permits", "provides", + "exports", "module", "open", "opens", "permits", "provides", "record", "sealed", "to", "transitive", "uses", "var", "when", "with", "yield", -- Literals (cannot be used as identifiers) "true", "false", "null", @@ -136,10 +136,6 @@ def argDeclKindToJavaType : ArgDeclKind → JavaType | .type _ => .simple "Expr" | .cat c => syntaxCatToJavaType c -/-- Get Ion separator name for a list category, or none if not a list. -/ -def getSeparator (c : SyntaxCat) : Option String := - SepFormat.fromCategoryName? c.name |>.map SepFormat.toIonName - /-- Extract the QualifiedIdent for categories that need Java interfaces, or none for primitives. -/ partial def syntaxCatToQualifiedName (cat : SyntaxCat) : Option QualifiedIdent := if primitiveCategories.contains cat.name then none @@ -151,35 +147,72 @@ partial def syntaxCatToQualifiedName (cat : SyntaxCat) : Option QualifiedIdent : | ⟨"Init", _⟩ => none | qid => some qid +/-! ## Serialization Code Generation -/ + +/-- Maps a primitive Init category to its serializer method name, or none for non-primitives. -/ +def primitiveSerializerMethod (qid : QualifiedIdent) : Option String := + match qid with + | q`Init.Ident => some "serializeIdent" + | q`Init.Str => some "serializeStrlit" + | q`Init.Num => some "serializeNum" + | q`Init.Decimal => some "serializeDecimal" + | q`Init.Bool => some "serializeBool" + | q`Init.ByteArray => some "serializeBytes" + | _ => .none + +/-- Get the serializer method reference for a SyntaxCat's inner type (used in Option/List). -/ +partial def serializerFnRef (c : SyntaxCat) : String := + match primitiveSerializerMethod c.name with + | some method => s!"$s::{method}" + | none => "$s::serialize" + +/-- Generate the serialization expression for a single field. -/ +partial def serializeFieldExpr (kind : ArgDeclKind) (fieldName : String) : String := + match kind with + | .type _ => s!"$s.serialize({fieldName})" + | .cat c => + match primitiveSerializerMethod c.name with + | some method => s!"$s.{method}({fieldName})" + | none => + if abstractCategories.contains c.name then s!"$s.serialize({fieldName})" + else match c.name with + | q`Init.Option => + let inner := c.args[0]! + s!"$s.serializeOption({fieldName}, {serializerFnRef inner})" + | q`Init.Seq | q`Init.CommaSepBy | q`Init.NewlineSepBy | q`Init.SpaceSepBy + | q`Init.SpacePrefixSepBy | q`Init.SemicolonSepBy => + let inner := c.args[0]! + let sep := (SepFormat.fromCategoryName? c.name).get!.toIonName + s!"$s.serializeSeq({fieldName}, \"{sep}\", {serializerFnRef inner})" + | _ => s!"$s.serialize({fieldName})" + /-! ## Java Structures -/ structure JavaField where name : String type : JavaType -structure JavaRecord where +/-- A nested record within a category interface. -/ +structure JavaCategoryRecord where name : String operationName : QualifiedIdent - implements : String fields : Array JavaField - -structure JavaInterface where - name : String - permits : Array String + argDecls : Array ArgDecl -- original DDM arg declarations for toIon generation /-- All generated Java source files for a dialect. -/ public structure GeneratedFiles where sourceRange : String node : String - interfaces : Array (String × String) -- (filename, content) - records : Array (String × String) - builders : String × String -- (filename, content) + categories : Array (String × String) -- (filename, content) + stubs : Array (String × String) -- (filename, content) for stub interfaces + builders : String × String -- (filename, content) serializer : String deriving Inhabited /-- Mapping from DDM names to disambiguated Java identifiers. -/ structure NameAssignments where categories : Std.HashMap QualifiedIdent String + /-- Maps (category, opName) to the nested record name within that category -/ operators : Std.HashMap (QualifiedIdent × String) String stubs : Std.HashMap QualifiedIdent String builders : String @@ -192,26 +225,12 @@ def argDeclToJavaField (decl : ArgDecl) : JavaField := def JavaField.toParam (f : JavaField) : String := s!"{f.type.toJava} {f.name}" -def JavaRecord.toJava (package : String) (r : JavaRecord) : String := - let params := String.intercalate ", " (r.fields.toList.map JavaField.toParam) - let opName := s!"{r.operationName.dialect}.{r.operationName.name}" -s!"package {package}; - -public record {r.name}( - SourceRange sourceRange{if r.fields.isEmpty then "" else ",\n " ++ params} -) implements {r.implements} \{ - @Override - public java.lang.String operationName() \{ return \"{opName}\"; } -} -" - -def JavaInterface.toJava (package : String) (i : JavaInterface) : String := - let permits := if i.permits.isEmpty then "" - else " permits " ++ String.intercalate ", " i.permits.toList -s!"package {package}; - -public sealed interface {i.name} extends Node{permits} \{} -" +/-- Group operators by their target category, preserving declaration order. -/ +def groupOpsByCategory (d : Dialect) : Std.HashMap QualifiedIdent (Array OpDecl) := + d.declarations.foldl (init := {}) fun acc decl => + match decl with + | .op op => acc.alter op.category (fun ops? => some ((ops?.getD #[]).push op)) + | _ => acc def templatePackage := "com.strata.template" @@ -233,11 +252,45 @@ def generateNodeInterface (package : String) (categories : List String) : String def generateStubInterface (package : String) (name : String) : String × String := (s!"{name}.java", s!"package {package};\n\npublic non-sealed interface {name} extends Node \{}\n") -def generateSerializer (package : String) (separatorMap : String) : String := +def generateSerializer (package : String) : String := serializerTemplate.replace templatePackage package - |>.replace "/*SEPARATOR_MAP*/" separatorMap -/-- Assign unique Java names to all generated types -/ +/-- Generate a nested record definition within a category interface. -/ +def generateRecord (catName : String) (r : JavaCategoryRecord) : String := + let params := ", ".intercalate (r.fields.toList.map JavaField.toParam) + let opName := s!"{r.operationName.dialect}.{r.operationName.name}" + let fieldSerializations := r.argDecls.toList.map fun arg => + let fieldName := escapeJavaName arg.ident + s!" sexp.add({serializeFieldExpr arg.kind (fieldName ++ "()")});" + let toIonBody := "\n".intercalate + (s!" var sexp = $s.newOp(\"{opName}\", sourceRange());" + :: fieldSerializations ++ [" return sexp;"]) + s!" public record {r.name}( + SourceRange sourceRange{if r.fields.isEmpty then "" else ",\n " ++ params} + ) implements {catName} \{ + @Override + public java.lang.String operationName() \{ return \"{opName}\"; } + + @Override + public com.amazon.ion.IonSexp toIon(IonSerializer $s) \{ +{toIonBody} + } + }" + +/-- Generate a category file with sealed interface and nested records. -/ +def generateCategoryFile (package : String) (catName : String) (records : Array JavaCategoryRecord) : String := + let permits := if records.isEmpty then "" + else " permits " ++ ", ".intercalate (records.toList.map fun r => s!"{catName}.{r.name}") + let body := "\n\n".intercalate (records.toList.map (generateRecord catName)) + s!"package {package}; + +public sealed interface {catName} extends Node{permits} \{ +{body} +} +" + +/-- Assign unique Java names to all generated types. + Operator names are scoped within their category (nested records). -/ def assignAllNames (d : Dialect) : NameAssignments := let baseNames : Std.HashSet String := Std.HashSet.ofList ["node", "sourcerange", "ionserializer"] @@ -259,7 +312,7 @@ def assignAllNames (d : Dialect) : NameAssignments := -- All QualifiedIdents that need Java names (categories + refs) let allQids := cats ++ refs.toArray.filter (!cats.contains ·) - -- Count name occurrences to detect collisions + -- Count name occurrences to detect collisions across categories let nameCounts : Std.HashMap String Nat := allQids.foldl (init := {}) fun m qid => m.alter qid.name (fun v => some (v.getD 0 + 1)) @@ -270,51 +323,39 @@ def assignAllNames (d : Dialect) : NameAssignments := else escapeJavaName (toPascalCase qid.name) disambiguate base used - -- Assign category names + -- Assign category names (top-level, must be globally unique) let catInit : Std.HashMap QualifiedIdent String × Std.HashSet String := ({}, baseNames) let (categoryNames, used) := cats.foldl (init := catInit) fun (map, used) cat => let (name, newUsed) := assignName used cat (map.insert cat name, newUsed) - -- Assign operator names - let opInit : Std.HashMap (QualifiedIdent × String) String × Std.HashSet String := ({}, used) - let (operatorNames, used) := d.declarations.foldl (init := opInit) fun (map, used) decl => - match decl with - | .op op => + -- Assign operator names (nested within category, must be unique within category + not collide with category name) + let opsByCategory := groupOpsByCategory d + + let operatorNames := opsByCategory.toList.foldl (init := ({})) fun opNames (cat, ops) => + let catName := categoryNames[cat]! + -- Within each category, operator names must be unique and not collide with the category name + let localUsed : Std.HashSet String := Std.HashSet.ofList [catName.toLower] + let (opNames, _) := ops.foldl (init := (opNames, localUsed)) fun (opNames, localUsed) op => let base := escapeJavaName (toPascalCase op.name) - let (name, newUsed) := disambiguate base used - (map.insert (op.category, op.name) name, newUsed) - | _ => (map, used) + -- For single-op categories where the op name matches the category, use "Of" + let base := if ops.size == 1 && base.toLower == catName.toLower then "Of" else base + let (name, newLocalUsed) := disambiguate base localUsed + (opNames.insert (op.category, op.name) name, newLocalUsed) + opNames -- Assign stub names (referenced types not in this dialect's categories) let stubInit : Std.HashMap QualifiedIdent String × Std.HashSet String := ({}, used) - let (stubNames, used) := refs.toArray.foldl (init := stubInit) fun (map, used) ref => + let (stubNames, _used) := refs.toArray.foldl (init := stubInit) fun (map, used) ref => if categoryNames.contains ref then (map, used) else let (name, newUsed) := assignName used ref (map.insert ref name, newUsed) - let (buildersName, _) := disambiguate d.name used + let (buildersName, _) := disambiguate (escapeJavaName (toPascalCase d.name)) _used { categories := categoryNames, operators := operatorNames, stubs := stubNames, builders := buildersName } -/-- Group operators by their target category -/ -def groupOpsByCategory (d : Dialect) (names : NameAssignments) - : Std.HashMap QualifiedIdent (Array String) := - d.declarations.foldl (init := {}) fun acc decl => - match decl with - | .op op => - let javaName := names.operators[(op.category, op.name)]! - acc.alter op.category (fun ops? => some ((ops?.getD #[]).push javaName)) - | _ => acc - -def opDeclToJavaRecord (dialectName : String) (names : NameAssignments) (op : OpDecl) - : JavaRecord := - { name := names.operators[(op.category, op.name)]! - operationName := ⟨dialectName, op.name⟩ - implements := names.categories[op.category]! - fields := op.argDecls.toArray.map argDeclToJavaField } - def generateBuilders (package : String) (dialectName : String) (d : Dialect) (names : NameAssignments) : String := let methods (op : OpDecl) := let (ps, as, checks) := op.argDecls.toArray @@ -322,7 +363,6 @@ def generateBuilders (package : String) (dialectName : String) (d : Dialect) (na let name := escapeJavaName decl.ident let cat := decl.kind.categoryOf.name if cat == q`Init.Num then - -- Long parameter must be non-negative. (ps.push s!"long {name}", as.push s!"java.math.BigInteger.valueOf({name})", checks.push s!"if ({name} < 0) throw new IllegalArgumentException(\"{name} must be non-negative\");") @@ -335,14 +375,12 @@ def generateBuilders (package : String) (dialectName : String) (d : Dialect) (na (ps.push s!"{t} {name}", as.push name, checks) let methodName := escapeJavaName op.name let returnType := names.categories[op.category]! - let recordName := names.operators[(op.category, op.name)]! + let recordName := s!"{returnType}.{names.operators[(op.category, op.name)]!}" let checksStr := if checks.isEmpty then "" else " ".intercalate checks.toList ++ " " let argsStr := if as.isEmpty then "" else ", " ++ ", ".intercalate as.toList let paramsStr := ", ".intercalate ps.toList - -- Overload with SourceRange parameter let srParams := if ps.isEmpty then "SourceRange sourceRange" else s!"SourceRange sourceRange, {paramsStr}" let withSR := s!" public static {returnType} {methodName}({srParams}) \{ {checksStr}return new {recordName}(sourceRange{argsStr}); }" - -- Convenience overload without SourceRange let withoutSR := s!" public static {returnType} {methodName}({paramsStr}) \{ {checksStr}return new {recordName}(SourceRange.NONE{argsStr}); }" s!"{withSR}\n{withoutSR}" let allMethods := d.declarations.filterMap fun | .op op => some (methods op) | _ => none @@ -350,7 +388,6 @@ def generateBuilders (package : String) (dialectName : String) (d : Dialect) (na public def generateDialect (d : Dialect) (package : String) : Except String GeneratedFiles := do let names := assignAllNames d - let opsByCategory := groupOpsByCategory d names -- Check for unsupported declarations for decl in d.declarations do @@ -359,55 +396,35 @@ public def generateDialect (d : Dialect) (package : String) : Except String Gene | .function f => throw s!"function declaration '{f.name}' is not supported in Java generation" | _ => pure () - -- Categories with operators get sealed interfaces with permits clauses - let sealedInterfaces := opsByCategory.toList.map fun (cat, ops) => - let name := names.categories[cat]! - let iface : JavaInterface := { name, permits := ops } - (s!"{name}.java", iface.toJava package) + -- Group operators by category (preserving declaration order) + let opsByCategory := groupOpsByCategory d + + -- Generate category files (sealed interface + nested records) + let categoryFiles := opsByCategory.toList.map fun (cat, ops) => + let catName := names.categories[cat]! + let records := ops.map fun op => + let recName := names.operators[(op.category, op.name)]! + { name := recName + operationName := ⟨d.name, op.name⟩ + fields := op.argDecls.toArray.map argDeclToJavaField + argDecls := op.argDecls.toArray : JavaCategoryRecord } + (s!"{catName}.java", generateCategoryFile package catName records) -- Stub interfaces for referenced types without operators - let stubInterfaces := names.stubs.toList.map fun (_, name) => + let stubFiles := names.stubs.toList.map fun (_, name) => generateStubInterface package name - -- Generate records for operators - let records := d.declarations.toList.filterMap fun decl => - match decl with - | .op op => - let name := names.operators[(op.category, op.name)]! - some (s!"{name}.java", (opDeclToJavaRecord d.name names op).toJava package) - | _ => none - - -- All interface names for Node permits clause - let allInterfaceNames := - sealedInterfaces ++ stubInterfaces - |>.map (·.1.dropEnd 5 |>.toString) - - -- Generate separator map for list fields - let separatorEntries := d.declarations.toList.filterMap fun decl => - match decl with - | .op op => - let opName := s!"{d.name}.{op.name}" - let fieldEntries := op.argDecls.toArray.toList.filterMap fun arg => - match arg.kind with - | .cat c => match getSeparator c with - | some sep => some s!"\"{escapeJavaName arg.ident}\", \"{sep}\"" - | none => none - | _ => none - if fieldEntries.isEmpty then none - else - let inner := fieldEntries.map fun e => s!"java.util.Map.entry({e})" - some s!" java.util.Map.entry(\"{opName}\", java.util.Map.ofEntries({", ".intercalate inner}))" - | _ => none - let separatorMap := if separatorEntries.isEmpty then "java.util.Map.of()" - else s!"java.util.Map.ofEntries(\n{",\n".intercalate separatorEntries})" + -- All type names for Node permits clause + let allTypeNames := categoryFiles.map (·.1.dropEnd 5 |>.toString) + ++ stubFiles.map (·.1.dropEnd 5 |>.toString) return { sourceRange := generateSourceRange package - node := generateNodeInterface package allInterfaceNames - interfaces := sealedInterfaces.toArray ++ stubInterfaces.toArray - records := records.toArray + node := generateNodeInterface package allTypeNames + categories := categoryFiles.toArray + stubs := stubFiles.toArray builders := (s!"{names.builders}.java", generateBuilders package names.builders d names) - serializer := generateSerializer package separatorMap + serializer := generateSerializer package } /-! ## File Output -/ @@ -425,10 +442,10 @@ public def writeJavaFiles (baseDir : System.FilePath) (package : String) (files IO.FS.writeFile (dir / "IonSerializer.java") files.serializer IO.FS.writeFile (dir / files.builders.1) files.builders.2 - for (filename, content) in files.interfaces do + for (filename, content) in files.categories do IO.FS.writeFile (dir / filename) content - for (filename, content) in files.records do + for (filename, content) in files.stubs do IO.FS.writeFile (dir / filename) content end Strata.Java diff --git a/Strata/DDM/Integration/Java/templates/IonSerializer.java b/Strata/DDM/Integration/Java/templates/IonSerializer.java index ae1d51221..c39044a04 100644 --- a/Strata/DDM/Integration/Java/templates/IonSerializer.java +++ b/Strata/DDM/Integration/Java/templates/IonSerializer.java @@ -6,99 +6,55 @@ public class IonSerializer { private final IonSystem ion; - private static final java.util.Map> SEPARATORS = /*SEPARATOR_MAP*/; - public IonSerializer(IonSystem ion) { this.ion = ion; } /** Serialize a node as a top-level command (no "op" wrapper). */ public IonValue serializeCommand(Node node) { - return serializeNode(node); + return node.toIon(this); } /** Serialize a node as an argument (with "op" wrapper). */ public IonValue serialize(Node node) { - return wrapOp(serializeNode(node)); - } - - private IonSexp serializeNode(Node node) { - IonSexp sexp = ion.newEmptySexp(); - String opName = node.operationName(); - sexp.add(ion.newSymbol(opName)); - sexp.add(serializeSourceRange(node.sourceRange())); - - var fieldSeps = SEPARATORS.getOrDefault(opName, java.util.Map.of()); - for (var component : node.getClass().getRecordComponents()) { - if (component.getName().equals("sourceRange")) continue; - try { - java.lang.Object value = component.getAccessor().invoke(node); - String sep = fieldSeps.get(component.getName()); - sexp.add(serializeArg(value, sep, component.getType())); - } catch (java.lang.Exception e) { - throw new java.lang.RuntimeException("Failed to serialize " + component.getName(), e); - } - } - return sexp; - } - - private IonValue wrapOp(IonValue inner) { IonSexp sexp = ion.newEmptySexp(); sexp.add(ion.newSymbol("op")); - sexp.add(inner); + sexp.add(node.toIon(this)); return sexp; } - private IonValue serializeSourceRange(SourceRange sr) { + /** Create an s-expression with operation name and source range. */ + public IonSexp newOp(java.lang.String opName, SourceRange sr) { + IonSexp sexp = ion.newEmptySexp(); + sexp.add(ion.newSymbol(opName)); if (sr.start() == 0 && sr.stop() == 0) { - return ion.newNull(); + sexp.add(ion.newNull()); + } else { + IonSexp range = ion.newEmptySexp(); + range.add(ion.newInt(sr.start())); + range.add(ion.newInt(sr.stop())); + sexp.add(range); } - IonSexp sexp = ion.newEmptySexp(); - sexp.add(ion.newInt(sr.start())); - sexp.add(ion.newInt(sr.stop())); return sexp; } - private IonValue serializeArg(java.lang.Object value, String sep, java.lang.Class type) { - if (value == null) { - return serializeOption(java.util.Optional.empty()); - } - if (value instanceof Node n) { - return serialize(n); - } - if (value instanceof java.lang.String s) { - return serializeIdent(s); - } - if (value instanceof java.math.BigInteger bi) { - return serializeNum(bi); - } - if (value instanceof java.math.BigDecimal bd) { - return serializeDecimal(bd); - } - if (value instanceof byte[] bytes) { - return serializeBytes(bytes); - } - if (value instanceof java.lang.Boolean b) { - return serializeBool(b); - } - if (value instanceof java.util.Optional opt) { - return serializeOption(opt); - } - if (value instanceof java.util.List list) { - return serializeSeq(list, sep != null ? sep : "seq"); - } - throw new java.lang.IllegalArgumentException("Unsupported type: " + type); + public IonValue serializeIdent(java.lang.String s) { + IonSexp sexp = ion.newEmptySexp(); + sexp.add(ion.newSymbol("ident")); + sexp.add(ion.newNull()); + sexp.add(ion.newString(s)); + return sexp; } - private IonValue serializeIdent(java.lang.String s) { + public IonValue serializeStrlit(java.lang.String s) { IonSexp sexp = ion.newEmptySexp(); - sexp.add(ion.newSymbol("ident")); + sexp.add(ion.newSymbol("strlit")); sexp.add(ion.newNull()); sexp.add(ion.newString(s)); return sexp; } - private IonValue serializeNum(java.math.BigInteger n) { + public IonValue serializeNum(java.math.BigInteger n) { IonSexp sexp = ion.newEmptySexp(); sexp.add(ion.newSymbol("num")); sexp.add(ion.newNull()); @@ -106,7 +62,7 @@ private IonValue serializeNum(java.math.BigInteger n) { return sexp; } - private IonValue serializeDecimal(java.math.BigDecimal d) { + public IonValue serializeDecimal(java.math.BigDecimal d) { IonSexp sexp = ion.newEmptySexp(); sexp.add(ion.newSymbol("decimal")); sexp.add(ion.newNull()); @@ -114,7 +70,7 @@ private IonValue serializeDecimal(java.math.BigDecimal d) { return sexp; } - private IonValue serializeBytes(byte[] bytes) { + public IonValue serializeBytes(byte[] bytes) { IonSexp sexp = ion.newEmptySexp(); sexp.add(ion.newSymbol("bytes")); sexp.add(ion.newNull()); @@ -122,29 +78,30 @@ private IonValue serializeBytes(byte[] bytes) { return sexp; } - private IonValue serializeBool(boolean b) { + public IonValue serializeBool(boolean b) { IonSexp inner = ion.newEmptySexp(); inner.add(ion.newSymbol(b ? "Init.boolTrue" : "Init.boolFalse")); inner.add(ion.newNull()); - return wrapOp(inner); + IonSexp sexp = ion.newEmptySexp(); + sexp.add(ion.newSymbol("op")); + sexp.add(inner); + return sexp; } - private IonValue serializeOption(java.util.Optional opt) { + public IonValue serializeOption(java.util.Optional opt, java.util.function.Function f) { IonSexp sexp = ion.newEmptySexp(); sexp.add(ion.newSymbol("option")); sexp.add(ion.newNull()); - if (opt.isPresent()) { - sexp.add(serializeArg(opt.get(), null, opt.get().getClass())); - } + opt.ifPresent(v -> sexp.add(f.apply(v))); return sexp; } - private IonValue serializeSeq(java.util.List list, String sepType) { + public IonValue serializeSeq(java.util.List list, java.lang.String sepType, java.util.function.Function f) { IonSexp sexp = ion.newEmptySexp(); sexp.add(ion.newSymbol(sepType)); sexp.add(ion.newNull()); - for (java.lang.Object item : list) { - sexp.add(serializeArg(item, null, item.getClass())); + for (T item : list) { + sexp.add(f.apply(item)); } return sexp; } diff --git a/Strata/DDM/Integration/Java/templates/Node.java b/Strata/DDM/Integration/Java/templates/Node.java index e09ae3bda..ca639c793 100644 --- a/Strata/DDM/Integration/Java/templates/Node.java +++ b/Strata/DDM/Integration/Java/templates/Node.java @@ -1,6 +1,9 @@ package com.strata.template; +import com.amazon.ion.IonSexp; + public sealed interface Node { SourceRange sourceRange(); java.lang.String operationName(); + IonSexp toIon(IonSerializer $s); } diff --git a/Strata/DDM/Ion.lean b/Strata/DDM/Ion.lean index d15603c56..908143406 100644 --- a/Strata/DDM/Ion.lean +++ b/Strata/DDM/Ion.lean @@ -127,12 +127,22 @@ def fromIonName? : String → Option SepFormat | "spacePrefixedList" => some .spacePrefix | "newlineSepList" => some .newline | "semicolonSepList" => some .semicolon - | _ => none + | _ => .none theorem fromIonName_toIonName_roundtrip (sep : SepFormat) : fromIonName? (toIonName sep) = some sep := by cases sep <;> rfl +/-- Invalid Ion separator names return `none`. -/ +theorem fromIonName_none_of_invalid (s : String) (h : ∀ sep, toIonName sep ≠ s) : + fromIonName? s = .none := by + simp [fromIonName?] + split <;> first + | rfl + | (exfalso; have := h .none; have := h .comma; have := h .space + have := h .spacePrefix; have := h .newline; have := h .semicolon + simp_all [toIonName]) + end SepFormat /-- diff --git a/StrataTest/DDM/Integration/Java/TestGen.lean b/StrataTest/DDM/Integration/Java/TestGen.lean index 390e21834..aa23dbe7d 100644 --- a/StrataTest/DDM/Integration/Java/TestGen.lean +++ b/StrataTest/DDM/Integration/Java/TestGen.lean @@ -19,7 +19,7 @@ open Strata.Java meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 --- Test 1: Basic dialect with 2 operators +-- Test 1: Basic dialect with 2 operators — nested records in category file #eval do let testDialect : Strata.Dialect := { name := "Test" @@ -46,10 +46,13 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - assert! files.interfaces.any (fun i => check i.2 "sealed interface Expr") - assert! files.records.size = 2 - assert! files.records.any (fun r => check r.1 "Literal") - assert! files.records.any (fun r => check r.1 "Add") + -- One category file containing the interface and both records + assert! files.categories.size = 1 + assert! files.categories.any (fun c => check c.2 "sealed interface Expr") + assert! files.categories.any (fun c => check c.2 "record Literal") + assert! files.categories.any (fun c => check c.2 "record Add") + -- Records have toIon methods + assert! files.categories.any (fun c => check c.2 "toIon") pure () -- Test 2: Reserved word escaping for fields @@ -70,11 +73,12 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - assert! files.records.any (fun r => r.1 == "Int.java") - assert! files.records.any (fun r => check r.2 "public_") + -- Single-op category where op name "int" (PascalCase "Int") doesn't match category "Stmt" + assert! files.categories.any (fun c => check c.2 "record Int") + assert! files.categories.any (fun c => check c.2 "public_") pure () --- Test 3: Name collision (operator name matches category name) +-- Test 3: Name collision (single-op, operator name matches category name) → uses "Of" #eval do let testDialect : Strata.Dialect := { name := "Collision" @@ -90,11 +94,12 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - assert! files.interfaces.any (fun i => i.1 == "Expr.java") - assert! files.records.any (fun r => r.1 == "Expr_.java") + -- Single-op category: operator gets "Of" since it collides with category name + assert! files.categories.any (fun c => check c.2 "sealed interface Expr") + assert! files.categories.any (fun c => check c.2 "record Of") pure () --- Test 4: Duplicate operator names and reserved word collision +-- Test 4: Duplicate operator names across categories — nested so no global collision #eval do let testDialect : Strata.Dialect := { name := "Dup" @@ -103,14 +108,17 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 .syncat { name := "A", argNames := #[] }, .syncat { name := "B", argNames := #[] }, .op { name := "foo", argDecls := .ofArray #[], category := ⟨"Dup", "A"⟩, syntaxDef := .std #[] 0 }, - .op { name := "foo", argDecls := .ofArray #[], category := ⟨"Dup", "B"⟩, syntaxDef := .std #[] 0 }, -- Duplicate + .op { name := "foo", argDecls := .ofArray #[], category := ⟨"Dup", "B"⟩, syntaxDef := .std #[] 0 }, .op { name := "class", argDecls := .ofArray #[], category := ⟨"Dup", "A"⟩, syntaxDef := .std #[] 0 }, - .op { name := "class_", argDecls := .ofArray #[], category := ⟨"Dup", "B"⟩, syntaxDef := .std #[] 0 } -- Would clash after escaping + .op { name := "class_", argDecls := .ofArray #[], category := ⟨"Dup", "B"⟩, syntaxDef := .std #[] 0 } ] } let files := (generateDialect testDialect "com.test").toOption.get! - let recordNames := files.records.map Prod.fst - assert! recordNames.toList.eraseDups.length == recordNames.size + -- Both categories should have their operators as nested records + assert! files.categories.size = 2 + -- A has Foo and Class (class is reserved but Class after PascalCase is not) + assert! files.categories.any (fun c => check c.2 "record Foo" && check c.2 "interface A") + assert! files.categories.any (fun c => check c.2 "record Class" && check c.2 "interface A") pure () -- Test 5: Category name collides with base class @@ -119,12 +127,13 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 name := "Base" imports := #[] declarations := #[ - .syncat { name := "Node", argNames := #[] }, -- Collides with base class + .syncat { name := "Node", argNames := #[] }, .op { name := "leaf", argDecls := .ofArray #[], category := ⟨"Base", "Node"⟩, syntaxDef := .std #[] 0 } ] } let files := (generateDialect testDialect "com.test").toOption.get! - let allNames := #["Node.java", "SourceRange.java"] ++ files.interfaces.map Prod.fst ++ files.records.map Prod.fst + -- Category "Node" collides with base class, should be disambiguated + let allNames := #["Node.java", "SourceRange.java"] ++ files.categories.map Prod.fst assert! allNames.toList.eraseDups.length == allNames.size pure () @@ -144,8 +153,8 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - assert! files.interfaces.any (fun i => i.1 == "MyCategory.java") - assert! files.records.any (fun r => r.1 == "MyOperator.java") + assert! files.categories.any (fun c => c.1 == "MyCategory.java") + assert! files.categories.any (fun c => check c.2 "record MyOperator") pure () -- Test 7: All DDM types map correctly @@ -173,15 +182,20 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - let record := files.records[0]!.2 - assert! check record "java.lang.String ident" - assert! check record "java.math.BigInteger num" - assert! check record "java.math.BigDecimal dec" - assert! check record "java.lang.String str" - assert! check record "boolean b" - assert! check record "byte[] bytes" - assert! check record "java.util.Optional opt" - assert! check record "java.util.List seq" + let catContent := files.categories[0]!.2 + assert! check catContent "java.lang.String ident" + assert! check catContent "java.math.BigInteger num" + assert! check catContent "java.math.BigDecimal dec" + assert! check catContent "java.lang.String str" + assert! check catContent "boolean b" + assert! check catContent "byte[] bytes" + assert! check catContent "java.util.Optional opt" + assert! check catContent "java.util.List seq" + -- Verify toIon uses correct serializers for Ident vs Str + assert! check catContent "serializeIdent(ident())" + assert! check catContent "serializeStrlit(str())" + assert! check catContent "serializeNum(num())" + assert! check catContent "serializeBool(b())" pure () -- Test 8: FQN usage (no imports that could conflict) @@ -200,9 +214,8 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - let record := files.records[0]!.2 - assert! !(check record "import java.") - assert! check record "java.lang.String operationName()" + let catContent := files.categories[0]!.2 + assert! check catContent "java.lang.String operationName()" pure () -- Test 9: Stub interfaces for referenced-but-empty categories @@ -215,7 +228,7 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 .op { name := "eval" argDecls := .ofArray #[ - { ident := "e", kind := .cat (.atom .none ⟨"Init", "Expr"⟩) } -- References Init.Expr + { ident := "e", kind := .cat (.atom .none ⟨"Init", "Expr"⟩) } ] category := ⟨"Stub", "Stmt"⟩ syntaxDef := .std #[] 0 @@ -223,8 +236,8 @@ meta def check (s sub : String) : Bool := (s.splitOn sub).length > 1 ] } let files := (generateDialect testDialect "com.test").toOption.get! - assert! files.interfaces.any (fun i => check i.2 "sealed interface Stmt") - assert! files.interfaces.any (fun i => check i.2 "non-sealed interface Expr") + assert! files.categories.any (fun c => check c.2 "sealed interface Stmt") + assert! files.stubs.any (fun s => check s.2 "non-sealed interface Expr") pure () -- Test 10: Core dialect returns error (has type/function declarations not yet supported) @@ -260,11 +273,12 @@ elab "#testCoreError" : command => do ] } let files := (generateDialect testDialect "com.test").toOption.get! - -- Should have 2 interfaces: one for A.Num, one stub for B.Num - assert! files.interfaces.size = 2 - let names : List String := files.interfaces.toList.map Prod.fst - assert! names.any (fun n => (n.splitOn "A").length > 1) - assert! names.any (fun n => (n.splitOn "B").length > 1) + -- Category for A.Num + stub for B.Num + assert! files.categories.size = 1 + assert! files.stubs.size = 1 + let allNames := files.categories.map Prod.fst ++ files.stubs.map Prod.fst + assert! allNames.any (fun n => (n.splitOn "A").length > 1) + assert! allNames.any (fun n => (n.splitOn "B").length > 1) pure () -- Test 12: Generated Java compiles (requires javac) @@ -285,15 +299,22 @@ elab "#testCompile" : command => do let dir : System.FilePath := "/tmp/strata-java-test" writeJavaFiles dir "com.test" files - let fileNames := #["SourceRange.java", "Node.java", files.builders.1] - ++ files.interfaces.map Prod.fst - ++ files.records.map Prod.fst + -- ion-java is required for compilation (Node.java imports IonSexp) + let jarPath := "StrataTest/DDM/Integration/Java/testdata/ion-java-1.11.11.jar" + if !(← System.FilePath.pathExists jarPath) then + Lean.logError s!"Test 12 failed: ion-java jar not found at {jarPath}" + IO.FS.removeDirAll dir + return + + let fileNames := #["SourceRange.java", "Node.java", "IonSerializer.java", files.builders.1] + ++ files.categories.map Prod.fst + ++ files.stubs.map Prod.fst let pkgDir := (dir / "com" / "test").toString let filePaths := fileNames.map fun f => pkgDir ++ "/" ++ f let result ← IO.Process.output { cmd := "javac" - args := filePaths + args := #["-cp", jarPath] ++ filePaths } IO.FS.removeDirAll dir @@ -320,6 +341,16 @@ elab "#testRoundtrip" : command => do if cmd.name != (⟨"Simple", "block"⟩ : Strata.QualifiedIdent) then Lean.logError "Expected block command"; return if let .seq _ _ stmts := cmd.args[0]! then if stmts.size != 4 then Lean.logError s!"Expected 4 statements, got {stmts.size}" + -- Verify print's msg arg is strlit (not ident) — catches Init.Str serialization bug + if stmts.size < 2 then Lean.logError "Expected at least 2 statements"; return + match stmts[1]! with + | .op op => + if op.name != ⟨"Simple", "print"⟩ then Lean.logError s!"Expected print, got {op.name}" + else match op.args[0]! with + | .strlit _ s => if s != "hello" then Lean.logError s!"Expected 'hello', got '{s}'" + | .ident _ s => Lean.logError s!"print msg is ident '{s}', expected strlit" + | _ => Lean.logError "Expected strlit arg for print" + | _ => Lean.logError "Expected op for stmts[1]" else Lean.logError "Expected seq argument" #testRoundtrip @@ -340,7 +371,6 @@ elab "#testRoundtripFiles" : command => do Lean.logError s!"Expected 2 files, got {files.length}" return - -- Check first file let file1 := files[0]! if file1.filePath != "file1.st" then Lean.logError s!"File 1: Expected path 'file1.st', got '{file1.filePath}'" @@ -358,7 +388,6 @@ elab "#testRoundtripFiles" : command => do else Lean.logError "File 1: Expected seq argument"; return - -- Check second file let file2 := files[1]! if file2.filePath != "file2.st" then Lean.logError s!"File 2: Expected path 'file2.st', got '{file2.filePath}'" @@ -390,10 +419,10 @@ elab "#testJavaGenPreloaded" : command => do Lean.logError s!"javaGen on preloaded Laurel dialect failed:\n{result.stdout}\n{result.stderr}" if ← dir.pathExists then IO.FS.removeDirAll dir return - -- Verify some expected files exist + -- Verify some expected files exist (now one file per category) let pkgDir := (dir / "com" / "test" / "laurel").toString let mut missing := false - for expected in #["Node.java", "StmtExpr.java", "Procedure.java"] do + for expected in #["Node.java", "StmtExpr.java", "Procedure.java", "Parameter.java"] do if !(← System.FilePath.pathExists (pkgDir ++ "/" ++ expected)) then Lean.logError s!"Expected file {expected} not found in {pkgDir}" missing := true diff --git a/StrataTest/DDM/Integration/Java/regenerate-testdata.sh b/StrataTest/DDM/Integration/Java/regenerate-testdata.sh index 7163658da..80ff209e3 100755 --- a/StrataTest/DDM/Integration/Java/regenerate-testdata.sh +++ b/StrataTest/DDM/Integration/Java/regenerate-testdata.sh @@ -6,12 +6,12 @@ cd "$(dirname "$0")" STRATA_ROOT="$(cd ../../../.. && pwd)" TESTDATA="testdata" GEN_DIR="testdata/generated" -JAR="testdata/ion-java-1.11.9.jar" +JAR="testdata/ion-java-1.11.11.jar" # Download ion-java if needed if [ ! -f "$JAR" ]; then echo "=== Downloading ion-java ===" - curl -sLO --output-dir testdata "https://repo1.maven.org/maven2/com/amazon/ion/ion-java/1.11.9/ion-java-1.11.9.jar" + curl -sLo "$JAR" "https://github.com/amazon-ion/ion-java/releases/download/v1.11.11/ion-java-1.11.11.jar" fi echo "=== Generating Java classes from dialect ===" diff --git a/StrataTest/DDM/Integration/Java/testdata/comprehensive-files.ion b/StrataTest/DDM/Integration/Java/testdata/comprehensive-files.ion index 9361a13ce..c688b9e87 100644 Binary files a/StrataTest/DDM/Integration/Java/testdata/comprehensive-files.ion and b/StrataTest/DDM/Integration/Java/testdata/comprehensive-files.ion differ diff --git a/StrataTest/DDM/Integration/Java/testdata/comprehensive.ion b/StrataTest/DDM/Integration/Java/testdata/comprehensive.ion index 6ee448d58..202ea8bd9 100644 Binary files a/StrataTest/DDM/Integration/Java/testdata/comprehensive.ion and b/StrataTest/DDM/Integration/Java/testdata/comprehensive.ion differ diff --git a/docs/DDMJavaCodeGen.md b/docs/DDMJavaCodeGen.md new file mode 100644 index 000000000..949d62512 --- /dev/null +++ b/docs/DDMJavaCodeGen.md @@ -0,0 +1,377 @@ +# DDM Java Code Generator + +The DDM Java code generator produces a set of Java source files from a +dialect definition. These files form a typed, immutable AST library that +can build dialect programs in Java and serialize them to Ion for +consumption by Strata. + +## Usage from the CLI + +The `strata` CLI provides the `javaGen` command for generating Java source +files directly from a dialect definition. + +``` +strata javaGen [--include ] +``` + +### Arguments + +| Argument | Description | +|----------|-------------| +| `dialect` | A dialect name (e.g. `Laurel`) or a path to a `.dialect.st` file | +| `package` | Java package name (e.g. `com.example.mydialect`) | +| `output-dir` | Directory where generated Java files will be written | + +### Flags + +| Flag | Description | +|------|-------------| +| `--include ` | Add a dialect search path (may be repeated) | + +### Examples + +Generate Java files from a built-in dialect by name: + +```bash +strata javaGen Laurel com.example.laurel ./generated +``` + +Generate from a dialect file on disk: + +```bash +strata javaGen StrataTest/DDM/Integration/Java/testdata/Simple.dialect.st com.example.simple ./generated +``` + +Use `--include` to add search paths when the dialect references other +dialect files: + +```bash +strata javaGen MyDialect com.example.mydialect ./generated \ + --include ./dialects --include ./deps +``` + +The command creates the Java package directory structure under `output-dir` +and writes all generated files. On success it prints the output path, e.g.: + +``` +Generated Java files for Laurel in ./generated/com/example/laurel +``` + +## Usage from Lean + +```lean +import Strata.DDM.Integration.Java.Gen + +open Strata.Java + +-- Obtain a Dialect value. The CLI builds one via DialectFileMap; +-- see the javaGenCommand in StrataMain.lean for the full pattern. +-- Here we assume `myDialect : Strata.Dialect` is already loaded. + +let files ← IO.ofExcept (generateDialect myDialect "com.example.mypackage") +writeJavaFiles "./generated" "com.example.mypackage" files +``` + +`generateDialect` returns `Except String GeneratedFiles`, failing if the +dialect contains unsupported declarations. `writeJavaFiles` creates the +package directory structure and writes all files. + +## Generated Files + +For a dialect named `MyDialect` in package `com.example.mydialect`, the +generator produces the following files under `com/example/mydialect/`: + +| File | Description | +|------|-------------| +| `SourceRange.java` | Source location record | +| `Node.java` | Root sealed interface for all AST nodes | +| `IonSerializer.java` | Serialization helpers for converting AST nodes to Ion | +| `MyDialect.java` | Static factory methods for building AST nodes | +| One `.java` per category | Sealed interface with nested operator records | + +Additionally, for categories referenced by the dialect but not defined in +it (e.g., `Init.Expr`), the generator emits non-sealed stub interfaces. + +## Core Types + +### `SourceRange` + +```java +public record SourceRange(long start, long stop) { + public static final SourceRange NONE = new SourceRange(0, 0); +} +``` + +Every AST node carries a `SourceRange`. Use `SourceRange.NONE` when source +location information is not available. The `start` and `stop` values are +byte offsets into the source file. Note that `SourceRange.NONE` (0, 0) +is indistinguishable from a zero-length range at byte offset 0. + +### `Node` + +```java +import com.amazon.ion.IonSexp; + +public sealed interface Node permits Stmt, Expr, ... { + SourceRange sourceRange(); + java.lang.String operationName(); + IonSexp toIon(IonSerializer $s); +} +``` + +The root of the AST type hierarchy. All category interfaces extend `Node`, +and all operator records implement a category interface. The `permits` +clause lists every generated category interface and stub interface. + +`operationName()` returns the fully qualified DDM operator name +(e.g., `"MyDialect.myOp"`). + +`toIon()` serializes the node to Ion format using the provided +`IonSerializer` helpers. This method is generated for each operator record +with field-specific serialization logic. + +`Node` requires `com.amazon.ion:ion-java` as a compile-time dependency. + +## Category Interfaces and Nested Records + +Each DDM syntactic category becomes a **sealed interface** extending +`Node`, with its operator records defined as **nested types** within the +interface. This mirrors the DDM structure where operators belong to +categories. + +```java +// Category "Stmt" with operators Assign and Return +public sealed interface Stmt extends Node permits Stmt.Assign, Stmt.Return { + public record Assign( + SourceRange sourceRange, + java.lang.String target, Expr value + ) implements Stmt { + @Override public java.lang.String operationName() { return "MyDialect.assign"; } + @Override public com.amazon.ion.IonSexp toIon(IonSerializer $s) { ... } + } + + public record Return( + SourceRange sourceRange, + Expr value + ) implements Stmt { + @Override public java.lang.String operationName() { return "MyDialect.return"; } + @Override public com.amazon.ion.IonSexp toIon(IonSerializer $s) { ... } + } +} +``` + +Usage: + +```java +var assign = new Stmt.Assign(SourceRange.NONE, "x", someExpr); +var ret = new Stmt.Return(SourceRange.NONE, someExpr); +List stmts = List.of(assign, ret); +``` + +### Naming Rules + +When a **single-operator category** has an operator whose name matches the +category name (e.g., `category Parameter` with `op parameter`), the record +is named `Of` to avoid a collision with the enclosing interface: + +```java +public sealed interface Parameter extends Node permits Parameter.Of { + public record Of(SourceRange sourceRange, ...) implements Parameter { ... } +} +// Usage: new Parameter.Of(SourceRange.NONE, name, type) +``` + +When a **multi-operator category** has an operator whose name matches the +category name, the record gets a `_` suffix: + +```java +public sealed interface Procedure extends Node permits Procedure.Procedure_, Procedure.Function { + public record Procedure_(...) implements Procedure { ... } + public record Function(...) implements Procedure { ... } +} +``` + +### Stub Interfaces + +Categories referenced by operator arguments but not defined in the current +dialect (e.g., `Init.Expr`, `Init.Type`) become **non-sealed stub +interfaces**: + +```java +public non-sealed interface Expr extends Node {} +``` + +This allows users to provide their own implementations for cross-dialect +extension points. + +## Type Mapping + +DDM argument types map to Java types as follows: + +| DDM Type | Java Type | +|----------|-----------| +| `Init.Ident` | `java.lang.String` | +| `Init.Str` | `java.lang.String` | +| `Init.Num` | `java.math.BigInteger` | +| `Init.Decimal` | `java.math.BigDecimal` | +| `Init.Bool` | `boolean` | +| `Init.ByteArray` | `byte[]` | +| `Init.Option T` | `java.util.Optional` | +| `Init.Seq T` | `java.util.List` | +| `Init.CommaSepBy T` | `java.util.List` | +| `Init.SpaceSepBy T` | `java.util.List` | +| `Init.SpacePrefixSepBy T` | `java.util.List` | +| `Init.NewlineSepBy T` | `java.util.List` | +| `Init.SemicolonSepBy T` | `java.util.List` | +| `Init.Expr` (abstract) | `Expr` (stub interface) | +| `Init.Type` (abstract) | `Type_` (stub interface) | +| `Init.TypeP` (abstract) | `TypeP` (stub interface) | +| Type expressions (`:type T`) | `Expr` (stub interface) | +| Dialect-defined category | Generated sealed interface | + +## Factory Class (Builders) + +A static factory class is generated with the dialect's name. It provides +two overloads per operator: + +1. **With `SourceRange`** — first parameter is the source range. +2. **Without `SourceRange`** — uses `SourceRange.NONE` automatically. + +```java +public class MyDialect { + public static Stmt assign(SourceRange sourceRange, java.lang.String target, Expr value) { + return new Stmt.Assign(sourceRange, target, value); + } + + public static Stmt assign(java.lang.String target, Expr value) { + return new Stmt.Assign(SourceRange.NONE, target, value); + } +} +``` + +Factory method names preserve the original DDM operator name (with +reserved word escaping), while record and interface names use PascalCase. + +### Numeric Convenience + +For `Init.Num` arguments, the factory accepts `long` instead of +`BigInteger` and converts internally. A runtime check rejects negative +values. Values larger than `Long.MAX_VALUE` can be created by constructing +the record directly with `BigInteger`. + +For `Init.Decimal` arguments, the factory accepts `double` and converts +via `BigDecimal.valueOf`. The `double` type has limited precision (~15-17 +significant digits); for higher precision, construct the record directly +with `BigDecimal`. + +## Ion Serializer + +`IonSerializer` provides helper methods for converting AST nodes to Ion +format. Each generated record has a `toIon` method that uses these helpers +to serialize its fields with the correct Ion format. + +Requires `com.amazon.ion:ion-java` as a runtime dependency. + +```java +IonSystem ion = IonSystemBuilder.standard().build(); +IonSerializer serializer = new IonSerializer(ion); +``` + +### Methods + +| Method | Description | +|--------|-------------| +| `serializeCommand(Node)` | Serialize a top-level command (no `op` wrapper) | +| `serialize(Node)` | Serialize a node as an argument (wrapped in `(op ...)`) | + +### Multi-file Format + +When serializing multiple source files (e.g., for a multi-file Java +project), the convention is to wrap each file's program in a struct with +`filePath` and `program` fields, collected into an Ion list: + +```java +IonList files = ion.newEmptyList(); +for (var sourceFile : sourceFiles) { + IonStruct entry = ion.newEmptyStruct(); + entry.put("filePath", ion.newString(sourceFile.path())); + + IonList program = ion.newEmptyList(); + IonSexp header = ion.newEmptySexp(); + header.add(ion.newSymbol("program")); + header.add(ion.newString("Laurel")); + program.add(header); + for (var command : sourceFile.commands()) { + program.add(serializer.serializeCommand(command)); + } + entry.put("program", program); + files.add(entry); +} +``` + +Each program starts with a header s-expression `(program "DialectName")`. + +### Serialization Format + +The serializer produces Ion expressions matching Strata's internal +representation: + +- **Operators**: `(Dialect.opName ...)` +- **Arguments (nested nodes)**: `(op (Dialect.opName ...))` +- **Identifiers** (`Init.Ident`): `(ident null "name")` +- **String literals** (`Init.Str`): `(strlit null "value")` +- **Numbers**: `(num null )` +- **Decimals**: `(decimal null )` +- **Booleans**: `(op (Init.boolTrue null))` or `(op (Init.boolFalse null))` +- **Byte arrays**: `(bytes null )` +- **Optionals**: `(option null)` (empty) or `(option null )` (present) +- **Lists**: `( null ...)` where `` is one of: + - `seq` — `Init.Seq` + - `commaSepList` — `Init.CommaSepBy` + - `spaceSepList` — `Init.SpaceSepBy` + - `spacePrefixedList` — `Init.SpacePrefixSepBy` + - `newlineSepList` — `Init.NewlineSepBy` + - `semicolonSepList` — `Init.SemicolonSepBy` +- **Source ranges**: `( )` or `null` for `SourceRange.NONE` + +The correct serialization method for each field is determined at generation +time from the DDM type and embedded directly in the record's `toIon` +method. + +## Name Disambiguation + +The generator avoids name collisions through several mechanisms: + +1. **Java reserved words** — a trailing `_` is appended (e.g., `class` → `class_`). + Applied after stripping invalid characters. +2. **Base class names** — `Node`, `SourceRange`, and `IonSerializer` are reserved. +3. **Cross-dialect collisions** — when two categories share the same short name, + the dialect name is prefixed in PascalCase (e.g., `LambdaExpr` vs `CoreExpr`). +4. **Operator/category collisions** — for single-operator categories where the + operator name matches the category name, the record is named `Of`. For + multi-operator categories, a `_` suffix is added, then `_2`, `_3`, etc. + for further collisions. +5. **Invalid characters** — non-alphanumeric characters (except `_`) are stripped. + If stripping produces an empty name, it defaults to `field`. +6. **Common `java.lang` classes** — names like `String`, `Object`, `Integer`, etc. + are escaped to avoid ambiguity with implicit imports. + +Record and interface names are converted to PascalCase. Factory method +names preserve the original operator name (not PascalCased). +Disambiguation is case-insensitive to avoid collisions on case-insensitive +file systems. + +Operator names are scoped within their category (since records are nested), +so the same operator name can appear in different categories without +collision. + +## Limitations + +- **Type declarations** (`type` in DDM) are not supported and cause an error. +- **Function declarations** (`function` in DDM) are not supported and cause an error. +- `syncat` and `metadata` declarations are accepted but do not produce output. +- Only operator declarations (`op`) are processed. + +## Implementation + +The generator lives in `Strata/DDM/Integration/Java/Gen.lean`.