From 98f48ee88f44e78df93fa2e6821f6ecffdbaaac5 Mon Sep 17 00:00:00 2001 From: "red-hat-konflux-kflux-prd-rh02[bot]" <190377777+red-hat-konflux-kflux-prd-rh02[bot]@users.noreply.github.com> Date: Tue, 17 Feb 2026 20:13:09 +0000 Subject: [PATCH] chore(deps): update github.com/protocolbuffers/txtpbfmt digest to a481f6a Signed-off-by: red-hat-konflux-kflux-prd-rh02 <190377777+red-hat-konflux-kflux-prd-rh02[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 +- .../protocolbuffers/txtpbfmt/ast/ast.go | 154 +- .../protocolbuffers/txtpbfmt/config/config.go | 124 ++ .../txtpbfmt/descriptor/descriptor.go | 83 + .../protocolbuffers/txtpbfmt/impl/impl.go | 1088 ++++++++++ .../protocolbuffers/txtpbfmt/logger/logger.go | 8 + .../protocolbuffers/txtpbfmt/parser/parser.go | 1779 +---------------- .../txtpbfmt/printer/printer.go | 347 ++++ .../protocolbuffers/txtpbfmt/quote/quote.go | 56 + .../protocolbuffers/txtpbfmt/sort/sort.go | 286 +++ .../txtpbfmt/unquote/unquote.go | 88 +- .../protocolbuffers/txtpbfmt/wrap/wrap.go | 267 +++ vendor/modules.txt | 12 +- 14 files changed, 2471 insertions(+), 1827 deletions(-) create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/config/config.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/descriptor/descriptor.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/impl/impl.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/logger/logger.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/printer/printer.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/quote/quote.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/sort/sort.go create mode 100644 vendor/github.com/protocolbuffers/txtpbfmt/wrap/wrap.go diff --git a/go.mod b/go.mod index 2e26c9e65f..4871468186 100644 --- a/go.mod +++ b/go.mod @@ -228,7 +228,7 @@ require ( github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/procfs v0.16.1 // indirect - github.com/protocolbuffers/txtpbfmt v0.0.0-20241112170944-20d2c9ebc01d // indirect + github.com/protocolbuffers/txtpbfmt v0.0.0-20260217160748-a481f6a22f94 // indirect github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect diff --git a/go.sum b/go.sum index 9f4116e04a..0c1908df7c 100644 --- a/go.sum +++ b/go.sum @@ -1417,8 +1417,8 @@ github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2 github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= -github.com/protocolbuffers/txtpbfmt v0.0.0-20241112170944-20d2c9ebc01d h1:HWfigq7lB31IeJL8iy7jkUmU/PG1Sr8jVGhS749dbUA= -github.com/protocolbuffers/txtpbfmt v0.0.0-20241112170944-20d2c9ebc01d/go.mod h1:jgxiZysxFPM+iWKwQwPR+y+Jvo54ARd4EisXxKYpB5c= +github.com/protocolbuffers/txtpbfmt v0.0.0-20260217160748-a481f6a22f94 h1:2PC6Ql3jipz1KvBlqUHjjk6v4aMwE86mfDu1XMH0LR8= +github.com/protocolbuffers/txtpbfmt v0.0.0-20260217160748-a481f6a22f94/go.mod h1:JSbkp0BviKovYYt9XunS95M3mLPibE9bGg+Y95DsEEY= github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM= github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/ast/ast.go b/vendor/github.com/protocolbuffers/txtpbfmt/ast/ast.go index e673cffa58..6a7abeafdf 100644 --- a/vendor/github.com/protocolbuffers/txtpbfmt/ast/ast.go +++ b/vendor/github.com/protocolbuffers/txtpbfmt/ast/ast.go @@ -77,6 +77,8 @@ type Node struct { // Used when we want to break between the field name and values when a // single-line node exceeds the requested wrap column. PutSingleValueOnNextLine bool + // Field number from proto definition (0 if unknown/not applicable). + FieldNumber int32 } // NodeLess is a sorting function that compares two *Nodes, possibly using the parent Node @@ -103,14 +105,44 @@ func ChainNodeLess(first, second NodeLess) NodeLess { } } +type sortOptions struct { + reverse bool +} + +// A SortOption configures SortNodes. +type SortOption func(*sortOptions) + +// ReverseOrdering controls whether to sort the Nodes in ascending or descending order. By default +// the Nodes are sorted in ascending order. By setting this option to true, the Nodes will be sorted +// in descending order. +// +// Default: false. +func ReverseOrdering(enabled bool) SortOption { + return func(opts *sortOptions) { + opts.reverse = enabled + } +} + // SortNodes sorts nodes by the given less function. -func SortNodes(parent *Node, ns []*Node, less NodeLess) { - sort.Stable(sortableNodes(parent, ns, less, true /* isWholeSlice */)) +func SortNodes(parent *Node, ns []*Node, less NodeLess, opts ...SortOption) { + var options sortOptions + for _, opt := range opts { + opt(&options) + } + if options.reverse { + sort.Stable(sort.Reverse(sortableNodes(parent, ns, less, true /* isWholeSlice */))) + } else { + sort.Stable(sortableNodes(parent, ns, less, true /* isWholeSlice */)) + } end := 0 for begin := 0; begin < len(ns); begin = end { for end = begin + 1; end < len(ns) && ns[begin].Name == ns[end].Name; end++ { } - sort.Stable(sortableNodes(parent, ns[begin:end], less, false /* isWholeSlice */)) + if options.reverse { + sort.Stable(sort.Reverse(sortableNodes(parent, ns[begin:end], less, false /* isWholeSlice */))) + } else { + sort.Stable(sortableNodes(parent, ns[begin:end], less, false /* isWholeSlice */)) + } } } @@ -153,21 +185,24 @@ func getFieldValueForByFieldValue(n *Node) *Value { return n.Values[0] } -// ByFieldValue is a NodeLess function that orders adjacent scalar nodes with the same name by -// their scalar value. -func ByFieldValue(_, ni, nj *Node, isWholeSlice bool) bool { - if isWholeSlice { - return false - } - vi := getFieldValueForByFieldValue(ni) - vj := getFieldValueForByFieldValue(nj) - if vi == nil { - return vj != nil - } - if vj == nil { - return false +// ByFieldValue returns a NodeLess function that orders adjacent scalar nodes +// with the same name by their scalar value. The values are passed through +// `projection` before sorting. +func ByFieldValue(projection func(string) string) NodeLess { + return func(_, ni, nj *Node, isWholeSlice bool) bool { + if isWholeSlice { + return false + } + vi := getFieldValueForByFieldValue(ni) + vj := getFieldValueForByFieldValue(nj) + if vi == nil { + return vj != nil + } + if vj == nil { + return false + } + return projection(vi.Value) < projection(vj.Value) } - return vi.Value < vj.Value } func getChildValueByFieldSubfield(field, subfield string, n *Node) *Value { @@ -179,6 +214,20 @@ func getChildValueByFieldSubfield(field, subfield string, n *Node) *Value { return n.getChildValue(subfield) } +func getChildValueByFieldSubfieldPath(field string, subfieldPath []string, n *Node) *Value { + if field != "" && n.Name != field { + return nil + } + nodes := GetFromPath(n.Children, subfieldPath) + if len(nodes) != 1 { + return nil + } + if len(nodes[0].Values) != 1 { + return nil + } + return nodes[0].Values[0] +} + // ByFieldSubfield returns a NodeLess function that orders adjacent message nodes with the given // field name by the given subfield name value. If no field name is provided, it compares the // subfields of any adjacent nodes with matching names. @@ -199,6 +248,68 @@ func ByFieldSubfield(field, subfield string) NodeLess { } } +// ByFieldSubfieldPath returns a NodeLess function that orders adjacent message nodes with the given +// field name by the given subfield path value. If no field name is provided, it compares the +// subfields of any adjacent nodes with matching names. Values are passed +// through `projection` before sorting. +func ByFieldSubfieldPath(field string, subfieldPath []string, projection func(string) string) NodeLess { + return func(_, ni, nj *Node, isWholeSlice bool) bool { + if isWholeSlice { + return false + } + vi := getChildValueByFieldSubfieldPath(field, subfieldPath, ni) + vj := getChildValueByFieldSubfieldPath(field, subfieldPath, nj) + if vi == nil { + return vj != nil + } + if vj == nil { + return false + } + return projection(vi.Value) < projection(vj.Value) + } +} + +// ByFieldNumber is a NodeLess function that orders fields by their field numbers. +// Field numbers are populated during parsing from descriptor information. +func ByFieldNumber(_, ni, nj *Node, isWholeSlice bool) bool { + if !isWholeSlice { + return false + } + + numI, numJ := ni.FieldNumber, nj.FieldNumber + + // If both have field numbers, sort by field number + if numI > 0 && numJ > 0 { + return numI < numJ + } + + // If only one has field number, prioritize it + if numI > 0 && numJ == 0 { + return true // ni has priority + } + if numI == 0 && numJ > 0 { + return false // nj has priority + } + + // If neither has field number, fall back to alphabetical order + return ni.Name < nj.Name +} + +// Formatter is a function that can format nodes in the AST. +type Formatter func([]*Node) error + +var extraFormatters []Formatter + +// RegisterFormatter registers an extra formatter that will be called after parsing. +func RegisterFormatter(f Formatter) { + extraFormatters = append(extraFormatters, f) +} + +// GetFormatters returns all registered formatters. +func GetFormatters() []Formatter { + return extraFormatters +} + // getChildValue returns the Value of the child with the given field name, // or nil if no single such child exists. func (n *Node) getChildValue(field string) *Value { @@ -311,12 +422,19 @@ func SortValues(values []*Value) { }) } +// SortValuesReverse reverse sorts values by their value. +func SortValuesReverse(values []*Value) { + sort.SliceStable(values, func(i, j int) bool { + return values[i].Value > values[j].Value + }) +} + // GetFromPath returns all nodes with a given string path in the parse tree. See ast_test.go for examples. func GetFromPath(nodes []*Node, path []string) []*Node { if len(path) == 0 { return nil } - res := []*Node{} + var res []*Node for _, node := range nodes { if node.Name == path[0] { if len(path) == 1 { diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/config/config.go b/vendor/github.com/protocolbuffers/txtpbfmt/config/config.go new file mode 100644 index 0000000000..c2b78952a3 --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/config/config.go @@ -0,0 +1,124 @@ +// Package config contains the configuration for the formatter. +package config + +import ( + "github.com/protocolbuffers/txtpbfmt/logger" +) + +// Config can be used to pass additional config parameters to the formatter at +// the time of the API call. +type Config struct { + // Do not apply any reformatting to this file. + Disable bool + + // Expand all children irrespective of the initial state. + ExpandAllChildren bool + + // Skip colons whenever possible. + SkipAllColons bool + + // Allow unnamed nodes everywhere. + // Default is to allow only top-level nodes to be unnamed. + AllowUnnamedNodesEverywhere bool + + // Sort fields by field name. + SortFieldsByFieldName bool + + // Sort fields by field number from proto definition. + SortFieldsByFieldNumber bool + + // Path to protobuf descriptor file (.desc). + ProtoDescriptor string + + // Full message type name for field number lookup (required, e.g. google.protobuf.Any). + MessageFullName string + + // Sort adjacent scalar fields of the same field name by their contents. + SortRepeatedFieldsByContent bool + + // Sort adjacent message fields of the given field name by the contents of the given subfield path. + // Format: either "field_name.subfield_name.subfield_name2...subfield_nameN" or just + // "subfield_name" (applies to all field names). + SortRepeatedFieldsBySubfield []string + + // Sort the Sort* fields by descending order instead of ascending order. + ReverseSort bool + + // Sort content fields in a way that's suitable for DNS names. It splits the + // value around '.' characters, reverses the substrings, and concatenates to + // generate the sort key. + DNSSortOrder bool + + // Map from Node.Name to the order of all fields within that node. See AddFieldSortOrder(). + FieldSortOrder map[string][]string + + // RequireFieldSortOrderToMatchAllFieldsInNode will cause parsing to fail if a node was added via + // AddFieldSortOrder() but 1+ fields under that node in the textproto aren't specified in the + // field order. This won't fail for nodes that don't have a field order specified at all. Use this + // to strictly enforce that your field order config always orders ALL the fields, and you're + // willing for new fields in the textproto to break parsing in order to enforce it. + RequireFieldSortOrderToMatchAllFieldsInNode bool + + // Remove lines that have the same field name and scalar value as another. + RemoveDuplicateValuesForRepeatedFields bool + + // Permit usage of Python-style """ or ''' delimited strings. + AllowTripleQuotedStrings bool + + // Max columns for string field values. If zero, no string wrapping will occur. + // Strings that may contain HTML tags will never be wrapped. + WrapStringsAtColumn int + + // Whether strings that appear to contain HTML tags should be wrapped + // (requires WrapStringsAtColumn to be set). + WrapHTMLStrings bool + + // Wrap string field values after each newline. + // Should not be used with other Wrap* options. + WrapStringsAfterNewlines bool + + // Wrap strictly at the column instead of a word boundary. + WrapStringsWithoutWordwrap bool + + // Whether angle brackets used instead of curly braces should be preserved + // when outputting a formatted textproto. + PreserveAngleBrackets bool + + // Use single quotes around strings that contain double but not single quotes. + SmartQuotes bool + + // Use a short representation for repeated primitive fields (`x: 1 x: 2` vs `x: [1, 2]`). If this + // field is true, all repeated primitive fields will use the short representation; otherwise, the + // latter will be used only if it's being used in the input textproto. + UseShortRepeatedPrimitiveFields bool + + // Logger enables logging when it is non-nil. + // If the log messages aren't going to be useful, it's best to leave Logger + // set to nil, as otherwise log messages will be constructed. + Logger logger.Logger +} + +// Infof is used for informative messages, for testing or debugging. +func (c *Config) Infof(format string, args ...any) { + if c.Logger != nil { + c.Logger.Infof(format, args...) + } +} + +// InfoLevel returns true if the logger is set to non-nil. +func (c *Config) InfoLevel() bool { + return c.Logger != nil +} + +// RootName contains a constant that can be used to identify the root of all Nodes. +const RootName = "__ROOT__" + +// AddFieldSortOrder adds a config rule for the given Node.Name, so that all contained field names +// are output in the provided order. To specify an order for top-level Nodes, use RootName as the +// nodeName. +func (c *Config) AddFieldSortOrder(nodeName string, fieldOrder ...string) { + if c.FieldSortOrder == nil { + c.FieldSortOrder = make(map[string][]string) + } + c.FieldSortOrder[nodeName] = fieldOrder +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/descriptor/descriptor.go b/vendor/github.com/protocolbuffers/txtpbfmt/descriptor/descriptor.go new file mode 100644 index 0000000000..f763d8f0e3 --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/descriptor/descriptor.go @@ -0,0 +1,83 @@ +// Package descriptor provides functionality to load and parse Protocol Buffer descriptor files. +package descriptor + +import ( + "fmt" + "os" + + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protodesc" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/reflect/protoregistry" + + "google.golang.org/protobuf/types/descriptorpb" +) + +// Loader provides functionality to load field numbers from descriptor files. +type Loader struct { + descriptorFile string + files *protoregistry.Files +} + +// NewLoader creates a new descriptor loader for the given descriptor file. +func NewLoader(descriptorFile string) (*Loader, error) { + if descriptorFile == "" { + return nil, fmt.Errorf("descriptor file is required") + } + + data, err := os.ReadFile(descriptorFile) + if err != nil { + return nil, fmt.Errorf("failed to read descriptor file %s: %v", descriptorFile, err) + } + + fileDescSet := &descriptorpb.FileDescriptorSet{} + if err := proto.Unmarshal(data, fileDescSet); err != nil { + return nil, fmt.Errorf("failed to unmarshal descriptor file %s: %v", descriptorFile, err) + } + + files, err := protodesc.NewFiles(fileDescSet) + if err != nil { + return nil, fmt.Errorf("failed to create files from descriptor file %s: %v", descriptorFile, err) + } + + return &Loader{ + descriptorFile: descriptorFile, + files: files, + }, nil +} + +// GetRootMessageDescriptor returns the root message descriptor for the specified messageFullName. +// messageFullName is required and must be a valid full name (e.g., "google.protobuf.Any"). +func (l *Loader) GetRootMessageDescriptor(messageFullName string) (protoreflect.MessageDescriptor, error) { + if l.files == nil { + return nil, fmt.Errorf("descriptor not loaded, call NewLoader() first") + } + + if messageFullName == "" { + // Collect available messages to help user + var availableMessages []string + l.files.RangeFiles(func(fd protoreflect.FileDescriptor) bool { + messages := fd.Messages() + for i := 0; i < messages.Len(); i++ { + msg := messages.Get(i) + availableMessages = append(availableMessages, string(msg.FullName())) + } + return true + }) + + if len(availableMessages) == 0 { + return nil, fmt.Errorf("No messages found in descriptor") + } + return nil, fmt.Errorf("message_full_name is required. Available messages: %v", availableMessages) + } + + // Find specific message type + desc, err := l.files.FindDescriptorByName(protoreflect.FullName(messageFullName)) + if err != nil { + return nil, fmt.Errorf("message type %s not found: %v", messageFullName, err) + } + if msgDesc, ok := desc.(protoreflect.MessageDescriptor); ok { + return msgDesc, nil + } + return nil, fmt.Errorf("%s is not a message type", messageFullName) +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/impl/impl.go b/vendor/github.com/protocolbuffers/txtpbfmt/impl/impl.go new file mode 100644 index 0000000000..22422c9a46 --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/impl/impl.go @@ -0,0 +1,1088 @@ +// Package impl edits text proto files, applies standard formatting +// and preserves comments. +package impl + +import ( + "bufio" + "bytes" + "fmt" + "strconv" + "strings" + + "google.golang.org/protobuf/reflect/protoreflect" + "github.com/protocolbuffers/txtpbfmt/ast" + "github.com/protocolbuffers/txtpbfmt/config" + "github.com/protocolbuffers/txtpbfmt/descriptor" + "github.com/protocolbuffers/txtpbfmt/quote" + "github.com/protocolbuffers/txtpbfmt/sort" + "github.com/protocolbuffers/txtpbfmt/wrap" +) + +type parser struct { + in []byte + index int + length int + // Maps the index of '{' characters on 'in' that have the matching '}' on + // the same line to 'true'. + bracketSameLine map[int]bool + config config.Config + line, column int // current position, 1-based. +} + +var defConfig = config.Config{} + +type bracketState struct { + insideComment bool + insideString bool + insideTemplate bool + insideTripleQuotedString bool + stringDelimiter string + isEscapedChar bool +} + +func (s *bracketState) processChar(c byte, i int, in []byte, allowTripleQuotedStrings bool) { + switch c { + case '#': + if !s.insideString { + s.insideComment = true + } + case '%': + if !s.insideComment && !s.insideString { + s.insideTemplate = !s.insideTemplate + } + case '"', '\'': + if s.insideComment { + return + } + s.handleQuotes(c, i, in, allowTripleQuotedStrings) + } +} + +func (s *bracketState) handleQuotes(c byte, i int, in []byte, allowTripleQuotedStrings bool) { + delim := string(c) + tripleQuoted := false + if allowTripleQuotedStrings && i+3 <= len(in) { + triple := string(in[i : i+3]) + if triple == `"""` || triple == `'''` { + delim = triple + tripleQuoted = true + } + } + + if s.insideString { + if s.stringDelimiter == delim && (s.insideTripleQuotedString || !s.isEscapedChar) { + s.insideString = false + s.insideTripleQuotedString = false + } + } else { + s.insideString = true + s.insideTripleQuotedString = tripleQuoted + s.stringDelimiter = delim + } +} + +// Return the byte-positions of each bracket which has the corresponding close on the +// same line as a set. +func sameLineBrackets(in []byte, allowTripleQuotedStrings bool) (map[int]bool, error) { + line := 1 + type bracket struct { + index int + line int + } + var open []bracket // Stack. + res := map[int]bool{} + state := bracketState{} + for i, c := range in { + state.processChar(c, i, in, allowTripleQuotedStrings) + switch c { + case '\n': + line++ + state.insideComment = false + case '{', '<': + if state.insideComment || state.insideString || state.insideTemplate { + continue + } + open = append(open, bracket{index: i, line: line}) + case '}', '>': + if state.insideComment || state.insideString || state.insideTemplate { + continue + } + if len(open) == 0 { + return nil, fmt.Errorf("too many '}' or '>' at line %d, index %d", line, i) + } + last := len(open) - 1 + br := open[last] + open = open[:last] + if br.line == line { + res[br.index] = true + } + } + if state.isEscapedChar { + state.isEscapedChar = false + } else if c == '\\' && state.insideString && !state.insideTripleQuotedString { + state.isEscapedChar = true + } + + } + if state.insideString { + return nil, fmt.Errorf("unterminated string literal") + } + return res, nil +} + +var ( + spaceSeparators = []byte(" \t\n\r") + valueSeparators = []byte(" \t\n\r{}:,[]<>;#") +) + +// Parse returns a tree representation of a textproto file. +func Parse(in []byte) ([]*ast.Node, error) { + return ParseWithConfig(in, defConfig) +} + +// ParseWithConfig functions similar to Parse, but allows the user to pass in +// additional configuration options. +func ParseWithConfig(in []byte, c config.Config) ([]*ast.Node, error) { + if err := AddMetaCommentsToConfig(in, &c); err != nil { + return nil, err + } + return ParseWithMetaCommentConfig(in, c) +} + +// ParseWithMetaCommentConfig parses in textproto with MetaComments already added to configuration. +func ParseWithMetaCommentConfig(in []byte, c config.Config) ([]*ast.Node, error) { + p, err := newParser(in, c) + if err != nil { + return nil, err + } + + // Load descriptor if field number sorting is enabled + var rootDesc protoreflect.MessageDescriptor + if c.SortFieldsByFieldNumber { + if c.ProtoDescriptor == "" { + return nil, fmt.Errorf("proto_descriptor is required when using sort_fields_by_field_number") + } + + loader, err := descriptor.NewLoader(c.ProtoDescriptor) + if err != nil { + return nil, fmt.Errorf("failed to create descriptor loader: %v", err) + } + + // Get root message descriptor + rootDesc, err = loader.GetRootMessageDescriptor(c.MessageFullName) + if err != nil { + return nil, fmt.Errorf("failed to get root message descriptor: %v", err) + } + } + + if p.config.InfoLevel() { + p.config.Infof("p.in: %q", string(p.in)) + p.config.Infof("p.length: %v", p.length) + } + // Although unnamed nodes aren't strictly allowed, some formats represent a + // list of protos as a list of unnamed top-level nodes. + nodes, _, err := p.parse( /*isRoot=*/ true, rootDesc) + if err != nil { + return nil, err + } + if p.index < p.length { + return nil, fmt.Errorf("parser didn't consume all input. Stopped at %s", p.errorContext()) + } + for _, f := range ast.GetFormatters() { + if err := f(nodes); err != nil { + return nil, err + } + } + if err := wrap.Strings(nodes, 0, c); err != nil { + return nil, err + } + if err := sort.Process( /*parent=*/ nil, nodes, c); err != nil { + return nil, err + } + return nodes, nil +} + +// There are two types of MetaComment, one in the format of = and the other one doesn't +// have the equal sign. Currently there are only two MetaComments that are in the former format: +// +// "sort_repeated_fields_by_subfield": If this appears multiple times, then they will all be added +// to the config and the order is preserved. +// "wrap_strings_at_column": The is expected to be an integer. If it is not, then it will be +// ignored. If this appears multiple times, only the last one saved. +func addToConfig(metaComment string, c *config.Config) error { + // Test if a MetaComment is in the format of =. + key, val, hasEqualSign := strings.Cut(metaComment, "=") + switch key { + case "allow_triple_quoted_strings": + c.AllowTripleQuotedStrings = true + case "allow_unnamed_nodes_everywhere": + c.AllowUnnamedNodesEverywhere = true + case "disable": + c.Disable = true + case "expand_all_children": + c.ExpandAllChildren = true + case "preserve_angle_brackets": + c.PreserveAngleBrackets = true + case "remove_duplicate_values_for_repeated_fields": + c.RemoveDuplicateValuesForRepeatedFields = true + case "skip_all_colons": + c.SkipAllColons = true + case "smartquotes": + c.SmartQuotes = true + case "sort_fields_by_field_name": + c.SortFieldsByFieldName = true + case "sort_repeated_fields_by_content": + c.SortRepeatedFieldsByContent = true + case "sort_repeated_fields_by_subfield": + // Take all the subfields and the subfields in order as tie breakers. + if !hasEqualSign { + return fmt.Errorf("format should be %s=, got: %s", key, metaComment) + } + c.SortRepeatedFieldsBySubfield = append(c.SortRepeatedFieldsBySubfield, val) + case "reverse_sort": + c.ReverseSort = true + case "dns_sort_order": + c.DNSSortOrder = true + case "wrap_strings_at_column": + // If multiple of this MetaComment exists in the file, take the last one. + if !hasEqualSign { + return fmt.Errorf("format should be %s=, got: %s", key, metaComment) + } + i, err := strconv.Atoi(strings.TrimSpace(val)) + if err != nil { + return fmt.Errorf("error parsing %s value %q (skipping): %v", key, val, err) + } + c.WrapStringsAtColumn = i + case "wrap_html_strings": + c.WrapHTMLStrings = true + case "wrap_strings_after_newlines": + c.WrapStringsAfterNewlines = true + case "wrap_strings_without_wordwrap": + c.WrapStringsWithoutWordwrap = true + case "use_short_repeated_primitive_fields": + c.UseShortRepeatedPrimitiveFields = true + case "on": // This doesn't change the overall config. + case "off": // This doesn't change the overall config. + default: + return fmt.Errorf("unrecognized MetaComment: %s", metaComment) + } + return nil +} + +// AddMetaCommentsToConfig parses MetaComments and adds them to the configuration. +func AddMetaCommentsToConfig(in []byte, c *config.Config) error { + scanner := bufio.NewScanner(bytes.NewReader(in)) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + if line[0] != byte('#') { + break // only process the leading comment block + } + + // Look for comment lines in the format of ":", and process the lines with + // equals to "txtpbfmt". It's assumed that the MetaComments are given in the format of: + // # txtpbfmt: [, ...] + key, value, hasColon := strings.Cut(line[1:], ":") // Ignore the first '#'. + if hasColon && strings.TrimSpace(key) == "txtpbfmt" { + for _, s := range strings.Split(strings.TrimSpace(value), ",") { + metaComment := strings.TrimSpace(s) + if err := addToConfig(metaComment, c); err != nil { + return err + } + } + } + } + return nil +} + +func newParser(in []byte, c config.Config) (*parser, error) { + var bracketSameLine map[int]bool + if c.ExpandAllChildren { + bracketSameLine = map[int]bool{} + } else { + var err error + if bracketSameLine, err = sameLineBrackets(in, c.AllowTripleQuotedStrings); err != nil { + return nil, err + } + } + if len(in) > 0 && in[len(in)-1] != '\n' { + in = append(in, '\n') + } + parser := &parser{ + in: in, + index: 0, + length: len(in), + bracketSameLine: bracketSameLine, + config: c, + line: 1, + column: 1, + } + return parser, nil +} + +// getFieldNumber returns the field number for a given field name in the descriptor. +func getFieldNumber(desc protoreflect.MessageDescriptor, fieldName string) int32 { + if desc == nil { + return 0 + } + + field := desc.Fields().ByTextName(fieldName) + if field == nil { + return 0 + } + return int32(field.Number()) +} + +// findChildDescriptor finds the descriptor for a nested message field. +func (p *parser) findChildDescriptor(desc protoreflect.MessageDescriptor, fieldName string) protoreflect.MessageDescriptor { + if desc == nil { + return nil + } + + field := desc.Fields().ByTextName(fieldName) + if field == nil { + return nil + } + if field.Kind() == protoreflect.MessageKind { + return field.Message() + } + return nil +} + +func (p *parser) nextInputIs(b byte) bool { + return p.index < p.length && p.in[p.index] == b +} + +func (p *parser) consume(b byte) bool { + if !p.nextInputIs(b) { + return false + } + p.index++ + p.column++ + if b == '\n' { + p.line++ + p.column = 1 + } + return true +} + +// consumeString consumes the given string s, which should not have any newlines. +func (p *parser) consumeString(s string) bool { + if p.index+len(s) > p.length { + return false + } + if string(p.in[p.index:p.index+len(s)]) != s { + return false + } + p.index += len(s) + p.column += len(s) + return true +} + +// loopDetector detects if the parser is in an infinite loop (ie failing to +// make progress). +type loopDetector struct { + lastIndex int + count int + parser *parser +} + +func (p *parser) getLoopDetector() *loopDetector { + return &loopDetector{lastIndex: p.index, parser: p} +} + +func (l *loopDetector) iter() error { + if l.parser.index == l.lastIndex { + l.count++ + if l.count < 2 { + return nil + } + return fmt.Errorf("parser failed to make progress at %s", l.parser.errorContext()) + } + l.lastIndex = l.parser.index + l.count = 0 + return nil +} + +func (p parser) errorContext() string { + index := p.index + if index >= p.length { + index = p.length - 1 + } + // Provide the surrounding input as context. + lastContentIndex := index + 20 + if lastContentIndex >= p.length { + lastContentIndex = p.length - 1 + } + previousContentIndex := index - 20 + if previousContentIndex < 0 { + previousContentIndex = 0 + } + before := string(p.in[previousContentIndex:index]) + after := string(p.in[index:lastContentIndex]) + return fmt.Sprintf("index %v\nposition %+v\nbefore: %q\nafter: %q\nbefore+after: %q", index, p.position(), before, after, before+after) +} + +func (p *parser) position() ast.Position { + return ast.Position{ + Byte: uint32(p.index), + Line: int32(p.line), + Column: int32(p.column), + } +} + +// Modifies the parser by rewinding to the given position. +// A position can be snapshotted by using the `position()` function above. +func (p *parser) rollbackPosition(pos ast.Position) { + p.index = int(pos.Byte) + p.line = int(pos.Line) + p.column = int(pos.Column) +} + +func (p *parser) consumeOptionalSeparator() error { + if p.index > 0 && !p.isBlankSep(p.index-1) { + // If an unnamed field immediately follows non-whitespace, we require a separator character first (key_one:,:value_two instead of key_one::value_two) + if p.consume(':') { + return fmt.Errorf("parser encountered unexpected character ':' (should be whitespace, ',', or ';')") + } + } + + _ = p.consume(';') // Ignore optional ';'. + _ = p.consume(',') // Ignore optional ','. + + return nil +} + +// parse parses a text proto. +// It assumes the text to be either conformant with the standard text proto +// (i.e. passes proto.UnmarshalText() without error) or the alternative textproto +// format (sequence of messages, each of which passes proto.UnmarshalText()). +// endPos is the position of the first character on the first line +// after parsed nodes: that's the position to append more children. +func (p *parser) parse(isRoot bool, desc protoreflect.MessageDescriptor) (result []*ast.Node, endPos ast.Position, err error) { + var res []*ast.Node + res = []*ast.Node{} // empty children is different from nil children + for ld := p.getLoopDetector(); p.index < p.length; { + if err := ld.iter(); err != nil { + return nil, ast.Position{}, err + } + + // p.parse is often invoked with the index pointing at the newline character + // after the previous item. We should still report that this item starts in + // the next line. + p.consume('\r') + p.consume('\n') + startPos := p.position() + + fmtDisabled, err := p.readFormatterDisabledBlock() + if err != nil { + return nil, startPos, err + } + if len(fmtDisabled) > 0 { + res = append(res, &ast.Node{ + Start: startPos, + Raw: fmtDisabled, + }) + continue + } + + // Read PreComments. + comments, blankLines := p.skipWhiteSpaceAndReadComments(true /* multiLine */) + + // Handle blank lines. + if blankLines > 0 { + if p.config.InfoLevel() { + p.config.Infof("blankLines: %v", blankLines) + } + // Here we collapse the leading blank lines into one blank line. + comments = append([]string{""}, comments...) + } + + for p.nextInputIs('%') { + comments = append(comments, p.readTemplate()) + c, _ := p.skipWhiteSpaceAndReadComments(false) + comments = append(comments, c...) + } + + if end, endPos, err := p.handleEndOfMessage(startPos, comments, &res); end { + return res, endPos, err + } + + nd := &ast.Node{ + Start: startPos, + PreComments: comments, + } + if p.config.InfoLevel() { + p.config.Infof("PreComments: %q", strings.Join(nd.PreComments, "\n")) + } + + // Skip white-space other than '\n', which is handled below. + for p.consume(' ') || p.consume('\t') { + } + + // Handle multiple comment blocks. + // + // # comment block 1 + // # comment block 1 + // + // # comment block 2 + // # comment block 2 + // + // Each block that ends on an empty line (instead of a field) gets its own + // 'empty' node. + if p.nextInputIs('\n') { + res = append(res, nd) + continue + } + + // Handle end of file. + if end, err := p.handleEndOfFile(nd, &res); end { + if err != nil { + return nil, ast.Position{}, err + } + break + } + + if err := p.parseFieldName(nd, isRoot); err != nil { + return nil, ast.Position{}, err + } + + // Set field number from descriptor if available + nd.FieldNumber = getFieldNumber(desc, nd.Name) + + // Skip separator. + preCommentsBeforeColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */) + nd.SkipColon = !p.consume(':') + previousPos := p.position() + preCommentsAfterColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */) + + if err := p.parseFieldValue(nd, desc, preCommentsBeforeColon, preCommentsAfterColon, previousPos); err != nil { + return nil, ast.Position{}, err + } + + if p.config.InfoLevel() && p.index < p.length { + p.config.Infof("p.in[p.index]: %q", string(p.in[p.index])) + } + res = append(res, nd) + } + return res, p.position(), nil +} + +func (p *parser) parseFieldValue(nd *ast.Node, desc protoreflect.MessageDescriptor, preCommentsBeforeColon, preCommentsAfterColon []string, previousPos ast.Position) error { + if p.consume('{') || p.consume('<') { + if err := p.parseMessage(nd, desc); err != nil { + return err + } + } else if p.consume('[') { + if err := p.parseList(nd, preCommentsBeforeColon, preCommentsAfterColon); err != nil { + return err + } + if nd.ValuesAsList { + return nil + } + } else { + // Rewind comments. + p.rollbackPosition(previousPos) + // Handle Values. + var err error + nd.Values, err = p.readValues() + if err != nil { + return err + } + if err := p.consumeOptionalSeparator(); err != nil { + return err + } + } + return nil +} + +func (p *parser) handleEndOfFile(nd *ast.Node, res *[]*ast.Node) (bool, error) { + if p.index >= p.length { + nd.End = p.position() + if len(nd.PreComments) > 0 { + *res = append(*res, nd) + } + return true, nil + } + return false, nil +} + +func (p *parser) handleEndOfMessage(startPos ast.Position, comments []string, res *[]*ast.Node) (bool, ast.Position, error) { + if endPos := p.position(); p.consume('}') || p.consume('>') || p.consume(']') { + // Handle comments after last child. + + if len(comments) > 0 { + *res = append(*res, &ast.Node{Start: startPos, PreComments: comments}) + } + + // endPos points at the closing brace, but we should rather return the position + // of the first character after the previous item. Therefore let's rewind a bit: + for endPos.Byte > 0 && p.in[endPos.Byte-1] == ' ' { + endPos.Byte-- + endPos.Column-- + } + + if err := p.consumeOptionalSeparator(); err != nil { + return true, ast.Position{}, err + } + + // Done parsing children. + return true, endPos, nil + } + return false, ast.Position{}, nil +} + +func (p *parser) parseFieldName(nd *ast.Node, isRoot bool) error { + if p.consume('[') { + // Read Name (of proto extension). + nd.Name = fmt.Sprintf("[%s]", p.readExtension()) + _ = p.consume(']') // Ignore the ']'. + } else { + // Read Name. + nd.Name = p.readFieldName() + if nd.Name == "" && !isRoot && !p.config.AllowUnnamedNodesEverywhere { + return fmt.Errorf("Failed to find a FieldName at %s", p.errorContext()) + } + } + if p.config.InfoLevel() { + p.config.Infof("name: %q", nd.Name) + } + return nil +} + +func (p *parser) parseMessage(nd *ast.Node, desc protoreflect.MessageDescriptor) error { + if p.config.SkipAllColons { + nd.SkipColon = true + } + nd.ChildrenSameLine = p.bracketSameLine[p.index-1] + nd.IsAngleBracket = p.config.PreserveAngleBrackets && p.in[p.index-1] == '<' + // Recursive call to parse child nodes. + childDesc := p.findChildDescriptor(desc, nd.Name) + nodes, lastPos, err := p.parse( /*isRoot=*/ false, childDesc) + if err != nil { + return err + } + nd.Children = nodes + nd.End = lastPos + + nd.ClosingBraceComment = p.readInlineComment() + return nil +} + +func (p *parser) parseList(nd *ast.Node, preCommentsBeforeColon, preCommentsAfterColon []string) error { + openBracketLine := p.line + + // Skip separator. + preCommentsAfterListStart := p.readContinuousBlocksOfComments() + + var preComments []string + preComments = append(preComments, preCommentsBeforeColon...) + preComments = append(preComments, preCommentsAfterColon...) + preComments = append(preComments, preCommentsAfterListStart...) + + if p.nextInputIs('{') { + // Handle list of nodes. + return p.parseListOfNodes(nd, preComments, openBracketLine) + } else { + // Handle list of values. + return p.parseListOfValues(nd, preComments, openBracketLine) + } +} + +func (p *parser) parseListOfNodes(nd *ast.Node, preComments []string, openBracketLine int) error { + nd.ChildrenAsList = true + + nodes, lastPos, err := p.parse( /*isRoot=*/ true, nil) + if err != nil { + return err + } + if len(nodes) > 0 { + nodes[0].PreComments = preComments + } + + nd.Children = nodes + nd.End = lastPos + nd.ClosingBraceComment = p.readInlineComment() + nd.ChildrenSameLine = openBracketLine == p.line + return nil +} + +func (p *parser) parseListOfValues(nd *ast.Node, preComments []string, openBracketLine int) error { + nd.ValuesAsList = true // We found values in list - keep it as list. + + for ld := p.getLoopDetector(); !p.consume(']') && p.index < p.length; { + if err := ld.iter(); err != nil { + return err + } + + // Read each value in the list. + vals, err := p.readValues() + if err != nil { + return err + } + if len(vals) != 1 { + return fmt.Errorf("multiple-string value not supported (%v). Please add comma explicitly, see http://b/162070952", vals) + } + if len(preComments) > 0 { + // If we read preComments before readValues(), they should go first, + // but avoid copy overhead if there are none. + vals[0].PreComments = append(preComments, vals[0].PreComments...) + } + + // Skip separator. + _, _ = p.skipWhiteSpaceAndReadComments(false /* multiLine */) + if p.consume(',') { + vals[0].InlineComment = p.readInlineComment() + } + + nd.Values = append(nd.Values, vals...) + + preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */) + } + nd.ChildrenSameLine = openBracketLine == p.line + + // Handle comments after last line (or for empty list) + nd.PostValuesComments = preComments + nd.ClosingBraceComment = p.readInlineComment() + + if err := p.consumeOptionalSeparator(); err != nil { + return err + } + return nil +} + +func (p *parser) readFieldName() string { + i := p.index + for ; i < p.length && !p.isValueSep(i); i++ { + } + return p.advance(i) +} + +func (p *parser) readExtension() string { + i := p.index + for ; i < p.length && (p.isBlankSep(i) || !p.isValueSep(i)); i++ { + } + return removeBlanks(p.advance(i)) +} + +func removeBlanks(in string) string { + s := []byte(in) + for _, b := range spaceSeparators { + s = bytes.Replace(s, []byte{b}, nil, -1) + } + return string(s) +} + +func (p *parser) readContinuousBlocksOfComments() []string { + var preComments []string + for { + comments, blankLines := p.skipWhiteSpaceAndReadComments(true) + if len(comments) == 0 { + break + } + if blankLines > 0 && len(preComments) > 0 { + comments = append([]string{""}, comments...) + } + preComments = append(preComments, comments...) + } + + return preComments +} + +func (p *parser) consumeWhitespace() (int, error) { + start := p.index + for p.index < p.length && p.isBlankSep(p.index) { + if p.consume('\n') || (p.consume('\r') && p.consume('\n')) { + // Include up to one blank line before the 'off' directive. + start = p.index - 1 + } else if p.consume(' ') || p.consume('\t') { + // Do nothing. Side-effect is to advance p.index. + } else { + return 0, fmt.Errorf("unhandled isBlankSep at %s", p.errorContext()) + } + } + return start, nil +} + +// Returns the exact text within the block flanked by "# txtpbfmt: off" and "# txtpbfmt: on". +// The 'off' directive must be on its own line, and it cannot be preceded by a comment line. Any +// preceding whitespace on this line and up to one blank line will be retained. +// The 'on' directive must followed by a line break. Only full nodes of a AST can be +// within this block. Partially disabled sections, like just the first line of a for loop without +// body or closing brace, are not supported. Value lists are not supported. No parsing happens +// within this block, and as parsing errors will be ignored, please exercise caution. +func (p *parser) readFormatterDisabledBlock() (string, error) { + previousPos := p.position() + start, err := p.consumeWhitespace() + if err != nil { + return "", err + } + if !p.consumeString("# txtpbfmt: off") { + // Directive not found. Rollback to start. + p.rollbackPosition(previousPos) + return "", nil + } + if !p.consume('\n') { + return "", fmt.Errorf("txtpbfmt off should be followed by newline at %s", p.errorContext()) + } + for ; p.index < p.length; p.index++ { + if p.consumeString("# txtpbfmt: on") { + if !p.consume('\n') { + return "", fmt.Errorf("txtpbfmt on should be followed by newline at %s", p.errorContext()) + } + // Retain up to one blank line. + p.consume('\n') + return string(p.in[start:p.index]), nil + } + } + // We reached the end of the file without finding the 'on' directive. + p.rollbackPosition(previousPos) + return "", fmt.Errorf("unterminated txtpbfmt off at %s", p.errorContext()) +} + +// skipWhiteSpaceAndReadComments has multiple cases: +// - (1) reading a block of comments followed by a blank line +// - (2) reading a block of comments followed by non-blank content +// - (3) reading the inline comments between the current char and the end of +// the current line +// +// In both cases (1) and (2), there can also be blank lines before the comment +// starts. +// +// Lines of comments and number of blank lines before the comment will be +// returned. If there is no comment, the returned slice will be empty. +func (p *parser) skipWhiteSpaceAndReadComments(multiLine bool) ([]string, int) { + i := p.index + var foundComment, insideComment bool + commentBegin := 0 + var comments []string + // Number of blanks lines *before* the comment (if any) starts. + blankLines := 0 + for ; i < p.length; i++ { + if p.in[i] == '#' && !insideComment { + insideComment = true + foundComment = true + commentBegin = i + } else if p.in[i] == '\n' { + if insideComment { + comments = append(comments, string(p.in[commentBegin:i])) // Exclude the '\n'. + insideComment = false + } else if foundComment { + i-- // Put back the last '\n' so the caller can detect that we're on case (1). + break + } else { + blankLines++ + } + if !multiLine { + break + } + } + if !insideComment && !p.isBlankSep(i) { + break + } + } + sep := p.advance(i) + if p.config.InfoLevel() { + p.config.Infof("sep: %q\np.index: %v", string(sep), p.index) + if p.index < p.length { + p.config.Infof("p.in[p.index]: %q", string(p.in[p.index])) + } + } + return comments, blankLines +} + +func (p *parser) isBlankSep(i int) bool { + return bytes.Contains(spaceSeparators, p.in[i:i+1]) +} + +func (p *parser) isValueSep(i int) bool { + return bytes.Contains(valueSeparators, p.in[i:i+1]) +} + +func (p *parser) advance(i int) string { + if i > p.length { + i = p.length + } + res := p.in[p.index:i] + p.index = i + strRes := string(res) + newlines := strings.Count(strRes, "\n") + if newlines == 0 { + p.column += len(strRes) + } else { + p.column = len(strRes) - strings.LastIndex(strRes, "\n") + p.line += newlines + } + return string(res) +} + +func (p *parser) readValues() ([]*ast.Value, error) { + var values []*ast.Value + var previousPos ast.Position + preComments, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */) + if p.nextInputIs('%') { + values = append(values, p.populateValue(p.readTemplate(), nil)) + previousPos = p.position() + } + if v, err := p.readTripleQuotedStringValue(); err != nil { + return nil, err + } else { + if v != nil { + values = append(values, v) + previousPos = p.position() + } + } + for p.consume('"') || p.consume('\'') { + // Handle string value. + v, err := p.readSingleQuotedStringValue(preComments) + if err != nil { + return nil, err + } + values = append(values, v) + previousPos = p.position() + preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */) + } + if previousPos != (ast.Position{}) { + // Rewind comments. + p.rollbackPosition(previousPos) + } else { + i := p.index + // Handle other values. + values = append(values, p.readOtherValue(i, preComments)) + } + if p.config.InfoLevel() { + p.config.Infof("values: %v", values) + } + return values, nil +} + +func (p *parser) readTripleQuotedStringValue() (*ast.Value, error) { + if !p.config.AllowTripleQuotedStrings { + return nil, nil + } + return p.readTripleQuotedString() +} + +func (p *parser) readSingleQuotedStringValue(preComments []string) (*ast.Value, error) { + stringBegin := p.index - 1 // Index of the quote. + i := p.index + for ; i < p.length; i++ { + if p.in[i] == '\\' { + i++ // Skip escaped char. + continue + } + if p.in[i] == '\n' { + p.index = i + return nil, fmt.Errorf("found literal (unescaped) new line in string at %s", p.errorContext()) + } + if p.in[i] == p.in[stringBegin] { + var vl string + if p.config.SmartQuotes { + vl = quote.Smart(p.advance(i)) + } else { + vl = quote.Fix(p.advance(i)) + } + _ = p.advance(i + 1) // Skip the quote. + return p.populateValue(vl, preComments), nil + } + } + if i == p.length { + p.index = i + return nil, fmt.Errorf("unfinished string at %s", p.errorContext()) + } + return nil, nil +} + +func (p *parser) readOtherValue(i int, preComments []string) *ast.Value { + for ; i < p.length; i++ { + if p.isValueSep(i) { + break + } + } + vl := p.advance(i) + return p.populateValue(vl, preComments) +} + +func (p *parser) readTripleQuotedString() (*ast.Value, error) { + start := p.index + stringBegin := p.index + delimiter := `"""` + if !p.consumeString(delimiter) { + delimiter = `'''` + if !p.consumeString(delimiter) { + return nil, nil + } + } + + for { + if p.consumeString(delimiter) { + break + } + if p.index == p.length { + p.index = start + return nil, fmt.Errorf("unfinished string at %s", p.errorContext()) + } + p.index++ + } + + v := p.populateValue(string(p.in[stringBegin:p.index]), nil) + + return v, nil +} + +func (p *parser) populateValue(vl string, preComments []string) *ast.Value { + if p.config.InfoLevel() { + p.config.Infof("value: %q", vl) + } + return &ast.Value{ + Value: vl, + InlineComment: p.readInlineComment(), + PreComments: preComments, + } +} + +func (p *parser) readInlineComment() string { + inlineComment, _ := p.skipWhiteSpaceAndReadComments(false /* multiLine */) + if p.config.InfoLevel() { + p.config.Infof("inlineComment: %q", strings.Join(inlineComment, "\n")) + } + if len(inlineComment) > 0 { + return inlineComment[0] + } + return "" +} + +func (p *parser) readStringInTemplate(i int) int { + stringBegin := i - 1 // Index of quote. + for ; i < p.length; i++ { + if p.in[i] == '\\' { + i++ // Skip escaped char. + continue + } + if p.in[i] == p.in[stringBegin] { + i++ // Skip end quote. + break + } + } + return i +} + +func (p *parser) readTemplate() string { + if !p.nextInputIs('%') { + return "" + } + i := p.index + 1 + for ; i < p.length; i++ { + if p.in[i] == '"' || p.in[i] == '\'' { + i++ + i = p.readStringInTemplate(i) + } + if i < p.length && p.in[i] == '%' { + i++ + break + } + } + return p.advance(i) +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/logger/logger.go b/vendor/github.com/protocolbuffers/txtpbfmt/logger/logger.go new file mode 100644 index 0000000000..c145629622 --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/logger/logger.go @@ -0,0 +1,8 @@ +// Package logger provides a logger interface for the parser. +package logger + +// Logger is a small glog-like interface. +type Logger interface { + // Infof is used for informative messages, for testing or debugging. + Infof(format string, args ...any) +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/parser/parser.go b/vendor/github.com/protocolbuffers/txtpbfmt/parser/parser.go index ffab187e17..c17b90e667 100644 --- a/vendor/github.com/protocolbuffers/txtpbfmt/parser/parser.go +++ b/vendor/github.com/protocolbuffers/txtpbfmt/parser/parser.go @@ -6,1807 +6,60 @@ package parser import ( - "bufio" - "bytes" - "fmt" - "math" - "regexp" - "strconv" - "strings" - - "github.com/mitchellh/go-wordwrap" "github.com/protocolbuffers/txtpbfmt/ast" - "github.com/protocolbuffers/txtpbfmt/unquote" + "github.com/protocolbuffers/txtpbfmt/config" + "github.com/protocolbuffers/txtpbfmt/impl" + "github.com/protocolbuffers/txtpbfmt/printer" + "github.com/protocolbuffers/txtpbfmt/sort" ) // Config can be used to pass additional config parameters to the formatter at // the time of the API call. -type Config struct { - // Do not apply any reformatting to this file. - Disable bool - - // Expand all children irrespective of the initial state. - ExpandAllChildren bool - - // Skip colons whenever possible. - SkipAllColons bool - - // Allow unnamed nodes everywhere. - // Default is to allow only top-level nodes to be unnamed. - AllowUnnamedNodesEverywhere bool - - // Sort fields by field name. - SortFieldsByFieldName bool - - // Sort adjacent scalar fields of the same field name by their contents. - SortRepeatedFieldsByContent bool - - // Sort adjacent message fields of the given field name by the contents of the given subfield. - // Format: either "field_name.subfield_name" or just "subfield_name" (applies to all field names). - SortRepeatedFieldsBySubfield []string - - // Map from Node.Name to the order of all fields within that node. See AddFieldSortOrder(). - fieldSortOrder map[string][]string - - // RequireFieldSortOrderToMatchAllFieldsInNode will cause parsing to fail if a node was added via - // AddFieldSortOrder() but 1+ fields under that node in the textproto aren't specified in the - // field order. This won't fail for nodes that don't have a field order specified at all. Use this - // to strictly enforce that your field order config always orders ALL the fields, and you're - // willing for new fields in the textproto to break parsing in order to enforce it. - RequireFieldSortOrderToMatchAllFieldsInNode bool - - // Remove lines that have the same field name and scalar value as another. - RemoveDuplicateValuesForRepeatedFields bool - - // Permit usage of Python-style """ or ''' delimited strings. - AllowTripleQuotedStrings bool - - // Max columns for string field values. If zero, no string wrapping will occur. - // Strings that may contain HTML tags will never be wrapped. - WrapStringsAtColumn int - - // Whether strings that appear to contain HTML tags should be wrapped - // (requires WrapStringsAtColumn to be set). - WrapHTMLStrings bool - - // Wrap string field values after each newline. - // Should not be used with other Wrap* options. - WrapStringsAfterNewlines bool - - // Wrap strictly at the column instead of a word boundary. - WrapStringsWithoutWordwrap bool - - // Whether angle brackets used instead of curly braces should be preserved - // when outputting a formatted textproto. - PreserveAngleBrackets bool - - // Use single quotes around strings that contain double but not single quotes. - SmartQuotes bool - - // Logger enables logging when it is non-nil. - // If the log messages aren't going to be useful, it's best to leave Logger - // set to nil, as otherwise log messages will be constructed. - Logger Logger -} - -func (c *Config) infof(format string, args ...any) { - if c.Logger != nil { - c.Logger.Infof(format, args...) - } -} -func (c *Config) infoLevel() bool { - return c.Logger != nil -} - -// Logger is a small glog-like interface. -type Logger interface { - // Infof is used for informative messages, for testing or debugging. - Infof(format string, args ...any) -} +type Config = config.Config // RootName contains a constant that can be used to identify the root of all Nodes. -const RootName = "__ROOT__" - -// AddFieldSortOrder adds a config rule for the given Node.Name, so that all contained field names -// are output in the provided order. To specify an order for top-level Nodes, use RootName as the -// nodeName. -func (c *Config) AddFieldSortOrder(nodeName string, fieldOrder ...string) { - if c.fieldSortOrder == nil { - c.fieldSortOrder = make(map[string][]string) - } - c.fieldSortOrder[nodeName] = fieldOrder -} +const RootName = config.RootName // UnsortedFieldsError will be returned by ParseWithConfig if // Config.RequireFieldSortOrderToMatchAllFieldsInNode is set, and an unrecognized field is found // while parsing. -type UnsortedFieldsError struct { - UnsortedFields []UnsortedField -} - -// UnsortedField records details about a single unsorted field. -type UnsortedField struct { - FieldName string - Line int32 - ParentFieldName string -} - -func (e *UnsortedFieldsError) Error() string { - var errs []string - for _, us := range e.UnsortedFields { - errs = append(errs, fmt.Sprintf(" line: %d, parent field: %q, unsorted field: %q", us.Line, us.ParentFieldName, us.FieldName)) - } - return fmt.Sprintf("fields parsed that were not specified in the parser.AddFieldSortOrder() call:\n%s", strings.Join(errs, "\n")) -} - -type parser struct { - in []byte - index int - length int - // Maps the index of '{' characters on 'in' that have the matching '}' on - // the same line to 'true'. - bracketSameLine map[int]bool - config Config - line, column int // current position, 1-based. -} - -var defConfig = Config{} -var tagRegex = regexp.MustCompile(`<.*>`) - -const indentSpaces = " " +type UnsortedFieldsError = sort.UnsortedFieldsError // Format formats a text proto file preserving comments. func Format(in []byte) ([]byte, error) { - return FormatWithConfig(in, defConfig) + return printer.Format(in) } // FormatWithConfig functions similar to format, but allows the user to pass in // additional configuration options. -func FormatWithConfig(in []byte, c Config) ([]byte, error) { - if err := addMetaCommentsToConfig(in, &c); err != nil { - return nil, err - } - if c.Disable { - c.infof("Ignored file with 'disable' comment.") - return in, nil - } - nodes, err := parseWithMetaCommentConfig(in, c) - if err != nil { - return nil, err - } - return PrettyBytes(nodes, 0), nil -} - -// Return the byte-positions of each bracket which has the corresponding close on the -// same line as a set. -func sameLineBrackets(in []byte, allowTripleQuotedStrings bool) (map[int]bool, error) { - line := 1 - type bracket struct { - index int - line int - } - open := []bracket{} // Stack. - res := map[int]bool{} - insideComment := false - insideString := false - insideTemplate := false - insideTripleQuotedString := false - var stringDelimiter string - isEscapedChar := false - for i, c := range in { - switch c { - case '\n': - line++ - insideComment = false - case '{', '<': - if insideComment || insideString || insideTemplate { - continue - } - open = append(open, bracket{index: i, line: line}) - case '}', '>': - if insideComment || insideString || insideTemplate { - continue - } - if len(open) == 0 { - return nil, fmt.Errorf("too many '}' or '>' at line %d, index %d", line, i) - } - last := len(open) - 1 - br := open[last] - open = open[:last] - if br.line == line { - res[br.index] = true - } - case '#': - if insideString { - continue - } - insideComment = true - case '%': - if insideComment || insideString { - continue - } - if insideTemplate { - insideTemplate = false - } else { - insideTemplate = true - } - case '"', '\'': - if insideComment { - continue - } - delim := string(c) - tripleQuoted := false - if allowTripleQuotedStrings && i+3 <= len(in) { - triple := string(in[i : i+3]) - if triple == `"""` || triple == `'''` { - delim = triple - tripleQuoted = true - } - } - - if insideString { - if stringDelimiter == delim && (insideTripleQuotedString || !isEscapedChar) { - insideString = false - insideTripleQuotedString = false - } - } else { - insideString = true - if tripleQuoted { - insideTripleQuotedString = true - } - stringDelimiter = delim - } - } - - if isEscapedChar { - isEscapedChar = false - } else if c == '\\' && insideString && !insideTripleQuotedString { - isEscapedChar = true - } - } - if insideString { - return nil, fmt.Errorf("unterminated string literal") - } - return res, nil +func FormatWithConfig(in []byte, c config.Config) ([]byte, error) { + return printer.FormatWithConfig(in, c) } -func removeDeleted(nodes []*ast.Node) []*ast.Node { - res := []*ast.Node{} - // When removing a node which has an empty line before it, we should keep - // the empty line before the next non-removed node to maintain the visual separation. - // Consider the following: - // foo: { name: "foo1" } - // foo: { name: "foo2" } - // - // bar: { name: "bar1" } - // bar: { name: "bar2" } - // - // If we decide to remove both foo2 and bar1, the result should still have one empty - // line between foo1 and bar2. - addEmptyLine := false - for _, node := range nodes { - if node.Deleted { - if len(node.PreComments) > 0 && node.PreComments[0] == "" { - addEmptyLine = true - } - continue - } - if len(node.Children) > 0 { - node.Children = removeDeleted(node.Children) - } - if addEmptyLine && (len(node.PreComments) == 0 || node.PreComments[0] != "") { - node.PreComments = append([]string{""}, node.PreComments...) - } - addEmptyLine = false - res = append(res, node) - } - return res -} - -var ( - spaceSeparators = []byte(" \t\n\r") - valueSeparators = []byte(" \t\n\r{}:,[]<>;#") -) - // Parse returns a tree representation of a textproto file. func Parse(in []byte) ([]*ast.Node, error) { - return ParseWithConfig(in, defConfig) + return impl.Parse(in) } // ParseWithConfig functions similar to Parse, but allows the user to pass in // additional configuration options. -func ParseWithConfig(in []byte, c Config) ([]*ast.Node, error) { - if err := addMetaCommentsToConfig(in, &c); err != nil { - return nil, err - } - return parseWithMetaCommentConfig(in, c) -} - -// Parses in textproto with MetaComments already added to configuration. -func parseWithMetaCommentConfig(in []byte, c Config) ([]*ast.Node, error) { - p, err := newParser(in, c) - if err != nil { - return nil, err - } - if p.config.infoLevel() { - p.config.infof("p.in: %q", string(p.in)) - p.config.infof("p.length: %v", p.length) - } - // Although unnamed nodes aren't strictly allowed, some formats represent a - // list of protos as a list of unnamed top-level nodes. - nodes, _, err := p.parse( /*isRoot=*/ true) - if err != nil { - return nil, err - } - if p.index < p.length { - return nil, fmt.Errorf("parser didn't consume all input. Stopped at %s", p.errorContext()) - } - if err := wrapStrings(nodes, 0, c); err != nil { - return nil, err - } - if err := sortAndFilterNodes( /*parent=*/ nil, nodes, nodeSortFunction(c), nodeFilterFunction(c), valuesSortFunction(c)); err != nil { - return nil, err - } - return nodes, nil -} - -// There are two types of MetaComment, one in the format of = and the other one doesn't -// have the equal sign. Currently there are only two MetaComments that are in the former format: -// -// "sort_repeated_fields_by_subfield": If this appears multiple times, then they will all be added -// to the config and the order is perserved. -// "wrap_strings_at_column": The is expected to be an integer. If it is not, then it will be -// ignored. If this appears multiple times, only the last one saved. -func addToConfig(metaComment string, c *Config) error { - // Test if a MetaComment is in the format of =. - key, val, hasEqualSign := strings.Cut(metaComment, "=") - switch key { - case "allow_triple_quoted_strings": - c.AllowTripleQuotedStrings = true - case "allow_unnamed_nodes_everywhere": - c.AllowUnnamedNodesEverywhere = true - case "disable": - c.Disable = true - case "expand_all_children": - c.ExpandAllChildren = true - case "preserve_angle_brackets": - c.PreserveAngleBrackets = true - case "remove_duplicate_values_for_repeated_fields": - c.RemoveDuplicateValuesForRepeatedFields = true - case "skip_all_colons": - c.SkipAllColons = true - case "smartquotes": - c.SmartQuotes = true - case "sort_fields_by_field_name": - c.SortFieldsByFieldName = true - case "sort_repeated_fields_by_content": - c.SortRepeatedFieldsByContent = true - case "sort_repeated_fields_by_subfield": - // Take all the subfields and the subfields in order as tie breakers. - if !hasEqualSign { - return fmt.Errorf("format should be %s=, got: %s", key, metaComment) - } - c.SortRepeatedFieldsBySubfield = append(c.SortRepeatedFieldsBySubfield, val) - case "wrap_strings_at_column": - // If multiple of this MetaComment exists in the file, take the last one. - if !hasEqualSign { - return fmt.Errorf("format should be %s=, got: %s", key, metaComment) - } - i, err := strconv.Atoi(strings.TrimSpace(val)) - if err != nil { - return fmt.Errorf("error parsing %s value %q (skipping): %v", key, val, err) - } - c.WrapStringsAtColumn = i - case "wrap_html_strings": - c.WrapHTMLStrings = true - case "wrap_strings_after_newlines": - c.WrapStringsAfterNewlines = true - case "wrap_strings_without_wordwrap": - c.WrapStringsWithoutWordwrap = true - case "on": // This doesn't change the overall config. - case "off": // This doesn't change the overall config. - default: - return fmt.Errorf("unrecognized MetaComment: %s", metaComment) - } - return nil -} - -// Parses MetaComments and adds them to the configuration. -func addMetaCommentsToConfig(in []byte, c *Config) error { - scanner := bufio.NewScanner(bytes.NewReader(in)) - for scanner.Scan() { - line := scanner.Text() - if len(line) == 0 { - continue - } - if line[0] != byte('#') { - break // only process the leading comment block - } - - // Look for comment lines in the format of ":", and process the lines with - // equals to "txtpbfmt". It's assumed that the MetaComments are given in the format of: - // # txtpbfmt: [, ...] - key, value, hasColon := strings.Cut(line[1:], ":") // Ignore the first '#'. - if hasColon && strings.TrimSpace(key) == "txtpbfmt" { - for _, s := range strings.Split(strings.TrimSpace(value), ",") { - metaComment := strings.TrimSpace(s) - if err := addToConfig(metaComment, c); err != nil { - return err - } - } - } - } - return nil -} - -func newParser(in []byte, c Config) (*parser, error) { - var bracketSameLine map[int]bool - if c.ExpandAllChildren { - bracketSameLine = map[int]bool{} - } else { - var err error - if bracketSameLine, err = sameLineBrackets(in, c.AllowTripleQuotedStrings); err != nil { - return nil, err - } - } - if len(in) > 0 && in[len(in)-1] != '\n' { - in = append(in, '\n') - } - parser := &parser{ - in: in, - index: 0, - length: len(in), - bracketSameLine: bracketSameLine, - config: c, - line: 1, - column: 1, - } - return parser, nil -} - -func (p *parser) nextInputIs(b byte) bool { - return p.index < p.length && p.in[p.index] == b -} - -func (p *parser) consume(b byte) bool { - if !p.nextInputIs(b) { - return false - } - p.index++ - p.column++ - if b == '\n' { - p.line++ - p.column = 1 - } - return true -} - -// consumeString consumes the given string s, which should not have any newlines. -func (p *parser) consumeString(s string) bool { - if p.index+len(s) > p.length { - return false - } - if string(p.in[p.index:p.index+len(s)]) != s { - return false - } - p.index += len(s) - p.column += len(s) - return true -} - -// loopDetector detects if the parser is in an infinite loop (ie failing to -// make progress). -type loopDetector struct { - lastIndex int - count int - parser *parser -} - -func (p *parser) getLoopDetector() *loopDetector { - return &loopDetector{lastIndex: p.index, parser: p} -} - -func (l *loopDetector) iter() error { - if l.parser.index == l.lastIndex { - l.count++ - if l.count < 2 { - return nil - } - return fmt.Errorf("parser failed to make progress at %s", l.parser.errorContext()) - } - l.lastIndex = l.parser.index - l.count = 0 - return nil -} - -func (p parser) errorContext() string { - index := p.index - if index >= p.length { - index = p.length - 1 - } - // Provide the surrounding input as context. - lastContentIndex := index + 20 - if lastContentIndex >= p.length { - lastContentIndex = p.length - 1 - } - previousContentIndex := index - 20 - if previousContentIndex < 0 { - previousContentIndex = 0 - } - before := string(p.in[previousContentIndex:index]) - after := string(p.in[index:lastContentIndex]) - return fmt.Sprintf("index %v\nposition %+v\nbefore: %q\nafter: %q\nbefore+after: %q", index, p.position(), before, after, before+after) -} - -func (p *parser) position() ast.Position { - return ast.Position{ - Byte: uint32(p.index), - Line: int32(p.line), - Column: int32(p.column), - } -} - -// Modifies the parser by rewinding to the given position. -// A position can be snapshotted by using the `position()` function above. -func (p *parser) rollbackPosition(pos ast.Position) { - p.index = int(pos.Byte) - p.line = int(pos.Line) - p.column = int(pos.Column) -} - -func (p *parser) consumeOptionalSeparator() error { - if p.index > 0 && !p.isBlankSep(p.index-1) { - // If an unnamed field immediately follows non-whitespace, we require a separator character first (key_one:,:value_two instead of key_one::value_two) - if p.consume(':') { - return fmt.Errorf("parser encountered unexpected character ':' (should be whitespace, ',', or ';')") - } - } - - _ = p.consume(';') // Ignore optional ';'. - _ = p.consume(',') // Ignore optional ','. - - return nil -} - -// parse parses a text proto. -// It assumes the text to be either conformant with the standard text proto -// (i.e. passes proto.UnmarshalText() without error) or the alternative textproto -// format (sequence of messages, each of which passes proto.UnmarshalText()). -// endPos is the position of the first character on the first line -// after parsed nodes: that's the position to append more children. -func (p *parser) parse(isRoot bool) (result []*ast.Node, endPos ast.Position, err error) { - res := []*ast.Node{} - for ld := p.getLoopDetector(); p.index < p.length; { - if err := ld.iter(); err != nil { - return nil, ast.Position{}, err - } - - // p.parse is often invoked with the index pointing at the newline character - // after the previous item. We should still report that this item starts in - // the next line. - p.consume('\n') - startPos := p.position() - - fmtDisabled, err := p.readFormatterDisabledBlock() - if err != nil { - return nil, startPos, err - } - if len(fmtDisabled) > 0 { - res = append(res, &ast.Node{ - Start: startPos, - Raw: fmtDisabled, - }) - continue - } - - // Read PreComments. - comments, blankLines := p.skipWhiteSpaceAndReadComments(true /* multiLine */) - - // Handle blank lines. - if blankLines > 0 { - if p.config.infoLevel() { - p.config.infof("blankLines: %v", blankLines) - } - // Here we collapse the leading blank lines into one blank line. - comments = append([]string{""}, comments...) - } - - for p.nextInputIs('%') { - comments = append(comments, p.readTemplate()) - c, _ := p.skipWhiteSpaceAndReadComments(false) - comments = append(comments, c...) - } - - if endPos := p.position(); p.consume('}') || p.consume('>') || p.consume(']') { - // Handle comments after last child. - - if len(comments) > 0 { - res = append(res, &ast.Node{Start: startPos, PreComments: comments}) - } - - // endPos points at the closing brace, but we should rather return the position - // of the first character after the previous item. Therefore let's rewind a bit: - for endPos.Byte > 0 && p.in[endPos.Byte-1] == ' ' { - endPos.Byte-- - endPos.Column-- - } - - if err = p.consumeOptionalSeparator(); err != nil { - return nil, ast.Position{}, err - } - - // Done parsing children. - return res, endPos, nil - } - - nd := &ast.Node{ - Start: startPos, - PreComments: comments, - } - if p.config.infoLevel() { - p.config.infof("PreComments: %q", strings.Join(nd.PreComments, "\n")) - } - - // Skip white-space other than '\n', which is handled below. - for p.consume(' ') || p.consume('\t') { - } - - // Handle multiple comment blocks. - // - // # comment block 1 - // # comment block 1 - // - // # comment block 2 - // # comment block 2 - // - // Each block that ends on an empty line (instead of a field) gets its own - // 'empty' node. - if p.nextInputIs('\n') { - res = append(res, nd) - continue - } - - // Handle end of file. - if p.index >= p.length { - nd.End = p.position() - if len(nd.PreComments) > 0 { - res = append(res, nd) - } - break - } - - if p.consume('[') { - // Read Name (of proto extension). - nd.Name = fmt.Sprintf("[%s]", p.readExtension()) - _ = p.consume(']') // Ignore the ']'. - } else { - // Read Name. - nd.Name = p.readFieldName() - if nd.Name == "" && !isRoot && !p.config.AllowUnnamedNodesEverywhere { - return nil, ast.Position{}, fmt.Errorf("Failed to find a FieldName at %s", p.errorContext()) - } - } - if p.config.infoLevel() { - p.config.infof("name: %q", nd.Name) - } - // Skip separator. - preCommentsBeforeColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */) - nd.SkipColon = !p.consume(':') - previousPos := p.position() - preCommentsAfterColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */) - - if p.consume('{') || p.consume('<') { - if p.config.SkipAllColons { - nd.SkipColon = true - } - nd.ChildrenSameLine = p.bracketSameLine[p.index-1] - nd.IsAngleBracket = p.config.PreserveAngleBrackets && p.in[p.index-1] == '<' - // Recursive call to parse child nodes. - nodes, lastPos, err := p.parse( /*isRoot=*/ false) - if err != nil { - return nil, ast.Position{}, err - } - nd.Children = nodes - nd.End = lastPos - - nd.ClosingBraceComment = p.readInlineComment() - } else if p.consume('[') { - openBracketLine := p.line - - // Skip separator. - preCommentsAfterListStart := p.readContinuousBlocksOfComments() - - var preComments []string - preComments = append(preComments, preCommentsBeforeColon...) - preComments = append(preComments, preCommentsAfterColon...) - preComments = append(preComments, preCommentsAfterListStart...) - - if p.nextInputIs('{') { - // Handle list of nodes. - nd.ChildrenAsList = true - - nodes, lastPos, err := p.parse( /*isRoot=*/ true) - if err != nil { - return nil, ast.Position{}, err - } - if len(nodes) > 0 { - nodes[0].PreComments = preComments - } - - nd.Children = nodes - nd.End = lastPos - nd.ClosingBraceComment = p.readInlineComment() - nd.ChildrenSameLine = openBracketLine == p.line - } else { - // Handle list of values. - nd.ValuesAsList = true // We found values in list - keep it as list. - - for ld := p.getLoopDetector(); !p.consume(']') && p.index < p.length; { - if err := ld.iter(); err != nil { - return nil, ast.Position{}, err - } - - // Read each value in the list. - vals, err := p.readValues() - if err != nil { - return nil, ast.Position{}, err - } - if len(vals) != 1 { - return nil, ast.Position{}, fmt.Errorf("multiple-string value not supported (%v). Please add comma explicitly, see http://b/162070952", vals) - } - if len(preComments) > 0 { - // If we read preComments before readValues(), they should go first, - // but avoid copy overhead if there are none. - vals[0].PreComments = append(preComments, vals[0].PreComments...) - } - - // Skip separator. - _, _ = p.skipWhiteSpaceAndReadComments(false /* multiLine */) - if p.consume(',') { - vals[0].InlineComment = p.readInlineComment() - } - - nd.Values = append(nd.Values, vals...) - - preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */) - } - nd.ChildrenSameLine = openBracketLine == p.line - - res = append(res, nd) - - // Handle comments after last line (or for empty list) - nd.PostValuesComments = preComments - nd.ClosingBraceComment = p.readInlineComment() - - if err = p.consumeOptionalSeparator(); err != nil { - return nil, ast.Position{}, err - } - - continue - } - } else { - // Rewind comments. - p.rollbackPosition(previousPos) - // Handle Values. - nd.Values, err = p.readValues() - if err != nil { - return nil, ast.Position{}, err - } - if err = p.consumeOptionalSeparator(); err != nil { - return nil, ast.Position{}, err - } - } - if p.config.infoLevel() && p.index < p.length { - p.config.infof("p.in[p.index]: %q", string(p.in[p.index])) - } - res = append(res, nd) - } - return res, p.position(), nil -} - -func (p *parser) readFieldName() string { - i := p.index - for ; i < p.length && !p.isValueSep(i); i++ { - } - return p.advance(i) -} - -func (p *parser) readExtension() string { - i := p.index - for ; i < p.length && (p.isBlankSep(i) || !p.isValueSep(i)); i++ { - } - return removeBlanks(p.advance(i)) -} - -func removeBlanks(in string) string { - s := []byte(in) - for _, b := range spaceSeparators { - s = bytes.Replace(s, []byte{b}, nil, -1) - } - return string(s) -} - -func (p *parser) readContinuousBlocksOfComments() []string { - var preComments []string - for { - comments, blankLines := p.skipWhiteSpaceAndReadComments(true) - if len(comments) == 0 { - break - } - if blankLines > 0 && len(preComments) > 0 { - comments = append([]string{""}, comments...) - } - preComments = append(preComments, comments...) - } - - return preComments -} - -// Returns the exact text within the block flanked by "# txtpbfmt: off" and "# txtpbfmt: on". -// The 'off' directive must be on its own line, and it cannot be preceded by a comment line. Any -// preceding whitespace on this line and up to one blank line will be retained. -// The 'on' directive must followed by a line break. Only full nodes of a AST can be -// within this block. Partially disabled sections, like just the first line of a for loop without -// body or closing brace, are not supported. Value lists are not supported. No parsing happens -// within this block, and as parsing errors will be ignored, please exercise caution. -func (p *parser) readFormatterDisabledBlock() (string, error) { - previousPos := p.position() - start := p.index - for p.index < p.length && p.isBlankSep(p.index) { - if p.consume('\n') { - // Include up to one blank line before the 'off' directive. - start = p.index - 1 - } else if p.consume(' ') { - // Do nothing. Side-effect is to advance p.index. - } else if p.consume('\t') { - // Do nothing. Side-effect is to advance p.index. - } - } - offStart := p.position() - if !p.consumeString("# txtpbfmt: off") { - // Directive not found. Rollback to start. - p.rollbackPosition(previousPos) - return "", nil - } - if !p.consume('\n') { - return "", fmt.Errorf("txtpbfmt off should be followed by newline at %s", p.errorContext()) - } - for ; p.index < p.length; p.index++ { - if p.consumeString("# txtpbfmt: on") { - if !p.consume('\n') { - return "", fmt.Errorf("txtpbfmt on should be followed by newline at %s", p.errorContext()) - } - // Retain up to one blank line. - p.consume('\n') - return string(p.in[start:p.index]), nil - } - } - // We reached the end of the file without finding the 'on' directive. - p.rollbackPosition(offStart) - return "", fmt.Errorf("unterminated txtpbfmt off at %s", p.errorContext()) -} - -// skipWhiteSpaceAndReadComments has multiple cases: -// - (1) reading a block of comments followed by a blank line -// - (2) reading a block of comments followed by non-blank content -// - (3) reading the inline comments between the current char and the end of -// the current line -// -// In both cases (1) and (2), there can also be blank lines before the comment -// starts. -// -// Lines of comments and number of blank lines before the comment will be -// returned. If there is no comment, the returned slice will be empty. -func (p *parser) skipWhiteSpaceAndReadComments(multiLine bool) ([]string, int) { - i := p.index - var foundComment, insideComment bool - commentBegin := 0 - var comments []string - // Number of blanks lines *before* the comment (if any) starts. - blankLines := 0 - for ; i < p.length; i++ { - if p.in[i] == '#' && !insideComment { - insideComment = true - foundComment = true - commentBegin = i - } else if p.in[i] == '\n' { - if insideComment { - comments = append(comments, string(p.in[commentBegin:i])) // Exclude the '\n'. - insideComment = false - } else if foundComment { - i-- // Put back the last '\n' so the caller can detect that we're on case (1). - break - } else { - blankLines++ - } - if !multiLine { - break - } - } - if !insideComment && !p.isBlankSep(i) { - break - } - } - sep := p.advance(i) - if p.config.infoLevel() { - p.config.infof("sep: %q\np.index: %v", string(sep), p.index) - if p.index < p.length { - p.config.infof("p.in[p.index]: %q", string(p.in[p.index])) - } - } - return comments, blankLines -} - -func (p *parser) isBlankSep(i int) bool { - return bytes.Contains(spaceSeparators, p.in[i:i+1]) -} - -func (p *parser) isValueSep(i int) bool { - return bytes.Contains(valueSeparators, p.in[i:i+1]) -} - -func (p *parser) advance(i int) string { - if i > p.length { - i = p.length - } - res := p.in[p.index:i] - p.index = i - strRes := string(res) - newlines := strings.Count(strRes, "\n") - if newlines == 0 { - p.column += len(strRes) - } else { - p.column = len(strRes) - strings.LastIndex(strRes, "\n") - p.line += newlines - } - return string(res) -} - -func (p *parser) readValues() ([]*ast.Value, error) { - var values []*ast.Value - var previousPos ast.Position - preComments, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */) - if p.nextInputIs('%') { - values = append(values, p.populateValue(p.readTemplate(), nil)) - previousPos = p.position() - } - if p.config.AllowTripleQuotedStrings { - v, err := p.readTripleQuotedString() - if err != nil { - return nil, err - } - if v != nil { - values = append(values, v) - previousPos = p.position() - } - } - for p.consume('"') || p.consume('\'') { - // Handle string value. - stringBegin := p.index - 1 // Index of the quote. - i := p.index - for ; i < p.length; i++ { - if p.in[i] == '\\' { - i++ // Skip escaped char. - continue - } - if p.in[i] == '\n' { - p.index = i - return nil, fmt.Errorf("found literal (unescaped) new line in string at %s", p.errorContext()) - } - if p.in[i] == p.in[stringBegin] { - var vl string - if p.config.SmartQuotes { - vl = smartQuotes(p.advance(i)) - } else { - vl = fixQuotes(p.advance(i)) - } - _ = p.advance(i + 1) // Skip the quote. - values = append(values, p.populateValue(vl, preComments)) - - previousPos = p.position() - preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */) - break - } - } - if i == p.length { - p.index = i - return nil, fmt.Errorf("unfinished string at %s", p.errorContext()) - } - } - if previousPos != (ast.Position{}) { - // Rewind comments. - p.rollbackPosition(previousPos) - } else { - i := p.index - // Handle other values. - for ; i < p.length; i++ { - if p.isValueSep(i) { - break - } - } - vl := p.advance(i) - values = append(values, p.populateValue(vl, preComments)) - } - if p.config.infoLevel() { - p.config.infof("values: %v", values) - } - return values, nil -} - -func (p *parser) readTripleQuotedString() (*ast.Value, error) { - start := p.index - stringBegin := p.index - delimiter := `"""` - if !p.consumeString(delimiter) { - delimiter = `'''` - if !p.consumeString(delimiter) { - return nil, nil - } - } - - for { - if p.consumeString(delimiter) { - break - } - if p.index == p.length { - p.index = start - return nil, fmt.Errorf("unfinished string at %s", p.errorContext()) - } - p.index++ - } - - v := p.populateValue(string(p.in[stringBegin:p.index]), nil) - - return v, nil -} - -func (p *parser) populateValue(vl string, preComments []string) *ast.Value { - if p.config.infoLevel() { - p.config.infof("value: %q", vl) - } - return &ast.Value{ - Value: vl, - InlineComment: p.readInlineComment(), - PreComments: preComments, - } -} - -func (p *parser) readInlineComment() string { - inlineComment, _ := p.skipWhiteSpaceAndReadComments(false /* multiLine */) - if p.config.infoLevel() { - p.config.infof("inlineComment: %q", strings.Join(inlineComment, "\n")) - } - if len(inlineComment) > 0 { - return inlineComment[0] - } - return "" -} - -func (p *parser) readTemplate() string { - if !p.nextInputIs('%') { - return "" - } - i := p.index + 1 - for ; i < p.length; i++ { - if p.in[i] == '"' || p.in[i] == '\'' { - stringBegin := i // Index of quote. - i++ - for ; i < p.length; i++ { - if p.in[i] == '\\' { - i++ // Skip escaped char. - continue - } - if p.in[i] == p.in[stringBegin] { - i++ // Skip end quote. - break - } - } - } - if i < p.length && p.in[i] == '%' { - i++ - break - } - } - return p.advance(i) -} - -// NodeSortFunction sorts the given nodes, using the parent node as context. parent can be nil. -type NodeSortFunction func(parent *ast.Node, nodes []*ast.Node) error - -// NodeFilterFunction filters the given nodes. -type NodeFilterFunction func(nodes []*ast.Node) - -// ValuesSortFunction sorts the given values. -type ValuesSortFunction func(values []*ast.Value) - -func sortAndFilterNodes(parent *ast.Node, nodes []*ast.Node, sortFunction NodeSortFunction, filterFunction NodeFilterFunction, valuesSortFunction ValuesSortFunction) error { - if len(nodes) == 0 { - return nil - } - if filterFunction != nil { - filterFunction(nodes) - } - for _, nd := range nodes { - err := sortAndFilterNodes(nd, nd.Children, sortFunction, filterFunction, valuesSortFunction) - if err != nil { - return err - } - if valuesSortFunction != nil && nd.ValuesAsList { - valuesSortFunction(nd.Values) - } - } - if sortFunction != nil { - return sortFunction(parent, nodes) - } - return nil -} - -// RemoveDuplicates marks duplicate key:value pairs from nodes as Deleted. -func RemoveDuplicates(nodes []*ast.Node) { - type nameAndValue struct { - name, value string - } - seen := make(map[nameAndValue]bool) - for _, nd := range nodes { - if len(nd.Values) == 1 { - key := nameAndValue{nd.Name, nd.Values[0].Value} - if _, value := seen[key]; value { - // Name-Value pair found in the same nesting level, deleting. - nd.Deleted = true - } else { - seen[key] = true - } - } - } -} - -func wrapStrings(nodes []*ast.Node, depth int, c Config) error { - if c.WrapStringsAtColumn == 0 && !c.WrapStringsAfterNewlines { - return nil - } - for _, nd := range nodes { - if nd.ChildrenSameLine { - continue - } - if c.WrapStringsAtColumn > 0 && needsWrappingAtColumn(nd, depth, c) { - if err := wrapLinesAtColumn(nd, depth, c); err != nil { - return err - } - } - if c.WrapStringsAfterNewlines && needsWrappingAfterNewlines(nd, c) { - if err := wrapLinesAfterNewlines(nd, c); err != nil { - return err - } - } - if err := wrapStrings(nd.Children, depth+1, c); err != nil { - return err - } - } - return nil -} - -func needsWrappingAtColumn(nd *ast.Node, depth int, c Config) bool { - // Even at depth 0 we have a 2-space indent when the wrapped string is rendered on the line below - // the field name. - const lengthBuffer = 2 - maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces)) - - if !c.WrapHTMLStrings { - for _, v := range nd.Values { - if tagRegex.Match([]byte(v.Value)) { - return false - } - } - } - - for _, v := range nd.Values { - if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) { - // Don't wrap triple-quoted strings - return false - } - if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' { - // Only wrap strings - return false - } - if len(v.Value) > maxLength || c.WrapStringsWithoutWordwrap { - return true - } - } - return false -} - -// If the Values of this Node constitute a string, and if Config.WrapStringsAtColumn > 0, then wrap -// the string so each line is within the specified columns. Wraps only the current Node (does not -// recurse into Children). -func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error { - // This function looks at the unquoted ast.Value.Value string (i.e., with each Value's wrapping - // quote chars removed). We need to remove these quotes, since otherwise they'll be re-flowed into - // the body of the text. - const lengthBuffer = 4 // Even at depth 0 we have a 2-space indent and a pair of quotes - maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces)) - - str, quote, err := unquote.Raw(nd) - if err != nil { - return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err) - } - - var lines []string - if c.WrapStringsWithoutWordwrap { - // https://protobuf.dev/reference/protobuf/textformat-spec/#string. - // String literals can contain octal, hex, unicode, and C-style escape - // sequences: \a \b \f \n \r \t \v \? \' \"\ ? \\ - re := regexp.MustCompile(`\\[abfnrtv?\\'"]` + - `|\\[0-7]{1,3}` + - `|\\x[0-9a-fA-F]{1,2}` + - `|\\u[0-9a-fA-F]{4}` + - `|\\U000[0-9a-fA-F]{5}` + - `|\\U0010[0-9a-fA-F]{4}` + - `|.`) - var line strings.Builder - for _, t := range re.FindAllString(str, -1) { - if line.Len()+len(t) > maxLength { - lines = append(lines, line.String()) - line.Reset() - } - line.WriteString(t) - } - lines = append(lines, line.String()) - } else { - // Remove one from the max length since a trailing space may be added below. - wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1) - lines = strings.Split(wrappedStr, "\n") - } - - newValues := make([]*ast.Value, 0, len(lines)) - // The Value objects have more than just the string in them. They also have any leading and - // trailing comments. To maintain these comments we recycle the existing Value objects if - // possible. - var i int - var line string - for i, line = range lines { - var v *ast.Value - if i < len(nd.Values) { - v = nd.Values[i] - } else { - v = &ast.Value{} - } - - if !c.WrapStringsWithoutWordwrap && i < len(lines)-1 { - line = line + " " - } - - if c.WrapStringsWithoutWordwrap { - var lineLength = len(line) - if v.InlineComment != "" { - lineLength += len(indentSpaces) + len(v.InlineComment) - } - // field name and field value are inlined for single strings, adjust for that. - if i == 0 && len(lines) == 1 { - lineLength += len(nd.Name) - } - if lineLength > maxLength { - // If there's an inline comment, promote it to a pre-comment which will - // emit a newline. - if v.InlineComment != "" { - v.PreComments = append(v.PreComments, v.InlineComment) - v.InlineComment = "" - } else if i == 0 && len(v.PreComments) == 0 { - // It's too long and we don't have any comments. - nd.PutSingleValueOnNextLine = true - } - } - } - - v.Value = fmt.Sprintf(`%c%s%c`, quote, line, quote) - newValues = append(newValues, v) - } - - postWrapCollectComments(nd, i) - - nd.Values = newValues - return nil -} - -// N.b.: this will incorrectly match `\\\\x`, which hopefully is rare. -var byteEscapeRegex = regexp.MustCompile(`\\x`) - -func needsWrappingAfterNewlines(nd *ast.Node, c Config) bool { - for _, v := range nd.Values { - if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) { - // Don't wrap triple-quoted strings - return false - } - if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' { - // Only wrap strings - return false - } - byteEscapeCount := len(byteEscapeRegex.FindAllStringIndex(v.Value, -1)) - if float64(byteEscapeCount) > float64(len(v.Value))*0.1 { - // Only wrap UTF-8 looking strings (where less than ~10% of the characters are escaped). - return false - } - // Check that there is at least one newline, *not* at the end of the string. - if i := strings.Index(v.Value, `\n`); i >= 0 && i < len(v.Value)-3 { - return true - } - } - return false -} - -// If the Values of this Node constitute a string, and if Config.WrapStringsAfterNewlines, -// then wrap the string so each line ends with a newline. -// Wraps only the current Node (does not recurse into Children). -func wrapLinesAfterNewlines(nd *ast.Node, c Config) error { - str, quote, err := unquote.Raw(nd) - if err != nil { - return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err) - } - - wrappedStr := strings.ReplaceAll(str, `\n`, `\n`+"\n") - // Avoid empty string at end after splitting in case str ended with an (escaped) newline. - wrappedStr = strings.TrimSuffix(wrappedStr, "\n") - lines := strings.Split(wrappedStr, "\n") - newValues := make([]*ast.Value, 0, len(lines)) - // The Value objects have more than just the string in them. They also have any leading and - // trailing comments. To maintain these comments we recycle the existing Value objects if - // possible. - var i int - var line string - for i, line = range lines { - var v *ast.Value - if i < len(nd.Values) { - v = nd.Values[i] - } else { - v = &ast.Value{} - } - v.Value = fmt.Sprintf(`%c%s%c`, quote, line, quote) - newValues = append(newValues, v) - } - - postWrapCollectComments(nd, i) - - nd.Values = newValues - return nil -} - -func postWrapCollectComments(nd *ast.Node, i int) { - for i++; i < len(nd.Values); i++ { - // If this executes, then the text was wrapped into less lines of text (less Values) than - // previously. If any of these had comments on them, we collect them so they are not lost. - v := nd.Values[i] - nd.PostValuesComments = append(nd.PostValuesComments, v.PreComments...) - if len(v.InlineComment) > 0 { - nd.PostValuesComments = append(nd.PostValuesComments, v.InlineComment) - } - } -} - -func fixQuotes(s string) string { - res := make([]byte, 0, len(s)) - res = append(res, '"') - for i := 0; i < len(s); i++ { - if s[i] == '"' { - res = append(res, '\\') - } else if s[i] == '\\' { - res = append(res, s[i]) - i++ - } - res = append(res, s[i]) - } - res = append(res, '"') - return string(res) -} - -func unescapeQuotes(s string) string { - res := make([]byte, 0, len(s)) - for i := 0; i < len(s); i++ { - // If we hit an escape sequence... - if s[i] == '\\' { - // ... keep the backslash unless it's in front of a quote ... - if i == len(s)-1 || (s[i+1] != '"' && s[i+1] != '\'') { - res = append(res, '\\') - } - // ... then point at the escaped character so it is output verbatim below. - // Doing this within the loop (without "continue") ensures correct handling - // of escaped backslashes. - i++ - } - if i < len(s) { - res = append(res, s[i]) - } - } - return string(res) -} - -func smartQuotes(s string) string { - s = unescapeQuotes(s) - if strings.Contains(s, "\"") && !strings.Contains(s, "'") { - // If we hit this branch, the string doesn't contain any single quotes, and - // is being wrapped in single quotes, so no escaping is needed. - return "'" + s + "'" - } - // fixQuotes will wrap the string in double quotes, but will escape any - // double quotes that appear within the string. - return fixQuotes(s) +func ParseWithConfig(in []byte, c config.Config) ([]*ast.Node, error) { + return impl.ParseWithConfig(in, c) } // DebugFormat returns a textual representation of the specified nodes for // consumption by humans when debugging (e.g. in test failures). No guarantees // are made about the specific output. func DebugFormat(nodes []*ast.Node, depth int) string { - res := []string{""} - prefix := strings.Repeat(".", depth) - for _, nd := range nodes { - var value string - if nd.Deleted { - res = append(res, "DELETED") - } - if nd.Children != nil { // Also for 0 children. - value = fmt.Sprintf("children:%s", DebugFormat(nd.Children, depth+1)) - } else { - value = fmt.Sprintf("values: %v\n", nd.Values) - } - res = append(res, - fmt.Sprintf("name: %q", nd.Name), - fmt.Sprintf("PreComments: %q (len %d)", strings.Join(nd.PreComments, "\n"), len(nd.PreComments)), - value) - } - return strings.Join(res, fmt.Sprintf("\n%s ", prefix)) + return printer.Debug(nodes, depth) } // Pretty formats the nodes at the given indentation depth (0 = top-level). func Pretty(nodes []*ast.Node, depth int) string { - var result strings.Builder - formatter{&result}.writeNodes(removeDeleted(nodes), depth, false /* isSameLine */, false /* asListItems */) - return result.String() + return string(printer.FormatNodesWithDepth(nodes, depth)) } // PrettyBytes returns formatted nodes at the given indentation depth (0 = top-level) as bytes. func PrettyBytes(nodes []*ast.Node, depth int) []byte { - var result bytes.Buffer - formatter{&result}.writeNodes(removeDeleted(nodes), depth, false /* isSameLine */, false /* asListItems */) - return result.Bytes() -} - -// UnsortedFieldCollector collects UnsortedFields during parsing. -type UnsortedFieldCollector struct { - fields map[string]UnsortedField -} - -func newUnsortedFieldCollector() *UnsortedFieldCollector { - return &UnsortedFieldCollector{ - fields: make(map[string]UnsortedField), - } -} - -// UnsortedFieldCollectorFunc collects UnsortedFields during parsing. -type UnsortedFieldCollectorFunc func(name string, line int32, parent string) - -func (ufc *UnsortedFieldCollector) collect(name string, line int32, parent string) { - ufc.fields[name] = UnsortedField{name, line, parent} -} - -func (ufc *UnsortedFieldCollector) asError() error { - if len(ufc.fields) == 0 { - return nil - } - var fields []UnsortedField - for _, f := range ufc.fields { - fields = append(fields, f) - } - return &UnsortedFieldsError{fields} -} - -func nodeSortFunction(c Config) NodeSortFunction { - var sorter ast.NodeLess = nil - unsortedFieldCollector := newUnsortedFieldCollector() - for name, fieldOrder := range c.fieldSortOrder { - sorter = ast.ChainNodeLess(sorter, ByFieldOrder(name, fieldOrder, unsortedFieldCollector.collect)) - } - if c.SortFieldsByFieldName { - sorter = ast.ChainNodeLess(sorter, ast.ByFieldName) - } - if c.SortRepeatedFieldsByContent { - sorter = ast.ChainNodeLess(sorter, ast.ByFieldValue) - } - for _, sf := range c.SortRepeatedFieldsBySubfield { - field, subfield := parseSubfieldSpec(sf) - if subfield != "" { - sorter = ast.ChainNodeLess(sorter, ast.ByFieldSubfield(field, subfield)) - } - } - if sorter != nil { - return func(parent *ast.Node, ns []*ast.Node) error { - ast.SortNodes(parent, ns, sorter) - if c.RequireFieldSortOrderToMatchAllFieldsInNode { - return unsortedFieldCollector.asError() - } - return nil - } - } - return nil -} - -// Returns the field and subfield parts of spec "{field}.{subfield}". -// Spec without a dot is considered to be "{subfield}". -func parseSubfieldSpec(subfieldSpec string) (field string, subfield string) { - parts := strings.SplitN(subfieldSpec, ".", 2) - if len(parts) == 1 { - return "", parts[0] - } - return parts[0], parts[1] -} - -func nodeFilterFunction(c Config) NodeFilterFunction { - if c.RemoveDuplicateValuesForRepeatedFields { - return RemoveDuplicates - } - return nil -} - -func valuesSortFunction(c Config) ValuesSortFunction { - if c.SortRepeatedFieldsByContent { - return ast.SortValues - } - return nil -} - -func getNodePriorityForByFieldOrder(parent, node *ast.Node, name string, priorities map[string]int, unsortedCollector UnsortedFieldCollectorFunc) *int { - if parent != nil && parent.Name != name { - return nil - } - if parent == nil && name != RootName { - return nil - } - // CommentOnly nodes don't set priority below, and default to MaxInt, which keeps them at the bottom - prio := math.MaxInt - - // Unknown fields will get the int nil value of 0 from the order map, and bubble to the top. - if !node.IsCommentOnly() { - var ok bool - prio, ok = priorities[node.Name] - if !ok { - unsortedCollector(node.Name, node.Start.Line, parent.Name) - } - } - return &prio -} - -// ByFieldOrder returns a NodeLess function that orders fields within a node named name -// by the order specified in fieldOrder. Nodes sorted but not specified by the field order -// are bubbled to the top and reported to unsortedCollector. -func ByFieldOrder(name string, fieldOrder []string, unsortedCollector UnsortedFieldCollectorFunc) ast.NodeLess { - priorities := make(map[string]int) - for i, fieldName := range fieldOrder { - priorities[fieldName] = i + 1 - } - return func(parent, ni, nj *ast.Node, isWholeSlice bool) bool { - if !isWholeSlice { - return false - } - vi := getNodePriorityForByFieldOrder(parent, ni, name, priorities, unsortedCollector) - vj := getNodePriorityForByFieldOrder(parent, nj, name, priorities, unsortedCollector) - if vi == nil { - return vj != nil - } - if vj == nil { - return false - } - return *vi < *vj - } -} - -// stringWriter abstracts over bytes.Buffer and strings.Builder -type stringWriter interface { - WriteString(s string) (int, error) -} - -// formatter accumulates pretty-printed textproto contents into a stringWriter. -type formatter struct { - stringWriter -} - -func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListItems bool) { - indent := " " - if !isSameLine { - indent = strings.Repeat(indentSpaces, depth) - } - - lastNonCommentIndex := 0 - if asListItems { - for i := len(nodes) - 1; i >= 0; i-- { - if !nodes[i].IsCommentOnly() { - lastNonCommentIndex = i - break - } - } - } - - for index, nd := range nodes { - if len(nd.Raw) > 0 { - f.WriteString(nd.Raw) - continue - } - for _, comment := range nd.PreComments { - if len(comment) == 0 { - if !(depth == 0 && index == 0) { - f.WriteString("\n") - } - continue - } - f.WriteString(indent) - f.WriteString(comment) - f.WriteString("\n") - } - - if nd.IsCommentOnly() { - // The comments have been printed already, no more work to do. - continue - } - f.WriteString(indent) - // Node name may be empty in alternative-style textproto files, because they - // contain a sequence of proto messages of the same type: - // { name: "first_msg" } - // { name: "second_msg" } - // In all other cases, nd.Name is not empty and should be printed. - if nd.Name != "" { - f.WriteString(nd.Name) - if !nd.SkipColon { - f.WriteString(":") - } - - // The space after the name is required for one-liners and message fields: - // title: "there was a space here" - // metadata: { ... } - // In other cases, there is a newline right after the colon, so no space required. - if nd.Children != nil || (len(nd.Values) == 1 && len(nd.Values[0].PreComments) == 0) || nd.ValuesAsList { - if nd.PutSingleValueOnNextLine { - f.WriteString("\n" + indent + indentSpaces) - } else { - f.WriteString(" ") - } - } - } - - if nd.ValuesAsList { // For ValuesAsList option we will preserve even empty list `field: []` - f.writeValuesAsList(nd, nd.Values, indent+indentSpaces) - } else if len(nd.Values) > 0 { - f.writeValues(nd, nd.Values, indent+indentSpaces) - } - - if nd.Children != nil { // Also for 0 Children. - if nd.ChildrenAsList { - f.writeChildrenAsListItems(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine) - } else { - f.writeChildren(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine, nd.IsAngleBracket) - } - } - - if asListItems && index < lastNonCommentIndex { - f.WriteString(",") - } - - if (nd.Children != nil || nd.ValuesAsList) && len(nd.ClosingBraceComment) > 0 { - f.WriteString(indentSpaces) - f.WriteString(nd.ClosingBraceComment) - } - - if !isSameLine { - f.WriteString("\n") - } - } -} - -func (f formatter) writeValues(nd *ast.Node, vals []*ast.Value, indent string) { - if len(vals) == 0 { - // This should never happen: formatValues can be called only if there are some values. - return - } - sep := "\n" + indent - if len(vals) == 1 && len(vals[0].PreComments) == 0 { - sep = "" - } - for _, v := range vals { - f.WriteString(sep) - for _, comment := range v.PreComments { - f.WriteString(comment) - f.WriteString(sep) - } - f.WriteString(v.Value) - if len(v.InlineComment) > 0 { - f.WriteString(indentSpaces) - f.WriteString(v.InlineComment) - } - } - for _, comment := range nd.PostValuesComments { - f.WriteString(sep) - f.WriteString(comment) - } -} - -func (f formatter) writeValuesAsList(nd *ast.Node, vals []*ast.Value, indent string) { - // Checks if it's possible to put whole list in a single line. - sameLine := nd.ChildrenSameLine && len(nd.PostValuesComments) == 0 - if sameLine { - // Parser found all children on a same line, but we need to check again. - // It's possible that AST was modified after parsing. - for _, val := range vals { - if len(val.PreComments) > 0 || len(vals[0].InlineComment) > 0 { - sameLine = false - break - } - } - } - sep := "" - if !sameLine { - sep = "\n" + indent - } - f.WriteString("[") - - for idx, v := range vals { - for _, comment := range v.PreComments { - f.WriteString(sep) - f.WriteString(comment) - } - f.WriteString(sep) - f.WriteString(v.Value) - if idx < len(vals)-1 { // Don't put trailing comma that fails Python parser. - f.WriteString(",") - if sameLine { - f.WriteString(" ") - } - } - if len(v.InlineComment) > 0 { - f.WriteString(indentSpaces) - f.WriteString(v.InlineComment) - } - } - for _, comment := range nd.PostValuesComments { - f.WriteString(sep) - f.WriteString(comment) - } - f.WriteString(strings.Replace(sep, indentSpaces, "", 1)) - f.WriteString("]") -} - -// writeChildren writes the child nodes. The result always ends with a closing brace. -func (f formatter) writeChildren(children []*ast.Node, depth int, sameLine, isAngleBracket bool) { - openBrace := "{" - closeBrace := "}" - if isAngleBracket { - openBrace = "<" - closeBrace = ">" - } - switch { - case sameLine && len(children) == 0: - f.WriteString(openBrace + closeBrace) - case sameLine: - f.WriteString(openBrace) - f.writeNodes(children, depth, sameLine, false /* asListItems */) - f.WriteString(" " + closeBrace) - default: - f.WriteString(openBrace + "\n") - f.writeNodes(children, depth, sameLine, false /* asListItems */) - f.WriteString(strings.Repeat(indentSpaces, depth-1)) - f.WriteString(closeBrace) - } -} - -// writeChildrenAsListItems writes the child nodes as list items. -func (f formatter) writeChildrenAsListItems(children []*ast.Node, depth int, sameLine bool) { - openBrace := "[" - closeBrace := "]" - switch { - case sameLine && len(children) == 0: - f.WriteString(openBrace + closeBrace) - case sameLine: - f.WriteString(openBrace) - f.writeNodes(children, depth, sameLine, true /* asListItems */) - f.WriteString(" " + closeBrace) - default: - f.WriteString(openBrace + "\n") - f.writeNodes(children, depth, sameLine, true /* asListItems */) - f.WriteString(strings.Repeat(indentSpaces, depth-1)) - f.WriteString(closeBrace) - } + return printer.FormatNodesWithDepth(nodes, depth) } diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/printer/printer.go b/vendor/github.com/protocolbuffers/txtpbfmt/printer/printer.go new file mode 100644 index 0000000000..c6f5cac27e --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/printer/printer.go @@ -0,0 +1,347 @@ +// Package printer provides functions for printing formatted textproto messages. +package printer + +import ( + "bytes" + "fmt" + "strings" + + "github.com/protocolbuffers/txtpbfmt/ast" + "github.com/protocolbuffers/txtpbfmt/config" + "github.com/protocolbuffers/txtpbfmt/impl" +) + +const indentSpaces = " " + +// Format formats a text proto file preserving comments. +func Format(in []byte) ([]byte, error) { + return FormatWithConfig(in, config.Config{}) +} + +// FormatWithConfig functions similar to format, but allows the user to pass in +// additional configuration options. +func FormatWithConfig(in []byte, c config.Config) ([]byte, error) { + if err := impl.AddMetaCommentsToConfig(in, &c); err != nil { + return nil, err + } + if c.Disable { + c.Infof("Ignored file with 'disable' comment.") + return in, nil + } + nodes, err := impl.ParseWithMetaCommentConfig(in, c) + if err != nil { + return nil, err + } + return FormatNodes(nodes), nil +} + +func removeDeleted(nodes []*ast.Node) []*ast.Node { + var res []*ast.Node + res = []*ast.Node{} // empty children is different from nil children + // When removing a node which has an empty line before it, we should keep + // the empty line before the next non-removed node to maintain the visual separation. + // Consider the following: + // foo: { name: "foo1" } + // foo: { name: "foo2" } + // + // bar: { name: "bar1" } + // bar: { name: "bar2" } + // + // If we decide to remove both foo2 and bar1, the result should still have one empty + // line between foo1 and bar2. + addEmptyLine := false + for _, node := range nodes { + if node.Deleted { + if len(node.PreComments) > 0 && node.PreComments[0] == "" { + addEmptyLine = true + } + continue + } + if len(node.Children) > 0 { + node.Children = removeDeleted(node.Children) + } + if addEmptyLine && (len(node.PreComments) == 0 || node.PreComments[0] != "") { + node.PreComments = append([]string{""}, node.PreComments...) + } + addEmptyLine = false + res = append(res, node) + } + return res +} + +// Debug returns a textual representation of the specified nodes for +// consumption by humans when debugging (e.g. in test failures). No guarantees +// are made about the specific output. +func Debug(nodes []*ast.Node, depth int) string { + res := []string{""} + prefix := strings.Repeat(".", depth) + for _, nd := range nodes { + var value string + if nd.Deleted { + res = append(res, "DELETED") + } + if nd.Children != nil { // Also for 0 children. + value = fmt.Sprintf("children:%s", Debug(nd.Children, depth+1)) + } else { + value = fmt.Sprintf("values: %v\n", nd.Values) + } + res = append(res, + fmt.Sprintf("name: %q", nd.Name), + fmt.Sprintf("PreComments: %q (len %d)", strings.Join(nd.PreComments, "\n"), len(nd.PreComments)), + value) + } + return strings.Join(res, fmt.Sprintf("\n%s ", prefix)) +} + +// FormatNodes returns formatted nodes at the given indentation depth (0 = top-level) as bytes. +func FormatNodes(nodes []*ast.Node) []byte { + return FormatNodesWithDepth(nodes, 0 /* depth */) +} + +// FormatNodesWithDepth returns formatted nodes at the given indentation depth (0 = top-level) as bytes. +func FormatNodesWithDepth(nodes []*ast.Node, depth int) []byte { + var result bytes.Buffer + formatter{&result}.writeNodes(removeDeleted(nodes), depth, false /* isSameLine */, false /* asListItems */) + return result.Bytes() +} + +// stringWriter abstracts over bytes.Buffer and strings.Builder +type stringWriter interface { + WriteString(s string) (int, error) +} + +// formatter accumulates pretty-printed textproto contents into a stringWriter. +type formatter struct { + stringWriter +} + +func (f formatter) writeNode(nd *ast.Node, depth int, isSameLine, asListItems bool, index, lastNonCommentIndex int) { + if len(nd.Raw) > 0 { + f.WriteString(nd.Raw) + return + } + indent := " " + if !isSameLine { + indent = strings.Repeat(indentSpaces, depth) + } + f.writePreComments(nd, indent, depth, index) + + if nd.IsCommentOnly() { + // The comments have been printed already, no more work to do. + return + } + f.WriteString(indent) + // Node name may be empty in alternative-style textproto files, because they + // contain a sequence of proto messages of the same type: + // { name: "first_msg" } + // { name: "second_msg" } + // In all other cases, nd.Name is not empty and should be printed. + if nd.Name != "" { + f.writeNodeName(nd, indent) + } + + f.writeNodeValues(nd, indent) + + f.writeNodeChildren(nd, depth, isSameLine) + + if asListItems && index < lastNonCommentIndex { + f.WriteString(",") + } + + f.writeNodeClosingBraceComment(nd) +} + +func (f formatter) writePreComments(nd *ast.Node, indent string, depth int, index int) { + for _, comment := range nd.PreComments { + if len(comment) == 0 { + if !(depth == 0 && index == 0) { + f.WriteString("\n") + } + continue + } + f.WriteString(indent) + f.WriteString(comment) + f.WriteString("\n") + } +} + +func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListItems bool) { + lastNonCommentIndex := 0 + if asListItems { + for i := len(nodes) - 1; i >= 0; i-- { + if !nodes[i].IsCommentOnly() { + lastNonCommentIndex = i + break + } + } + } + + for index, nd := range nodes { + f.writeNode(nd, depth, isSameLine, asListItems, index, lastNonCommentIndex) + if !isSameLine && len(nd.Raw) == 0 && !nd.IsCommentOnly() { + f.WriteString("\n") + } + } +} + +func (f formatter) writeNodeName(nd *ast.Node, indent string) { + f.WriteString(nd.Name) + if !nd.SkipColon { + f.WriteString(":") + } + + // The space after the name is required for one-liners and message fields: + // title: "there was a space here" + // metadata: { ... } + // In other cases, there is a newline right after the colon, so no space required. + if nd.Children != nil || (len(nd.Values) == 1 && len(nd.Values[0].PreComments) == 0) || nd.ValuesAsList { + if nd.PutSingleValueOnNextLine { + f.WriteString("\n" + indent + indentSpaces) + } else { + f.WriteString(" ") + } + } +} + +func (f formatter) writeNodeValues(nd *ast.Node, indent string) { + if nd.ValuesAsList { // For ValuesAsList option we will preserve even empty list `field: []` + f.writeValuesAsList(nd, nd.Values, indent+indentSpaces) + } else if len(nd.Values) > 0 { + f.writeValues(nd, nd.Values, indent+indentSpaces) + } +} + +func (f formatter) writeNodeChildren(nd *ast.Node, depth int, isSameLine bool) { + if nd.Children != nil { // Also for 0 Children. + if nd.ChildrenAsList { + f.writeChildrenAsListItems(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine) + } else { + f.writeChildren(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine, nd.IsAngleBracket) + } + } +} + +func (f formatter) writeNodeClosingBraceComment(nd *ast.Node) { + if (nd.Children != nil || nd.ValuesAsList) && len(nd.ClosingBraceComment) > 0 { + f.WriteString(indentSpaces) + f.WriteString(nd.ClosingBraceComment) + } +} + +func (f formatter) writeValues(nd *ast.Node, vals []*ast.Value, indent string) { + if len(vals) == 0 { + // This should never happen: formatValues can be called only if there are some values. + return + } + sep := "\n" + indent + if len(vals) == 1 && len(vals[0].PreComments) == 0 { + sep = "" + } + for _, v := range vals { + f.WriteString(sep) + for _, comment := range v.PreComments { + f.WriteString(comment) + f.WriteString(sep) + } + f.WriteString(v.Value) + if len(v.InlineComment) > 0 { + f.WriteString(indentSpaces) + f.WriteString(v.InlineComment) + } + } + for _, comment := range nd.PostValuesComments { + f.WriteString(sep) + f.WriteString(comment) + } +} + +func (f formatter) canWriteValuesAsListOnSameLine(nd *ast.Node, vals []*ast.Value) bool { + if !nd.ChildrenSameLine || len(nd.PostValuesComments) > 0 { + return false + } + // Parser found all children on a same line, but we need to check again. + // It's possible that AST was modified after parsing. + for _, val := range vals { + if len(val.PreComments) > 0 || len(val.InlineComment) > 0 { + return false + } + } + return true +} + +func (f formatter) writeValuesAsList(nd *ast.Node, vals []*ast.Value, indent string) { + // Checks if it's possible to put whole list in a single line. + sameLine := f.canWriteValuesAsListOnSameLine(nd, vals) + sep := "" + if !sameLine { + sep = "\n" + indent + } + f.WriteString("[") + + for idx, v := range vals { + for _, comment := range v.PreComments { + f.WriteString(sep) + f.WriteString(comment) + } + f.WriteString(sep) + f.WriteString(v.Value) + if idx < len(vals)-1 { // Don't put trailing comma that fails Python parser. + f.WriteString(",") + if sameLine { + f.WriteString(" ") + } + } + if len(v.InlineComment) > 0 { + f.WriteString(indentSpaces) + f.WriteString(v.InlineComment) + } + } + for _, comment := range nd.PostValuesComments { + f.WriteString(sep) + f.WriteString(comment) + } + f.WriteString(strings.Replace(sep, indentSpaces, "", 1)) + f.WriteString("]") +} + +// writeChildren writes the child nodes. The result always ends with a closing brace. +func (f formatter) writeChildren(children []*ast.Node, depth int, sameLine, isAngleBracket bool) { + openBrace := "{" + closeBrace := "}" + if isAngleBracket { + openBrace = "<" + closeBrace = ">" + } + switch { + case sameLine && len(children) == 0: + f.WriteString(openBrace + closeBrace) + case sameLine: + f.WriteString(openBrace) + f.writeNodes(children, depth, sameLine, false /* asListItems */) + f.WriteString(" " + closeBrace) + default: + f.WriteString(openBrace + "\n") + f.writeNodes(children, depth, sameLine, false /* asListItems */) + f.WriteString(strings.Repeat(indentSpaces, depth-1)) + f.WriteString(closeBrace) + } +} + +// writeChildrenAsListItems writes the child nodes as list items. +func (f formatter) writeChildrenAsListItems(children []*ast.Node, depth int, sameLine bool) { + openBrace := "[" + closeBrace := "]" + switch { + case sameLine && len(children) == 0: + f.WriteString(openBrace + closeBrace) + case sameLine: + f.WriteString(openBrace) + f.writeNodes(children, depth, sameLine, true /* asListItems */) + f.WriteString(" " + closeBrace) + default: + f.WriteString(openBrace + "\n") + f.writeNodes(children, depth, sameLine, true /* asListItems */) + f.WriteString(strings.Repeat(indentSpaces, depth-1)) + f.WriteString(closeBrace) + } +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/quote/quote.go b/vendor/github.com/protocolbuffers/txtpbfmt/quote/quote.go new file mode 100644 index 0000000000..cf3221ecdf --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/quote/quote.go @@ -0,0 +1,56 @@ +// Package quote provides functions for fixing and smart quotes. +package quote + +import "strings" + +// Fix fixes quotes. +func Fix(s string) string { + res := make([]byte, 0, len(s)) + res = append(res, '"') + for i := 0; i < len(s); i++ { + if s[i] == '"' { + res = append(res, '\\') + } else if s[i] == '\\' { + res = append(res, s[i]) + i++ + } + res = append(res, s[i]) + } + res = append(res, '"') + return string(res) +} + +func unescapeQuotes(s string) string { + res := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + // If we hit an escape sequence... + if s[i] == '\\' { + // ... keep the backslash unless it's in front of a quote ... + if i == len(s)-1 || (s[i+1] != '"' && s[i+1] != '\'') { + res = append(res, '\\') + } + // ... then point at the escaped character so it is output verbatim below. + // Doing this within the loop (without "continue") ensures correct handling + // of escaped backslashes. + i++ + } + if i < len(s) { + res = append(res, s[i]) + } + } + return string(res) +} + +// Smart wraps the string in double quotes, but will escape any +// double quotes that appear within the string. +func Smart(s string) string { + s = unescapeQuotes(s) + if strings.Contains(s, "\"") && !strings.Contains(s, "'") { + // If we hit this branch, the string doesn't contain any single quotes, and + // is being wrapped in single quotes, so no escaping is needed. + return "'" + s + "'" + } + // Fix will wrap the string in double quotes, but will escape any + // double quotes that appear within the string. + return Fix(s) +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/sort/sort.go b/vendor/github.com/protocolbuffers/txtpbfmt/sort/sort.go new file mode 100644 index 0000000000..4748a63f2c --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/sort/sort.go @@ -0,0 +1,286 @@ +// Package sort provides functions for sorting nodes and values. +package sort + +import ( + "fmt" + "math" + "strings" + + "github.com/protocolbuffers/txtpbfmt/ast" + "github.com/protocolbuffers/txtpbfmt/config" +) + +// UnsortedFieldsError will be returned by ParseWithConfig if +// Config.RequireFieldSortOrderToMatchAllFieldsInNode is set, and an unrecognized field is found +// while parsing. +type UnsortedFieldsError struct { + UnsortedFields []unsortedField +} + +// unsortedField records details about a single unsorted field. +type unsortedField struct { + FieldName string + Line int32 + ParentFieldName string +} + +func (e *UnsortedFieldsError) Error() string { + var errs []string + for _, us := range e.UnsortedFields { + errs = append(errs, fmt.Sprintf(" line: %d, parent field: %q, unsorted field: %q", us.Line, us.ParentFieldName, us.FieldName)) + } + return fmt.Sprintf("fields parsed that were not specified in the parser.AddFieldSortOrder() call:\n%s", strings.Join(errs, "\n")) +} + +func identityProjection(s string) string { + return s +} + +func dnsProjection(s string) string { + parts := strings.Split(s, ".") + // Reverse `parts`. + for i, j := 0, len(parts)-1; i < j; i, j = i+1, j-1 { + parts[i], parts[j] = parts[j], parts[i] + } + return strings.Join(parts, ".") +} + +// nodeSortFunction sorts the given nodes, using the parent node as context. parent can be nil. +type nodeSortFunction func(parent *ast.Node, nodes []*ast.Node) error + +// nodeFilterFunction filters the given nodes. +type nodeFilterFunction func(nodes []*ast.Node) + +// valuesSortFunction sorts the given values. +type valuesSortFunction func(values []*ast.Value) + +// Process sorts and filters the given nodes. +func Process(parent *ast.Node, nodes []*ast.Node, c config.Config) error { + return process(parent, nodes, nodeSortFunctionConfig(c), nodeFilterFunctionConfig(c), valuesSortFunctionConfig(c), c) +} + +// process sorts and filters the given nodes. +func process(parent *ast.Node, nodes []*ast.Node, sortFunction nodeSortFunction, filterFunction nodeFilterFunction, valuesSortFunction valuesSortFunction, c config.Config) error { + if len(nodes) == 0 { + return nil + } + if filterFunction != nil { + filterFunction(nodes) + } + for _, nd := range nodes { + err := process(nd, nd.Children, sortFunction, filterFunction, valuesSortFunction, c) + if err != nil { + return err + } + if valuesSortFunction != nil && nd.ValuesAsList { + valuesSortFunction(nd.Values) + } + } + if sortFunction != nil { + if err := sortFunction(parent, nodes); err != nil { + return err + } + } + if c.UseShortRepeatedPrimitiveFields { + groupRepeatedPrimitiveFields(nodes) + } + return nil +} + +func isPrimitive(n *ast.Node) bool { + return len(n.Children) == 0 && len(n.Values) == 1 +} + +func groupRepeatedPrimitiveFields(nodes []*ast.Node) { + for i := 0; i < len(nodes); { + node := nodes[i] + if node.Deleted || !isPrimitive(node) { + i++ + continue + } + j := i + 1 + for ; j < len(nodes); j++ { + if nodes[j].Deleted || !isPrimitive(nodes[j]) || nodes[j].Name != node.Name || len(nodes[j].PreComments) > 0 || len(nodes[j].PostValuesComments) > 0 { + break + } + } + if j > i+1 { + // Found group of repeated primitive fields: nodes[i...j-1] + node.ValuesAsList = true + node.ChildrenSameLine = true + for k := i + 1; k < j; k++ { + node.Values = append(node.Values, nodes[k].Values...) + nodes[k].Deleted = true + } + } + i = j + } +} + +// removeDuplicates marks duplicate key:value pairs from nodes as Deleted. +func removeDuplicates(nodes []*ast.Node) { + type nameAndValue struct { + name, value string + } + seen := make(map[nameAndValue]bool) + for _, nd := range nodes { + if len(nd.Values) == 1 { + key := nameAndValue{nd.Name, nd.Values[0].Value} + if _, value := seen[key]; value { + // Name-Value pair found in the same nesting level, deleting. + nd.Deleted = true + } else { + seen[key] = true + } + } + } +} + +// unsortedFieldCollector collects UnsortedFields during parsing. +type unsortedFieldCollector struct { + fields map[string]unsortedField +} + +// newUnsortedFieldCollector returns a new UnsortedFieldCollector. +func newUnsortedFieldCollector() *unsortedFieldCollector { + return &unsortedFieldCollector{ + fields: make(map[string]unsortedField), + } +} + +// unsortedFieldCollectorFunc collects UnsortedFields during parsing. +type unsortedFieldCollectorFunc func(name string, line int32, parent string) + +// collect collects the unsorted field. +func (ufc *unsortedFieldCollector) collect(name string, line int32, parent string) { + ufc.fields[name] = unsortedField{name, line, parent} +} + +// asError returns an error if any unsorted fields were collected. +func (ufc *unsortedFieldCollector) asError() error { + if len(ufc.fields) == 0 { + return nil + } + var fields []unsortedField + for _, f := range ufc.fields { + fields = append(fields, f) + } + return &UnsortedFieldsError{fields} +} + +// nodeSortFunctionConfig returns a function that sorts nodes based on the config. +func nodeSortFunctionConfig(c config.Config) nodeSortFunction { + var sorter ast.NodeLess = nil + unsortedFieldCollector := newUnsortedFieldCollector() + for name, fieldOrder := range c.FieldSortOrder { + sorter = ast.ChainNodeLess(sorter, byFieldOrder(name, fieldOrder, unsortedFieldCollector.collect)) + } + if c.SortFieldsByFieldName { + sorter = ast.ChainNodeLess(sorter, ast.ByFieldName) + } + if c.SortFieldsByFieldNumber { + sorter = ast.ChainNodeLess(sorter, ast.ByFieldNumber) + } + projection := identityProjection + if c.DNSSortOrder { + projection = dnsProjection + } + if c.SortRepeatedFieldsByContent { + sorter = ast.ChainNodeLess(sorter, ast.ByFieldValue(projection)) + } + for _, sf := range c.SortRepeatedFieldsBySubfield { + field, subfieldPath := parseSubfieldSpec(sf) + if len(subfieldPath) > 0 { + sorter = ast.ChainNodeLess(sorter, ast.ByFieldSubfieldPath(field, subfieldPath, + projection)) + } + } + if sorter != nil { + return func(parent *ast.Node, ns []*ast.Node) error { + ast.SortNodes(parent, ns, sorter, ast.ReverseOrdering(c.ReverseSort)) + if c.RequireFieldSortOrderToMatchAllFieldsInNode { + return unsortedFieldCollector.asError() + } + return nil + } + } + return nil +} + +// Returns the field and subfield path parts of spec "{field}.{subfield1}.{subfield2}...". +// Spec without a dot is considered to be "{subfield}". +func parseSubfieldSpec(subfieldSpec string) (field string, subfieldPath []string) { + parts := strings.Split(subfieldSpec, ".") + if len(parts) == 1 { + return "", parts + } + return parts[0], parts[1:] +} + +// nodeFilterFunctionConfig returns a function that filters nodes based on the config. +func nodeFilterFunctionConfig(c config.Config) nodeFilterFunction { + if c.RemoveDuplicateValuesForRepeatedFields { + return removeDuplicates + } + return nil +} + +// valuesSortFunctionConfig returns a function that sorts values based on the config. +func valuesSortFunctionConfig(c config.Config) valuesSortFunction { + if c.SortRepeatedFieldsByContent { + if c.ReverseSort { + return ast.SortValuesReverse + } + return ast.SortValues + } + return nil +} + +func getNodePriorityForByFieldOrder(parent, node *ast.Node, name string, priorities map[string]int, unsortedCollector unsortedFieldCollectorFunc) *int { + if parent != nil && parent.Name != name { + return nil + } + if parent == nil && name != config.RootName { + return nil + } + // CommentOnly nodes don't set priority below, and default to MaxInt, which keeps them at the bottom + prio := math.MaxInt + + // Unknown fields will get the int nil value of 0 from the order map, and bubble to the top. + if !node.IsCommentOnly() { + var ok bool + prio, ok = priorities[node.Name] + if !ok { + parentName := config.RootName + if parent != nil { + parentName = parent.Name + } + unsortedCollector(node.Name, node.Start.Line, parentName) + } + } + return &prio +} + +// byFieldOrder returns a NodeLess function that orders fields within a node named name +// by the order specified in fieldOrder. Nodes sorted but not specified by the field order +// are bubbled to the top and reported to unsortedCollector. +func byFieldOrder(name string, fieldOrder []string, unsortedCollector unsortedFieldCollectorFunc) ast.NodeLess { + priorities := make(map[string]int) + for i, fieldName := range fieldOrder { + priorities[fieldName] = i + 1 + } + return func(parent, ni, nj *ast.Node, isWholeSlice bool) bool { + if !isWholeSlice { + return false + } + vi := getNodePriorityForByFieldOrder(parent, ni, name, priorities, unsortedCollector) + vj := getNodePriorityForByFieldOrder(parent, nj, name, priorities, unsortedCollector) + if vi == nil { + return vj != nil + } + if vj == nil { + return false + } + return *vi < *vj + } +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/unquote/unquote.go b/vendor/github.com/protocolbuffers/txtpbfmt/unquote/unquote.go index 1a59436d35..beae38c045 100644 --- a/vendor/github.com/protocolbuffers/txtpbfmt/unquote/unquote.go +++ b/vendor/github.com/protocolbuffers/txtpbfmt/unquote/unquote.go @@ -104,11 +104,7 @@ func unquoteC(s string, quote rune) (string, error) { } s = s[n:] if r != '\\' { - if r < utf8.RuneSelf { - buf = append(buf, byte(r)) - } else { - buf = append(buf, string(r)...) - } + buf = appendRune(buf, r) continue } @@ -122,6 +118,13 @@ func unquoteC(s string, quote rune) (string, error) { return string(buf), nil } +func appendRune(buf []byte, r rune) []byte { + if r < utf8.RuneSelf { + return append(buf, byte(r)) + } + return append(buf, string(r)...) +} + func unescape(s string) (ch string, tail string, err error) { // Copied from third_party/golang/protobuf/proto/text_parser.go @@ -150,42 +153,45 @@ func unescape(s string) (ch string, tail string, err error) { case '\'', '"', '\\': return string(r), s, nil case '0', '1', '2', '3', '4', '5', '6', '7': - if len(s) < 2 { - return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) - } - ss := string(r) + s[:2] - s = s[2:] - i, err := strconv.ParseUint(ss, 8, 8) - if err != nil { - return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss) - } - return string([]byte{byte(i)}), s, nil - case 'x', 'X', 'u', 'U': - var n int - switch r { - case 'x', 'X': - n = 2 - case 'u': - n = 4 - case 'U': - n = 8 - } - if len(s) < n { - return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n) - } - ss := s[:n] - s = s[n:] - i, err := strconv.ParseUint(ss, 16, 64) - if err != nil { - return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss) - } - if r == 'x' || r == 'X' { - return string([]byte{byte(i)}), s, nil - } - if i > utf8.MaxRune { - return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss) - } - return strconv.FormatUint(i, 10), s, nil + return unescapeOctal(r, s) + case 'x', 'X': + return unescapeHex(r, s, 2) + case 'u': + return unescapeHex(r, s, 4) + case 'U': + return unescapeHex(r, s, 8) } return "", "", fmt.Errorf(`unknown escape \%c`, r) } + +func unescapeOctal(r rune, s string) (string, string, error) { + if len(s) < 2 { + return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) + } + ss := string(r) + s[:2] + s = s[2:] + i, err := strconv.ParseUint(ss, 8, 8) + if err != nil { + return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss) + } + return string([]byte{byte(i)}), s, nil +} + +func unescapeHex(r rune, s string, n int) (string, string, error) { + if len(s) < n { + return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n) + } + ss := s[:n] + s = s[n:] + i, err := strconv.ParseUint(ss, 16, 64) + if err != nil { + return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss) + } + if r == 'x' || r == 'X' { + return string([]byte{byte(i)}), s, nil + } + if i > utf8.MaxRune { + return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss) + } + return strconv.FormatUint(i, 10), s, nil +} diff --git a/vendor/github.com/protocolbuffers/txtpbfmt/wrap/wrap.go b/vendor/github.com/protocolbuffers/txtpbfmt/wrap/wrap.go new file mode 100644 index 0000000000..36dfeeca75 --- /dev/null +++ b/vendor/github.com/protocolbuffers/txtpbfmt/wrap/wrap.go @@ -0,0 +1,267 @@ +// Package wrap provides functions for wrapping strings in textproto ASTs. +package wrap + +import ( + "fmt" + "regexp" + "strings" + + "github.com/mitchellh/go-wordwrap" + "github.com/protocolbuffers/txtpbfmt/ast" + "github.com/protocolbuffers/txtpbfmt/config" + "github.com/protocolbuffers/txtpbfmt/unquote" +) + +var tagRegex = regexp.MustCompile(`<.*>`) + +const indentSpaces = " " + +// Strings wraps the strings in the given nodes. +func Strings(nodes []*ast.Node, depth int, c config.Config) error { + if c.WrapStringsAtColumn == 0 && !c.WrapStringsAfterNewlines { + return nil + } + for _, nd := range nodes { + if nd.ChildrenSameLine { + continue + } + if err := wrapNodeStrings(nd, depth, c); err != nil { + return err + } + if err := Strings(nd.Children, depth+1, c); err != nil { + return err + } + } + return nil +} + +func wrapNodeStrings(nd *ast.Node, depth int, c config.Config) error { + if c.WrapStringsAtColumn > 0 && needsWrappingAtColumn(nd, depth, c) { + if err := wrapLinesAtColumn(nd, depth, c); err != nil { + return err + } + } + if c.WrapStringsAfterNewlines && needsWrappingAfterNewlines(nd, c) { + if err := wrapLinesAfterNewlines(nd, c); err != nil { + return err + } + } + return nil +} + +func shouldWrapString(v *ast.Value, maxLength int, c config.Config) bool { + if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) { + // Don't wrap triple-quoted strings + return false + } + if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' { + // Only wrap strings + return false + } + return len(v.Value) > maxLength || c.WrapStringsWithoutWordwrap +} + +func shouldNotWrapString(nd *ast.Node, c config.Config) bool { + if !c.WrapHTMLStrings { + for _, v := range nd.Values { + if tagRegex.Match([]byte(v.Value)) { + return true + } + } + } + return false +} + +func needsWrappingAtColumn(nd *ast.Node, depth int, c config.Config) bool { + // Even at depth 0 we have a 2-space indent when the wrapped string is rendered on the line below + // the field name. + const lengthBuffer = 2 + maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces)) + + if shouldNotWrapString(nd, c) { + return false + } + + for _, v := range nd.Values { + if shouldWrapString(v, maxLength, c) { + return true + } + } + return false +} + +func wrapLinesWithoutWordwrap(str string, maxLength int) []string { + // https://protobuf.dev/reference/protobuf/textformat-spec/#string. + // String literals can contain octal, hex, unicode, and C-style escape + // sequences: \a \b \f \n \r \t \v \? \' \"\ ? \\ + re := regexp.MustCompile(`\\[abfnrtv?\\'"]` + + `|\\[0-7]{1,3}` + + `|\\x[0-9a-fA-F]{1,2}` + + `|\\u[0-9a-fA-F]{4}` + + `|\\U000[0-9a-fA-F]{5}` + + `|\\U0010[0-9a-fA-F]{4}` + + `|.`) + var lines []string + var line strings.Builder + for _, t := range re.FindAllString(str, -1) { + if line.Len()+len(t) > maxLength { + lines = append(lines, line.String()) + line.Reset() + } + line.WriteString(t) + } + lines = append(lines, line.String()) + return lines +} + +func adjustLineLength(nd *ast.Node, v *ast.Value, line string, maxLength int, i int, numLines int) { + lineLength := len(line) + if v.InlineComment != "" { + lineLength += len(indentSpaces) + len(v.InlineComment) + } + // field name and field value are inlined for single strings, adjust for that. + if i == 0 && numLines == 1 { + lineLength += len(nd.Name) + } + if lineLength > maxLength { + // If there's an inline comment, promote it to a pre-comment which will + // emit a newline. + if v.InlineComment != "" { + v.PreComments = append(v.PreComments, v.InlineComment) + v.InlineComment = "" + } else if i == 0 && len(v.PreComments) == 0 { + // It's too long and we don't have any comments. + nd.PutSingleValueOnNextLine = true + } + } +} + +// If the Values of this Node constitute a string, and if Config.WrapStringsAtColumn > 0, then wrap +// the string so each line is within the specified columns. Wraps only the current Node (does not +// recurse into Children). +func wrapLinesAtColumn(nd *ast.Node, depth int, c config.Config) error { + // This function looks at the unquoted ast.Value.Value string (i.e., with each Value's wrapping + // quote chars removed). We need to remove these quotes, since otherwise they'll be re-flowed into + // the body of the text. + const lengthBuffer = 4 // Even at depth 0 we have a 2-space indent and a pair of quotes + maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces)) + + str, quote, err := unquote.Raw(nd) + if err != nil { + return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err) + } + + var lines []string + if c.WrapStringsWithoutWordwrap { + lines = wrapLinesWithoutWordwrap(str, maxLength) + } else { + // Remove one from the max length since a trailing space may be added below. + wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1) + lines = strings.Split(wrappedStr, "\n") + } + + newValues := make([]*ast.Value, 0, len(lines)) + // The Value objects have more than just the string in them. They also have any leading and + // trailing comments. To maintain these comments we recycle the existing Value objects if + // possible. + var i int + var line string + for i, line = range lines { + var v *ast.Value + if i < len(nd.Values) { + v = nd.Values[i] + } else { + v = &ast.Value{} + } + + if !c.WrapStringsWithoutWordwrap && i < len(lines)-1 { + line = line + " " + } + + if c.WrapStringsWithoutWordwrap { + adjustLineLength(nd, v, line, maxLength, i, len(lines)) + } + + v.Value = fmt.Sprintf(`%c%s%c`, quote, line, quote) + newValues = append(newValues, v) + } + + postWrapCollectComments(nd, i) + + nd.Values = newValues + return nil +} + +// N.b.: this will incorrectly match `\\\\x`, which hopefully is rare. +var byteEscapeRegex = regexp.MustCompile(`\\x`) + +func needsWrappingAfterNewlines(nd *ast.Node, c config.Config) bool { + for _, v := range nd.Values { + if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) { + // Don't wrap triple-quoted strings + return false + } + if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' { + // Only wrap strings + return false + } + byteEscapeCount := len(byteEscapeRegex.FindAllStringIndex(v.Value, -1)) + if float64(byteEscapeCount) > float64(len(v.Value))*0.1 { + // Only wrap UTF-8 looking strings (where less than ~10% of the characters are escaped). + return false + } + // Check that there is at least one newline, *not* at the end of the string. + if i := strings.Index(v.Value, `\n`); i >= 0 && i < len(v.Value)-3 { + return true + } + } + return false +} + +// If the Values of this Node constitute a string, and if Config.WrapStringsAfterNewlines, +// then wrap the string so each line ends with a newline. +// Wraps only the current Node (does not recurse into Children). +func wrapLinesAfterNewlines(nd *ast.Node, c config.Config) error { + str, quote, err := unquote.Raw(nd) + if err != nil { + return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err) + } + + wrappedStr := strings.ReplaceAll(str, `\n`, `\n`+"\n") + // Avoid empty string at end after splitting in case str ended with an (escaped) newline. + wrappedStr = strings.TrimSuffix(wrappedStr, "\n") + lines := strings.Split(wrappedStr, "\n") + newValues := make([]*ast.Value, 0, len(lines)) + // The Value objects have more than just the string in them. They also have any leading and + // trailing comments. To maintain these comments we recycle the existing Value objects if + // possible. + var i int + var line string + for i, line = range lines { + var v *ast.Value + if i < len(nd.Values) { + v = nd.Values[i] + } else { + v = &ast.Value{} + } + v.Value = fmt.Sprintf(`%c%s%c`, quote, line, quote) + newValues = append(newValues, v) + } + + postWrapCollectComments(nd, i) + + nd.Values = newValues + return nil +} + +func postWrapCollectComments(nd *ast.Node, i int) { + for i++; i < len(nd.Values); i++ { + // If this executes, then the text was wrapped into less lines of text (less Values) than + // previously. If any of these had comments on them, we collect them so they are not lost. + v := nd.Values[i] + nd.PostValuesComments = append(nd.PostValuesComments, v.PreComments...) + if len(v.InlineComment) > 0 { + nd.PostValuesComments = append(nd.PostValuesComments, v.InlineComment) + } + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 971df7e7e7..1fc43bbc92 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1155,11 +1155,19 @@ github.com/prometheus/common/model github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/protocolbuffers/txtpbfmt v0.0.0-20241112170944-20d2c9ebc01d -## explicit; go 1.18 +# github.com/protocolbuffers/txtpbfmt v0.0.0-20260217160748-a481f6a22f94 +## explicit; go 1.19 github.com/protocolbuffers/txtpbfmt/ast +github.com/protocolbuffers/txtpbfmt/config +github.com/protocolbuffers/txtpbfmt/descriptor +github.com/protocolbuffers/txtpbfmt/impl +github.com/protocolbuffers/txtpbfmt/logger github.com/protocolbuffers/txtpbfmt/parser +github.com/protocolbuffers/txtpbfmt/printer +github.com/protocolbuffers/txtpbfmt/quote +github.com/protocolbuffers/txtpbfmt/sort github.com/protocolbuffers/txtpbfmt/unquote +github.com/protocolbuffers/txtpbfmt/wrap # github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 ## explicit github.com/rcrowley/go-metrics