diff --git a/README.md b/README.md index 9211607..74039fc 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Currently supported collectors: - [LLDP collector](internal/collector/lldp_collector.go): collects LLDP neighbor information from SONiC Redis. - [VLAN collector](internal/collector/vlan_collector.go): collects VLAN and VLAN member state from SONiC Redis. - [LAG collector](internal/collector/lag_collector.go): collects PortChannel and member state from SONiC Redis. +- [FDB collector](internal/collector/fdb_collector.go): collects FDB summary metrics from SONiC ASIC DB. # Usage @@ -45,6 +46,12 @@ Environment variables: - `LAG_TIMEOUT` - timeout for one LAG refresh cycle. Default: `2s`. - `LAG_MAX_LAGS` - maximum number of LAGs exported per refresh. Default: `512`. - `LAG_MAX_MEMBERS` - maximum number of LAG members exported per refresh. Default: `4096`. +- `FDB_ENABLED` - enable FDB collector. Default: `false`. +- `FDB_REFRESH_INTERVAL` - FDB cache refresh interval. Default: `60s`. +- `FDB_TIMEOUT` - timeout for one FDB refresh cycle. Default: `2s`. +- `FDB_MAX_ENTRIES` - maximum number of ASIC FDB entries processed per refresh. Default: `50000`. +- `FDB_MAX_PORTS` - maximum number of per-port FDB series exported. Default: `1024`. +- `FDB_MAX_VLANS` - maximum number of per-VLAN FDB series exported. Default: `4096`. ## Validated Platforms diff --git a/cmd/sonic-exporter/main.go b/cmd/sonic-exporter/main.go index 56c8688..b5d7789 100644 --- a/cmd/sonic-exporter/main.go +++ b/cmd/sonic-exporter/main.go @@ -60,6 +60,7 @@ func main() { lldpCollector := collector.NewLldpCollector(logger) vlanCollector := collector.NewVlanCollector(logger) lagCollector := collector.NewLagCollector(logger) + fdbCollector := collector.NewFdbCollector(logger) prometheus.MustRegister(interfaceCollector) prometheus.MustRegister(hwCollector) prometheus.MustRegister(crmCollector) @@ -73,6 +74,9 @@ func main() { if lagCollector.IsEnabled() { prometheus.MustRegister(lagCollector) } + if fdbCollector.IsEnabled() { + prometheus.MustRegister(fdbCollector) + } // Node exporter collectors nodeCollector, err := nodecollector.NewNodeCollector(logger, diff --git a/fixtures/test/asic_db_data.json b/fixtures/test/asic_db_data.json new file mode 100644 index 0000000..533390a --- /dev/null +++ b/fixtures/test/asic_db_data.json @@ -0,0 +1,37 @@ +{ + "id": "ASIC_DB", + "data": { + "ASIC_STATE:SAI_OBJECT_TYPE_VLAN:oid:0x260000000000111": { + "SAI_VLAN_ATTR_VLAN_ID": "1000" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_VLAN:oid:0x260000000000222": { + "SAI_VLAN_ATTR_VLAN_ID": "2000" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_BRIDGE_PORT:oid:0x3a000000000001": { + "SAI_BRIDGE_PORT_ATTR_PORT_ID": "oid:0x1000000000002" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_BRIDGE_PORT:oid:0x3a000000000002": { + "SAI_BRIDGE_PORT_ATTR_PORT_ID": "oid:0x1000000000003" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:{\"bvid\":\"oid:0x260000000000111\",\"mac\":\"AA:BB:CC:00:00:01\",\"switch_id\":\"oid:0x21000000000000\"}": { + "SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID": "oid:0x3a000000000001", + "SAI_FDB_ENTRY_ATTR_TYPE": "SAI_FDB_ENTRY_TYPE_DYNAMIC" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:{\"bvid\":\"oid:0x260000000000111\",\"mac\":\"AA:BB:CC:00:00:02\",\"switch_id\":\"oid:0x21000000000000\"}": { + "SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID": "oid:0x3a000000000001", + "SAI_FDB_ENTRY_ATTR_TYPE": "SAI_FDB_ENTRY_TYPE_DYNAMIC" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:{\"bvid\":\"oid:0x260000000000222\",\"mac\":\"AA:BB:CC:00:00:03\",\"switch_id\":\"oid:0x21000000000000\"}": { + "SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID": "oid:0x3a000000000002", + "SAI_FDB_ENTRY_ATTR_TYPE": "SAI_FDB_ENTRY_TYPE_STATIC" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:{\"bvid\":\"oid:0x260000000000999\",\"mac\":\"AA:BB:CC:00:00:04\",\"switch_id\":\"oid:0x21000000000000\"}": { + "SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID": "oid:0x3a000000000002", + "SAI_FDB_ENTRY_ATTR_TYPE": "SAI_FDB_ENTRY_TYPE_DYNAMIC" + }, + "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:not-json": { + "SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID": "oid:0x3a000000000001", + "SAI_FDB_ENTRY_ATTR_TYPE": "SAI_FDB_ENTRY_TYPE_DYNAMIC" + } + } +} diff --git a/internal/collector/collector_test.go b/internal/collector/collector_test.go index 7498239..7a62189 100644 --- a/internal/collector/collector_test.go +++ b/internal/collector/collector_test.go @@ -27,6 +27,7 @@ func populateRedisData() error { "../../fixtures/test/counters_db_data.json", "../../fixtures/test/config_db_data.json", "../../fixtures/test/appl_db_data.json", + "../../fixtures/test/asic_db_data.json", "../../fixtures/test/state_db_data.json", } @@ -79,6 +80,7 @@ func TestMain(m *testing.M) { os.Setenv("LLDP_INCLUDE_MGMT", "true") os.Setenv("VLAN_ENABLED", "true") os.Setenv("LAG_ENABLED", "true") + os.Setenv("FDB_ENABLED", "true") err = populateRedisData() if err != nil { slog.Error("failed to populate redis data", "error", err) @@ -93,6 +95,7 @@ func TestMain(m *testing.M) { os.Unsetenv("LLDP_INCLUDE_MGMT") os.Unsetenv("VLAN_ENABLED") os.Unsetenv("LAG_ENABLED") + os.Unsetenv("FDB_ENABLED") os.Exit(exitCode) } @@ -375,3 +378,97 @@ func TestLagCollector(t *testing.T) { t.Errorf("unexpected collecting result:\n%s", err) } } + +func TestFdbCollector(t *testing.T) { + promslogConfig := &promslog.Config{} + logger := promslog.New(promslogConfig) + + fdbCollector := NewFdbCollector(logger) + + problems, err := testutil.CollectAndLint(fdbCollector) + if err != nil { + t.Error("metric lint completed with errors") + } + + for _, problem := range problems { + t.Errorf("metric %v has a problem: %v", problem.Metric, problem.Text) + } + + metadata := ` + # HELP sonic_fdb_collector_success Whether FDB collector succeeded + # TYPE sonic_fdb_collector_success gauge + # HELP sonic_fdb_entries Number of FDB entries + # TYPE sonic_fdb_entries gauge + # HELP sonic_fdb_entries_unknown_vlan Number of FDB entries with unknown VLAN mapping + # TYPE sonic_fdb_entries_unknown_vlan gauge + ` + + expected := ` + sonic_fdb_collector_success 1 + sonic_fdb_entries 4 + sonic_fdb_entries_unknown_vlan 1 + ` + + if err := testutil.CollectAndCompare(fdbCollector, strings.NewReader(metadata+expected), "sonic_fdb_collector_success", "sonic_fdb_entries", "sonic_fdb_entries_unknown_vlan"); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + portMetadata := ` + # HELP sonic_fdb_entries_by_port Number of FDB entries by port + # TYPE sonic_fdb_entries_by_port gauge + ` + + portExpected := ` + sonic_fdb_entries_by_port{port="Ethernet0"} 2 + sonic_fdb_entries_by_port{port="Ethernet39"} 2 + ` + + if err := testutil.CollectAndCompare(fdbCollector, strings.NewReader(portMetadata+portExpected), "sonic_fdb_entries_by_port"); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + vlanMetadata := ` + # HELP sonic_fdb_entries_by_vlan Number of FDB entries by VLAN + # TYPE sonic_fdb_entries_by_vlan gauge + ` + + vlanExpected := ` + sonic_fdb_entries_by_vlan{vlan="1000"} 2 + sonic_fdb_entries_by_vlan{vlan="2000"} 1 + sonic_fdb_entries_by_vlan{vlan="unknown"} 1 + ` + + if err := testutil.CollectAndCompare(fdbCollector, strings.NewReader(vlanMetadata+vlanExpected), "sonic_fdb_entries_by_vlan"); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + typeMetadata := ` + # HELP sonic_fdb_entries_by_type Number of FDB entries by entry type + # TYPE sonic_fdb_entries_by_type gauge + ` + + typeExpected := ` + sonic_fdb_entries_by_type{entry_type="dynamic"} 3 + sonic_fdb_entries_by_type{entry_type="static"} 1 + ` + + if err := testutil.CollectAndCompare(fdbCollector, strings.NewReader(typeMetadata+typeExpected), "sonic_fdb_entries_by_type"); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + statusMetadata := ` + # HELP sonic_fdb_entries_skipped Number of FDB entries skipped during latest refresh + # TYPE sonic_fdb_entries_skipped gauge + # HELP sonic_fdb_entries_truncated Whether FDB collection hit max entries limit (1=yes, 0=no) + # TYPE sonic_fdb_entries_truncated gauge + ` + + statusExpected := ` + sonic_fdb_entries_skipped 1 + sonic_fdb_entries_truncated 0 + ` + + if err := testutil.CollectAndCompare(fdbCollector, strings.NewReader(statusMetadata+statusExpected), "sonic_fdb_entries_skipped", "sonic_fdb_entries_truncated"); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } +} diff --git a/internal/collector/fdb_collector.go b/internal/collector/fdb_collector.go new file mode 100644 index 0000000..98c49f6 --- /dev/null +++ b/internal/collector/fdb_collector.go @@ -0,0 +1,450 @@ +package collector + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "sort" + "strings" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/vinted/sonic-exporter/pkg/redis" +) + +const ( + asciiFDBEntryPrefix = "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:" + asciiVLANObjectPrefix = "ASIC_STATE:SAI_OBJECT_TYPE_VLAN:" + asciiBridgePortPrefix = "ASIC_STATE:SAI_OBJECT_TYPE_BRIDGE_PORT:" +) + +type fdbCollectorConfig struct { + enabled bool + refreshInterval time.Duration + timeout time.Duration + maxEntries int + maxPorts int + maxVlans int + redisScanCount int64 +} + +type fdbCollector struct { + fdbEntries *prometheus.Desc + fdbEntriesByPort *prometheus.Desc + fdbEntriesByVlan *prometheus.Desc + fdbEntriesByType *prometheus.Desc + fdbEntriesUnknownVlan *prometheus.Desc + fdbEntriesTruncated *prometheus.Desc + fdbEntriesSkipped *prometheus.Desc + scrapeDuration *prometheus.Desc + scrapeCollectorSuccess *prometheus.Desc + cacheAge *prometheus.Desc + + logger *slog.Logger + config fdbCollectorConfig + + mu sync.RWMutex + cachedMetrics []prometheus.Metric + lastSuccess float64 + lastScrapeDuration float64 + lastSkippedEntries float64 + lastTruncated float64 + lastRefreshTime time.Time +} + +type fdbEntryKey struct { + Bvid string `json:"bvid"` + Vlan string `json:"vlan"` + Mac string `json:"mac"` +} + +func NewFdbCollector(logger *slog.Logger) *fdbCollector { + const ( + namespace = "sonic" + subsystem = "fdb" + ) + + collector := &fdbCollector{ + fdbEntries: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries"), + "Number of FDB entries", nil, nil), + fdbEntriesByPort: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries_by_port"), + "Number of FDB entries by port", []string{"port"}, nil), + fdbEntriesByVlan: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries_by_vlan"), + "Number of FDB entries by VLAN", []string{"vlan"}, nil), + fdbEntriesByType: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries_by_type"), + "Number of FDB entries by entry type", []string{"entry_type"}, nil), + fdbEntriesUnknownVlan: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries_unknown_vlan"), + "Number of FDB entries with unknown VLAN mapping", nil, nil), + fdbEntriesTruncated: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries_truncated"), + "Whether FDB collection hit max entries limit (1=yes, 0=no)", nil, nil), + fdbEntriesSkipped: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "entries_skipped"), + "Number of FDB entries skipped during latest refresh", nil, nil), + scrapeDuration: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "scrape_duration_seconds"), + "Time it took for exporter to refresh FDB metrics", nil, nil), + scrapeCollectorSuccess: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "collector_success"), + "Whether FDB collector succeeded", nil, nil), + cacheAge: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "cache_age_seconds"), + "Age of latest FDB cache refresh", nil, nil), + logger: logger, + config: loadFdbCollectorConfig(logger), + } + + if !collector.config.enabled { + collector.logger.Info("FDB collector is disabled") + return collector + } + + collector.refreshMetrics() + go collector.refreshLoop() + + return collector +} + +func (collector *fdbCollector) IsEnabled() bool { + return collector.config.enabled +} + +func (collector *fdbCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- collector.fdbEntries + ch <- collector.fdbEntriesByPort + ch <- collector.fdbEntriesByVlan + ch <- collector.fdbEntriesByType + ch <- collector.fdbEntriesUnknownVlan + ch <- collector.fdbEntriesTruncated + ch <- collector.fdbEntriesSkipped + ch <- collector.scrapeDuration + ch <- collector.scrapeCollectorSuccess + ch <- collector.cacheAge +} + +func (collector *fdbCollector) Collect(ch chan<- prometheus.Metric) { + if !collector.config.enabled { + return + } + + collector.mu.RLock() + cachedMetrics := append([]prometheus.Metric{}, collector.cachedMetrics...) + lastSkippedEntries := collector.lastSkippedEntries + lastTruncated := collector.lastTruncated + lastScrapeDuration := collector.lastScrapeDuration + lastSuccess := collector.lastSuccess + lastRefreshTime := collector.lastRefreshTime + collector.mu.RUnlock() + + for _, metric := range cachedMetrics { + ch <- metric + } + + cacheAge := 0.0 + if !lastRefreshTime.IsZero() { + cacheAge = time.Since(lastRefreshTime).Seconds() + } + + ch <- prometheus.MustNewConstMetric(collector.fdbEntriesSkipped, prometheus.GaugeValue, lastSkippedEntries) + ch <- prometheus.MustNewConstMetric(collector.fdbEntriesTruncated, prometheus.GaugeValue, lastTruncated) + ch <- prometheus.MustNewConstMetric(collector.scrapeDuration, prometheus.GaugeValue, lastScrapeDuration) + ch <- prometheus.MustNewConstMetric(collector.scrapeCollectorSuccess, prometheus.GaugeValue, lastSuccess) + ch <- prometheus.MustNewConstMetric(collector.cacheAge, prometheus.GaugeValue, cacheAge) +} + +func (collector *fdbCollector) refreshLoop() { + ticker := time.NewTicker(collector.config.refreshInterval) + defer ticker.Stop() + + for range ticker.C { + collector.refreshMetrics() + } +} + +func (collector *fdbCollector) refreshMetrics() { + start := time.Now() + ctx, cancel := context.WithTimeout(context.Background(), collector.config.timeout) + defer cancel() + + metrics, skippedEntries, truncated, err := collector.scrapeMetrics(ctx) + scrapeDuration := time.Since(start).Seconds() + + collector.mu.Lock() + defer collector.mu.Unlock() + + collector.lastScrapeDuration = scrapeDuration + + if err != nil { + collector.lastSuccess = 0 + collector.logger.Error("Error refreshing FDB metrics", "error", err) + return + } + + collector.cachedMetrics = metrics + collector.lastSkippedEntries = float64(skippedEntries) + collector.lastTruncated = truncated + collector.lastSuccess = 1 + collector.lastRefreshTime = time.Now() +} + +func (collector *fdbCollector) scrapeMetrics(ctx context.Context) ([]prometheus.Metric, int, float64, error) { + redisClient, err := redis.NewClient() + if err != nil { + return nil, 0, 0, fmt.Errorf("redis client initialization failed: %w", err) + } + defer redisClient.Close() + + portOidToName, err := collector.portOidToNameMap(ctx, redisClient) + if err != nil { + return nil, 0, 0, err + } + + bvidToVlan, skippedEntries, err := collector.bvidToVlanMap(ctx, redisClient) + if err != nil { + return nil, 0, 0, err + } + + bridgePortToPort, bridgeSkippedEntries, err := collector.bridgePortToPortMap(ctx, redisClient, portOidToName) + if err != nil { + return nil, 0, 0, err + } + skippedEntries += bridgeSkippedEntries + + fdbKeys, err := redisClient.ScanKeysFromDb(ctx, "ASIC_DB", asciiFDBEntryPrefix+"*", collector.config.redisScanCount) + if err != nil { + return nil, 0, 0, fmt.Errorf("failed to scan FDB keys: %w", err) + } + + sort.Strings(fdbKeys) + + truncated := 0.0 + entriesTotal := 0.0 + unknownVLANEntries := 0.0 + entriesByVLAN := map[string]float64{} + entriesByPort := map[string]float64{} + entriesByType := map[string]float64{} + + for idx, fdbKey := range fdbKeys { + if idx >= collector.config.maxEntries { + truncated = 1 + skippedEntries += len(fdbKeys) - idx + break + } + + parsedKey, err := parseFDBKey(fdbKey) + if err != nil { + skippedEntries++ + continue + } + + fdbData, err := redisClient.HgetAllFromDb(ctx, "ASIC_DB", fdbKey) + if err != nil { + return nil, 0, 0, fmt.Errorf("failed to read FDB entry %s: %w", fdbKey, err) + } + + if len(fdbData) == 0 { + skippedEntries++ + continue + } + + vlanLabel, unknownVLAN := resolveFDBVLANLabel(parsedKey, bvidToVlan) + if unknownVLAN { + unknownVLANEntries++ + } + + entryType := normalizeFDBType(fdbData["SAI_FDB_ENTRY_ATTR_TYPE"]) + if entryType == "" { + entryType = "unknown" + } + + portLabel := bridgePortToPort[fdbData["SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID"]] + if portLabel == "" { + portLabel = "unknown" + } + + entriesTotal++ + entriesByVLAN[vlanLabel]++ + entriesByPort[portLabel]++ + entriesByType[entryType]++ + } + + metrics := make([]prometheus.Metric, 0, len(entriesByVLAN)+len(entriesByPort)+len(entriesByType)+2) + metrics = append(metrics, prometheus.MustNewConstMetric(collector.fdbEntries, prometheus.GaugeValue, entriesTotal)) + metrics = append(metrics, prometheus.MustNewConstMetric(collector.fdbEntriesUnknownVlan, prometheus.GaugeValue, unknownVLANEntries)) + + vlanNames := sortedMapKeys(entriesByVLAN) + for idx, vlanName := range vlanNames { + if idx >= collector.config.maxVlans { + skippedEntries += len(vlanNames) - idx + break + } + metrics = append(metrics, prometheus.MustNewConstMetric(collector.fdbEntriesByVlan, prometheus.GaugeValue, entriesByVLAN[vlanName], vlanName)) + } + + portNames := sortedMapKeys(entriesByPort) + for idx, portName := range portNames { + if idx >= collector.config.maxPorts { + skippedEntries += len(portNames) - idx + break + } + metrics = append(metrics, prometheus.MustNewConstMetric(collector.fdbEntriesByPort, prometheus.GaugeValue, entriesByPort[portName], portName)) + } + + entryTypes := sortedMapKeys(entriesByType) + for _, entryType := range entryTypes { + metrics = append(metrics, prometheus.MustNewConstMetric(collector.fdbEntriesByType, prometheus.GaugeValue, entriesByType[entryType], entryType)) + } + + return metrics, skippedEntries, truncated, nil +} + +func (collector *fdbCollector) portOidToNameMap(ctx context.Context, redisClient redis.Client) (map[string]string, error) { + portNameMap, err := redisClient.HgetAllFromDb(ctx, "COUNTERS_DB", "COUNTERS_PORT_NAME_MAP") + if err != nil { + return nil, fmt.Errorf("failed to read COUNTERS_PORT_NAME_MAP: %w", err) + } + + lagNameMap, err := redisClient.HgetAllFromDb(ctx, "COUNTERS_DB", "COUNTERS_LAG_NAME_MAP") + if err != nil { + return nil, fmt.Errorf("failed to read COUNTERS_LAG_NAME_MAP: %w", err) + } + + oidToName := make(map[string]string, len(portNameMap)+len(lagNameMap)) + for name, oid := range portNameMap { + oidToName[oid] = name + } + + for name, oid := range lagNameMap { + oidToName[oid] = name + } + + return oidToName, nil +} + +func (collector *fdbCollector) bvidToVlanMap(ctx context.Context, redisClient redis.Client) (map[string]string, int, error) { + vlanKeys, err := redisClient.ScanKeysFromDb(ctx, "ASIC_DB", asciiVLANObjectPrefix+"*", collector.config.redisScanCount) + if err != nil { + return nil, 0, fmt.Errorf("failed to scan VLAN object keys: %w", err) + } + + bvidToVlan := make(map[string]string, len(vlanKeys)) + skippedEntries := 0 + + for _, vlanKey := range vlanKeys { + bvid := strings.TrimPrefix(vlanKey, asciiVLANObjectPrefix) + if bvid == "" || bvid == vlanKey { + skippedEntries++ + continue + } + + vlanData, err := redisClient.HgetAllFromDb(ctx, "ASIC_DB", vlanKey) + if err != nil { + return nil, 0, fmt.Errorf("failed to read VLAN object %s: %w", vlanKey, err) + } + + vlanID := strings.TrimSpace(vlanData["SAI_VLAN_ATTR_VLAN_ID"]) + if vlanID == "" { + skippedEntries++ + continue + } + + bvidToVlan[bvid] = vlanID + } + + return bvidToVlan, skippedEntries, nil +} + +func (collector *fdbCollector) bridgePortToPortMap(ctx context.Context, redisClient redis.Client, portOidToName map[string]string) (map[string]string, int, error) { + bridgePortKeys, err := redisClient.ScanKeysFromDb(ctx, "ASIC_DB", asciiBridgePortPrefix+"*", collector.config.redisScanCount) + if err != nil { + return nil, 0, fmt.Errorf("failed to scan bridge port keys: %w", err) + } + + bridgePortToPort := make(map[string]string, len(bridgePortKeys)) + skippedEntries := 0 + + for _, bridgePortKey := range bridgePortKeys { + bridgePortID := strings.TrimPrefix(bridgePortKey, asciiBridgePortPrefix) + if bridgePortID == "" || bridgePortID == bridgePortKey { + skippedEntries++ + continue + } + + bridgePortData, err := redisClient.HgetAllFromDb(ctx, "ASIC_DB", bridgePortKey) + if err != nil { + return nil, 0, fmt.Errorf("failed to read bridge port %s: %w", bridgePortKey, err) + } + + portID := bridgePortData["SAI_BRIDGE_PORT_ATTR_PORT_ID"] + if portID == "" { + continue + } + + portName := portOidToName[portID] + if portName == "" { + skippedEntries++ + continue + } + + bridgePortToPort[bridgePortID] = portName + } + + return bridgePortToPort, skippedEntries, nil +} + +func loadFdbCollectorConfig(logger *slog.Logger) fdbCollectorConfig { + return fdbCollectorConfig{ + enabled: parseBoolEnv(logger, "FDB_ENABLED", false), + refreshInterval: parseDurationEnv(logger, "FDB_REFRESH_INTERVAL", 60*time.Second), + timeout: parseDurationEnv(logger, "FDB_TIMEOUT", 2*time.Second), + maxEntries: parseIntEnv(logger, "FDB_MAX_ENTRIES", 50000), + maxPorts: parseIntEnv(logger, "FDB_MAX_PORTS", 1024), + maxVlans: parseIntEnv(logger, "FDB_MAX_VLANS", 4096), + redisScanCount: 256, + } +} + +func parseFDBKey(fdbKey string) (fdbEntryKey, error) { + entryKey := fdbEntryKey{} + + payload := strings.TrimPrefix(fdbKey, asciiFDBEntryPrefix) + if payload == "" || payload == fdbKey { + return entryKey, fmt.Errorf("fdb key has invalid format") + } + + if err := json.Unmarshal([]byte(payload), &entryKey); err != nil { + return entryKey, err + } + + if entryKey.Mac == "" { + return entryKey, fmt.Errorf("missing mac in fdb key") + } + + return entryKey, nil +} + +func resolveFDBVLANLabel(entryKey fdbEntryKey, bvidToVlan map[string]string) (string, bool) { + vlanLabel := strings.TrimSpace(entryKey.Vlan) + if vlanLabel != "" { + return vlanLabel, false + } + + vlanLabel = bvidToVlan[strings.TrimSpace(entryKey.Bvid)] + if vlanLabel != "" { + return vlanLabel, false + } + + return "unknown", true +} + +func normalizeFDBType(rawValue string) string { + entryType := strings.TrimSpace(strings.ToLower(rawValue)) + entryType = strings.TrimPrefix(entryType, "sai_fdb_entry_type_") + return entryType +} + +func sortedMapKeys(values map[string]float64) []string { + keys := make([]string, 0, len(values)) + for key := range values { + keys = append(keys, key) + } + sort.Strings(keys) + return keys +} diff --git a/pkg/redis/client.go b/pkg/redis/client.go index 08341e7..1f95efc 100644 --- a/pkg/redis/client.go +++ b/pkg/redis/client.go @@ -19,6 +19,8 @@ func RedisDbId(name string) (int, bool) { return 0, true case "COUNTERS_DB": return 2, true + case "ASIC_DB": + return 1, true case "CONFIG_DB": return 4, true case "STATE_DB":