Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ jobs:
go-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: setup Go
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
- uses: actions/checkout@v5
- run: go test -v ./...
golangci-lint:
runs-on: ubuntu-latest
Expand All @@ -24,4 +24,4 @@ jobs:
go-version-file: 'go.mod'
cache: false
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
run: go run github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64.8 run
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Currently supported collectors:
- [CRM collector](internal/collector/crm_collector.go): collects Critial Resource Monitoring metrics.
- [Queue collector](internal/collector/queue_collector.go): collects metrics about queues.
- [LLDP collector](internal/collector/lldp_collector.go): collects LLDP neighbor information from SONiC Redis.
- [VLAN collector](internal/collector/vlan_collector.go): collects VLAN and VLAN member state from SONiC Redis.
- [LAG collector](internal/collector/lag_collector.go): collects PortChannel and member state from SONiC Redis.

# Usage

Expand All @@ -33,6 +35,16 @@ Environment variables:
- `LLDP_REFRESH_INTERVAL` - LLDP cache refresh interval. Default: `30s`.
- `LLDP_TIMEOUT` - timeout for one LLDP refresh cycle. Default: `2s`.
- `LLDP_MAX_NEIGHBORS` - maximum number of LLDP neighbors exported per refresh. Default: `512`.
- `VLAN_ENABLED` - enable VLAN collector. Default: `true`.
- `VLAN_REFRESH_INTERVAL` - VLAN cache refresh interval. Default: `30s`.
- `VLAN_TIMEOUT` - timeout for one VLAN refresh cycle. Default: `2s`.
- `VLAN_MAX_VLANS` - maximum number of VLANs exported per refresh. Default: `1024`.
- `VLAN_MAX_MEMBERS` - maximum number of VLAN members exported per refresh. Default: `8192`.
- `LAG_ENABLED` - enable LAG collector. Default: `true`.
- `LAG_REFRESH_INTERVAL` - LAG cache refresh interval. Default: `30s`.
- `LAG_TIMEOUT` - timeout for one LAG refresh cycle. Default: `2s`.
- `LAG_MAX_LAGS` - maximum number of LAGs exported per refresh. Default: `512`.
- `LAG_MAX_MEMBERS` - maximum number of LAG members exported per refresh. Default: `4096`.

## Validated Platforms

Expand Down
20 changes: 18 additions & 2 deletions cmd/sonic-exporter/main.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"log/slog"
"net/http"
"os"

Expand All @@ -17,7 +18,7 @@ import (

func main() {
// setup node exporter collectors through global kingpin flags
kingpin.CommandLine.Parse([]string{
_, err := kingpin.CommandLine.Parse([]string{
"--collector.disable-defaults",
"--collector.loadavg",
"--collector.cpu",
Expand All @@ -27,6 +28,10 @@ func main() {
"--collector.time",
"--collector.stat",
})
if err != nil {
slog.Error("failed to parse node exporter collector defaults", "error", err)
os.Exit(1)
}

// New kingpin instance to prevent imported code from adding flags (node exporter)
kp := kingpin.New("sonic-exporter", "Prometheus exporter for SONiC network switches")
Expand All @@ -40,7 +45,10 @@ func main() {
flag.AddFlags(kp, promslogConfig)
kp.HelpFlag.Short('h')
kp.UsageWriter(os.Stdout)
kp.Parse(os.Args[1:])
if _, err = kp.Parse(os.Args[1:]); err != nil {
slog.Error("failed to parse command line arguments", "error", err)
os.Exit(1)
}

logger := promslog.New(promslogConfig)

Expand All @@ -50,13 +58,21 @@ func main() {
crmCollector := collector.NewCrmCollector(logger)
queueCollector := collector.NewQueueCollector(logger)
lldpCollector := collector.NewLldpCollector(logger)
vlanCollector := collector.NewVlanCollector(logger)
lagCollector := collector.NewLagCollector(logger)
prometheus.MustRegister(interfaceCollector)
prometheus.MustRegister(hwCollector)
prometheus.MustRegister(crmCollector)
prometheus.MustRegister(queueCollector)
if lldpCollector.IsEnabled() {
prometheus.MustRegister(lldpCollector)
}
if vlanCollector.IsEnabled() {
prometheus.MustRegister(vlanCollector)
}
if lagCollector.IsEnabled() {
prometheus.MustRegister(lagCollector)
}

// Node exporter collectors
nodeCollector, err := nodecollector.NewNodeCollector(logger,
Expand Down
27 changes: 27 additions & 0 deletions fixtures/test/appl_db_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,33 @@
},
"LLDP_ENTRY_TABLE:Ethernet0": {
"lldp_rem_index": "1"
},
"VLAN_TABLE:Vlan1000": {
"admin_status": "up",
"oper_status": "up"
},
"VLAN_TABLE:Vlan2000": {
"admin_status": "down",
"oper_status": "down"
},
"LAG_TABLE:PortChannel1": {
"mtu": "9100",
"admin_status": "up",
"oper_status": "up"
},
"LAG_TABLE:PortChannel2": {
"mtu": "9100",
"admin_status": "up",
"oper_status": "down"
},
"LAG_MEMBER_TABLE:PortChannel1:Ethernet24": {
"status": "enabled"
},
"LAG_MEMBER_TABLE:PortChannel1:Ethernet28": {
"status": "disabled"
},
"LAG_MEMBER_TABLE:PortChannel2:Ethernet92": {
"status": "enabled"
}
}
}
22 changes: 22 additions & 0 deletions fixtures/test/config_db_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,28 @@
"lanes": "125,126,127,128",
"mtu": "9100",
"speed": "100000"
},
"VLAN|Vlan1000": {
"admin_status": "up",
"vlanid": "1000"
},
"VLAN|Vlan2000": {
"admin_status": "down",
"vlanid": "2000"
},
"VLAN_MEMBER|Vlan1000|Ethernet0": {
"tagging_mode": "untagged"
},
"VLAN_MEMBER|Vlan1000|PortChannel1": {
"tagging_mode": "tagged"
},
"PORTCHANNEL|PortChannel1": {
"admin_status": "up",
"mtu": "9100"
},
"PORTCHANNEL|PortChannel2": {
"admin_status": "up",
"mtu": "9100"
}
}
}
99 changes: 99 additions & 0 deletions internal/collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ func TestMain(m *testing.M) {
os.Setenv("REDIS_ADDRESS", s.Addr())
os.Setenv("LLDP_ENABLED", "true")
os.Setenv("LLDP_INCLUDE_MGMT", "true")
os.Setenv("VLAN_ENABLED", "true")
os.Setenv("LAG_ENABLED", "true")
err = populateRedisData()
if err != nil {
slog.Error("failed to populate redis data", "error", err)
Expand All @@ -89,6 +91,8 @@ func TestMain(m *testing.M) {
os.Unsetenv("REDIS_ADDRESS")
os.Unsetenv("LLDP_ENABLED")
os.Unsetenv("LLDP_INCLUDE_MGMT")
os.Unsetenv("VLAN_ENABLED")
os.Unsetenv("LAG_ENABLED")
os.Exit(exitCode)
}

Expand Down Expand Up @@ -276,3 +280,98 @@ func TestLldpCollector(t *testing.T) {
t.Errorf("unexpected collecting result:\n%s", err)
}
}

func TestVlanCollector(t *testing.T) {
promslogConfig := &promslog.Config{}
logger := promslog.New(promslogConfig)

vlanCollector := NewVlanCollector(logger)

problems, err := testutil.CollectAndLint(vlanCollector)
if err != nil {
t.Error("metric lint completed with errors")
}

for _, problem := range problems {
t.Errorf("metric %v has a problem: %v", problem.Metric, problem.Text)
}

metadata := `
# HELP sonic_vlan_collector_success Whether VLAN collector succeeded
# TYPE sonic_vlan_collector_success gauge
# HELP sonic_vlan_members Number of VLAN members
# TYPE sonic_vlan_members gauge
`

expected := `
sonic_vlan_collector_success 1
sonic_vlan_members{vlan="Vlan1000"} 2
sonic_vlan_members{vlan="Vlan2000"} 0
`

if err := testutil.CollectAndCompare(vlanCollector, strings.NewReader(metadata+expected), "sonic_vlan_collector_success", "sonic_vlan_members"); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}

memberMetadata := `
# HELP sonic_vlan_member_info Non-numeric data about VLAN member, value is always 1
# TYPE sonic_vlan_member_info gauge
`

memberExpected := `
sonic_vlan_member_info{member="Ethernet0",tagging_mode="untagged",vlan="Vlan1000"} 1
sonic_vlan_member_info{member="PortChannel1",tagging_mode="tagged",vlan="Vlan1000"} 1
`

if err := testutil.CollectAndCompare(vlanCollector, strings.NewReader(memberMetadata+memberExpected), "sonic_vlan_member_info"); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}
}

func TestLagCollector(t *testing.T) {
promslogConfig := &promslog.Config{}
logger := promslog.New(promslogConfig)

lagCollector := NewLagCollector(logger)

problems, err := testutil.CollectAndLint(lagCollector)
if err != nil {
t.Error("metric lint completed with errors")
}

for _, problem := range problems {
t.Errorf("metric %v has a problem: %v", problem.Metric, problem.Text)
}

metadata := `
# HELP sonic_lag_collector_success Whether LAG collector succeeded
# TYPE sonic_lag_collector_success gauge
# HELP sonic_lag_members Number of LAG member interfaces
# TYPE sonic_lag_members gauge
`

expected := `
sonic_lag_collector_success 1
sonic_lag_members{lag="PortChannel1"} 2
sonic_lag_members{lag="PortChannel2"} 1
`

if err := testutil.CollectAndCompare(lagCollector, strings.NewReader(metadata+expected), "sonic_lag_collector_success", "sonic_lag_members"); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}

memberMetadata := `
# HELP sonic_lag_member_status Status of LAG member interface (1=enabled, 0=disabled)
# TYPE sonic_lag_member_status gauge
`

memberExpected := `
sonic_lag_member_status{lag="PortChannel1",member="Ethernet24"} 1
sonic_lag_member_status{lag="PortChannel1",member="Ethernet28"} 0
sonic_lag_member_status{lag="PortChannel2",member="Ethernet92"} 1
`

if err := testutil.CollectAndCompare(lagCollector, strings.NewReader(memberMetadata+memberExpected), "sonic_lag_member_status"); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}
}
Loading