diff --git a/metrics/client.go b/metrics/client.go index cdca9a7..74d823b 100644 --- a/metrics/client.go +++ b/metrics/client.go @@ -1,16 +1,23 @@ package metrics import ( + "context" + "fmt" + "strconv" "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + + "github.com/smartcontractkit/chainlink-common/pkg/beholder" ) var ( RPCCallLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "rpc_call_latency", - Help: "The duration of an RPC call in milliseconds", + Name: rpcCallLatencyBeholder, + Help: "The duration of an RPC call in nanoseconds", Buckets: []float64{ float64(50 * time.Millisecond), float64(100 * time.Millisecond), @@ -23,3 +30,70 @@ var ( }, }, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "success", "rpcCallName"}) ) + +const rpcCallLatencyBeholder = "rpc_call_latency" + +// RPCClientMetrics records RPC call latency to Prometheus and Beholder (failures: success="false"; same pattern as multinode metrics). +// Construct once per chain (or process) with ChainFamily and ChainID; pass rpcUrl and isSendOnly on each call +// when they vary by node or request. +type RPCClientMetrics interface { + // RecordRequest records latency for an RPC call (observed in nanoseconds for Prometheus and Beholder). + // Failures use success="false"; derive error rate from rpc_call_latency_count{success="false"} (or equivalent). + RecordRequest(ctx context.Context, rpcURL string, isSendOnly bool, callName string, latency time.Duration, err error) +} + +var _ RPCClientMetrics = (*rpcClientMetrics)(nil) + +type rpcClientMetrics struct { + chainFamily string + chainID string + latencyHis metric.Float64Histogram +} + +// RPCClientMetricsConfig holds labels that are fixed for the lifetime of the metrics handle (e.g. one per chain). +type RPCClientMetricsConfig struct { + ChainFamily string + ChainID string +} + +// NewRPCClientMetrics creates RPC client metrics that publish to Prometheus and Beholder. +func NewRPCClientMetrics(cfg RPCClientMetricsConfig) (RPCClientMetrics, error) { + latency, err := beholder.GetMeter().Float64Histogram(rpcCallLatencyBeholder) + if err != nil { + return nil, fmt.Errorf("failed to register RPC call latency metric: %w", err) + } + return &rpcClientMetrics{ + chainFamily: cfg.ChainFamily, + chainID: cfg.ChainID, + latencyHis: latency, + }, nil +} + +func (m *rpcClientMetrics) RecordRequest(ctx context.Context, rpcURL string, isSendOnly bool, callName string, latency time.Duration, err error) { + successStr := "true" + if err != nil { + successStr = "false" + } + sendStr := strconv.FormatBool(isSendOnly) + latencyNs := float64(latency) + + RPCCallLatency.WithLabelValues(m.chainFamily, m.chainID, rpcURL, sendStr, successStr, callName).Observe(latencyNs) + + latAttrs := metric.WithAttributes( + attribute.String("chainFamily", m.chainFamily), + attribute.String("chainID", m.chainID), + attribute.String("rpcUrl", rpcURL), + attribute.String("isSendOnly", sendStr), + attribute.String("success", successStr), + attribute.String("rpcCallName", callName), + ) + m.latencyHis.Record(ctx, latencyNs, latAttrs) +} + +// NoopRPCClientMetrics is a no-op implementation for when metrics are disabled. +type NoopRPCClientMetrics struct{} + +func (NoopRPCClientMetrics) RecordRequest(context.Context, string, bool, string, time.Duration, error) { +} + +var _ RPCClientMetrics = NoopRPCClientMetrics{} diff --git a/metrics/client_test.go b/metrics/client_test.go new file mode 100644 index 0000000..ba407a0 --- /dev/null +++ b/metrics/client_test.go @@ -0,0 +1,31 @@ +package metrics + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestNewRPCClientMetrics(t *testing.T) { + m, err := NewRPCClientMetrics(RPCClientMetricsConfig{ + ChainFamily: "evm", + ChainID: "1", + }) + require.NoError(t, err) + require.NotNil(t, m) + + ctx := context.Background() + const url = "http://localhost:8545" + m.RecordRequest(ctx, url, false, "latest_block", 100*time.Millisecond, nil) + m.RecordRequest(ctx, url, true, "latest_block", 50*time.Millisecond, errors.New("rpc error")) +} + +func TestNoopRPCClientMetrics_RecordRequest(t *testing.T) { + var m NoopRPCClientMetrics + ctx := context.Background() + m.RecordRequest(ctx, "http://localhost:8545", false, "latest_block", 100*time.Millisecond, nil) + m.RecordRequest(ctx, "http://localhost:8545", false, "latest_block", 50*time.Millisecond, errors.New("rpc error")) +} diff --git a/metrics/multinode.go b/metrics/multinode.go index c278913..5bc1ff5 100644 --- a/metrics/multinode.go +++ b/metrics/multinode.go @@ -135,29 +135,29 @@ type GenericMultiNodeMetrics interface { var _ GenericMultiNodeMetrics = &multiNodeMetrics{} type multiNodeMetrics struct { - network string - chainID string - nodeStates metric.Int64Gauge - nodeClientVersion metric.Int64Gauge - nodeVerifies metric.Int64Counter - nodeVerifiesFailed metric.Int64Counter - nodeVerifiesSuccess metric.Int64Counter - nodeTransitionsToAlive metric.Int64Counter - nodeTransitionsToInSync metric.Int64Counter - nodeTransitionsToOutOfSync metric.Int64Counter - nodeTransitionsToUnreachable metric.Int64Counter - nodeTransitionsToInvalidChainID metric.Int64Counter - nodeTransitionsToUnusable metric.Int64Counter - nodeTransitionsToSyncing metric.Int64Counter - highestSeenBlock metric.Int64Gauge - highestFinalizedBlock metric.Int64Gauge - seenBlocks metric.Int64Counter - polls metric.Int64Counter - pollsFailed metric.Int64Counter - pollsSuccess metric.Int64Counter - finalizedStateFailed metric.Int64Counter - nodeTransitionsToFinalizedStateNotAvailable metric.Int64Counter - invariantViolations metric.Int64Counter + network string + chainID string + nodeStates metric.Int64Gauge + nodeClientVersion metric.Int64Gauge + nodeVerifies metric.Int64Counter + nodeVerifiesFailed metric.Int64Counter + nodeVerifiesSuccess metric.Int64Counter + nodeTransitionsToAlive metric.Int64Counter + nodeTransitionsToInSync metric.Int64Counter + nodeTransitionsToOutOfSync metric.Int64Counter + nodeTransitionsToUnreachable metric.Int64Counter + nodeTransitionsToInvalidChainID metric.Int64Counter + nodeTransitionsToUnusable metric.Int64Counter + nodeTransitionsToSyncing metric.Int64Counter + highestSeenBlock metric.Int64Gauge + highestFinalizedBlock metric.Int64Gauge + seenBlocks metric.Int64Counter + polls metric.Int64Counter + pollsFailed metric.Int64Counter + pollsSuccess metric.Int64Counter + finalizedStateFailed metric.Int64Counter + nodeTransitionsToFinalizedStateNotAvailable metric.Int64Counter + invariantViolations metric.Int64Counter } func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNodeMetrics, error) { @@ -289,7 +289,7 @@ func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNod pollsSuccess: pollsSuccess, finalizedStateFailed: finalizedStateFailed, nodeTransitionsToFinalizedStateNotAvailable: nodeTransitionsToFinalizedStateNotAvailable, - invariantViolations: invariantViolations, + invariantViolations: invariantViolations, }, nil }