Skip to content
78 changes: 76 additions & 2 deletions metrics/client.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
package metrics

import (
"context"
"fmt"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"

"github.com/smartcontractkit/chainlink-common/pkg/beholder"
)

var (
RPCCallLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "rpc_call_latency",
Help: "The duration of an RPC call in milliseconds",
Name: rpcCallLatencyBeholder,
Help: "The duration of an RPC call in nanoseconds",
Buckets: []float64{
float64(50 * time.Millisecond),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we change bucket size here from nanoseconds to milliseconds, we'll need to change the values that we report, but this is a breaking change. Other teams and NOPs may already depend on the values being in nanoseconds.

float64(100 * time.Millisecond),
Expand All @@ -23,3 +30,70 @@ var (
},
}, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "success", "rpcCallName"})
)

const rpcCallLatencyBeholder = "rpc_call_latency"

// RPCClientMetrics records RPC call latency to Prometheus and Beholder (failures: success="false"; same pattern as multinode metrics).
// Construct once per chain (or process) with ChainFamily and ChainID; pass rpcUrl and isSendOnly on each call
// when they vary by node or request.
type RPCClientMetrics interface {
// RecordRequest records latency for an RPC call (observed in nanoseconds for Prometheus and Beholder).
// Failures use success="false"; derive error rate from rpc_call_latency_count{success="false"} (or equivalent).
RecordRequest(ctx context.Context, rpcURL string, isSendOnly bool, callName string, latency time.Duration, err error)
}

var _ RPCClientMetrics = (*rpcClientMetrics)(nil)

type rpcClientMetrics struct {
chainFamily string
chainID string
latencyHis metric.Float64Histogram
}

// RPCClientMetricsConfig holds labels that are fixed for the lifetime of the metrics handle (e.g. one per chain).
type RPCClientMetricsConfig struct {
ChainFamily string
ChainID string
}

// NewRPCClientMetrics creates RPC client metrics that publish to Prometheus and Beholder.
func NewRPCClientMetrics(cfg RPCClientMetricsConfig) (RPCClientMetrics, error) {
latency, err := beholder.GetMeter().Float64Histogram(rpcCallLatencyBeholder)
if err != nil {
return nil, fmt.Errorf("failed to register RPC call latency metric: %w", err)
}
return &rpcClientMetrics{
chainFamily: cfg.ChainFamily,
chainID: cfg.ChainID,
latencyHis: latency,
}, nil
}

func (m *rpcClientMetrics) RecordRequest(ctx context.Context, rpcURL string, isSendOnly bool, callName string, latency time.Duration, err error) {
successStr := "true"
if err != nil {
successStr = "false"
}
sendStr := strconv.FormatBool(isSendOnly)
latencyNs := float64(latency)

RPCCallLatency.WithLabelValues(m.chainFamily, m.chainID, rpcURL, sendStr, successStr, callName).Observe(latencyNs)

latAttrs := metric.WithAttributes(
attribute.String("chainFamily", m.chainFamily),
attribute.String("chainID", m.chainID),
attribute.String("rpcUrl", rpcURL),
attribute.String("isSendOnly", sendStr),
attribute.String("success", successStr),
attribute.String("rpcCallName", callName),
)
m.latencyHis.Record(ctx, latencyNs, latAttrs)
}

// NoopRPCClientMetrics is a no-op implementation for when metrics are disabled.
type NoopRPCClientMetrics struct{}

func (NoopRPCClientMetrics) RecordRequest(context.Context, string, bool, string, time.Duration, error) {
}

var _ RPCClientMetrics = NoopRPCClientMetrics{}
31 changes: 31 additions & 0 deletions metrics/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package metrics

import (
"context"
"errors"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestNewRPCClientMetrics(t *testing.T) {
m, err := NewRPCClientMetrics(RPCClientMetricsConfig{
ChainFamily: "evm",
ChainID: "1",
})
require.NoError(t, err)
require.NotNil(t, m)

ctx := context.Background()
const url = "http://localhost:8545"
m.RecordRequest(ctx, url, false, "latest_block", 100*time.Millisecond, nil)
m.RecordRequest(ctx, url, true, "latest_block", 50*time.Millisecond, errors.New("rpc error"))
}

func TestNoopRPCClientMetrics_RecordRequest(t *testing.T) {
var m NoopRPCClientMetrics
ctx := context.Background()
m.RecordRequest(ctx, "http://localhost:8545", false, "latest_block", 100*time.Millisecond, nil)
m.RecordRequest(ctx, "http://localhost:8545", false, "latest_block", 50*time.Millisecond, errors.New("rpc error"))
}
48 changes: 24 additions & 24 deletions metrics/multinode.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,29 +135,29 @@ type GenericMultiNodeMetrics interface {
var _ GenericMultiNodeMetrics = &multiNodeMetrics{}

type multiNodeMetrics struct {
network string
chainID string
nodeStates metric.Int64Gauge
nodeClientVersion metric.Int64Gauge
nodeVerifies metric.Int64Counter
nodeVerifiesFailed metric.Int64Counter
nodeVerifiesSuccess metric.Int64Counter
nodeTransitionsToAlive metric.Int64Counter
nodeTransitionsToInSync metric.Int64Counter
nodeTransitionsToOutOfSync metric.Int64Counter
nodeTransitionsToUnreachable metric.Int64Counter
nodeTransitionsToInvalidChainID metric.Int64Counter
nodeTransitionsToUnusable metric.Int64Counter
nodeTransitionsToSyncing metric.Int64Counter
highestSeenBlock metric.Int64Gauge
highestFinalizedBlock metric.Int64Gauge
seenBlocks metric.Int64Counter
polls metric.Int64Counter
pollsFailed metric.Int64Counter
pollsSuccess metric.Int64Counter
finalizedStateFailed metric.Int64Counter
nodeTransitionsToFinalizedStateNotAvailable metric.Int64Counter
invariantViolations metric.Int64Counter
network string
chainID string
nodeStates metric.Int64Gauge
nodeClientVersion metric.Int64Gauge
nodeVerifies metric.Int64Counter
nodeVerifiesFailed metric.Int64Counter
nodeVerifiesSuccess metric.Int64Counter
nodeTransitionsToAlive metric.Int64Counter
nodeTransitionsToInSync metric.Int64Counter
nodeTransitionsToOutOfSync metric.Int64Counter
nodeTransitionsToUnreachable metric.Int64Counter
nodeTransitionsToInvalidChainID metric.Int64Counter
nodeTransitionsToUnusable metric.Int64Counter
nodeTransitionsToSyncing metric.Int64Counter
highestSeenBlock metric.Int64Gauge
highestFinalizedBlock metric.Int64Gauge
seenBlocks metric.Int64Counter
polls metric.Int64Counter
pollsFailed metric.Int64Counter
pollsSuccess metric.Int64Counter
finalizedStateFailed metric.Int64Counter
nodeTransitionsToFinalizedStateNotAvailable metric.Int64Counter
invariantViolations metric.Int64Counter
}

func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNodeMetrics, error) {
Expand Down Expand Up @@ -289,7 +289,7 @@ func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNod
pollsSuccess: pollsSuccess,
finalizedStateFailed: finalizedStateFailed,
nodeTransitionsToFinalizedStateNotAvailable: nodeTransitionsToFinalizedStateNotAvailable,
invariantViolations: invariantViolations,
invariantViolations: invariantViolations,
}, nil
}

Expand Down
Loading