Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add telemetry #27

Merged
merged 30 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmd/flags.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cmd

const (
flagHome = "home"
flagFile = "file"
flagHome = "home"
flagFile = "file"
flagMetricsListenAddr = "metrics-listen-addr"
)
13 changes: 12 additions & 1 deletion cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,20 @@ $ %s start 1 12 # start relaying data from specific tunnelIDs.`, appName, a
tunnelIDs = append(tunnelIDs, tunnelID)
}

return app.Start(cmd.Context(), tunnelIDs)
metricsListenAddrFlag, err := cmd.Flags().GetString(flagMetricsListenAddr)
if err != nil {
return err
}

return app.Start(cmd.Context(), tunnelIDs, metricsListenAddrFlag)
},
}

cmd.Flags().String(
flagMetricsListenAddr,
"",
"address to use for metrics server. By default, "+
"will be the metrics-listen-addr parameter in the global config. ",
)
return cmd
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/miguelmota/go-ethereum-hdwallet v0.1.2
github.com/mitchellh/mapstructure v1.5.0
github.com/pelletier/go-toml/v2 v2.2.2
github.com/prometheus/client_golang v1.20.5
github.com/shopspring/decimal v1.4.0
github.com/spf13/cobra v1.8.1
github.com/spf13/viper v1.19.0
Expand Down Expand Up @@ -138,7 +139,6 @@ require (
github.com/petermattis/goid v0.0.0-20231207134359-e60b3f734c67 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down
220 changes: 220 additions & 0 deletions internal/relayermetrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
package relayermetrics

import (
"context"
"fmt"
"net"
"net/http"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/zap"
)

// metrics stores the Prometheus metrics instance.
var metrics *PrometheusMetrics

// globalTelemetryEnabled indicates whether telemetry is enabled globally.
// It is set on initialization and does not change for the lifetime of the program.
var globalTelemetryEnabled bool

type PrometheusMetrics struct {
PacketsRelayedSuccess *prometheus.CounterVec
UnrelayedPackets *prometheus.GaugeVec
TasksCount *prometheus.CounterVec
TaskExecutionTime *prometheus.SummaryVec
TunnelsPerDestinationChain *prometheus.CounterVec
ActiveTargetContractsCount prometheus.Gauge
TxsCount *prometheus.CounterVec
TxProcessTime *prometheus.SummaryVec
GasUsed *prometheus.SummaryVec
}

func updateMetrics(updateFn func()) {
if globalTelemetryEnabled {
updateFn()
}
}

// IncPacketsRelayedSuccess increments the count of successfully relayed packets for a specific tunnel.
func IncPacketsRelayedSuccess(tunnelID uint64) {
updateMetrics(func() {
metrics.PacketsRelayedSuccess.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Inc()
})
}

// SetUnrelayedPackets sets the number of unrelayed packets for a specific tunnel.
func SetUnrelayedPackets(tunnelID uint64, unrelayedPackets float64) {
updateMetrics(func() {
metrics.UnrelayedPackets.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Set(unrelayedPackets)
})
}

// IncTasksCount increments the total tasks count for a specific tunnel.
func IncTasksCount(tunnelID uint64) {
updateMetrics(func() {
metrics.TasksCount.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Inc()
})
}

// ObserveTaskExecutionTime records the execution time of a task for a specific tunnel.
func ObserveTaskExecutionTime(tunnelID uint64, taskExecutionTime float64) {
updateMetrics(func() {
metrics.TaskExecutionTime.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Observe(taskExecutionTime)
})
}

// IncTunnelsPerDestinationChain increments the total number of tunnels per specific destination chain.
func IncTunnelsPerDestinationChain(destinationChain string) {
updateMetrics(func() {
metrics.TunnelsPerDestinationChain.WithLabelValues(destinationChain).Inc()
})
}

// IncActiveTargetContractsCount increases the count of active target contracts.
func IncActiveTargetContractsCount() {
updateMetrics(func() {
metrics.ActiveTargetContractsCount.Inc()
})
}

// DecActiveTargetContractsCount decreases the count of active target contracts.
func DecActiveTargetContractsCount() {
updateMetrics(func() {
metrics.ActiveTargetContractsCount.Dec()
})
}

// IncTxsCount increments the transactions count metric for a specific tunnel.
func IncTxsCount(tunnelID uint64) {
updateMetrics(func() {
metrics.TxsCount.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Inc()
})
}

// ObserveTxProcessTime tracks transaction processing time in seconds with millisecond precision.
func ObserveTxProcessTime(destinationChain string, txProcessTime float64) {
updateMetrics(func() {
metrics.TxProcessTime.WithLabelValues(destinationChain).Observe(txProcessTime)
})
}

// ObserveGasUsed tracks gas used for the each relayed transaction.
func ObserveGasUsed(tunnelID uint64, gasUsed uint64) {
updateMetrics(func() {
metrics.GasUsed.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Observe(float64(gasUsed))
})
}

func InitPrometheusMetrics() {
packetLabels := []string{"tunnel_id"}
taskLabels := []string{"tunnel_id"}
tunnelPerDestinationChainLabels := []string{"destination_chain"}
txLabels := []string{"tunnel_id"}
gasUsedLabels := []string{"tunnel_id"}

metrics = &PrometheusMetrics{
PacketsRelayedSuccess: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_packets_relayed_success",
Help: "Total number of packets successfully relayed from BandChain",
}, packetLabels),
UnrelayedPackets: promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "falcon_unrelayed_packets",
Help: "Number of unrelayed packets (the difference between total packets from BandChain and received packets from the target chain)",
}, packetLabels),
TasksCount: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_tasks_count",
Help: "Total number of successfully executed tasks",
}, taskLabels),
TaskExecutionTime: promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "falcon_task_execution_time",
Help: "Task execution time in milliseconds",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, taskLabels),
TunnelsPerDestinationChain: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_tunnels_per_destination_chain",
Help: "Total number of destination chains",
}, tunnelPerDestinationChainLabels),
ActiveTargetContractsCount: promauto.NewGauge(prometheus.GaugeOpts{
Name: "falcon_active_target_contracts_count",
Help: "Number of active target chain contracts",
}),
TxsCount: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_txs_count",
Help: "Total number of transactions per tunnel",
}, txLabels),
TxProcessTime: promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "falcon_tx_process_time",
Help: "Transaction processing time in milliseconds",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, txLabels),
GasUsed: promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "falcon_gas_used",
Help: "Amount of gas used per transaction",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, gasUsedLabels),
}
}

// StartMetricsServer starts a metrics server in a background goroutine,
// accepting connections on the given listener.
// Any HTTP logging will be written at info level to the given logger.
// The server will be forcefully shut down when ctx finishes.
func StartMetricsServer(ctx context.Context, log *zap.Logger, metricsListenAddr string) error {
ln, err := net.Listen("tcp", metricsListenAddr)
if err != nil {
log.Error(
"Failed to start metrics server you can change the address and port using metrics-listen-addr config setting or --metrics-listen-flag",
)

return fmt.Errorf("failed to listen on metrics address %q: %w", metricsListenAddr, err)
}
log = log.With(zap.String("sys", "metricshttp"))
log.Info("Metrics server listening", zap.String("addr", metricsListenAddr))

// allow for the global telemetry enabled state to be set.
globalTelemetryEnabled = true

// initialize Prometheus metrics
InitPrometheusMetrics()

// set up new mux identical to the default mux configuration in net/http/pprof.
mux := http.NewServeMux()

// serve prometheus metrics
mux.Handle("/metrics", promhttp.Handler())

srv := &http.Server{
Handler: mux,
ErrorLog: zap.NewStdLog(log),
BaseContext: func(net.Listener) context.Context {
return ctx
},
ReadHeaderTimeout: 5 * time.Second,
}

go func() {
_ = srv.Serve(ln)
}()

go func() {
<-ctx.Done()
srv.Close()
}()

return nil
}
20 changes: 10 additions & 10 deletions internal/relayertest/mocks/band_chain_query.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions internal/relayertest/testdata/custom_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = 60000000000
sync_tunnels_interval = 300000000000
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657', 'http://localhost:26658']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = '1m'
sync_tunnels_interval = '5m'
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657', 'http://localhost:26658']
Expand Down
1 change: 1 addition & 0 deletions internal/relayertest/testdata/default_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = 60000000000
sync_tunnels_interval = 300000000000
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = 60000000000
sync_tunnels_interval = 300000000000
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657']
Expand Down
Loading
Loading