Skip to content

Commit

Permalink
[Feature] Add telemetry (#27)
Browse files Browse the repository at this point in the history
* add telemetry

* change http to tcp

* refac

* add enable metric config

* rm comment

* rm dockerfile

* fix incorret synctunnels logic

* refac metrics

* refac metrics

* refac metrics

* refac code

* refac code

* fix bug

* fix on comment

* rm inactive contract metrics

* combine check telemetry enable

* fix lint

* fix comment

* fix comment

* refac sync

* refac

* refac grammar

* change ci-lint

* rm aligncheck

* fix comment

* fix unused func

* remove weird comment

---------

Co-authored-by: Tanut Lertwarachai <[email protected]>
  • Loading branch information
tanut32039 and Tanut Lertwarachai authored Feb 21, 2025
1 parent 2982260 commit 59f1d30
Show file tree
Hide file tree
Showing 14 changed files with 381 additions and 92 deletions.
5 changes: 3 additions & 2 deletions cmd/flags.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cmd

const (
flagHome = "home"
flagFile = "file"
flagHome = "home"
flagFile = "file"
flagMetricsListenAddr = "metrics-listen-addr"
)
13 changes: 12 additions & 1 deletion cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,20 @@ $ %s start 1 12 # start relaying data from specific tunnelIDs.`, appName, a
tunnelIDs = append(tunnelIDs, tunnelID)
}

return app.Start(cmd.Context(), tunnelIDs)
metricsListenAddrFlag, err := cmd.Flags().GetString(flagMetricsListenAddr)
if err != nil {
return err
}

return app.Start(cmd.Context(), tunnelIDs, metricsListenAddrFlag)
},
}

cmd.Flags().String(
flagMetricsListenAddr,
"",
"address to use for metrics server. By default, "+
"will be the metrics-listen-addr parameter in the global config. ",
)
return cmd
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/miguelmota/go-ethereum-hdwallet v0.1.2
github.com/mitchellh/mapstructure v1.5.0
github.com/pelletier/go-toml/v2 v2.2.2
github.com/prometheus/client_golang v1.20.5
github.com/shopspring/decimal v1.4.0
github.com/spf13/cobra v1.8.1
github.com/spf13/viper v1.19.0
Expand Down Expand Up @@ -138,7 +139,6 @@ require (
github.com/petermattis/goid v0.0.0-20231207134359-e60b3f734c67 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down
220 changes: 220 additions & 0 deletions internal/relayermetrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
package relayermetrics

import (
"context"
"fmt"
"net"
"net/http"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/zap"
)

// metrics stores the Prometheus metrics instance.
var metrics *PrometheusMetrics

// globalTelemetryEnabled indicates whether telemetry is enabled globally.
// It is set on initialization and does not change for the lifetime of the program.
var globalTelemetryEnabled bool

type PrometheusMetrics struct {
PacketsRelayedSuccess *prometheus.CounterVec
UnrelayedPackets *prometheus.GaugeVec
TasksCount *prometheus.CounterVec
TaskExecutionTime *prometheus.SummaryVec
TunnelsPerDestinationChain *prometheus.CounterVec
ActiveTargetContractsCount prometheus.Gauge
TxsCount *prometheus.CounterVec
TxProcessTime *prometheus.SummaryVec
GasUsed *prometheus.SummaryVec
}

func updateMetrics(updateFn func()) {
if globalTelemetryEnabled {
updateFn()
}
}

// IncPacketsRelayedSuccess increments the count of successfully relayed packets for a specific tunnel.
func IncPacketsRelayedSuccess(tunnelID uint64) {
updateMetrics(func() {
metrics.PacketsRelayedSuccess.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Inc()
})
}

// SetUnrelayedPackets sets the number of unrelayed packets for a specific tunnel.
func SetUnrelayedPackets(tunnelID uint64, unrelayedPackets float64) {
updateMetrics(func() {
metrics.UnrelayedPackets.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Set(unrelayedPackets)
})
}

// IncTasksCount increments the total tasks count for a specific tunnel.
func IncTasksCount(tunnelID uint64) {
updateMetrics(func() {
metrics.TasksCount.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Inc()
})
}

// ObserveTaskExecutionTime records the execution time of a task for a specific tunnel.
func ObserveTaskExecutionTime(tunnelID uint64, taskExecutionTime float64) {
updateMetrics(func() {
metrics.TaskExecutionTime.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Observe(taskExecutionTime)
})
}

// IncTunnelsPerDestinationChain increments the total number of tunnels per specific destination chain.
func IncTunnelsPerDestinationChain(destinationChain string) {
updateMetrics(func() {
metrics.TunnelsPerDestinationChain.WithLabelValues(destinationChain).Inc()
})
}

// IncActiveTargetContractsCount increases the count of active target contracts.
func IncActiveTargetContractsCount() {
updateMetrics(func() {
metrics.ActiveTargetContractsCount.Inc()
})
}

// DecActiveTargetContractsCount decreases the count of active target contracts.
func DecActiveTargetContractsCount() {
updateMetrics(func() {
metrics.ActiveTargetContractsCount.Dec()
})
}

// IncTxsCount increments the transactions count metric for a specific tunnel.
func IncTxsCount(tunnelID uint64) {
updateMetrics(func() {
metrics.TxsCount.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Inc()
})
}

// ObserveTxProcessTime tracks transaction processing time in seconds with millisecond precision.
func ObserveTxProcessTime(destinationChain string, txProcessTime float64) {
updateMetrics(func() {
metrics.TxProcessTime.WithLabelValues(destinationChain).Observe(txProcessTime)
})
}

// ObserveGasUsed tracks gas used for the each relayed transaction.
func ObserveGasUsed(tunnelID uint64, gasUsed uint64) {
updateMetrics(func() {
metrics.GasUsed.WithLabelValues(fmt.Sprintf("%d", tunnelID)).Observe(float64(gasUsed))
})
}

func InitPrometheusMetrics() {
packetLabels := []string{"tunnel_id"}
taskLabels := []string{"tunnel_id"}
tunnelPerDestinationChainLabels := []string{"destination_chain"}
txLabels := []string{"tunnel_id"}
gasUsedLabels := []string{"tunnel_id"}

metrics = &PrometheusMetrics{
PacketsRelayedSuccess: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_packets_relayed_success",
Help: "Total number of packets successfully relayed from BandChain",
}, packetLabels),
UnrelayedPackets: promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "falcon_unrelayed_packets",
Help: "Number of unrelayed packets (the difference between total packets from BandChain and received packets from the target chain)",
}, packetLabels),
TasksCount: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_tasks_count",
Help: "Total number of successfully executed tasks",
}, taskLabels),
TaskExecutionTime: promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "falcon_task_execution_time",
Help: "Task execution time in milliseconds",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, taskLabels),
TunnelsPerDestinationChain: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_tunnels_per_destination_chain",
Help: "Total number of destination chains",
}, tunnelPerDestinationChainLabels),
ActiveTargetContractsCount: promauto.NewGauge(prometheus.GaugeOpts{
Name: "falcon_active_target_contracts_count",
Help: "Number of active target chain contracts",
}),
TxsCount: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "falcon_txs_count",
Help: "Total number of transactions per tunnel",
}, txLabels),
TxProcessTime: promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "falcon_tx_process_time",
Help: "Transaction processing time in milliseconds",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, txLabels),
GasUsed: promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "falcon_gas_used",
Help: "Amount of gas used per transaction",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, gasUsedLabels),
}
}

// StartMetricsServer starts a metrics server in a background goroutine,
// accepting connections on the given listener.
// Any HTTP logging will be written at info level to the given logger.
// The server will be forcefully shut down when ctx finishes.
func StartMetricsServer(ctx context.Context, log *zap.Logger, metricsListenAddr string) error {
ln, err := net.Listen("tcp", metricsListenAddr)
if err != nil {
log.Error(
"Failed to start metrics server you can change the address and port using metrics-listen-addr config setting or --metrics-listen-flag",
)

return fmt.Errorf("failed to listen on metrics address %q: %w", metricsListenAddr, err)
}
log = log.With(zap.String("sys", "metricshttp"))
log.Info("Metrics server listening", zap.String("addr", metricsListenAddr))

// allow for the global telemetry enabled state to be set.
globalTelemetryEnabled = true

// initialize Prometheus metrics
InitPrometheusMetrics()

// set up new mux identical to the default mux configuration in net/http/pprof.
mux := http.NewServeMux()

// serve prometheus metrics
mux.Handle("/metrics", promhttp.Handler())

srv := &http.Server{
Handler: mux,
ErrorLog: zap.NewStdLog(log),
BaseContext: func(net.Listener) context.Context {
return ctx
},
ReadHeaderTimeout: 5 * time.Second,
}

go func() {
_ = srv.Serve(ln)
}()

go func() {
<-ctx.Done()
srv.Close()
}()

return nil
}
20 changes: 10 additions & 10 deletions internal/relayertest/mocks/band_chain_query.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions internal/relayertest/testdata/custom_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = 60000000000
sync_tunnels_interval = 300000000000
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657', 'http://localhost:26658']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = '1m'
sync_tunnels_interval = '5m'
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657', 'http://localhost:26658']
Expand Down
1 change: 1 addition & 0 deletions internal/relayertest/testdata/default_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = 60000000000
sync_tunnels_interval = 300000000000
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_level = 'info'
checking_packet_interval = 60000000000
sync_tunnels_interval = 300000000000
penalty_skip_rounds = 3
metrics_listen_addr = ''

[bandchain]
rpc_endpoints = ['http://localhost:26657']
Expand Down
Loading

0 comments on commit 59f1d30

Please sign in to comment.