Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Reputation: CNS-1004 - QoS excellence epoch score aggregation #1612

Open
wants to merge 21 commits into
base: CNS-1003-reputation-proto-definitions
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
54a0136
CNS-1004: implement qos excellence score aggregation from relay payments
oren-lava Aug 5, 2024
e3f0183
CNS-1004: unit tests
oren-lava Aug 5, 2024
4471779
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 5, 2024
d1a4117
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 6, 2024
841e0e8
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 6, 2024
32a544c
CNS-1004: add stake to reputation
oren-lava Aug 6, 2024
1a599a0
CNS-1004: add stake check in unit test
oren-lava Aug 7, 2024
5e56157
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 7, 2024
2ff09f5
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 8, 2024
994e762
CNS-1004: minor adds
oren-lava Aug 13, 2024
13deb57
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 15, 2024
31f5ddd
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Aug 29, 2024
21e2819
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Sep 4, 2024
8a34d74
CNS-1004: fix after merge
oren-lava Sep 4, 2024
b7c747f
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Dec 5, 2024
e374c8b
small fixes
oren-lava Dec 5, 2024
c7e2a57
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Jan 5, 2025
5ad9aae
pr fix
oren-lava Jan 5, 2025
ee2cf55
upon update calculate variance first and then the score
oren-lava Jan 6, 2025
2262838
Merge branch 'CNS-1003-reputation-proto-definitions' into CNS-1004-re…
oren-lava Jan 7, 2025
600464d
pr fixes
oren-lava Jan 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions proto/lavanet/lava/pairing/relay.proto
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,23 @@ message RelayReply {
}

message QualityOfServiceReport{
// Latency of provider answers in milliseconds, range 0-inf, lower is better
string latency = 1 [
(gogoproto.moretags) = "yaml:\"Latency\"",
(gogoproto.customtype) = "github.com/cosmos/cosmos-sdk/types.Dec",
(gogoproto.nullable) = false
];

// Percentage of times the provider returned a non-error response, range 0-1, higher is better
string availability = 2 [
(gogoproto.moretags) = "yaml:\"availability\"",
(gogoproto.customtype) = "github.com/cosmos/cosmos-sdk/types.Dec",
(gogoproto.nullable) = false
];

// Amount of time the provider is not synced (have the latest block) in milliseconds, range 0-inf, lower is better.
// Example: in ETH we have 15sec block time. So sync = 15000 means that the provider is one block
// behind the actual latest block.
string sync = 3 [
(gogoproto.moretags) = "yaml:\"sync\"",
(gogoproto.customtype) = "github.com/cosmos/cosmos-sdk/types.Dec",
Expand Down
9 changes: 9 additions & 0 deletions testutil/common/tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,15 @@ func (ts *Tester) GetNextMonth(from time.Time) int64 {
return utils.NextMonth(from).UTC().Unix()
}

func (ts *Tester) BlockTimeDefault() time.Duration {
return ts.Keepers.Downtime.GetParams(ts.Ctx).DowntimeDuration
}

func (ts *Tester) EpochTimeDefault() time.Duration {
epochBlocks := ts.Keepers.Epochstorage.GetParams(ts.Ctx).EpochBlocks
return ts.BlockTimeDefault() * time.Duration(epochBlocks)
}

func (ts *Tester) AdvanceToBlock(block uint64) {
if block < ts.BlockHeight() {
panic("AdvanceToBlock: block in the past: " +
Expand Down
8 changes: 8 additions & 0 deletions utils/convert.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
package utils

import "math"

func SafeUint64ToInt64Convert(val uint64) int64 {
if val > math.MaxInt64 {
val = math.MaxInt64
}
return int64(val)
}
func Btof(b bool) float64 {
if b {
return 1
Expand Down
53 changes: 53 additions & 0 deletions x/pairing/keeper/msg_server_relay_payment.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,22 @@ func (k msgServer) RelayPayment(goCtx context.Context, msg *types.MsgRelayPaymen
k.handleBadgeCu(ctx, badgeData, relay.Provider, relay.CuSum, newBadgeTimerExpiry)
}

// update the reputation's epoch QoS score
// the excellece QoS report can be nil when the provider and consumer geolocations are not equal
if relay.QosExcellenceReport != nil {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better in a function for readability

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done. see 5ad9aae

err = k.updateReputationEpochQosScore(ctx, project.Subscription, relay)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change the name of the method since we have another one with the same name

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done. see 600464d

if err != nil {
return nil, utils.LavaFormatWarning("RelayPayment: could not update reputation epoch QoS score", err,
utils.LogAttr("consumer", project.Subscription),
utils.LogAttr("project", project.Index),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
utils.LogAttr("qos_excellence_report", relay.QosExcellenceReport.String()),
utils.LogAttr("sync_factor", k.ReputationLatencyOverSyncFactor(ctx).String()),
)
}
}

// TODO: add support for spec changes
spec, found := k.specKeeper.GetSpec(ctx, relay.SpecId)
if !found || !spec.Enabled {
Expand Down Expand Up @@ -473,3 +489,40 @@ func (k Keeper) handleBadgeCu(ctx sdk.Context, badgeData BadgeData, provider str
badgeUsedCuMapEntry.UsedCu += relayCuSum
k.SetBadgeUsedCu(ctx, badgeUsedCuMapEntry)
}

func (k Keeper) updateReputationEpochQosScore(ctx sdk.Context, subscription string, relay *types.RelaySession) error {
sub, found := k.subscriptionKeeper.GetSubscription(ctx, subscription)
if !found {
return utils.LavaFormatError("RelayPayment: could not get cluster for reputation score update", fmt.Errorf("relay consumer's subscription not found"),
utils.LogAttr("subscription", subscription),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
)
}

syncFactor := k.ReputationLatencyOverSyncFactor(ctx)
score, err := relay.QosExcellenceReport.ComputeQosExcellenceForReputation(syncFactor)
if err != nil {
return utils.LavaFormatWarning("RelayPayment: could not compute qos excellence score", err,
utils.LogAttr("consumer", subscription),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
utils.LogAttr("qos_excellence_report", relay.QosExcellenceReport.String()),
utils.LogAttr("sync_factor", syncFactor.String()),
)
}

stakeEntry, found := k.epochStorageKeeper.GetStakeEntryCurrent(ctx, relay.SpecId, relay.Provider)
if !found {
return utils.LavaFormatWarning("RelayPayment: could not get stake entry for reputation", fmt.Errorf("stake entry not found"),
utils.LogAttr("consumer", subscription),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
)
}
effectiveStake := sdk.NewCoin(stakeEntry.Stake.Denom, stakeEntry.TotalStake())

// note the current weight used is by relay num. In the future, it might change
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explain why this is? afaik this is just an incrementing number to count sessions (?)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal was to give more weight to relay payments with lots of relays. I also remember is the counter, will fix later

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done. see 600464d

k.UpdateReputationEpochQosScore(ctx, relay.SpecId, sub.Cluster, relay.Provider, score, utils.SafeUint64ToInt64Convert(relay.RelayNum), effectiveStake)
return nil
}
194 changes: 194 additions & 0 deletions x/pairing/keeper/msg_server_relay_payment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1063,3 +1063,197 @@ func TestPairingCaching(t *testing.T) {
require.Equal(t, totalCU*3, sub.Sub.MonthCuTotal-sub.Sub.MonthCuLeft)
}
}

// TestUpdateReputationEpochQosScore tests the update of the reputation's epoch qos score
// Scenarios:
// 1. provider1 sends relay -> its reputation is updated (epoch score and time last updated),
// also, provider2 reputation is not updated
func TestUpdateReputationEpochQosScore(t *testing.T) {
ts := newTester(t)
ts.setupForPayments(2, 1, 0) // 2 providers, 1 client, default providers-to-pair

consumerAcc, consumer := ts.GetAccount(common.CONSUMER, 0)
_, provider1 := ts.GetAccount(common.PROVIDER, 0)
_, provider2 := ts.GetAccount(common.PROVIDER, 1)
qos := &types.QualityOfServiceReport{
Latency: sdk.OneDec(),
Availability: sdk.NewDecWithPrec(1, 1),
Sync: sdk.OneDec(),
}

res, err := ts.QuerySubscriptionCurrent(consumer)
require.NoError(t, err)
cluster := res.Sub.Cluster

// set default reputations for both providers. Advance epoch to change the current block time
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider1, types.NewReputation(ts.Ctx))
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider2, types.NewReputation(ts.Ctx))
ts.AdvanceEpoch()

// send relay payment msg from provider1
relaySession := ts.newRelaySession(provider1, 0, 100, ts.BlockHeight(), 1)
relaySession.QosExcellenceReport = qos
sig, err := sigs.Sign(consumerAcc.SK, *relaySession)
require.NoError(t, err)
relaySession.Sig = sig

payment := types.MsgRelayPayment{
Creator: provider1,
Relays: []*types.RelaySession{relaySession},
}
ts.relayPaymentWithoutPay(payment, true)

// get both providers reputation: provider1 should have its epoch score and time last updated changed,
// provider2 should have nothing change from the default
r1, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider1)
require.True(t, found)
r2, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider2)
require.True(t, found)

require.Greater(t, r1.TimeLastUpdated, r2.TimeLastUpdated)
epochScore1, err := r1.EpochScore.Score.Resolve()
require.NoError(t, err)
epochScore2, err := r2.EpochScore.Score.Resolve()
require.NoError(t, err)
variance1, err := r1.EpochScore.Variance.Resolve()
require.NoError(t, err)
variance2, err := r2.EpochScore.Variance.Resolve()
require.NoError(t, err)
require.True(t, epochScore1.GT(epochScore2)) // score is higher because QoS is bad
require.True(t, variance1.GT(variance2)) // variance is higher because the QoS is significantly differnet from DefaultQos

entry, found := ts.Keepers.Epochstorage.GetStakeEntryCurrent(ts.Ctx, ts.spec.Index, provider1)
require.True(t, found)
require.True(t, entry.Stake.IsEqual(r1.Stake))
}

// TestUpdateReputationEpochQosScoreTruncation tests the following scenarios:
// 1. stabilization period has not passed -> no truncation
// 2. stabilization period passed -> with truncation (score update is smaller than the first one)
// note, this test works since we use a bad QoS report (compared to default) so we know that the score should
// increase (which is considered worse)
func TestUpdateReputationEpochQosScoreTruncation(t *testing.T) {
// these will be used to compare the score change with/without truncation
scoreUpdates := []sdk.Dec{}

// we set the stabilization period to 2 epochs time. Advancing one epoch means we won't truncate,
// advancing 3 means we will truncate.
epochsToAdvance := []uint64{1, 3}

for i := range epochsToAdvance {
ts := newTester(t)
ts.setupForPayments(1, 1, 0) // 1 provider, 1 client, default providers-to-pair

consumerAcc, consumer := ts.GetAccount(common.CONSUMER, 0)
_, provider1 := ts.GetAccount(common.PROVIDER, 0)
qos := &types.QualityOfServiceReport{
Latency: sdk.NewDec(1000),
Availability: sdk.OneDec(),
Sync: sdk.NewDec(1000),
}

resQCurrent, err := ts.QuerySubscriptionCurrent(consumer)
require.NoError(t, err)
cluster := resQCurrent.Sub.Cluster

// set stabilization period to be 2*epoch time
resQParams, err := ts.Keepers.Pairing.Params(ts.GoCtx, &types.QueryParamsRequest{})
require.NoError(t, err)
resQParams.Params.ReputationVarianceStabilizationPeriod = int64(ts.EpochTimeDefault().Seconds())
ts.Keepers.Pairing.SetParams(ts.Ctx, resQParams.Params)

// set default reputation
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider1, types.NewReputation(ts.Ctx))

// advance epochs
ts.AdvanceEpochs(epochsToAdvance[i])

// send relay payment msg from provider1
relaySession := ts.newRelaySession(provider1, 0, 100, ts.BlockHeight(), 1)
relaySession.QosExcellenceReport = qos
sig, err := sigs.Sign(consumerAcc.SK, *relaySession)
require.NoError(t, err)
relaySession.Sig = sig

payment := types.MsgRelayPayment{
Creator: provider1,
Relays: []*types.RelaySession{relaySession},
}
ts.relayPaymentWithoutPay(payment, true)

// get update of epoch score
r, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider1)
require.True(t, found)
epochScoreNoTruncation, err := r.EpochScore.Score.Resolve()
require.NoError(t, err)
defaultEpochScore, err := types.ZeroQosScore.Score.Resolve()
require.NoError(t, err)
scoreUpdates = append(scoreUpdates, epochScoreNoTruncation.Sub(defaultEpochScore))
}

// require that the score update that was not truncated is larger than the one that was truncated
require.True(t, scoreUpdates[0].GT(scoreUpdates[1]))
}

// TestUpdateReputationEpochQosScoreTruncation tests the following scenario:
// 1. relay num is the reputation update weight. More relays = bigger update
func TestUpdateReputationEpochQosScoreRelayNumWeight(t *testing.T) {
// these will be used to compare the score change with high/low relay numbers
scoreUpdates := []sdk.Dec{}

// we set the stabilization period to 2 epochs time. Advancing one epoch means we won't truncate,
// advancing 3 means we will truncate.
relayNums := []uint64{100, 1}

for i := range relayNums {
ts := newTester(t)
ts.setupForPayments(1, 1, 0) // 1 provider, 1 client, default providers-to-pair

consumerAcc, consumer := ts.GetAccount(common.CONSUMER, 0)
_, provider1 := ts.GetAccount(common.PROVIDER, 0)
qos := &types.QualityOfServiceReport{
Latency: sdk.NewDec(1000),
Availability: sdk.OneDec(),
Sync: sdk.NewDec(1000),
}

resQCurrent, err := ts.QuerySubscriptionCurrent(consumer)
require.NoError(t, err)
cluster := resQCurrent.Sub.Cluster

// set stabilization period to be 2*epoch time to avoid truncation
resQParams, err := ts.Keepers.Pairing.Params(ts.GoCtx, &types.QueryParamsRequest{})
require.NoError(t, err)
resQParams.Params.ReputationVarianceStabilizationPeriod = int64(ts.EpochTimeDefault().Seconds())
ts.Keepers.Pairing.SetParams(ts.Ctx, resQParams.Params)

// set default reputation
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider1, types.NewReputation(ts.Ctx))
ts.AdvanceEpoch()

// send relay payment msg from provider1
relaySession := ts.newRelaySession(provider1, 0, 100, ts.BlockHeight(), relayNums[i])
relaySession.QosExcellenceReport = qos
sig, err := sigs.Sign(consumerAcc.SK, *relaySession)
require.NoError(t, err)
relaySession.Sig = sig

payment := types.MsgRelayPayment{
Creator: provider1,
Relays: []*types.RelaySession{relaySession},
}
ts.relayPaymentWithoutPay(payment, true)

// get update of epoch score
r, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider1)
require.True(t, found)
epochScoreNoTruncation, err := r.EpochScore.Score.Resolve()
require.NoError(t, err)
defaultEpochScore, err := types.ZeroQosScore.Score.Resolve()
require.NoError(t, err)
scoreUpdates = append(scoreUpdates, epochScoreNoTruncation.Sub(defaultEpochScore))
}

// require that the score update that was with 1000 relay num is larger than the one with one relay num
require.True(t, scoreUpdates[0].GT(scoreUpdates[1]))
}
26 changes: 26 additions & 0 deletions x/pairing/keeper/reputation.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,32 @@ func (k Keeper) GetAllReputation(ctx sdk.Context) []types.ReputationGenesis {
return entries
}

// UpdateReputationEpochQosScore updates the epoch QoS score of the provider's reputation using the score from the relay
// payment's QoS excellence report
func (k Keeper) UpdateReputationEpochQosScore(ctx sdk.Context, chainID string, cluster string, provider string, score math.LegacyDec, weight int64, stake sdk.Coin) {
// get current reputation and get parameters for the epoch score update
r, found := k.GetReputation(ctx, chainID, cluster, provider)
truncate := false
if found {
stabilizationPeriod := k.ReputationVarianceStabilizationPeriod(ctx)
if r.ShouldTruncate(stabilizationPeriod, ctx.BlockTime().UTC().Unix()) {
truncate = true
}
} else {
// new reputation score is not truncated and its decay factor is equal to 1
r = types.NewReputation(ctx)
}

// calculate the updated QoS epoch score
updatedEpochScore := r.EpochScore.Update(score, truncate, weight)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we make this in place method?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done. see 600464d


// update the reputation and set
r.EpochScore = updatedEpochScore
r.TimeLastUpdated = ctx.BlockTime().UTC().Unix()
r.Stake = stake
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is the stake needed?

Copy link
Collaborator Author

@oren-lava oren-lava Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from the comment in the proto:

The stake is used when converting the reputation QoS scores to repuatation pairing score.

You'll see it in the next parts

k.SetReputation(ctx, chainID, cluster, provider, r)
}

// GetReputationScore returns the current reputation pairing score
func (k Keeper) GetReputationScore(ctx sdk.Context, chainID string, cluster string, provider string) (val math.LegacyDec, found bool) {
block := uint64(ctx.BlockHeight())
Expand Down
26 changes: 26 additions & 0 deletions x/pairing/types/QualityOfServiceReport.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package types
import (
"fmt"

"cosmossdk.io/math"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/lavanet/lava/v4/utils"
)

func (qos *QualityOfServiceReport) ComputeQoS() (sdk.Dec, error) {
Expand All @@ -24,3 +26,27 @@ func (qos *QualityOfServiceReport) ComputeQoSExcellence() (sdk.Dec, error) {
}
return qos.Availability.Quo(qos.Sync).Quo(qos.Latency).ApproxRoot(3)
}

// ComputeQosExcellenceForReputation computes the score from the QoS excellence report to update the provider's reputation
// report score = latency + sync*syncFactor + ((1/availability) - 1) * FailureCost (note: larger value is worse)
func (qos QualityOfServiceReport) ComputeQosExcellenceForReputation(syncFactor math.LegacyDec) (math.LegacyDec, error) {
if qos.Availability.LT(sdk.ZeroDec()) ||
qos.Latency.LT(sdk.ZeroDec()) ||
qos.Sync.LT(sdk.ZeroDec()) || syncFactor.LT(sdk.ZeroDec()) {
return sdk.ZeroDec(), utils.LavaFormatWarning("ComputeQosExcellenceForReputation: compute failed", fmt.Errorf("QoS excellence scores is below 0"),
utils.LogAttr("availability", qos.Availability.String()),
utils.LogAttr("sync", qos.Sync.String()),
utils.LogAttr("latency", qos.Latency.String()),
)
}

latency := qos.Latency
sync := qos.Sync.Mul(syncFactor)
availability := math.LegacyNewDec(FailureCost)
if !qos.Availability.IsZero() {
availability = availability.Mul((math.LegacyOneDec().Quo(qos.Availability)).Sub(math.LegacyOneDec()))
} else {
availability = math.LegacyMaxSortableDec.QuoInt64(2) // on qs.Availability = 0 we take the largest score possible
}
return latency.Add(sync).Add(availability), nil
}
Loading
Loading