Skip to content

Commit

Permalink
start tracking number of seconds without an etcd leader
Browse files Browse the repository at this point in the history
  • Loading branch information
deads2k committed Jan 13, 2025
1 parent b573a5b commit b5bea41
Showing 1 changed file with 63 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ import (
"bytes"
"encoding/csv"
"fmt"
"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/vg"
"image/color"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
"os"
"path"
"strconv"
"strings"
"sync"
"time"

"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/vg"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
)

type CountForSecond struct {
Expand All @@ -27,6 +28,10 @@ type CountForSecond struct {

NumberOfRequestsReceived int

NumberOfRequestsReceivedThatAccessedEtcd int
NumberOfRequestsReceivedDidNotAccessEtcd int
NumberOfRequestsReceivedSawNoLeader int

// NumberOfRequestsReceivedThatLaterGot500 is calculated based on when the request was received instead of completed
// because different requests have different timeouts, so it's more useful to categorize based on received time
NumberOfRequestsReceivedThatLaterGot500 int
Expand Down Expand Up @@ -78,6 +83,15 @@ func (s *countTracking) HandleAuditLogEvent(auditEvent *auditv1.Event, beginning
for i := receivedIndex; i <= completionIndex; i++ {
s.CountsForRun.CountsForEachSecond[receivedIndex].NumberOfConcurrentRequestsBeingHandled++
}

if _, accessedEtcd := auditEvent.Annotations["apiserver.internal.openshift.io/etcd-access"]; accessedEtcd {
s.CountsForRun.CountsForEachSecond[receivedIndex].NumberOfRequestsReceivedThatAccessedEtcd++
} else {
s.CountsForRun.CountsForEachSecond[receivedIndex].NumberOfRequestsReceivedDidNotAccessEtcd++
}
if _, accessedEtcd := auditEvent.Annotations["apiserver.internal.openshift.io/no-leader"]; accessedEtcd {
s.CountsForRun.CountsForEachSecond[receivedIndex].NumberOfRequestsReceivedSawNoLeader++
}
}

// returns received index, completion index, status code, and false if the index is out of bounds
Expand Down Expand Up @@ -115,7 +129,9 @@ func (c *CountsForRun) countIndexesFromAuditTime(in *auditv1.Event) (int, int, i
func (c *CountsForRun) ToCSV() ([]byte, error) {
out := &bytes.Buffer{}
csvWriter := csv.NewWriter(out)
if err := csvWriter.Write([]string{"seconds after cluster start", "number of requests being handled", "number of requests received", "number of requests received this second getting 500"}); err != nil {
if err := csvWriter.Write([]string{
"seconds after cluster start", "number of requests being handled", "number of requests received", "number of requests received this second getting 500",
"number of requests received accessing etcd", "number of requests received NOT accessing etcd", "number of requests received with no leader", "percentage of requests received with no leader"}); err != nil {
return nil, fmt.Errorf("failed writing headers: %w", err)
}

Expand All @@ -125,6 +141,10 @@ func (c *CountsForRun) ToCSV() ([]byte, error) {
strconv.Itoa(curr.NumberOfConcurrentRequestsBeingHandled),
strconv.Itoa(curr.NumberOfRequestsReceived),
strconv.Itoa(curr.NumberOfRequestsReceivedThatLaterGot500),
strconv.Itoa(curr.NumberOfRequestsReceivedThatAccessedEtcd),
strconv.Itoa(curr.NumberOfRequestsReceivedDidNotAccessEtcd),
strconv.Itoa(curr.NumberOfRequestsReceivedSawNoLeader),
strconv.Itoa(int(float32(curr.NumberOfRequestsReceivedSawNoLeader) / float32(curr.NumberOfRequestsReceivedThatAccessedEtcd) * 100.0)),
}

if err := csvWriter.Write(record); err != nil {
Expand Down Expand Up @@ -187,7 +207,7 @@ func (c *CountsForRun) SubsetDataAtTime(endTime metav1.Time) *CountsForRun {
return ret
}

func (c *CountsForRun) ToLineChart() (*plot.Plot, error) {
func (c *CountsForRun) ToConcurrentRequestsLineChart() (*plot.Plot, error) {
p := plot.New()
p.Title.Text = "Requests by Second of Cluster Life"
p.Y.Label.Text = "Number of Requests in that Second"
Expand Down Expand Up @@ -232,6 +252,31 @@ func (c *CountsForRun) ToLineChart() (*plot.Plot, error) {
return p, nil
}

func (c *CountsForRun) ToEtcdNoLeaderLineChart() (*plot.Plot, error) {
p := plot.New()
p.Title.Text = "Percentage of Requests Each Second with Etcd No Leader"
p.Y.Label.Text = "Percentage of No Leader Responses in that Second"
p.X.Label.Text = "Seconds of Cluster Life"
p.X.Tick.Marker = plot.TimeTicks{}
plotter.DefaultLineStyle.Width = vg.Points(1)

percentageOfNoLeaderResponses := plotter.XYs{}
for i, requestCounts := range c.CountsForEachSecond {
timeOfSecond := c.EstimatedStartOfCluster.Add(time.Duration(i) * time.Second)
percentageOfNoLeaderResponses = append(percentageOfNoLeaderResponses, plotter.XY{X: float64(timeOfSecond.Unix()), Y: float64(float64(requestCounts.NumberOfRequestsReceivedSawNoLeader) / float64(requestCounts.NumberOfRequestsReceivedThatAccessedEtcd) * 100.0)})
}

lineOfPercentageOfNoLeaderResponses, err := plotter.NewLine(percentageOfNoLeaderResponses)
if err != nil {
return nil, err
}
lineOfPercentageOfNoLeaderResponses.LineStyle.Color = color.RGBA{R: 255, G: 0, B: 0, A: 255}
p.Add(lineOfPercentageOfNoLeaderResponses)
p.Legend.Add("Percentage of No Leader Responses in that Second", lineOfPercentageOfNoLeaderResponses)

return p, nil
}

func (c *CountsForRun) WriteContentToStorage(storageDir, name, timeSuffix string) error {
csvContent, err := c.ToCSV()
if err != nil {
Expand All @@ -241,7 +286,7 @@ func (c *CountsForRun) WriteContentToStorage(storageDir, name, timeSuffix string
if err := os.WriteFile(requestCountsByTimeFile, csvContent, 0644); err != nil {
return err
}
requestCountsForEntireRunPlot, err := c.ToLineChart()
requestCountsForEntireRunPlot, err := c.ToConcurrentRequestsLineChart()
if err != nil {
return err
}
Expand All @@ -251,5 +296,15 @@ func (c *CountsForRun) WriteContentToStorage(storageDir, name, timeSuffix string
return err
}

noEtcdLeaderByTimeEntireRunPlot, err := c.ToEtcdNoLeaderLineChart()
if err != nil {
return err
}
noEtcdLeaderByTimeTimeGraphFile := path.Join(storageDir, fmt.Sprintf("%s_%s.png", name, timeSuffix))
err = noEtcdLeaderByTimeEntireRunPlot.Save(vg.Length(c.NumberOfSeconds), 500, noEtcdLeaderByTimeTimeGraphFile)
if err != nil {
return err
}

return nil
}

0 comments on commit b5bea41

Please sign in to comment.