Skip to content

Commit

Permalink
Finish refactoring
Browse files Browse the repository at this point in the history
Add new packages:
- metricDataDispatcher
- archiver
  • Loading branch information
moebiusband73 committed Aug 28, 2024
1 parent f914a31 commit e7231b0
Show file tree
Hide file tree
Showing 10 changed files with 216 additions and 207 deletions.
5 changes: 3 additions & 2 deletions internal/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
Expand Down Expand Up @@ -150,7 +151,7 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err)
}

if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
if err := metricdata.Init(); err != nil {
t.Fatal(err)
}

Expand Down Expand Up @@ -341,7 +342,7 @@ func TestRestApi(t *testing.T) {
}

t.Run("CheckArchive", func(t *testing.T) {
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
if err != nil {
t.Fatal(err)
}
Expand Down
6 changes: 3 additions & 3 deletions internal/api/rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
Expand Down Expand Up @@ -515,7 +515,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
var data schema.JobData

if r.URL.Query().Get("all-metrics") == "true" {
data, err = metricdata.LoadData(job, nil, scopes, r.Context())
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context())
if err != nil {
log.Warn("Error while loading job data")
return
Expand Down Expand Up @@ -604,7 +604,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
scopes = []schema.MetricScope{"node"}
}

data, err := metricdata.LoadData(job, metrics, scopes, r.Context())
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context())
if err != nil {
log.Warn("Error while loading job data")
return
Expand Down
81 changes: 81 additions & 0 deletions internal/archiver/archiver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver

import (
"context"
"math"

"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)

// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}

// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}

if job.NumAcc > 0 {
scopes = append(scopes, schema.MetricScopeAccelerator)
}

jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}

jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}

for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}

for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}

jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}

// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if config.Keys.DisableArchive {
return jobMeta, nil
}

return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}
20 changes: 11 additions & 9 deletions internal/graph/schema.resolvers.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions internal/graph/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (

"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
Expand All @@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
ctx context.Context,
filter []*model.JobFilter,
rows int, cols int,
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {

minX float64, minY float64, maxX float64, maxY float64,
) ([][]float64, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for roofline")
Expand All @@ -47,7 +47,7 @@ func (r *queryResolver) rooflineHeatmap(
continue
}

jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
return nil, err
Expand Down Expand Up @@ -120,7 +120,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
continue
}

if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint")
return nil, err
}
Expand Down
110 changes: 105 additions & 5 deletions internal/metricDataDispatcher/dataLoader.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ func LoadData(job *schema.Job,

if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!config.Keys.DisableArchive {
config.Keys.DisableArchive {

repo, ok := metricdata.GetMetricDataRepo(job.Cluster)

if !ok {
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}

Expand Down Expand Up @@ -117,7 +116,33 @@ func LoadData(job *schema.Job,
ttl = 2 * time.Minute
}

prepareJobData(jd, scopes)
// FIXME: Review: Is this really necessary or correct.
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/median/max Graph can be
// used instead of a lot of single lines.
const maxSeriesSize int = 15
for _, scopes := range jd {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}

jm.AddStatisticsSeries()
}
}

nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}

if nodeScopeRequested {
jd.AddNodeScope("flops_any")
jd.AddNodeScope("mem_bw")
}

return jd, ttl, size
})
Expand All @@ -129,3 +154,78 @@ func LoadData(job *schema.Job,

return data.(schema.JobData), nil
}

// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context,
) error {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}

repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}

stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}

for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}

sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}

return nil
}

// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
repo, err := metricdata.GetMetricDataRepo(cluster)
if err != nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}

if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}

data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}

if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}

return data, nil
}
Loading

0 comments on commit e7231b0

Please sign in to comment.