From fca56cf83a89933bb6f7c6472a45ac2f9b054e09 Mon Sep 17 00:00:00 2001 From: Michael Mokrysz Date: Fri, 20 Oct 2017 13:57:29 +0100 Subject: [PATCH 1/6] Tag app and service metrics with free/paid status Our product team would like to break down the platform information on their dashboard further. In particular they want to: - Identify the number of apps running on trial vs non-trial orgs - Identify trends in service instances by trial vs non-trial orgs - Identify trends in service instances by free vs paid service plans This required adding new tags to app and service metrics recorded by the `paas-metrics` app in `paas-cf/tools/metrics`. Until now we have kept business logic within the Datadog dashboard, but this is no longer possible. Datadog imposes soft limits on our number of custom metrics. If we were to tag services with service plan names and org quota names there would be a combinatorial explosion in the number of custom metrics being generated. We have mitigated this by putting some business logic into `paas-metrics`. It now identifies: - Free service plans as those named "Free" - Trial organisations as those using the "default" quota. These are recorded as metrics tags with fewer possible combinations: - On apps and services: `trial_org:true` or `trial_org:false` - On services: `free_service:true` `free_service:false` --- tools/metrics/gauges.go | 143 ++++++++++++++++++++++++++++++---------- 1 file changed, 108 insertions(+), 35 deletions(-) diff --git a/tools/metrics/gauges.go b/tools/metrics/gauges.go index 39a17a2d3d..b747a4e7e7 100644 --- a/tools/metrics/gauges.go +++ b/tools/metrics/gauges.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "net/url" "time" @@ -116,32 +117,41 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return err } - started := 0 - stopped := 0 + + counters := map[string]map[bool]int{ + "started": map[bool]int{}, + "stopped": map[bool]int{}, + } for _, app := range apps { + org_quota, err := findOrgQuotaFromSpaceGUID(c, app.SpaceGuid) + if err != nil { + continue + } + org_is_trial := isOrgQuotaTrial(org_quota) if app.State == "STARTED" { - started += 1 + counters["started"][org_is_trial]++ } if app.State == "STOPPED" { - stopped += 1 + counters["stopped"][org_is_trial]++ } } - return w.WriteMetrics([]Metric{ - { - Kind: Gauge, - Time: time.Now(), - Name: "apps.count", - Value: float64(started), - Tags: []string{"state:running"}, - }, - { - Kind: Gauge, - Time: time.Now(), - Name: "apps.count", - Value: float64(stopped), - Tags: []string{"state:stopped"}, - }, - }) + + metrics := []Metric{} + for state, count_by_trial := range counters { + for org_is_trial, count := range count_by_trial { + metrics = append(metrics, Metric{ + Kind: Gauge, + Time: time.Now(), + Name: "apps.count", + Value: float64(count), + Tags: []string{ + "state:" + state, + fmt.Sprintf("trial_org:%t", org_is_trial), + }, + }) + } + } + return w.WriteMetrics(metrics) }) } @@ -155,27 +165,56 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return nil } - counters := map[string]int{} + service_plans, err := c.cf.ListServicePlans() + if err != nil { + return nil + } + + counters := map[bool]map[bool]map[string]int{ + true: map[bool]map[string]int{ + true: map[string]int{}, + false: map[string]int{}, + }, + false: map[bool]map[string]int{ + true: map[string]int{}, + false: map[string]int{}, + }, + } for _, instance := range serviceInstances { service := findService(services, instance.ServiceGuid) - if service == nil { + if service == nil || service.Label == "" { continue } - if service.Label == "" { + service_plan := findServicePlan(service_plans, instance.ServicePlanGuid) + if service_plan == nil { continue } - counters[service.Label]++ + org_quota, err := findOrgQuotaFromSpaceGUID(c, instance.SpaceGuid) + if err != nil { + continue + } + org_is_trial := isOrgQuotaTrial(org_quota) + service_plan_is_free := isServicePlanFree(service_plan) + counters[org_is_trial][service_plan_is_free][service.Label]++ } - metrics := []Metric{} - for serviceName, count := range counters { - metrics = append(metrics, Metric{ - Kind: Gauge, - Time: time.Now(), - Name: "services.provisioned", - Value: float64(count), - Tags: []string{"type:" + serviceName}, - }) + metrics := []Metric{} + for org_is_trial, x := range counters { + for service_plan_is_free, y := range x { + for service_label, count := range y { + metrics = append(metrics, Metric{ + Kind: Gauge, + Time: time.Now(), + Name: "services.provisioned", + Value: float64(count), + Tags: []string{ + "type:" + service_label, + fmt.Sprintf("trial_org:%t", org_is_trial), + fmt.Sprintf("free_service:%t", service_plan_is_free), + }, + }) + } + } } return w.WriteMetrics(metrics) }) @@ -191,10 +230,9 @@ func OrgCountGauge(c *Client, interval time.Duration) MetricReadCloser { for _, org := range orgs { quota, err := org.Quota() if err != nil { - return err + continue } counters[quota.Name]++ - } metrics := []Metric{} for name, count := range counters { @@ -274,3 +312,38 @@ func findService(services []cfclient.Service, guid string) *cfclient.Service { } return nil } + +func findServicePlan(service_plans []cfclient.ServicePlan, guid string) *cfclient.ServicePlan { + for _, service_plan := range service_plans { + if service_plan.Guid == guid { + return &service_plan + } + } + return nil +} + +func findOrgQuotaFromSpaceGUID(c *Client, guid string) (*cfclient.OrgQuota, error) { + space, err := c.cf.GetSpaceByGuid(guid) + if err != nil { + return nil, err + } + org, err := space.Org() + if err != nil { + return nil, err + } + org_quota, err := org.Quota() + if err != nil { + return nil, err + } + return org_quota, nil +} + +// Determine if an organisation is on a trial plan. +func isOrgQuotaTrial(quota *cfclient.OrgQuota) bool { + return quota.Name == "default" +} + +// Determine if a service plan is free. +func isServicePlanFree(plan *cfclient.ServicePlan) bool { + return plan.Name == "Free" +} From 209c49aa70b5a941444dec1a4159144108793d36 Mon Sep 17 00:00:00 2001 From: Michael Mokrysz Date: Wed, 25 Oct 2017 15:49:10 +0100 Subject: [PATCH 2/6] fixup! Tag app and service metrics with free/paid status --- tools/metrics/gauges.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/metrics/gauges.go b/tools/metrics/gauges.go index b747a4e7e7..2e59840843 100644 --- a/tools/metrics/gauges.go +++ b/tools/metrics/gauges.go @@ -118,6 +118,10 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { return err } + // Number of relevant apps in + // - APP_STATE: string of whether each app is "started" or "stopped" + // - ORG_IS_TRIAL: boolean of whether each app is owned by a trial organisation + // counters[APP_STATE][ORG_IS_TRIAL] counters := map[string]map[bool]int{ "started": map[bool]int{}, "stopped": map[bool]int{}, @@ -170,6 +174,11 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { return nil } + // Number of relevant service instances in + // - ORG_IS_TRIAL: boolean of whether each app is owned by a trial organisation + // - SERVICE_PLAN_IS_FREE: whether the instance's service plan is free + // - NAME_OF_SERVICE: e.g., "mysql" or "postgres" + // counters[ORG_IS_TRIAL][SERVICE_PLAN_IS_FREE][NAME_OF_SERVICE] counters := map[bool]map[bool]map[string]int{ true: map[bool]map[string]int{ true: map[string]int{}, From 6773a25468e855faa57eb49aca99f60002369c39 Mon Sep 17 00:00:00 2001 From: Michael Mokrysz Date: Wed, 25 Oct 2017 16:13:11 +0100 Subject: [PATCH 3/6] fixup! Tag app and service metrics with free/paid status --- tools/metrics/gauges.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/metrics/gauges.go b/tools/metrics/gauges.go index 2e59840843..8c9841a52b 100644 --- a/tools/metrics/gauges.go +++ b/tools/metrics/gauges.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "log" "net/url" "time" @@ -129,6 +130,7 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { for _, app := range apps { org_quota, err := findOrgQuotaFromSpaceGUID(c, app.SpaceGuid) if err != nil { + log.Printf("Error finding org quota for space %s for app %s: %s\n", app.SpaceGuid, app.Guid, err) continue } org_is_trial := isOrgQuotaTrial(org_quota) @@ -191,15 +193,22 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { } for _, instance := range serviceInstances { service := findService(services, instance.ServiceGuid) - if service == nil || service.Label == "" { + if service == nil { + log.Printf("Service was not found for service instance %s\n", instance.Guid) + continue + } + if service.Label == "" { + log.Printf("Service label was empty for service %s and service instance %s\n", service.Guid, instance.Guid) continue } service_plan := findServicePlan(service_plans, instance.ServicePlanGuid) if service_plan == nil { + log.Printf("Error finding service plan for service instance %s: %s\n", instance.Guid, err) continue } org_quota, err := findOrgQuotaFromSpaceGUID(c, instance.SpaceGuid) if err != nil { + log.Printf("Error finding org quota for space %s for service instance %s: %s\n", instance.SpaceGuid, instance.Guid, err) continue } org_is_trial := isOrgQuotaTrial(org_quota) @@ -239,6 +248,7 @@ func OrgCountGauge(c *Client, interval time.Duration) MetricReadCloser { for _, org := range orgs { quota, err := org.Quota() if err != nil { + log.Printf("Error finding org quota for org %s: %s\n", org.Guid, err) continue } counters[quota.Name]++ From 9881ddfabe8b403bd1a08d04aee4aa38acf595d5 Mon Sep 17 00:00:00 2001 From: Michael Mokrysz Date: Wed, 25 Oct 2017 16:13:57 +0100 Subject: [PATCH 4/6] fixup! Tag app and service metrics with free/paid status --- tools/metrics/gauges.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/metrics/gauges.go b/tools/metrics/gauges.go index 8c9841a52b..71d2c4d1d3 100644 --- a/tools/metrics/gauges.go +++ b/tools/metrics/gauges.go @@ -177,7 +177,7 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { } // Number of relevant service instances in - // - ORG_IS_TRIAL: boolean of whether each app is owned by a trial organisation + // - ORG_IS_TRIAL: boolean of whether each instance is owned by a trial organisation // - SERVICE_PLAN_IS_FREE: whether the instance's service plan is free // - NAME_OF_SERVICE: e.g., "mysql" or "postgres" // counters[ORG_IS_TRIAL][SERVICE_PLAN_IS_FREE][NAME_OF_SERVICE] From c2de42a5ad93394073c903f31545e7fcdd6fe8f3 Mon Sep 17 00:00:00 2001 From: Michael Mokrysz Date: Wed, 25 Oct 2017 16:33:22 +0100 Subject: [PATCH 5/6] fixup! Tag app and service metrics with free/paid status --- tools/metrics/gauges.go | 87 +++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/tools/metrics/gauges.go b/tools/metrics/gauges.go index 71d2c4d1d3..8865518c30 100644 --- a/tools/metrics/gauges.go +++ b/tools/metrics/gauges.go @@ -118,6 +118,18 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return err } + spaces, err := c.cf.ListSpaces() + if err != nil { + return err + } + orgs, err := c.cf.ListOrgs() + if err != nil { + return err + } + org_quotas, err := c.cf.ListOrgQuotas() + if err != nil { + return err + } // Number of relevant apps in // - APP_STATE: string of whether each app is "started" or "stopped" @@ -128,9 +140,19 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { "stopped": map[bool]int{}, } for _, app := range apps { - org_quota, err := findOrgQuotaFromSpaceGUID(c, app.SpaceGuid) - if err != nil { - log.Printf("Error finding org quota for space %s for app %s: %s\n", app.SpaceGuid, app.Guid, err) + space := findSpace(spaces, app.SpaceGuid) + if space == nil { + log.Printf("Space was not found for app %s\n", app.Guid) + continue + } + org := findOrg(orgs, space.OrganizationGuid) + if org == nil { + log.Printf("Org was not found for app %s in space %s\n", app.Guid, space.Guid) + continue + } + org_quota := findOrgQuota(org_quotas, org.QuotaDefinitionGuid) + if org_quota == nil { + log.Printf("Org Quota was not found for app %s in org %s\n", app.Guid, org.Guid) continue } org_is_trial := isOrgQuotaTrial(org_quota) @@ -175,6 +197,18 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return nil } + spaces, err := c.cf.ListSpaces() + if err != nil { + return err + } + orgs, err := c.cf.ListOrgs() + if err != nil { + return err + } + org_quotas, err := c.cf.ListOrgQuotas() + if err != nil { + return err + } // Number of relevant service instances in // - ORG_IS_TRIAL: boolean of whether each instance is owned by a trial organisation @@ -206,9 +240,19 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { log.Printf("Error finding service plan for service instance %s: %s\n", instance.Guid, err) continue } - org_quota, err := findOrgQuotaFromSpaceGUID(c, instance.SpaceGuid) + space := findSpace(spaces, instance.SpaceGuid) + if space == nil { + log.Printf("Space was not found for service instance %s\n", instance.Guid) + continue + } + org := findOrg(orgs, space.OrganizationGuid) + if org == nil { + log.Printf("Org was not found for service instance %s in space %s\n", instance.Guid, space.Guid) + continue + } + org_quota := findOrgQuota(org_quotas, org.QuotaDefinitionGuid) if err != nil { - log.Printf("Error finding org quota for space %s for service instance %s: %s\n", instance.SpaceGuid, instance.Guid, err) + log.Printf("Org Quota was not found for service instance %s in org %s\n", instance.Guid, org.Guid) continue } org_is_trial := isOrgQuotaTrial(org_quota) @@ -341,20 +385,31 @@ func findServicePlan(service_plans []cfclient.ServicePlan, guid string) *cfclien return nil } -func findOrgQuotaFromSpaceGUID(c *Client, guid string) (*cfclient.OrgQuota, error) { - space, err := c.cf.GetSpaceByGuid(guid) - if err != nil { - return nil, err +func findSpace(spaces []cfclient.Space, guid string) *cfclient.Space { + for _, space := range spaces { + if space.Guid == guid { + return &space + } } - org, err := space.Org() - if err != nil { - return nil, err + return nil +} + +func findOrg(orgs []cfclient.Org, guid string) *cfclient.Org { + for _, org := range orgs { + if org.Guid == guid { + return &org + } } - org_quota, err := org.Quota() - if err != nil { - return nil, err + return nil +} + +func findOrgQuota(org_quotas []cfclient.OrgQuota, guid string) *cfclient.OrgQuota { + for _, org_quota := range org_quotas { + if org_quota.Guid == guid { + return &org_quota + } } - return org_quota, nil + return nil } // Determine if an organisation is on a trial plan. From ef3c59f2a113fa12ea1e34ee7d2e1b0b1051f660 Mon Sep 17 00:00:00 2001 From: Michael Mokrysz Date: Wed, 25 Oct 2017 17:03:27 +0100 Subject: [PATCH 6/6] fixup! Tag app and service metrics with free/paid status --- tools/metrics/gauges.go | 62 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/tools/metrics/gauges.go b/tools/metrics/gauges.go index 8865518c30..e34c7c89dc 100644 --- a/tools/metrics/gauges.go +++ b/tools/metrics/gauges.go @@ -126,7 +126,7 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return err } - org_quotas, err := c.cf.ListOrgQuotas() + orgQuotas, err := c.cf.ListOrgQuotas() if err != nil { return err } @@ -150,23 +150,23 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { log.Printf("Org was not found for app %s in space %s\n", app.Guid, space.Guid) continue } - org_quota := findOrgQuota(org_quotas, org.QuotaDefinitionGuid) - if org_quota == nil { + orgQuota := findOrgQuota(orgQuotas, org.QuotaDefinitionGuid) + if orgQuota == nil { log.Printf("Org Quota was not found for app %s in org %s\n", app.Guid, org.Guid) continue } - org_is_trial := isOrgQuotaTrial(org_quota) + orgIsTrial := isOrgQuotaTrial(orgQuota) if app.State == "STARTED" { - counters["started"][org_is_trial]++ + counters["started"][orgIsTrial]++ } if app.State == "STOPPED" { - counters["stopped"][org_is_trial]++ + counters["stopped"][orgIsTrial]++ } } metrics := []Metric{} - for state, count_by_trial := range counters { - for org_is_trial, count := range count_by_trial { + for state, countByTrial := range counters { + for orgIsTrial, count := range countByTrial { metrics = append(metrics, Metric{ Kind: Gauge, Time: time.Now(), @@ -174,7 +174,7 @@ func AppCountGauge(c *Client, interval time.Duration) MetricReadCloser { Value: float64(count), Tags: []string{ "state:" + state, - fmt.Sprintf("trial_org:%t", org_is_trial), + fmt.Sprintf("trial_org:%t", orgIsTrial), }, }) } @@ -193,7 +193,7 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return nil } - service_plans, err := c.cf.ListServicePlans() + servicePlans, err := c.cf.ListServicePlans() if err != nil { return nil } @@ -205,7 +205,7 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { if err != nil { return err } - org_quotas, err := c.cf.ListOrgQuotas() + orgQuotas, err := c.cf.ListOrgQuotas() if err != nil { return err } @@ -235,8 +235,8 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { log.Printf("Service label was empty for service %s and service instance %s\n", service.Guid, instance.Guid) continue } - service_plan := findServicePlan(service_plans, instance.ServicePlanGuid) - if service_plan == nil { + servicePlan := findServicePlan(servicePlans, instance.ServicePlanGuid) + if servicePlan == nil { log.Printf("Error finding service plan for service instance %s: %s\n", instance.Guid, err) continue } @@ -250,29 +250,29 @@ func ServiceCountGauge(c *Client, interval time.Duration) MetricReadCloser { log.Printf("Org was not found for service instance %s in space %s\n", instance.Guid, space.Guid) continue } - org_quota := findOrgQuota(org_quotas, org.QuotaDefinitionGuid) + orgQuota := findOrgQuota(orgQuotas, org.QuotaDefinitionGuid) if err != nil { log.Printf("Org Quota was not found for service instance %s in org %s\n", instance.Guid, org.Guid) continue } - org_is_trial := isOrgQuotaTrial(org_quota) - service_plan_is_free := isServicePlanFree(service_plan) - counters[org_is_trial][service_plan_is_free][service.Label]++ + orgIsTrial := isOrgQuotaTrial(orgQuota) + servicePlanIsFree := isServicePlanFree(servicePlan) + counters[orgIsTrial][servicePlanIsFree][service.Label]++ } metrics := []Metric{} - for org_is_trial, x := range counters { - for service_plan_is_free, y := range x { - for service_label, count := range y { + for orgIsTrial, x := range counters { + for servicePlanIsFree, y := range x { + for serviceLabel, count := range y { metrics = append(metrics, Metric{ Kind: Gauge, Time: time.Now(), Name: "services.provisioned", Value: float64(count), Tags: []string{ - "type:" + service_label, - fmt.Sprintf("trial_org:%t", org_is_trial), - fmt.Sprintf("free_service:%t", service_plan_is_free), + "type:" + serviceLabel, + fmt.Sprintf("trial_org:%t", orgIsTrial), + fmt.Sprintf("free_service:%t", servicePlanIsFree), }, }) } @@ -376,10 +376,10 @@ func findService(services []cfclient.Service, guid string) *cfclient.Service { return nil } -func findServicePlan(service_plans []cfclient.ServicePlan, guid string) *cfclient.ServicePlan { - for _, service_plan := range service_plans { - if service_plan.Guid == guid { - return &service_plan +func findServicePlan(servicePlans []cfclient.ServicePlan, guid string) *cfclient.ServicePlan { + for _, servicePlan := range servicePlans { + if servicePlan.Guid == guid { + return &servicePlan } } return nil @@ -403,10 +403,10 @@ func findOrg(orgs []cfclient.Org, guid string) *cfclient.Org { return nil } -func findOrgQuota(org_quotas []cfclient.OrgQuota, guid string) *cfclient.OrgQuota { - for _, org_quota := range org_quotas { - if org_quota.Guid == guid { - return &org_quota +func findOrgQuota(orgQuotas []cfclient.OrgQuota, guid string) *cfclient.OrgQuota { + for _, orgQuota := range orgQuotas { + if orgQuota.Guid == guid { + return &orgQuota } } return nil