From 1f88562b8524b3ff0cfce2580401fd649bb02ca5 Mon Sep 17 00:00:00 2001 From: Kiran Pachhai Date: Mon, 25 Mar 2019 14:52:08 -0400 Subject: [PATCH 1/4] Added two new metrics to collect all the past commit history according to the branch specified --- config/config.go | 10 +++++-- docker-compose.yml | 5 +++- exporter/gather.go | 61 +++++++++++++++++++++++++++++++++++------- exporter/metrics.go | 40 ++++++++++++++++++++++++--- exporter/prometheus.go | 6 ++--- exporter/structs.go | 18 +++++++++++++ 6 files changed, 121 insertions(+), 19 deletions(-) diff --git a/config/config.go b/config/config.go index 7ca8b0ec..7d0b3706 100644 --- a/config/config.go +++ b/config/config.go @@ -17,6 +17,7 @@ type Config struct { *cfg.BaseConfig APIURL string Repositories string + Branch string Organisations string Users string APITokenEnv string @@ -31,12 +32,13 @@ func Init() Config { ac := cfg.Init() url := cfg.GetEnv("API_URL", "https://api.github.com") repos := os.Getenv("REPOS") + branch := os.Getenv("BRANCH") orgs := os.Getenv("ORGS") users := os.Getenv("USERS") tokenEnv := os.Getenv("GITHUB_TOKEN") tokenFile := os.Getenv("GITHUB_TOKEN_FILE") token, err := getAuth(tokenEnv, tokenFile) - scraped, err := getScrapeURLs(url, repos, orgs, users) + scraped, err := getScrapeURLs(url, repos, branch, orgs, users) if err != nil { log.Errorf("Error initialising Configuration, Error: %v", err) @@ -46,6 +48,7 @@ func Init() Config { &ac, url, repos, + branch, orgs, users, tokenEnv, @@ -59,7 +62,7 @@ func Init() Config { // Init populates the Config struct based on environmental runtime configuration // All URL's are added to the TargetURL's string array -func getScrapeURLs(apiURL, repos, orgs, users string) ([]string, error) { +func getScrapeURLs(apiURL, repos, branch, orgs, users string) ([]string, error) { urls := []string{} @@ -76,6 +79,9 @@ func getScrapeURLs(apiURL, repos, orgs, users string) ([]string, error) { for _, x := range rs { y := fmt.Sprintf("%s/repos/%s%s", apiURL, x, opts) urls = append(urls, y) + // Append commits history to the array + z := fmt.Sprintf("%s/repos/%s/commits%s&sha=%s", apiURL, x, opts, branch) + urls = append(urls, z) } } diff --git a/docker-compose.yml b/docker-compose.yml index a007ef26..4b5c883b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,5 +10,8 @@ services: - 9171:9171 image: infinityworks/github-exporter:latest environment: - - REPOS= + - REPOS= + - BRANCH= - GITHUB_TOKEN= + - LOG_LEVEL= + diff --git a/exporter/gather.go b/exporter/gather.go index 14b6b7fb..133110f8 100644 --- a/exporter/gather.go +++ b/exporter/gather.go @@ -1,36 +1,65 @@ package exporter import ( + "bytes" "encoding/json" "fmt" "strconv" + "strings" log "github.com/sirupsen/logrus" ) // gatherData - Collects the data from the API and stores into struct -func (e *Exporter) gatherData() ([]*Datum, *RateLimits, error) { +func (e *Exporter) gatherData() ([]*Datum, []*CommitDatum, *RateLimits, error) { data := []*Datum{} + commitData := []*CommitDatum{} responses, err := asyncHTTPGets(e.TargetURLs, e.APIToken) if err != nil { - return data, nil, err + return data, commitData, nil, err } + opts := "?&per_page=100" for _, response := range responses { // Github can at times present an array, or an object for the same data set. // This code checks handles this variation. if isArray(response.body) { - ds := []*Datum{} - json.Unmarshal(response.body, &ds) - data = append(data, ds...) + if isCommitData(response.body) { + cds := []*CommitDatum{} + json.Unmarshal(response.body, &cds) + commitData = append(commitData, cds...) + for len(commitData[len(commitData)-1].Parents) != 0 { + apiURL := strings.Split(commitData[len(commitData)-1].URL, "/commits/")[0] + urls := []string{fmt.Sprintf("%s/commits%s&sha=%s", apiURL, opts, commitData[len(commitData)-1].CommitHash)} + responsesNext, err := asyncHTTPGets(urls, e.APIToken) + if err != nil { + break + } + for _, r := range responsesNext { + cds = []*CommitDatum{} + json.Unmarshal(r.body, &cds) + commitData = append(commitData, cds...) + } + } + } else { + ds := []*Datum{} + json.Unmarshal(response.body, &ds) + data = append(data, ds...) + } } else { - d := new(Datum) - json.Unmarshal(response.body, &d) - data = append(data, d) + if isCommitData(response.body) { + cd := new(CommitDatum) + json.Unmarshal(response.body, &cd) + commitData = append(commitData, cd) + } else { + d := new(Datum) + json.Unmarshal(response.body, &d) + data = append(data, d) + } } log.Infof("API data fetched for repository: %s", response.url) @@ -43,8 +72,8 @@ func (e *Exporter) gatherData() ([]*Datum, *RateLimits, error) { log.Errorf("Unable to obtain rate limit data from API, Error: %s", err) } - //return data, rates, err - return data, rates, nil + //return data, commitData, rates, err + return data, commitData, rates, nil } @@ -108,3 +137,15 @@ func isArray(body []byte) bool { return isArray } + +func isCommitData(body []byte) bool { + + isCommitData := false + + data := body[:10] + if bytes.Contains(data, []byte(`"sha":`)) { + isCommitData = true + } + + return isCommitData +} diff --git a/exporter/metrics.go b/exporter/metrics.go index 558180ea..675bd51a 100644 --- a/exporter/metrics.go +++ b/exporter/metrics.go @@ -1,7 +1,11 @@ package exporter -import "github.com/prometheus/client_golang/prometheus" -import "strconv" +import ( + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) // AddMetrics - Add's all of the metrics to a map of strings, returns the map. func AddMetrics() map[string]*prometheus.Desc { @@ -33,6 +37,16 @@ func AddMetrics() map[string]*prometheus.Desc { "Size in KB for given repository", []string{"repo", "user", "private", "fork", "archived", "license", "language"}, nil, ) + APIMetrics["CommitsHistory"] = prometheus.NewDesc( + prometheus.BuildFQName("github", "commit", "count"), + "Total number of commits for given repository and given branch", + []string{"branch", "author"}, nil, + ) + APIMetrics["LatestCommit"] = prometheus.NewDesc( + prometheus.BuildFQName("github", "commit", "latest"), + "Latest Commit for a given repository and given branch", + []string{"branch", "author", "date", "commithash"}, nil, + ) APIMetrics["Limit"] = prometheus.NewDesc( prometheus.BuildFQName("github", "rate", "limit"), "Number of API queries allowed in a 60 minute window", @@ -53,7 +67,7 @@ func AddMetrics() map[string]*prometheus.Desc { } // processMetrics - processes the response data and sets the metrics using it as a source -func (e *Exporter) processMetrics(data []*Datum, rates *RateLimits, ch chan<- prometheus.Metric) error { +func (e *Exporter) processMetrics(data []*Datum, commitData []*CommitDatum, rates *RateLimits, ch chan<- prometheus.Metric) error { // APIMetrics - range through the data slice for _, x := range data { @@ -62,8 +76,28 @@ func (e *Exporter) processMetrics(data []*Datum, rates *RateLimits, ch chan<- pr ch <- prometheus.MustNewConstMetric(e.APIMetrics["OpenIssues"], prometheus.GaugeValue, x.OpenIssues, x.Name, x.Owner.Login, strconv.FormatBool(x.Private), strconv.FormatBool(x.Fork), strconv.FormatBool(x.Archived), x.License.Key, x.Language) ch <- prometheus.MustNewConstMetric(e.APIMetrics["Watchers"], prometheus.GaugeValue, x.Watchers, x.Name, x.Owner.Login, strconv.FormatBool(x.Private), strconv.FormatBool(x.Fork), strconv.FormatBool(x.Archived), x.License.Key, x.Language) ch <- prometheus.MustNewConstMetric(e.APIMetrics["Size"], prometheus.GaugeValue, x.Size, x.Name, x.Owner.Login, strconv.FormatBool(x.Private), strconv.FormatBool(x.Fork), strconv.FormatBool(x.Archived), x.License.Key, x.Language) + } + branch := e.Config.Branch + latestCommitAuthor, latestCommitDate, latestCommitHash := "", "", "" + totalCommits := make(map[string]float64) + for i, x := range commitData { + if i == 0 { + latestCommitDate = strings.Split(x.Commit.Author.Date, "T")[0] + latestCommitAuthor = x.Commit.Author.Name + latestCommitHash = x.CommitHash + } + author := x.Commit.Author.Name + if _, ok := totalCommits[author]; ok { + totalCommits[author]++ + } else { + totalCommits[author] = 1.0 + } + } + for author, val := range totalCommits { + ch <- prometheus.MustNewConstMetric(e.APIMetrics["CommitsHistory"], prometheus.GaugeValue, val, branch, author) } + ch <- prometheus.MustNewConstMetric(e.APIMetrics["LatestCommit"], prometheus.GaugeValue, 1.0, branch, latestCommitAuthor, latestCommitDate, latestCommitHash) // Set Rate limit stats ch <- prometheus.MustNewConstMetric(e.APIMetrics["Limit"], prometheus.GaugeValue, rates.Limit) diff --git a/exporter/prometheus.go b/exporter/prometheus.go index 90715dcb..21379390 100644 --- a/exporter/prometheus.go +++ b/exporter/prometheus.go @@ -18,8 +18,8 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { // This function is called when a scrape is peformed on the /metrics page func (e *Exporter) Collect(ch chan<- prometheus.Metric) { - // Scrape the Data from Github - var data, rates, err = e.gatherData() + // Scrape the Data and CommitData from Github + var data, commitData, rates, err = e.gatherData() if err != nil { log.Errorf("Error gathering Data from remote API: %v", err) @@ -27,7 +27,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { } // Set prometheus gauge metrics using the data gathered - err = e.processMetrics(data, rates, ch) + err = e.processMetrics(data, commitData, rates, ch) if err != nil { log.Error("Error Processing Metrics", err) diff --git a/exporter/structs.go b/exporter/structs.go index a64cfae6..5efa9f8d 100644 --- a/exporter/structs.go +++ b/exporter/structs.go @@ -39,6 +39,24 @@ type Datum struct { Size float64 `json:"size"` } +// CommitData is used to store an array of CommitDatum +type CommitData []CommitDatum + +// CommitDatum is used to store commit historical data +type CommitDatum struct { + CommitHash string `json:"sha"` + Commit struct { + Author struct { + Name string `json:"name"` + Date string `json:"date"` + } `json:"author"` + } `json:"commit"` + URL string `json:"url"` + Parents []struct { + CommitHash string `json:"sha"` + } `json:"parents"` +} + // RateLimits is used to store rate limit data into a struct // This data is later represented as a metric, captured at the end of a scrape type RateLimits struct { From 97c8fb59ac59c3b3fe8b01345bf49814e8251b34 Mon Sep 17 00:00:00 2001 From: Kiran Pachhai Date: Mon, 25 Mar 2019 15:33:43 -0400 Subject: [PATCH 2/4] Fixed the issue where multiple repos was not working as intended --- .gitignore | 1 + exporter/metrics.go | 32 ++++++++++++++++++-------------- exporter/structs.go | 11 +++++++++++ 3 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..1fd6f6b4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +github-exporter diff --git a/exporter/metrics.go b/exporter/metrics.go index 675bd51a..96ed0016 100644 --- a/exporter/metrics.go +++ b/exporter/metrics.go @@ -40,12 +40,12 @@ func AddMetrics() map[string]*prometheus.Desc { APIMetrics["CommitsHistory"] = prometheus.NewDesc( prometheus.BuildFQName("github", "commit", "count"), "Total number of commits for given repository and given branch", - []string{"branch", "author"}, nil, + []string{"repo", "branch", "author"}, nil, ) APIMetrics["LatestCommit"] = prometheus.NewDesc( prometheus.BuildFQName("github", "commit", "latest"), "Latest Commit for a given repository and given branch", - []string{"branch", "author", "date", "commithash"}, nil, + []string{"repo", "branch", "author", "date", "commithash"}, nil, ) APIMetrics["Limit"] = prometheus.NewDesc( prometheus.BuildFQName("github", "rate", "limit"), @@ -79,25 +79,29 @@ func (e *Exporter) processMetrics(data []*Datum, commitData []*CommitDatum, rate } branch := e.Config.Branch - latestCommitAuthor, latestCommitDate, latestCommitHash := "", "", "" - totalCommits := make(map[string]float64) - for i, x := range commitData { - if i == 0 { - latestCommitDate = strings.Split(x.Commit.Author.Date, "T")[0] - latestCommitAuthor = x.Commit.Author.Name - latestCommitHash = x.CommitHash - } + latestCommits := make(map[string]*LatestCommitHistory) + totalCommits := make(map[string]*CommitHistory) + for _, x := range commitData { + shortenedRepo := strings.Replace(x.URL, "https://api.github.com/repos/", "", -1) + repo := shortenedRepo[:strings.Index(shortenedRepo, "/commits")] author := x.Commit.Author.Name + if _, ok := latestCommits[repo]; !ok { + date := strings.Split(x.Commit.Author.Date, "T")[0] + hash := x.CommitHash + latestCommits[repo] = &LatestCommitHistory{author, date, hash} + } if _, ok := totalCommits[author]; ok { - totalCommits[author]++ + totalCommits[author].Count++ } else { - totalCommits[author] = 1.0 + totalCommits[author] = &CommitHistory{repo, 1.0} } } for author, val := range totalCommits { - ch <- prometheus.MustNewConstMetric(e.APIMetrics["CommitsHistory"], prometheus.GaugeValue, val, branch, author) + ch <- prometheus.MustNewConstMetric(e.APIMetrics["CommitsHistory"], prometheus.GaugeValue, val.Count, val.Repo, branch, author) + } + for repo, val := range latestCommits { + ch <- prometheus.MustNewConstMetric(e.APIMetrics["LatestCommit"], prometheus.GaugeValue, 1.0, repo, branch, val.Author, val.Date, val.Hash) } - ch <- prometheus.MustNewConstMetric(e.APIMetrics["LatestCommit"], prometheus.GaugeValue, 1.0, branch, latestCommitAuthor, latestCommitDate, latestCommitHash) // Set Rate limit stats ch <- prometheus.MustNewConstMetric(e.APIMetrics["Limit"], prometheus.GaugeValue, rates.Limit) diff --git a/exporter/structs.go b/exporter/structs.go index 5efa9f8d..06cda342 100644 --- a/exporter/structs.go +++ b/exporter/structs.go @@ -57,6 +57,17 @@ type CommitDatum struct { } `json:"parents"` } +type CommitHistory struct { + Repo string + Count float64 +} + +type LatestCommitHistory struct { + Author string + Date string + Hash string +} + // RateLimits is used to store rate limit data into a struct // This data is later represented as a metric, captured at the end of a scrape type RateLimits struct { From dd08f38263aeb2886686b4ef3474ce01db5eae01 Mon Sep 17 00:00:00 2001 From: Kiran Pachhai Date: Mon, 25 Mar 2019 16:20:35 -0400 Subject: [PATCH 3/4] Fixed the issue where the same key was used for recording commit history for multiple repos --- exporter/metrics.go | 18 +++++++++--------- exporter/structs.go | 6 ++++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/exporter/metrics.go b/exporter/metrics.go index 96ed0016..e0994b43 100644 --- a/exporter/metrics.go +++ b/exporter/metrics.go @@ -85,22 +85,22 @@ func (e *Exporter) processMetrics(data []*Datum, commitData []*CommitDatum, rate shortenedRepo := strings.Replace(x.URL, "https://api.github.com/repos/", "", -1) repo := shortenedRepo[:strings.Index(shortenedRepo, "/commits")] author := x.Commit.Author.Name - if _, ok := latestCommits[repo]; !ok { + if _, ok := latestCommits[author+repo]; !ok { date := strings.Split(x.Commit.Author.Date, "T")[0] hash := x.CommitHash - latestCommits[repo] = &LatestCommitHistory{author, date, hash} + latestCommits[author+repo] = &LatestCommitHistory{author, repo, date, hash} } - if _, ok := totalCommits[author]; ok { - totalCommits[author].Count++ + if _, ok := totalCommits[author+repo]; ok { + totalCommits[author+repo].Count++ } else { - totalCommits[author] = &CommitHistory{repo, 1.0} + totalCommits[author+repo] = &CommitHistory{author, repo, 1.0} } } - for author, val := range totalCommits { - ch <- prometheus.MustNewConstMetric(e.APIMetrics["CommitsHistory"], prometheus.GaugeValue, val.Count, val.Repo, branch, author) + for _, val := range totalCommits { + ch <- prometheus.MustNewConstMetric(e.APIMetrics["CommitsHistory"], prometheus.GaugeValue, val.Count, val.Repo, branch, val.Author) } - for repo, val := range latestCommits { - ch <- prometheus.MustNewConstMetric(e.APIMetrics["LatestCommit"], prometheus.GaugeValue, 1.0, repo, branch, val.Author, val.Date, val.Hash) + for _, val := range latestCommits { + ch <- prometheus.MustNewConstMetric(e.APIMetrics["LatestCommit"], prometheus.GaugeValue, 1.0, val.Repo, branch, val.Author, val.Date, val.Hash) } // Set Rate limit stats diff --git a/exporter/structs.go b/exporter/structs.go index 06cda342..f5587195 100644 --- a/exporter/structs.go +++ b/exporter/structs.go @@ -58,12 +58,14 @@ type CommitDatum struct { } type CommitHistory struct { - Repo string - Count float64 + Author string + Repo string + Count float64 } type LatestCommitHistory struct { Author string + Repo string Date string Hash string } From c136be8275b952f7b82412c3ae31b323fd1c3a06 Mon Sep 17 00:00:00 2001 From: Kiran Pachhai Date: Mon, 25 Mar 2019 16:30:35 -0400 Subject: [PATCH 4/4] Fixed the issue where latest commit history was being created more than once --- exporter/metrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/metrics.go b/exporter/metrics.go index e0994b43..feffcc29 100644 --- a/exporter/metrics.go +++ b/exporter/metrics.go @@ -85,10 +85,10 @@ func (e *Exporter) processMetrics(data []*Datum, commitData []*CommitDatum, rate shortenedRepo := strings.Replace(x.URL, "https://api.github.com/repos/", "", -1) repo := shortenedRepo[:strings.Index(shortenedRepo, "/commits")] author := x.Commit.Author.Name - if _, ok := latestCommits[author+repo]; !ok { + if _, ok := latestCommits[repo]; !ok { date := strings.Split(x.Commit.Author.Date, "T")[0] hash := x.CommitHash - latestCommits[author+repo] = &LatestCommitHistory{author, repo, date, hash} + latestCommits[repo] = &LatestCommitHistory{author, repo, date, hash} } if _, ok := totalCommits[author+repo]; ok { totalCommits[author+repo].Count++