diff --git a/collector/exporter.go b/collector/exporter.go new file mode 100755 index 0000000..775f980 --- /dev/null +++ b/collector/exporter.go @@ -0,0 +1,194 @@ +package collector + +import ( + "time" + "strings" + + "github.com/prometheus/common/log" + "github.com/prometheus/client_golang/prometheus" +) + +// Metrics name parts. +const ( + // Default all Volumes + allVolumes = "_all" + + // Namespace + namespace = "gluster" + // Subsystem(s). + exporter = "exporter" +) + +// Metric descriptors. +var ( + scrapeDurationDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, exporter, "collector_duration_seconds"), + "Collector time duration.", + []string{"collector"}, nil, + ) +) + +// Collect defines which metrics we should collect +type Collect struct { + Base bool + Profile bool + Quota bool + Mount bool + Peer bool +} + +type Exporter struct { + hostname string + glusterPath string + volumes []string + collect Collect + error prometheus.Gauge + totalScrapes prometheus.Counter + scrapeErrors *prometheus.CounterVec + glusterUp prometheus.Gauge +} + +// returns a new GlusterFS exporter +func New(hostname string, glusterPath string, volumeString string, collect Collect) *Exporter { + + gfsPath, err := getGlusterBinary(glusterPath) + if err != nil { + log.Errorf("Given Gluster path %v has err: %v", glusterPath, err) + } + + volumes := strings.Split(volumeString, ",") + if len(volumes) < 1 { + log.Infof("No volumes given. Proceeding without volume information. Volumes: %v", volumeString) + } + + return &Exporter{ + hostname: hostname, + glusterPath: gfsPath, + volumes: volumes, + collect: collect, + totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: exporter, + Name: "scrapes_total", + Help: "Total number of times GlusterFS was scraped for metrics.", + }), + scrapeErrors: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: exporter, + Name: "scrape_errors_total", + Help: "Total number of times an error occurred scraping a GlusterFS.", + }, []string{"collector"}), + error: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: exporter, + Name: "last_scrape_error", + Help: "Whether the last scrape of metrics from GlusterFS resulted in an error (1 for error, 0 for success).", + }), + glusterUp: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "up", + Help: "Whether the GlusterFS server is up.", + }), + } +} + +// Describe implements prometheus.Collector. +func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { + + metricCh := make(chan prometheus.Metric) + doneCh := make(chan struct{}) + + go func() { + for m := range metricCh { + ch <- m.Desc() + } + close(doneCh) + }() + + e.Collect(metricCh) + close(metricCh) + <-doneCh +} + +// Collect implements prometheus.Collector. +func (e *Exporter) Collect(ch chan<- prometheus.Metric) { + e.scrape(ch) + + ch <- e.totalScrapes + ch <- e.error + e.scrapeErrors.Collect(ch) + ch <- e.glusterUp +} + +func (e *Exporter) scrape(ch chan<- prometheus.Metric) { + e.totalScrapes.Inc() + var err error + + scrapeTime := time.Now() + + // if can get volume info, glusterFS is UP(1), or Down(0) + _, err = ExecVolumeInfo() + if err != nil { + e.glusterUp.Set(0) + } + e.glusterUp.Set(1) + + // default collect volume info as Base Metrics + e.collect.Base = true + + if e.collect.Base { + // Base Gluster Info Scrape + scrapeTime := time.Now() + if err = ScrapeGlobalVolumeStatus(e.volumes, allVolumes, ch); err != nil { + log.Errorln("Error scraping for collect.global_status:", err) + e.scrapeErrors.WithLabelValues("collect.global_status").Inc() + e.error.Set(1) + } + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "collect.global_status") + } + + // Peer Info Scrape + if e.collect.Peer { + scrapeTime = time.Now() + if err = ScrapePeerStatus(ch); err != nil { + log.Errorln("Error scraping for collect.peer_status: ", err) + e.scrapeErrors.WithLabelValues("collect.peer_status").Inc() + e.error.Set(1) + } + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "collect.peer_status") + } + + // Mount Scrape + if e.collect.Mount { + scrapeTime = time.Now() + if err = ScrapeVolumeMountStatus(e.scrapeErrors, ch); err != nil { + log.Errorln("Error scraping for collect.mount_status:", err) + e.scrapeErrors.WithLabelValues("collect.mount_status").Inc() + e.error.Set(1) + } + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "collect.mount_status") + } + + // Profile Scrape + if e.collect.Profile { + scrapeTime = time.Now() + if err = ScrapeProfileStatus(e.volumes, allVolumes, e.hostname, e.scrapeErrors, ch); err != nil { + log.Errorln("Error scraping for collect.profile_status:", err) + e.scrapeErrors.WithLabelValues("collect.profile_status").Inc() + e.error.Set(1) + } + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "collect.profile_status") + } + + // Quota Scrape + if e.collect.Quota { + scrapeTime = time.Now() + if err = ScrapeQuotaStatus(e.volumes, allVolumes, e.scrapeErrors, ch); err != nil { + log.Errorln("Error scraping for collect.quota_status:", err) + e.scrapeErrors.WithLabelValues("collect.quota_status").Inc() + e.error.Set(1) + } + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "collect.quota_status") + } + +} diff --git a/test/gluster_peer_status.xml b/collector/fixtures/gluster_peer_status.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_peer_status.xml rename to collector/fixtures/gluster_peer_status.xml diff --git a/test/gluster_volume_heal_info.xml b/collector/fixtures/gluster_volume_heal_info.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_heal_info.xml rename to collector/fixtures/gluster_volume_heal_info.xml diff --git a/test/gluster_volume_heal_info_err_node1.xml b/collector/fixtures/gluster_volume_heal_info_err_node1.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_heal_info_err_node1.xml rename to collector/fixtures/gluster_volume_heal_info_err_node1.xml diff --git a/test/gluster_volume_heal_info_err_node2.xml b/collector/fixtures/gluster_volume_heal_info_err_node2.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_heal_info_err_node2.xml rename to collector/fixtures/gluster_volume_heal_info_err_node2.xml diff --git a/test/gluster_volume_info.xml b/collector/fixtures/gluster_volume_info.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_info.xml rename to collector/fixtures/gluster_volume_info.xml diff --git a/test/gluster_volume_list.xml b/collector/fixtures/gluster_volume_list.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_list.xml rename to collector/fixtures/gluster_volume_list.xml diff --git a/test/gluster_volume_profile_gv_test_info.xml b/collector/fixtures/gluster_volume_profile_gv_test_info.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_profile_gv_test_info.xml rename to collector/fixtures/gluster_volume_profile_gv_test_info.xml diff --git a/test/gluster_volume_profile_gv_test_info_cumulative.xml b/collector/fixtures/gluster_volume_profile_gv_test_info_cumulative.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_profile_gv_test_info_cumulative.xml rename to collector/fixtures/gluster_volume_profile_gv_test_info_cumulative.xml diff --git a/test/gluster_volume_quota_list.xml b/collector/fixtures/gluster_volume_quota_list.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_quota_list.xml rename to collector/fixtures/gluster_volume_quota_list.xml diff --git a/test/gluster_volume_status_all_detail.xml b/collector/fixtures/gluster_volume_status_all_detail.xml old mode 100644 new mode 100755 similarity index 100% rename from test/gluster_volume_status_all_detail.xml rename to collector/fixtures/gluster_volume_status_all_detail.xml diff --git a/collector/mount.go b/collector/mount.go new file mode 100755 index 0000000..6fb9e39 --- /dev/null +++ b/collector/mount.go @@ -0,0 +1,125 @@ +package collector + +import ( + "bytes" + "os/exec" + "strings" + "fmt" + "time" + "os" + + "github.com/prometheus/client_golang/prometheus" +) + + +// TODO: Need Test +const ( + // Subsystem(s). + mount = "mount" +) + +type mountV struct { + mountPoint string + volume string +} + +var ( + volumeWriteable = prometheus.NewDesc( + prometheus.BuildFQName(namespace, mount, "mount_writable"), + "Writes and deletes file in Volume and checks if it is writable", + []string{"volume", "mountpoint"}, nil) + + mountSuccessful = prometheus.NewDesc( + prometheus.BuildFQName(namespace, mount, "mount_successful"), + "Checks if mountpoint exists, returns a bool value 0 or 1", + []string{"volume", "mountpoint"}, nil) +) + +func ScrapeVolumeMountStatus(scrapeError *prometheus.CounterVec, ch chan<- prometheus.Metric) error { + mountBuffer, execMountCheckErr := execMountCheck() + if execMountCheckErr != nil { + return execMountCheckErr + } else { + mounts, err := ParseMountOutput(mountBuffer.String()) + testMountResult := testMount(mounts, err) + + for _, mount := range mounts { + ch <- prometheus.MustNewConstMetric( + mountSuccessful, prometheus.GaugeValue, float64(testMountResult), mount.volume, mount.mountPoint, + ) + } + + if err != nil { + return err + } else { + for _, mount := range mounts { + isWritable, err := execTouchOnVolumes(mount.mountPoint) + if err != nil { + scrapeError.WithLabelValues("collect.mount_status").Inc() + } + testWriteResult := testWritable(isWritable) + ch <- prometheus.MustNewConstMetric( + volumeWriteable, prometheus.GaugeValue, float64(testWriteResult), mount.volume, mount.mountPoint, + ) + } + } + } + + return nil +} + +func execMountCheck() (*bytes.Buffer, error) { + stdoutBuffer := &bytes.Buffer{} + mountCmd := exec.Command("mount", "-t", "fuse.glusterfs") + + mountCmd.Stdout = stdoutBuffer + err := mountCmd.Run() + + if err != nil { + return stdoutBuffer, err + } + return stdoutBuffer, nil +} + +// ParseMountOutput pares output of system execution 'mount' +func ParseMountOutput(mountBuffer string) ([]mountV, error) { + mounts := make([]mountV, 0, 2) + mountRows := strings.Split(mountBuffer, "\n") + for _, row := range mountRows { + trimmedRow := strings.TrimSpace(row) + if len(row) > 3 { + mountColumns := strings.Split(trimmedRow, " ") + mounts = append(mounts, mountV{mountPoint: mountColumns[2], volume: mountColumns[0]}) + } + } + return mounts, nil +} + +// Test is mount Successful or not +func testMount(mounts []mountV, err error) int { + if mounts != nil && len(mounts) > 0 { + return 1 + } + return 0 +} + +// Test if mount Writable or not +func testWritable(isWritable bool) int { + if isWritable { + return 1 + } + return 0 +} + +func execTouchOnVolumes(mountpoint string) (bool, error) { + testFileName := fmt.Sprintf("%v/%v_%v", mountpoint, "gluster_mount.fixtures", time.Now()) + _, createErr := os.Create(testFileName) + if createErr != nil { + return false, createErr + } + removeErr := os.Remove(testFileName) + if removeErr != nil { + return false, removeErr + } + return true, nil +} diff --git a/main_test.go b/collector/mount_test.go old mode 100644 new mode 100755 similarity index 93% rename from main_test.go rename to collector/mount_test.go index 8eaffd3..ba120e7 --- a/main_test.go +++ b/collector/mount_test.go @@ -1,4 +1,4 @@ -package main +package collector import "testing" @@ -29,7 +29,7 @@ func TestParseMountOutput(t *testing.T) { }, } for _, c := range tests { - mounts, err := parseMountOutput(c.mountOutput) + mounts, err := ParseMountOutput(c.mountOutput) if err != nil { t.Error(err) } diff --git a/collector/peer.go b/collector/peer.go new file mode 100755 index 0000000..4d9b674 --- /dev/null +++ b/collector/peer.go @@ -0,0 +1,86 @@ +package collector + +import ( + "bytes" + "io/ioutil" + "encoding/xml" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +const ( + // Subsystem(s). + peer = "peer" +) + +var ( + peersConnected = prometheus.NewDesc( + prometheus.BuildFQName(namespace, peer, "peers_connected"), + "Is peer connected to gluster cluster.", + nil, nil, + ) + peersTotal = prometheus.NewDesc( + prometheus.BuildFQName(namespace, peer, "peers_total"), + "Total peer nums of gluster cluster.", + nil, nil, + ) +) + +func ScrapePeerStatus(ch chan<- prometheus.Metric) error { + // Read gluster peer status + peerStatus, peerStatusErr := ExecPeerStatus() + if peerStatusErr != nil { + log.Errorf("Couldn't parse xml of peer status: %v", peerStatusErr) + return peerStatusErr + } + + countConnected := 0 + countTotal := 0 + for _, peer := range peerStatus.Peer { + // State 3 means "Peer in Cluster" + if peer.Connected == 1 && peer.State == 3 { + countConnected++ + } + countTotal++ + } + + ch <- prometheus.MustNewConstMetric( + peersConnected, prometheus.GaugeValue, float64(countConnected), + ) + + ch <- prometheus.MustNewConstMetric( + peersTotal, prometheus.GaugeValue, float64(countTotal), + ) + + return nil +} + +// ExecPeerStatus executes "gluster peer status" at the local machine and +// returns PeerStatus struct and error +func ExecPeerStatus() (PeerStatus, error) { + args := []string{"peer", "status"} + bytesBuffer, cmdErr := execGlusterCommand(args...) + if cmdErr != nil { + return PeerStatus{}, cmdErr + } + peerStatus, err := PeerStatusXMLUnmarshall(bytesBuffer) + if err != nil { + log.Errorf("Something went wrong while unmarshalling xml: %v", err) + return peerStatus.PeerStatus, err + } + + return peerStatus.PeerStatus, nil +} + +// PeerStatusXMLUnmarshall unmarshalls bytes to PeerStatusXML struct +func PeerStatusXMLUnmarshall(cmdOutBuff *bytes.Buffer) (PeerStatusXML, error) { + var vol PeerStatusXML + b, err := ioutil.ReadAll(cmdOutBuff) + if err != nil { + log.Error(err) + return vol, err + } + xml.Unmarshal(b, &vol) + return vol, nil +} diff --git a/collector/peer_test.go b/collector/peer_test.go new file mode 100755 index 0000000..0fa4db2 --- /dev/null +++ b/collector/peer_test.go @@ -0,0 +1,33 @@ +package collector + +import ( + "testing" + "io/ioutil" + "bytes" +) + +func TestPeerStatusXMLUnmarshall(t *testing.T) { + content, err := ioutil.ReadFile("fixtures/gluster_peer_status.xml") + if err != nil { + t.Fatal(err) + } + + // Convert into bytes.buffer + contentBuf := bytes.NewBuffer(content) + peerStatus, err := PeerStatusXMLUnmarshall(contentBuf) + if err != nil { + t.Errorf("Something went wrong while unmarshalling xml: %v", err) + } + + count := 0 + for _, peer := range peerStatus.PeerStatus.Peer { + if peer.Connected == 1 && peer.State == 3 { + count ++ + } + } + + if want, got := 3, count; want != got { + t.Errorf("want peer count %d, got %d", want, got) + } + +} \ No newline at end of file diff --git a/collector/profile.go b/collector/profile.go new file mode 100755 index 0000000..84a7d60 --- /dev/null +++ b/collector/profile.go @@ -0,0 +1,150 @@ +package collector + +import ( + "bytes" + "io/ioutil" + "encoding/xml" + + "github.com/prometheus/common/log" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // Subsystem(s). + profile = "profile" +) + +var ( + brickDuration = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_duration"), + "Time running volume brick.", + []string{"volume", "brick"}, nil, + ) + + brickDataRead = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_data_read"), + "Total amount of data read by brick.", + []string{"volume", "brick"}, nil, + ) + + brickDataWritten = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_data_written"), + "Total amount of data written by brick.", + []string{"volume", "brick"}, nil, + ) + + brickFopHits = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_fop_hits"), + "Total amount of file operation hits.", + []string{"volume", "brick", "fop_name"}, nil, + ) + + brickFopLatencyAvg = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_fop_latency_avg"), + "Average file operations latency over total uptime", + []string{"volume", "brick", "fop_name"}, nil, + ) + + brickFopLatencyMin = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_fop_latency_min"), + "Minimum file operations latency over total uptime", + []string{"volume", "brick", "fop_name"}, nil, + ) + + brickFopLatencyMax = prometheus.NewDesc( + prometheus.BuildFQName(namespace, profile, "brick_fop_latency_max"), + "Maximum file operations latency over total uptime", + []string{"volume", "brick", "fop_name"}, nil, + ) +) + +func ScrapeProfileStatus(volumeStrings []string, allVolumes string, hostname string, scrapeError *prometheus.CounterVec, ch chan<- prometheus.Metric) error { + // volumeInfo + volumeInfo, err := ExecVolumeInfo() + // Couldn't parse xml, so something is really wrong and up = 0 + if err != nil { + return err + } + + for _, volume := range volumeInfo.VolInfo.Volumes.Volume { + if volumeStrings[0] == allVolumes || ContainsVolume(volumeStrings, volume.Name) { + volumeProfile, execVolProfileErr := ExecVolumeProfileGvInfoCumulative(volume.Name) + if execVolProfileErr != nil { + log.Errorf("Error while executing or marshalling gluster profile output: %v", execVolProfileErr) + scrapeError.WithLabelValues("collect.profile_status").Inc() + } + + for _, brick := range volumeProfile.Brick { + + ch <- prometheus.MustNewConstMetric( + brickDuration, prometheus.CounterValue, float64(brick.CumulativeStats.Duration), volume.Name, brick.BrickName, + ) + + ch <- prometheus.MustNewConstMetric( + brickDataRead, prometheus.CounterValue, float64(brick.CumulativeStats.TotalRead), volume.Name, brick.BrickName, + ) + + ch <- prometheus.MustNewConstMetric( + brickDataWritten, prometheus.CounterValue, float64(brick.CumulativeStats.TotalWrite), volume.Name, brick.BrickName, + ) + + for _, fop := range brick.CumulativeStats.FopStats.Fop { + // continue and not record metrics if fop.AvgLatency fop.MinLatency fop.MaxLatency all is 0 + if fop.AvgLatency + fop.MaxLatency + fop.MinLatency == 0 { + continue + } + + ch <- prometheus.MustNewConstMetric( + brickFopHits, prometheus.CounterValue, float64(fop.Hits), volume.Name, brick.BrickName, fop.Name, + ) + + ch <- prometheus.MustNewConstMetric( + brickFopLatencyAvg, prometheus.GaugeValue, float64(fop.AvgLatency), volume.Name, brick.BrickName, fop.Name, + ) + + ch <- prometheus.MustNewConstMetric( + brickFopLatencyMin, prometheus.GaugeValue, float64(fop.MinLatency), volume.Name, brick.BrickName, fop.Name, + ) + + ch <- prometheus.MustNewConstMetric( + brickFopLatencyMax, prometheus.GaugeValue, float64(fop.MaxLatency), volume.Name, brick.BrickName, fop.Name, + ) + + } + } + } + } + + return nil +} + + +// ExecVolumeProfileGvInfoCumulative executes "gluster volume profile {volume} info cumulative --xml" at the local machine and +// returns VolumeInfoXML struct and error +func ExecVolumeProfileGvInfoCumulative(volumeName string) (VolProfile, error) { + args := []string{"volume", "profile"} + args = append(args, volumeName) + args = append(args, "info", "cumulative") + bytesBuffer, cmdErr := execGlusterCommand(args...) + if cmdErr != nil { + return VolProfile{}, cmdErr + } + volumeProfile, err := VolumeProfileGvInfoCumulativeXMLUnmarshall(bytesBuffer) + if err != nil { + log.Errorf("Something went wrong while unmarshalling xml: %v", err) + return volumeProfile.VolProfile, err + } + return volumeProfile.VolProfile, nil +} + +// VolumeProfileGvInfoCumulativeXMLUnmarshall unmarshalls cumulative profile of gluster volume profile +func VolumeProfileGvInfoCumulativeXMLUnmarshall(cmdOutBuff *bytes.Buffer) (VolumeProfileXML, error) { + var vol VolumeProfileXML + b, err := ioutil.ReadAll(cmdOutBuff) + if err != nil { + log.Error(err) + return vol, err + } + xml.Unmarshal(b, &vol) + return vol, nil +} diff --git a/collector/profile_test.go b/collector/profile_test.go new file mode 100755 index 0000000..a9f8dde --- /dev/null +++ b/collector/profile_test.go @@ -0,0 +1,229 @@ +package collector + +import ( + "testing" + "io/ioutil" + "bytes" +) + +func TestVolumeProfileGvInfoCumulativeXMLUnmarshall(t *testing.T) { + content, err := ioutil.ReadFile("fixtures/gluster_volume_profile_gv_test_info_cumulative.xml") + if err != nil { + t.Fatal(err) + } + + // Convert into bytes.buffer + contentBuf := bytes.NewBuffer(content) + volumeProfile, err := VolumeProfileGvInfoCumulativeXMLUnmarshall(contentBuf) + if err != nil { + t.Errorf("Something went wrong while unmarshalling xml: %v", err) + } + + for _, brick := range volumeProfile.VolProfile.Brick { + + switch brick.BrickName { + // just test one node + case "node1.example.local:/mnt/gluster/gv_test": + if want, got := 16932, brick.CumulativeStats.Duration; want != got { + t.Errorf("want brick.CumulativeStats.Duration %d, got %d", want, got) + } + + if want, got := 0, brick.CumulativeStats.TotalRead; want != got { + t.Errorf("want brick.CumulativeStats.TotalRead %d, got %d", want, got) + } + + if want, got := 7590710, brick.CumulativeStats.TotalWrite; want != got { + t.Errorf("want brick.CumulativeStats.TotalWrite %d, got %d", want, got) + } + + for _, fop := range brick.CumulativeStats.FopStats.Fop { + switch fop.Name { + case "WRITE": + if want, got := 58, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 224.500000, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 183.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 807.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "STATFS": + if want, got := 3, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 44.666667, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 32.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 69.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "FLUSH": + if want, got := 1, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 117.000000, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 117.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 117.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "GETXATTR": + if want, got := 123, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 148.658537, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 17.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 1154.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "OPENDIR": + if want, got := 87, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 4.091954, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 3.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 6.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "CREATE": + if want, got := 1, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 23259.000000, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 23259.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 23259.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "LOOKUP": + if want, got := 119, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 68.495798, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 14.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 332.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "READDIR": + if want, got := 174, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 1601.942529, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 195.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 4566.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "FINODELK": + if want, got := 2, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 80.000000, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 76.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 84.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "ENTRYLK": + if want, got := 2, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 54.000000, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 51.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 57.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + case "FXATTROP": + if want, got := 2, fop.Hits; want != got { + t.Errorf("want fop.Hits %d, got %d", want, got) + } + + if want, got := 211.500000, fop.AvgLatency; want != got { + t.Errorf("want fop.AvgLatency %d, got %d", want, got) + } + + if want, got := 192.000000, fop.MinLatency; want != got { + t.Errorf("want fop.MinLatency %d, got %d", want, got) + } + + if want, got := 231.000000, fop.MaxLatency; want != got { + t.Errorf("want fop.MaxLatency %d, got %d", want, got) + } + default: + // just test one fop + t.Error("No fop.Name match test instance") + } + + break + } + default: + // just test one node + continue + } + } +} \ No newline at end of file diff --git a/collector/quota.go b/collector/quota.go new file mode 100755 index 0000000..c549ba0 --- /dev/null +++ b/collector/quota.go @@ -0,0 +1,133 @@ +package collector + +import ( + "bytes" + "io/ioutil" + "encoding/xml" + + "github.com/prometheus/common/log" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // Subsystem(s). + quota = "quota" +) + +var ( + quotaHardLimit = prometheus.NewDesc( + prometheus.BuildFQName(namespace, quota, "volume_quota_hardlimit"), + "Quota hard limit (bytes) in a volume", + []string{"path", "volume"}, nil) + + quotaSoftLimit = prometheus.NewDesc( + prometheus.BuildFQName(namespace, quota, "volume_quota_softlimit"), + "Quota soft limit (bytes) in a volume", + []string{"path", "volume"}, nil) + + quotaUsed = prometheus.NewDesc( + prometheus.BuildFQName(namespace, quota, "volume_quota_used"), + "Current data (bytes) used in a quota", + []string{"path", "volume"}, nil) + + quotaAvailable = prometheus.NewDesc( + prometheus.BuildFQName(namespace, quota, "volume_quota_available"), + "Current data (bytes) available in a quota", + []string{"path", "volume"}, nil) + + quotaSoftLimitExceeded = prometheus.NewDesc( + prometheus.BuildFQName(namespace, quota, "volume_quota_softlimit_exceeded"), + "Is the quota soft-limit exceeded", + []string{"path", "volume"}, nil) + + quotaHardLimitExceeded = prometheus.NewDesc( + prometheus.BuildFQName(namespace, quota, "volume_quota_hardlimit_exceeded"), + "Is the quota hard-limit exceeded", + []string{"path", "volume"}, nil) +) + +func ScrapeQuotaStatus(volumeStrings []string, allVolumes string, scrapeError *prometheus.CounterVec, ch chan<- prometheus.Metric) error { + // volumeInfo + volumeInfo, err := ExecVolumeInfo() + // Couldn't parse xml, so something is really wrong and up = 0 + if err != nil { + return err + } + + for _, volume := range volumeInfo.VolInfo.Volumes.Volume { + if volumeStrings[0] == allVolumes || ContainsVolume(volumeStrings, volume.Name) { + + if volumeQuota, err := ExecVolumeQuotaList(volume.Name); err != nil { + log.Error("Cannot create quota metrics if quotas are not enabled in your Gluster Server") + scrapeError.WithLabelValues("collect.quota_status").Inc() + + } else { + for _, limit := range volumeQuota.VolQuota.QuotaLimits { + ch <- prometheus.MustNewConstMetric( + quotaHardLimit, prometheus.CounterValue, float64(limit.HardLimit), limit.Path, volume.Name, + ) + + ch <- prometheus.MustNewConstMetric( + quotaSoftLimit, prometheus.CounterValue, float64(limit.SoftLimitValue), limit.Path, volume.Name, + ) + + ch <- prometheus.MustNewConstMetric( + quotaUsed, prometheus.CounterValue, float64(limit.UsedSpace), limit.Path, volume.Name, + ) + + ch <- prometheus.MustNewConstMetric( + quotaAvailable, prometheus.CounterValue, float64(limit.AvailSpace), limit.Path, volume.Name, + ) + + slExceeded := ExceededFunc(limit.SlExceeded) + ch <- prometheus.MustNewConstMetric( + quotaSoftLimitExceeded, prometheus.CounterValue, slExceeded, limit.Path, volume.Name, + ) + + hlExceeded := ExceededFunc(limit.HlExceeded) + ch <- prometheus.MustNewConstMetric( + quotaHardLimitExceeded, prometheus.CounterValue, hlExceeded, limit.Path, volume.Name, + ) + } + } + } + } + + return nil +} + +// ExecVolumeQuotaList executes volume quota list on host system and processess input +// returns QuotaList structs and errors +func ExecVolumeQuotaList(volumeName string) (VolumeQuotaXML, error) { + args := []string{"volume", "quota", volumeName, "list"} + bytesBuffer, cmdErr := execGlusterCommand(args...) + if cmdErr != nil { + // common error like "quota: No quota configured on volume {volume}" + // return empty VolumeQuotaXML + return VolumeQuotaXML{}, cmdErr + } + volumeQuota, err := VolumeQuotaListXMLUnmarshall(bytesBuffer) + if err != nil { + log.Errorf("Something went wrong while unmarshalling xml: %v", err) + return volumeQuota, err + } + return volumeQuota, nil +} + +func VolumeQuotaListXMLUnmarshall(cmdOutBuff *bytes.Buffer) (VolumeQuotaXML, error) { + var volQuotaXML VolumeQuotaXML + b, err := ioutil.ReadAll(cmdOutBuff) + if err != nil { + log.Error(err) + return volQuotaXML, err + } + xml.Unmarshal(b, &volQuotaXML) + return volQuotaXML, nil +} + +func ExceededFunc(Exceeded string) float64 { + if Exceeded != "No" { + return 1.0 + } + return 0.0 +} diff --git a/collector/quota_test.go b/collector/quota_test.go new file mode 100755 index 0000000..d85856d --- /dev/null +++ b/collector/quota_test.go @@ -0,0 +1,70 @@ +package collector + +import ( + "testing" + "io/ioutil" + "bytes" +) + +func TestVolumeQuotaListXMLUnmarshall(t *testing.T) { + content, err := ioutil.ReadFile("fixtures/gluster_volume_quota_list.xml") + if err != nil { + t.Fatal(err) + } + + // Convert into bytes.buffer + contentBuf := bytes.NewBuffer(content) + volumeQuota, err := VolumeQuotaListXMLUnmarshall(contentBuf) + if err != nil { + t.Errorf("Something went wrong while unmarshalling xml: %v", err) + } + + for _, limit := range volumeQuota.VolQuota.QuotaLimits { + + if want, got := 0.0, ExceededFunc(limit.SlExceeded); want != got { + t.Errorf("want limit.SlExceeded %f, got %f", want, got) + } + + if want, got := 0.0, ExceededFunc(limit.HlExceeded); want != got { + t.Errorf("want limit.HlExceeded %f, got %f", want ,got) + } + + switch limit.Path { + case "/foo": + if want, got := 10737418240, limit.HardLimit; want != int(got) { + t.Errorf("want limit.HardLimit %d, got %d", want, got) + } + + if want, got := 8589934592, limit.SoftLimitValue; want != int(got) { + t.Errorf("want limit.SoftLimitValue %d, got %d", want, got) + } + + if want, got := 428160000, limit.UsedSpace; want != int(got) { + t.Errorf("want limit.UsedSpace %d, got %d", want, got) + } + + if want, got := 10309258240, limit.AvailSpace; want != int(got) { + t.Errorf("want limit.AvailSpace %d, got %d", want, got) + } + + case "/bar": + if want, got := 2147483648, limit.HardLimit; want != int(got) { + t.Errorf("want limit.HardLimit %d, got %d", want, got) + } + + if want, got := 1717986918, limit.SoftLimitValue; want != int(got) { + t.Errorf("want limit.SoftLimitValue %d, got %d", want, got) + } + + if want, got := 335544320, limit.UsedSpace; want != int(got) { + t.Errorf("want limit.UsedSpace %d, got %d", want, got) + } + + if want, got := 1811939328, limit.AvailSpace; want != int(got) { + t.Errorf("want limit.AvailSpace %d, got %d", want, got) + } + default: + t.Error("No limit.Path match test instance") + } + } +} diff --git a/collector/utils.go b/collector/utils.go new file mode 100755 index 0000000..fea146e --- /dev/null +++ b/collector/utils.go @@ -0,0 +1,73 @@ +package collector + +import ( + "bytes" + "os" + "os/exec" + + "github.com/prometheus/common/log" + "strings" +) + + +func getGlusterBinary(glusterPath string) (string, error) { + + switch glusterPath { + // NoDefine + case "": + out, err := exec.Command("which","gluster").Output() + + // Trim `out` with '\n' + rout := strings.TrimSuffix(string(out), "\n") + + if err != nil { + log.Fatal("Please Make sure Gluster installed correctly. Cannot find gluster binary.") + return rout, err + } + return rout, err + // Has Define + default: + // Check Exists + _, err := PathExists(glusterPath) + if err != nil { + return "", err + } + return glusterPath, nil + } +} + +func execGlusterCommand(arg ...string) (*bytes.Buffer, error) { + glusterCmd, getErr := getGlusterBinary("") + if getErr != nil { + log.Error(getErr) + } + + stdoutBuffer := &bytes.Buffer{} + argXML := append(arg, "--xml") + glusterExec := exec.Command(glusterCmd, argXML...) + glusterExec.Stdout = stdoutBuffer + err := glusterExec.Run() + + if err != nil { + log.Errorf("tried to execute %v and got error: %v", arg, err) + return stdoutBuffer, err + } + return stdoutBuffer, nil +} + +// ContainsVolume checks a slice if it contains an element +func ContainsVolume(slice []string, element string) bool { + for _, a := range slice { + if a == element { + return true + } + } + return false +} + +func PathExists(path string) (bool, error) { + if _, err := os.Stat(path); os.IsNotExist(err) { + return false, err + } + return true, nil +} diff --git a/collector/volume.go b/collector/volume.go new file mode 100755 index 0000000..47e36de --- /dev/null +++ b/collector/volume.go @@ -0,0 +1,171 @@ +package collector + +import ( + "bytes" + "io/ioutil" + "encoding/xml" + + "github.com/prometheus/common/log" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // Subsystem(s). + volume = "volume" +) + +var ( + up = prometheus.NewDesc( + prometheus.BuildFQName(namespace, volume, "up"), + "Was the last query of Gluster successful.", + nil, nil, + ) + + volumesCount = prometheus.NewDesc( + prometheus.BuildFQName(namespace, volume, "volumes_count"), + "How many volumes were up at the last query.", + nil, nil, + ) + + brickCount = prometheus.NewDesc( + prometheus.BuildFQName(namespace, volume, "brick_count"), + "Number of bricks at last query.", + []string{"volume"}, nil, + ) + + volumeStatus = prometheus.NewDesc( + prometheus.BuildFQName(namespace, volume, "volume_status"), + "Status code of requested volume.", + []string{"volume"}, nil, + ) + + nodeSizeFreeBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, volume, "node_size_free_bytes"), + "Free bytes reported for each node on each instance. Labels are to distinguish origins", + []string{"hostname", "path", "volume"}, nil, + ) + + nodeSizeTotalBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, volume, "node_size_total_bytes"), + "Total bytes reported for each node on each instance. Labels are to distinguish origins", + []string{"hostname", "path", "volume"}, nil, + ) +) + +func ScrapeGlobalVolumeStatus(volumeStrings []string, allVolumes string, ch chan<- prometheus.Metric) error { + // Collect metrics from volume info + volumeInfo, err := ExecVolumeInfo() + // Couldn't parse xml, so something is really wrong and up = 0 + if err != nil { + ch <- prometheus.MustNewConstMetric( + up, prometheus.GaugeValue, 0.0, + ) + return err + } + + // use OpErrno as indicator for up + if volumeInfo.OpErrno != 0 { + ch <- prometheus.MustNewConstMetric( + up, prometheus.GaugeValue, 0.0, + ) + } else { + ch <- prometheus.MustNewConstMetric( + up, prometheus.GaugeValue, 1.0, + ) + } + + // Volume Count + ch <- prometheus.MustNewConstMetric( + volumesCount, prometheus.GaugeValue, float64(volumeInfo.VolInfo.Volumes.Count), + ) + + // Volume Status and Brick Count + for _, volume := range volumeInfo.VolInfo.Volumes.Volume { + if volumeStrings[0] == allVolumes || ContainsVolume(volumeStrings, volume.Name) { + ch <- prometheus.MustNewConstMetric( + brickCount, prometheus.GaugeValue, float64(volume.BrickCount), volume.Name, + ) + + ch <- prometheus.MustNewConstMetric( + volumeStatus, prometheus.GaugeValue, float64(volume.Status), volume.Name, + ) + } + } + + // Collect metrics from volume status all detail + volumeStatusAll, err := ExecVolumeStatusAllDetail() + if err != nil { + return err + } + for _, vol := range volumeStatusAll.VolStatus.Volumes.Volume { + for _, node := range vol.Node { + if node.Status == 1 { + ch <- prometheus.MustNewConstMetric( + nodeSizeTotalBytes, prometheus.GaugeValue, float64(node.SizeTotal), node.Hostname, node.Path, vol.VolName, + ) + ch <- prometheus.MustNewConstMetric( + nodeSizeFreeBytes, prometheus.GaugeValue, float64(node.SizeFree), node.Hostname, node.Path, vol.VolName, + ) + } + } + } + + return nil +} + +// ExecVolumeInfo executes "gluster volume info" at the local machine and +// returns VolumeInfoXML struct and error +func ExecVolumeInfo() (VolumeInfoXML, error) { + args := []string{"volume", "info"} + bytesBuffer, cmdErr := execGlusterCommand(args...) + if cmdErr != nil { + return VolumeInfoXML{}, cmdErr + } + + volumeInfo, err := VolumeInfoXMLUnmarshall(bytesBuffer) + if err != nil { + return volumeInfo, err + } + + return volumeInfo, nil +} + +// ExecVolumeStatusAllDetail executes "gluster volume status all detail" at the local machine +// returns VolumeStatusXML struct and error +func ExecVolumeStatusAllDetail() (VolumeStatusXML, error) { + args := []string{"volume", "status", "all", "detail"} + bytesBuffer, cmdErr := execGlusterCommand(args...) + if cmdErr != nil { + return VolumeStatusXML{}, cmdErr + } + volumeStatus, err := VolumeStatusAllDetailXMLUnmarshall(bytesBuffer) + if err != nil { + log.Errorf("Something went wrong while unmarshalling xml: %v", err) + return volumeStatus, err + } + return volumeStatus, nil +} + +// VolumeInfoXMLUnmarshall unmarshalls bytes to VolumeInfoXML struct +func VolumeInfoXMLUnmarshall(cmdOutBuff *bytes.Buffer) (VolumeInfoXML, error) { + var vol VolumeInfoXML + b, err := ioutil.ReadAll(cmdOutBuff) + if err != nil { + log.Error(err) + return vol, err + } + xml.Unmarshal(b, &vol) + return vol, nil +} + +// VolumeStatusAllDetailXMLUnmarshall reads bytes.buffer and returns unmarshalled xml +func VolumeStatusAllDetailXMLUnmarshall(cmdOutBuff *bytes.Buffer) (VolumeStatusXML, error) { + var vol VolumeStatusXML + b, err := ioutil.ReadAll(cmdOutBuff) + if err != nil { + log.Error(err) + return vol, err + } + xml.Unmarshal(b, &vol) + return vol, nil +} diff --git a/collector/volume_test.go b/collector/volume_test.go new file mode 100755 index 0000000..2889232 --- /dev/null +++ b/collector/volume_test.go @@ -0,0 +1,98 @@ +package collector + +import ( + "testing" + "io/ioutil" + "bytes" + "fmt" +) + +func TestVolumeInfoXMLUnmarshall(t *testing.T) { + content, err := ioutil.ReadFile("fixtures/gluster_volume_info.xml") + if err != nil { + t.Fatal(err) + } + + // Convert into bytes.buffer + contentBuf := bytes.NewBuffer(content) + volumeInfo, err := VolumeInfoXMLUnmarshall(contentBuf) + if err != nil { + t.Errorf("Something went wrong while unmarshalling xml: %v", err) + } + + if want, got := 0, volumeInfo.OpErrno; want != got { + t.Errorf("want volumeInfo.OpErrno %d, got %d", want, got) + } + + if want, got := 2, volumeInfo.VolInfo.Volumes.Count; want != got { + t.Errorf("want volumeInfo.VolInfo.Volumes.Count %d, got %d", want, got) + } + + volumeStrings := []string{"_all"} + + for _, volume := range volumeInfo.VolInfo.Volumes.Volume { + if volumeStrings[0] == allVolumes || ContainsVolume(volumeStrings, volume.Name) { + + switch volume.Name { + case "gv_cluster": + if want, got := 4, volume.BrickCount; want != got { + t.Errorf("want volume.BrickCount %d, got %d", want, got) + } + + if want, got := 1, volume.Status; want != got { + t.Errorf("want volume.Status %d, got %d", want, got) + } + case "gv_test": + if want, got := 4, volume.BrickCount; want != got { + t.Errorf("want volume.BrickCount %d, got %d", want, got) + } + + if want, got := 1, volume.Status; want != got { + t.Errorf("want volume.Status %d, got %d", want, got) + } + default: fmt.Printf("want %s or %s, got %s", "gv_cluster", "gv_test", "Error") + } + + } + } +} + +func TestVolumeStatusAllDetailXMLUnmarshall(t *testing.T) { + content, err := ioutil.ReadFile("fixtures/gluster_volume_status_all_detail.xml") + if err != nil { + t.Fatal(err) + } + + // Convert into bytes.buffer + contentBuf := bytes.NewBuffer(content) + volumeStatusAll, err := VolumeStatusAllDetailXMLUnmarshall(contentBuf) + if err != nil { + t.Errorf("Something went wrong while unmarshalling xml: %v", err) + } + + for _, vol := range volumeStatusAll.VolStatus.Volumes.Volume { + + for _, node := range vol.Node { + if node.Status == 1 { + + if want, got := 20507914240, node.SizeTotal; want != int(got) { + t.Errorf("want node.SizeTotal %d, got %d", want, got) + } + + switch vol.VolName { + case "gv_test": + if want, got := "/mnt/gluster/gv_test", node.Path; want != got { + t.Errorf("want node.Path %s, got %s", want, got) + } + case "gv_test2": + if want, got := "/mnt/gluster/gv_test2", node.Path; want != got { + t.Errorf("want node.Path %s, got %s", want, got) + } + default: + t.Error("No vol.VolName match test instance") + } + + } + } + } +} diff --git a/collector/xmlStructs.go b/collector/xmlStructs.go new file mode 100755 index 0000000..484b8f4 --- /dev/null +++ b/collector/xmlStructs.go @@ -0,0 +1,274 @@ +package collector + +import "encoding/xml" + +// VolumeInfoXML struct repesents cliOutput element of "gluster volume info" command +// +// cliOutput +// |-- opRet +// |-- opErrno +// |-- opErrstr +// |-- volInfo +// |-- volumes +// |-- volume +// |-- name +// |-- id +// |-- status +// |-- statusStr +// |-- snapshotCount +// |-- brickCount +// |-- distCount +// |-- stripeCount +// |-- replicaCount +// |-- arbiterCount +// |-- disperseCount +// |-- redundancyCount +// |-- type +// |-- typeStr +// |-- transport +// |-- xlators/ // TODO: don't know what means +// |-- bricks +// |-- []brick +// |-- name +// |-- hostUuid +// |-- isArbiter +// |-- optCount +// |-- options +// |-- []option +// |-- name +// |-- value + +type VolumeInfoXML struct { + XMLName xml.Name `xml:"cliOutput"` + OpRet int `xml:"opRet"` + OpErrno int `xml:"opErrno"` + OpErrstr string `xml:"opErrstr"` + VolInfo VolInfo `xml:"volInfo"` +} + +// VolInfo element of "gluster volume info" command +type VolInfo struct { + XMLName xml.Name `xml:"volInfo"` + Volumes Volumes `xml:"volumes"` +} + +// Volumes element of "gluster volume info" command +type Volumes struct { + XMLName xml.Name `xml:"volumes"` + Volume []Volume `xml:"volume"` + Count int `xml:"count"` +} + +// Volume element of "gluster volume info" command +type Volume struct { + XMLName xml.Name `xml:"volume"` + Name string `xml:"name"` + ID string `xml:"id"` + Status int `xml:"status"` + StatusStr string `xml:"statusStr"` + BrickCount int `xml:"brickCount"` + Bricks []Brick `xml:"bricks"` + DistCount int `xml:"distCount"` +} + +// Brick element of "gluster volume info" command +type Brick struct { + UUID string `xml:"brick>uuid"` + Name string `xml:"brick>name"` + HostUUID string `xml:"brick>hostUuid"` + IsArbiter int `xml:"brick>isArbiter"` +} + + +// PeerStatusXML struct represents cliOutput element of "gluster peer status" command +// +// cliOutput +// |-- opRet +// |-- opErrno +// |-- opErrstr +// |-- peerStatus +// |-- []peer +// |-- uuid +// |-- hostname +// |-- hostnames +// |-- hostname +// |-- connected +// |-- state +// |-- stateStr + +type PeerStatusXML struct { + XMLName xml.Name `xml:"cliOutput"` + OpRet int `xml:"opRet"` + OpErrno int `xml:"opErrno"` + OpErrstr string `xml:"opErrstr"` + PeerStatus PeerStatus `xml:"peerStatus"` +} + +// PeerStatus element of "gluster peer status" command +type PeerStatus struct { + XMLName xml.Name `xml:"peerStatus"` + Peer []Peer `xml:"peer"` +} + +// Peer element of "gluster peer status" command +type Peer struct { + XMLName xml.Name `xml:"peer"` + UUID string `xml:"uuid"` + Hostname string `xml:"hostname"` + Hostnames Hostnames `xml:"hostnames"` + Connected int `xml:"connected"` + State int `xml:"state"` + StateStr string `xml:"stateStr"` +} + +// Hostnames element of "gluster peer status" command +type Hostnames struct { + Hostname string `xml:"hostname"` +} + +// VolumeStatusXML XML type of "gluster volume status" +// +// cliOutput +// |-- opRet +// |-- opErrno +// |-- opErrstr +// |-- volStatus +// |-- volumes +// |-- []volume +// |-- volName +// |-- nodeCount +// |-- []node +// |-- hostname +// |-- path +// |-- peerid +// |-- status +// |-- port +// |-- ports +// |-- tcp +// |-- rdma +// |-- pid + +type VolumeStatusXML struct { + XMLName xml.Name `xml:"cliOutput"` + OpRet int `xml:"opRet"` + OpErrno int `xml:"opErrno"` + OpErrstr string `xml:"opErrstr"` + VolStatus struct { + Volumes struct { + Volume []struct { + VolName string `xml:"volName"` + NodeCount int `xml:"nodeCount"` + Node []struct { + Hostname string `xml:"hostname"` + Path string `xml:"path"` + PeerID string `xml:"peerid"` + Status int `xml:"status"` + Port int `xml:"port"` + Ports struct { + TCP int `xml:"tcp"` + RDMA string `xml:"rdma"` + } `xml:"ports"` + Pid int `xml:"pid"` + SizeTotal uint64 `xml:"sizeTotal"` + SizeFree uint64 `xml:"sizeFree"` + Device string `xml:"device"` + BlockSize int `xml:"blockSize"` + MntOptions string `xml:"mntOptions"` + FsName string `xml:"fsName"` + } `xml:"node"` + } `xml:"volume"` + } `xml:"volumes"` + } `xml:"volStatus"` +} + +// Quota +type QuotaLimit struct { + XMLName xml.Name `xml:"limit"` + Path string `xml:"path"` + HardLimit uint64 `xml:"hard_limit"` + SoftLimitValue uint64 `xml:"soft_limit_value"` + UsedSpace uint64 `xml:"used_space"` + AvailSpace uint64 `xml:"avail_space"` + SlExceeded string `xml:"sl_exceeded"` + HlExceeded string `xml:"hl_exceeded"` +} + +type VolQuota struct { + XMLName xml.Name `xml:"volQuota"` + QuotaLimits []QuotaLimit `xml:"limit"` +} +// VolumeQuotaXML XML type of "gluster volume quota {volume} list" +type VolumeQuotaXML struct { + XMLName xml.Name `xml:"cliOutput"` + OpRet int `xml:"opRet"` + OpErrno int `xml:"opErrno"` + OpErrstr string `xml:"opErrstr"` + VolQuota VolQuota `xml:"volQuota"` +} + +// Profile +// VolumeProfileXML struct repesents cliOutput element of "gluster volume profile {volume} info" command +// +// cliOutput +// |-- opRet +// |-- opErrno +// |-- opErrstr +// |-- volProfile +// |-- volname +// |-- profileOp +// |-- brickCount +// |-- []brick +// |-- brickName +// |-- cumulativeStats +// |-- blockStats +// |-- []block +// |-- fopStats +// |-- []fop +// |-- duration +// |-- totalRead +// |-- totalWrite + +type VolumeProfileXML struct { + XMLName xml.Name `xml:"cliOutput"` + OpRet int `xml:"opRet"` + OpErrno int `xml:"opErrno"` + OpErrstr string `xml:"opErrstr"` + VolProfile VolProfile `xml:"volProfile"` +} + +// VolProfile element of "gluster volume profile {volume} info" command +type VolProfile struct { + Volname string `xml:"volname"` + ProfileOp int `xml:"profileOp"` + BrickCount int `xml:"brickCount"` + Brick []BrickProfile `xml:"brick"` +} + +// BrickProfile struct for element brick of "gluster volume profile {volume} info" command +type BrickProfile struct { + //XMLName xml.Name `xml:"brick"` + BrickName string `xml:"brickName"` + CumulativeStats CumulativeStats `xml:"cumulativeStats"` +} + +// CumulativeStats element of "gluster volume profile {volume} info" command +type CumulativeStats struct { + FopStats FopStats `xml:"fopStats"` + Duration int `xml:"duration"` + TotalRead int `xml:"totalRead"` + TotalWrite int `xml:"totalWrite"` +} + +// FopStats element of "gluster volume profile {volume} info" command +type FopStats struct { + Fop []Fop `xml:"fop"` +} + +// Fop is struct for FopStats +type Fop struct { + Name string `xml:"name"` + Hits int `xml:"hits"` + AvgLatency float64 `xml:"avgLatency"` + MinLatency float64 `xml:"minLatency"` + MaxLatency float64 `xml:"maxLatency"` +} \ No newline at end of file diff --git a/dashboard/gluster-exporter-full_rev1.json b/dashboard/gluster-exporter-full_rev1.json new file mode 100644 index 0000000..3f3df68 --- /dev/null +++ b/dashboard/gluster-exporter-full_rev1.json @@ -0,0 +1,1245 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS_ONLINE", + "label": "Prometheus_Online", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": 180, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(99, 245, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "gluster_up", + "intervalFactor": 2, + "metric": "gluster_up", + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,1", + "title": "Peers Total", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "UP", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "decimals": null, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "sum(gluster_volume_node_size_total_bytes{volume=~\"^$Volume$\"})", + "intervalFactor": 2, + "metric": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Gluster $Volume Total Space", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "sum(gluster_volume_node_size_free_bytes{volume=~\"^$Volume$\"})", + "intervalFactor": 2, + "metric": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Gluster $Volume Total Space", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "gluster_volume_brick_count{volume=~\"^$Volume$\"}", + "intervalFactor": 2, + "metric": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Gluster $Volume Brick Count", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Gluster Status", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 180, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "gluster_peer_peers_total", + "intervalFactor": 2, + "metric": "gluster_peer_peers_total", + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,2", + "title": "Peers Total", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "gluster_peer_peers_total", + "intervalFactor": 2, + "metric": "gluster_peer_peers_total", + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,2", + "title": "Peer Connected", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(26, 171, 47, 0.89)", + "rgba(237, 129, 40, 0.89)", + "rgba(249, 80, 62, 0.91)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "gluster_peer_peers_total - gluster_peer_peers_connected", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,1", + "title": "Peer OUT", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Peer Status", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 200, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(gluster_profile_brick_data_read{brick=~\"^$Brick$\",volume=~\"^$Volume$\"}[5m])) by (brick)", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$Brick Data Read", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(gluster_profile_brick_data_written{brick=~\"^$Brick$\",volume=~\"^$Volume$\"}[5m])) by (brick)", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$Brick Data Written", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Brick Read/Write", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS_ONLINE}", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": "Brick", + "span": 1.5, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "gluster_profile_brick_duration{brick=~\"^$Brick$\",volume=~\"^$Volume$\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "$Brick uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 300, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "fill": 0, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "gluster_profile_brick_fop_latency_avg{brick=~\"^$Brick$\",volume=~\"^$Volume$\", fop_name=~\"^$FOP$\"}", + "intervalFactor": 2, + "legendFormat": "$FOP of $Brick $Volume", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume:$Volume Brick:$Brick FOP:$FOP AVG Statistic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "gluster_profile_brick_fop_latency_min{brick=~\"^$Brick$\",volume=~\"^$Volume$\", fop_name=~\"^$FOP$\"}", + "intervalFactor": 2, + "legendFormat": "$FOP of $Brick $Volume", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume:$Volume Brick:$Brick $FOP:FOP MIN Statistic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "gluster_profile_brick_fop_latency_max{brick=~\"^$Brick$\",volume=~\"^$Volume$\", fop_name=~\"$FOP$\"}", + "intervalFactor": 2, + "legendFormat": "$FOP of $Brick $Volume", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume:$Volume Brick:$Brick $FOP:FOP MAX Statistic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(gluster_profile_brick_fop_hits{brick=~\"^$Brick$\",volume=~\"^$Volume$\", fop_name=~\"^$FOP$\"}[5m])) by (brick)", + "intervalFactor": 2, + "legendFormat": "$FOP of $Brick $Volume", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume:$Volume Brick:$Brick $FOP:FOP HIT Statistic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "$Volume $Brick Profile $FOP", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Volume", + "options": [], + "query": "label_values(gluster_volume_brick_count, volume)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": true + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "FOP", + "options": [], + "query": "label_values(gluster_profile_brick_fop_hits, fop_name)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": true + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS_ONLINE}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Brick", + "options": [], + "query": "label_values(gluster_profile_brick_data_read, brick)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "GlusterFS Dashboard", + "version": 6 +} \ No newline at end of file diff --git a/gluster_client.go b/gluster_client.go deleted file mode 100644 index 8901f93..0000000 --- a/gluster_client.go +++ /dev/null @@ -1,178 +0,0 @@ -package main - -import ( - "bytes" - "fmt" - "os" - "os/exec" - "strconv" - "time" - - "github.com/ofesseler/gluster_exporter/structs" - "github.com/prometheus/common/log" -) - -func execGlusterCommand(arg ...string) (*bytes.Buffer, error) { - stdoutBuffer := &bytes.Buffer{} - argXML := append(arg, "--xml") - glusterExec := exec.Command(GlusterCmd, argXML...) - glusterExec.Stdout = stdoutBuffer - err := glusterExec.Run() - - if err != nil { - log.Errorf("tried to execute %v and got error: %v", arg, err) - return stdoutBuffer, err - } - return stdoutBuffer, nil -} - -func execMountCheck() (*bytes.Buffer, error) { - stdoutBuffer := &bytes.Buffer{} - mountCmd := exec.Command("mount", "-t", "fuse.glusterfs") - - mountCmd.Stdout = stdoutBuffer - err := mountCmd.Run() - - if err != nil { - return stdoutBuffer, err - } - return stdoutBuffer, nil -} - -func execTouchOnVolumes(mountpoint string) (bool, error) { - testFileName := fmt.Sprintf("%v/%v_%v", mountpoint, "gluster_mount.test", time.Now()) - _, createErr := os.Create(testFileName) - if createErr != nil { - return false, createErr - } - removeErr := os.Remove(testFileName) - if removeErr != nil { - return false, removeErr - } - return true, nil -} - -// ExecVolumeInfo executes "gluster volume info" at the local machine and -// returns VolumeInfoXML struct and error -func ExecVolumeInfo() (structs.VolumeInfoXML, error) { - args := []string{"volume", "info"} - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return structs.VolumeInfoXML{}, cmdErr - } - volumeInfo, err := structs.VolumeInfoXMLUnmarshall(bytesBuffer) - if err != nil { - log.Errorf("Something went wrong while unmarshalling xml: %v", err) - return volumeInfo, err - } - - return volumeInfo, nil -} - -// ExecVolumeList executes "gluster volume info" at the local machine and -// returns VolumeList struct and error -func ExecVolumeList() (structs.VolList, error) { - args := []string{"volume", "list"} - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return structs.VolList{}, cmdErr - } - volumeList, err := structs.VolumeListXMLUnmarshall(bytesBuffer) - if err != nil { - log.Errorf("Something went wrong while unmarshalling xml: %v", err) - return volumeList.VolList, err - } - - return volumeList.VolList, nil -} - -// ExecPeerStatus executes "gluster peer status" at the local machine and -// returns PeerStatus struct and error -func ExecPeerStatus() (structs.PeerStatus, error) { - args := []string{"peer", "status"} - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return structs.PeerStatus{}, cmdErr - } - peerStatus, err := structs.PeerStatusXMLUnmarshall(bytesBuffer) - if err != nil { - log.Errorf("Something went wrong while unmarshalling xml: %v", err) - return peerStatus.PeerStatus, err - } - - return peerStatus.PeerStatus, nil -} - -// ExecVolumeProfileGvInfoCumulative executes "gluster volume {volume] profile info cumulative" at the local machine and -// returns VolumeInfoXML struct and error -func ExecVolumeProfileGvInfoCumulative(volumeName string) (structs.VolProfile, error) { - args := []string{"volume", "profile"} - args = append(args, volumeName) - args = append(args, "info", "cumulative") - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return structs.VolProfile{}, cmdErr - } - volumeProfile, err := structs.VolumeProfileGvInfoCumulativeXMLUnmarshall(bytesBuffer) - if err != nil { - log.Errorf("Something went wrong while unmarshalling xml: %v", err) - return volumeProfile.VolProfile, err - } - return volumeProfile.VolProfile, nil -} - -// ExecVolumeStatusAllDetail executes "gluster volume status all detail" at the local machine -// returns VolumeStatusXML struct and error -func ExecVolumeStatusAllDetail() (structs.VolumeStatusXML, error) { - args := []string{"volume", "status", "all", "detail"} - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return structs.VolumeStatusXML{}, cmdErr - } - volumeStatus, err := structs.VolumeStatusAllDetailXMLUnmarshall(bytesBuffer) - if err != nil { - log.Errorf("Something went wrong while unmarshalling xml: %v", err) - return volumeStatus, err - } - return volumeStatus, nil -} - -// ExecVolumeHealInfo executes volume heal info on host system and processes input -// returns (int) number of unsynced files -func ExecVolumeHealInfo(volumeName string) (int, error) { - args := []string{"volume", "heal", volumeName, "info"} - entriesOutOfSync := 0 - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return -1, cmdErr - } - healInfo, err := structs.VolumeHealInfoXMLUnmarshall(bytesBuffer) - if err != nil { - log.Error(err) - return -1, err - } - - for _, brick := range healInfo.HealInfo.Bricks.Brick { - var count int - count, _ = strconv.Atoi(brick.NumberOfEntries) - entriesOutOfSync += count - } - return entriesOutOfSync, nil -} - -// ExecVolumeQuotaList executes volume quota list on host system and processess input -// returns QuotaList structs and errors - -func ExecVolumeQuotaList(volumeName string) (structs.VolumeQuotaXML, error) { - args := []string{"volume", "quota", volumeName, "list"} - bytesBuffer, cmdErr := execGlusterCommand(args...) - if cmdErr != nil { - return structs.VolumeQuotaXML{}, cmdErr - } - volumeQuota, err := structs.VolumeQuotaListXMLUnmarshall(bytesBuffer) - if err != nil { - log.Errorf("Something went wrong while unmarshalling xml: %v", err) - return volumeQuota, err - } - return volumeQuota, nil -} diff --git a/gluster_exporter.go b/gluster_exporter.go new file mode 100644 index 0000000..292aed0 --- /dev/null +++ b/gluster_exporter.go @@ -0,0 +1,180 @@ +package main + +import ( + "fmt" + "net/http" + "os" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/common/log" + "github.com/prometheus/common/version" + + "github.com/stefanmonkey/gluster_exporter/collector" + "gopkg.in/alecthomas/kingpin.v2" +) + +const ( + program = "gluster_exporter" +) + +var ( + glusterPath = kingpin.Flag( + "gluster_executable_path", + "Path to gluster executable", + ).Default("").String() + glusterVolumes = kingpin.Flag( + "volumes", + fmt.Sprintf("Comma separated volume names: vol1,vol2,vol3. Default is '%v' to scrape all metrics", "_all"), + ).Default("_all").String() + listenAddress = kingpin.Flag( + "listen-address", + "Address to listen on web interface and telemetry.", + ).Default(":9189").String() + metricPath = kingpin.Flag( + "metrics-path", + "Path under which to expose metrics.", + ).Default("/metrics").String() + profile = kingpin.Flag( + "profile", + "When profiling reports in gluster are enabled, set '--profile' to get more metrics, Default disable", + ).Default("false").Bool() + quota = kingpin.Flag( + "quota", + "When quota in gluster are enabled and configured, set '--quota' to get quota metrics, Default disable", + ).Default("false").Bool() + mount = kingpin.Flag( + "mount", + "set '--mount' to get mount metrics, Default disable", + ).Default("false").Bool() + peer = kingpin.Flag( + "peer", + "set '--peer' to get peer metrics, Default disable", + ).Default("false").Bool() + authUser = kingpin.Flag( + "auth.user", + "Username for basic auth.", + ).Default("").String() + authPasswd = kingpin.Flag( + "auth.passwd", + "Password for basic auth.", + ).Default("").String() +) + +func init() { + prometheus.MustRegister(version.NewCollector("gluster_exporter")) +} + +type basicAuthHandler struct { + handler http.HandlerFunc + user string + password string +} + +func (h *basicAuthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + user, password, ok := r.BasicAuth() + if !ok || password != h.password || user != h.user { + w.Header().Set("WWW-Authenticate", "Basic realm=\"metrics\"") + http.Error(w, "Invalid username or password", http.StatusUnauthorized) + return + } + h.handler(w, r) + return +} + +func hasUserAndPassword() bool { + return *authUser != "" && *authPasswd != "" +} + +func filter(filters map[string]bool, name string, flag bool) bool { + if len(filters) > 0 { + return flag && filters[name] + } + return flag +} + +func handler(w http.ResponseWriter, r *http.Request) { + hostname, err := os.Hostname() + if err != nil { + log.Fatalf("While trying to get Hostname error happened: %v", err) + } + + params := r.URL.Query()["collect[]"] + log.Debugln("collect query:", params) + + // prometheus query with params in prometheus.yml + // like + // - job_name: 'mysql performance' + // scrape_interval: 1m + // static_configs: + // - targets: + // - '192.168.1.2:9104' + // params: + // collect[]: + // - profile + // - quota + filters := make(map[string]bool) + if len(params) > 0 { + for _, param := range params { + filters[param] = true + } + } + + collect := collector.Collect{ + Profile: filter(filters, "profile", *profile), + Quota: filter(filters, "quota", *quota), + Mount: filter(filters, "mount", *mount), + Peer: filter(filters, "peer", *peer), + } + + registry := prometheus.NewRegistry() + registry.MustRegister(collector.New(hostname, *glusterPath, *glusterVolumes, collect)) + + gatherers := prometheus.Gatherers{ + prometheus.DefaultGatherer, + registry, + } + + handler := promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{}) + if hasUserAndPassword() { + handler = &basicAuthHandler{ + handler: promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{}).ServeHTTP, + user: *authUser, + password: *authPasswd, + } + log.Info("Use AUTH") + } + + handler.ServeHTTP(w, r) +} + +func main() { + + log.AddFlags(kingpin.CommandLine) + kingpin.Version(version.Print(program)) + kingpin.HelpFlag.Short('h') + kingpin.Parse() + + // landingPage contains the HTML served at '/'. + var landingPage = []byte(` +