From effdefeb25d8c5dba7758db8c112a98fd782c413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romain=20Tarti=C3=A8re?= Date: Sun, 21 Jan 2024 06:41:59 -1000 Subject: [PATCH] Also report free disk space in metric description Now that we take free space into account, adding it to the message make sense. --- lib/riemann/tools/health.rb | 11 +++- spec/riemann/tools/health_spec.rb | 92 +++++++++++++++++++++---------- 2 files changed, 74 insertions(+), 29 deletions(-) diff --git a/lib/riemann/tools/health.rb b/lib/riemann/tools/health.rb index ac8c320b..2f6a4303 100644 --- a/lib/riemann/tools/health.rb +++ b/lib/riemann/tools/health.rb @@ -11,6 +11,8 @@ class Health include Riemann::Tools include Riemann::Tools::Utils + SI_UNITS = '_kMGTPEZYRQ' + opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9 opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95 opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9 @@ -404,7 +406,7 @@ def disk elsif x > @limits[:disk][:warning] && available < @limits[:disk][:warning_leniency_kb] alert "disk #{f[5]}", :warning, x, "#{f[4]} used" else - alert "disk #{f[5]}", :ok, x, "#{f[4]} used" + alert "disk #{f[5]}", :ok, x, "#{f[4]} used, #{number_to_human_size(available * 1024, :floor)} free" end end end @@ -488,6 +490,13 @@ def human_size_to_number(value) end end + def number_to_human_size(value, rounding = :round) + return value.to_s if value < 1024 + + r = Math.log(value, 1024).floor + format('%.1f%ciB', size: (value.to_f / (1024**r)).send(rounding, 1), unit: SI_UNITS[r]) + end + def tick invalidate_cache diff --git a/spec/riemann/tools/health_spec.rb b/spec/riemann/tools/health_spec.rb index 5348d320..531d35f0 100644 --- a/spec/riemann/tools/health_spec.rb +++ b/spec/riemann/tools/health_spec.rb @@ -20,6 +20,42 @@ end end + describe('#number_to_human_size') do + subject { described_class.new.number_to_human_size(input, rounding) } + + { + 0 => %w[0 0 0], + 1024 => ['1.0kiB', '1.0kiB', '1.0kiB'], + 2047 => ['1.9kiB', '2.0kiB', '2.0kiB'], + 2048 => ['2.0kiB', '2.0kiB', '2.0kiB'], + 2049 => ['2.0kiB', '2.0kiB', '2.1kiB'], + 44_040_192 => ['42.0MiB', '42.0MiB', '42.0MiB'], + 1_155_301_638_144 => ['1.0TiB', '1.1TiB', '1.1TiB'], + }.each do |input, expected_output| + context %(when passed #{input.inspect}) do + let(:input) { input } + + context 'when rounding lower' do + let(:rounding) { :floor } + + it { is_expected.to eq(expected_output[0]) } + end + + context 'when rounding to nearest' do + let(:rounding) { :round } + + it { is_expected.to eq(expected_output[1]) } + end + + context 'when rounding above' do + let(:rounding) { :ceil } + + it { is_expected.to eq(expected_output[2]) } + end + end + end + end + describe('#disks') do before do allow(subject).to receive(:df).and_return(<<~OUTPUT) @@ -39,29 +75,29 @@ end it 'reports all zfs filesystems' do - allow(subject).to receive(:alert).with('disk /', :ok, 0.07185344331519083, '7% used') - allow(subject).to receive(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used') - allow(subject).to receive(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used') - allow(subject).to receive(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used') - allow(subject).to receive(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used') - allow(subject).to receive(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used') - allow(subject).to receive(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used') - allow(subject).to receive(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used') - allow(subject).to receive(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used') - allow(subject).to receive(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used') - allow(subject).to receive(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used') + allow(subject).to receive(:alert).with('disk /', :ok, 0.07185344331519083, '7% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used, 284.6GiB free') + allow(subject).to receive(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used, 2.0TiB free') subject.disk - expect(subject).to have_received(:alert).with('disk /', :ok, 0.07185344331519083, '7% used') - expect(subject).to have_received(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used') - expect(subject).to have_received(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used') - expect(subject).to have_received(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used') - expect(subject).to have_received(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used') - expect(subject).to have_received(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used') - expect(subject).to have_received(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used') - expect(subject).to have_received(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used') - expect(subject).to have_received(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used') - expect(subject).to have_received(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used') - expect(subject).to have_received(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used') + expect(subject).to have_received(:alert).with('disk /', :ok, 0.07185344331519083, '7% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used, 284.6GiB free') + expect(subject).to have_received(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used, 2.0TiB free') end context 'with a foreign locale' do @@ -74,11 +110,11 @@ end it 'reports all zfs filesystems' do - allow(subject).to receive(:alert).with('disk /', :ok, 0.6267130394624543, '63% used') - allow(subject).to receive(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used') + allow(subject).to receive(:alert).with('disk /', :ok, 0.6267130394624543, '63% used, 6.7GiB free') + allow(subject).to receive(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used, 66.7GiB free') subject.disk - expect(subject).to have_received(:alert).with('disk /', :ok, 0.6267130394624543, '63% used') - expect(subject).to have_received(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used') + expect(subject).to have_received(:alert).with('disk /', :ok, 0.6267130394624543, '63% used, 6.7GiB free') + expect(subject).to have_received(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used, 66.7GiB free') end end @@ -91,9 +127,9 @@ end it 'reports a correct lenient state' do - allow(subject).to receive(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used') + allow(subject).to receive(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used, 1.0TiB free') subject.disk - expect(subject).to have_received(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used') + expect(subject).to have_received(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used, 1.0TiB free') end end end