Skip to content

Commit

Permalink
Also report free disk space in metric description
Browse files Browse the repository at this point in the history
Now that we take free space into account, adding it to the message make
sense.
  • Loading branch information
smortex committed Jan 22, 2024
1 parent eb6c0dd commit effdefe
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 29 deletions.
11 changes: 10 additions & 1 deletion lib/riemann/tools/health.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class Health
include Riemann::Tools
include Riemann::Tools::Utils

SI_UNITS = '_kMGTPEZYRQ'

opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
Expand Down Expand Up @@ -404,7 +406,7 @@ def disk
elsif x > @limits[:disk][:warning] && available < @limits[:disk][:warning_leniency_kb]
alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
else
alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
alert "disk #{f[5]}", :ok, x, "#{f[4]} used, #{number_to_human_size(available * 1024, :floor)} free"
end
end
end
Expand Down Expand Up @@ -488,6 +490,13 @@ def human_size_to_number(value)
end
end

def number_to_human_size(value, rounding = :round)
return value.to_s if value < 1024

r = Math.log(value, 1024).floor
format('%<size>.1f%<unit>ciB', size: (value.to_f / (1024**r)).send(rounding, 1), unit: SI_UNITS[r])
end

def tick
invalidate_cache

Expand Down
92 changes: 64 additions & 28 deletions spec/riemann/tools/health_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,42 @@
end
end

describe('#number_to_human_size') do
subject { described_class.new.number_to_human_size(input, rounding) }

{
0 => %w[0 0 0],
1024 => ['1.0kiB', '1.0kiB', '1.0kiB'],
2047 => ['1.9kiB', '2.0kiB', '2.0kiB'],
2048 => ['2.0kiB', '2.0kiB', '2.0kiB'],
2049 => ['2.0kiB', '2.0kiB', '2.1kiB'],
44_040_192 => ['42.0MiB', '42.0MiB', '42.0MiB'],
1_155_301_638_144 => ['1.0TiB', '1.1TiB', '1.1TiB'],
}.each do |input, expected_output|
context %(when passed #{input.inspect}) do
let(:input) { input }

context 'when rounding lower' do
let(:rounding) { :floor }

it { is_expected.to eq(expected_output[0]) }
end

context 'when rounding to nearest' do
let(:rounding) { :round }

it { is_expected.to eq(expected_output[1]) }
end

context 'when rounding above' do
let(:rounding) { :ceil }

it { is_expected.to eq(expected_output[2]) }
end
end
end
end

describe('#disks') do
before do
allow(subject).to receive(:df).and_return(<<~OUTPUT)
Expand All @@ -39,29 +75,29 @@
end

it 'reports all zfs filesystems' do
allow(subject).to receive(:alert).with('disk /', :ok, 0.07185344331519083, '7% used')
allow(subject).to receive(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used')
allow(subject).to receive(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used')
allow(subject).to receive(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used')
allow(subject).to receive(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used')
allow(subject).to receive(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used')
allow(subject).to receive(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used')
allow(subject).to receive(:alert).with('disk /', :ok, 0.07185344331519083, '7% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used, 2.0TiB free')
subject.disk
expect(subject).to have_received(:alert).with('disk /', :ok, 0.07185344331519083, '7% used')
expect(subject).to have_received(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used')
expect(subject).to have_received(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used')
expect(subject).to have_received(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used')
expect(subject).to have_received(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used')
expect(subject).to have_received(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used')
expect(subject).to have_received(:alert).with('disk /', :ok, 0.07185344331519083, '7% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used, 2.0TiB free')
end

context 'with a foreign locale' do
Expand All @@ -74,11 +110,11 @@
end

it 'reports all zfs filesystems' do
allow(subject).to receive(:alert).with('disk /', :ok, 0.6267130394624543, '63% used')
allow(subject).to receive(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used')
allow(subject).to receive(:alert).with('disk /', :ok, 0.6267130394624543, '63% used, 6.7GiB free')
allow(subject).to receive(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used, 66.7GiB free')
subject.disk
expect(subject).to have_received(:alert).with('disk /', :ok, 0.6267130394624543, '63% used')
expect(subject).to have_received(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used')
expect(subject).to have_received(:alert).with('disk /', :ok, 0.6267130394624543, '63% used, 6.7GiB free')
expect(subject).to have_received(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used, 66.7GiB free')
end
end

Expand All @@ -91,9 +127,9 @@
end

it 'reports a correct lenient state' do
allow(subject).to receive(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used')
allow(subject).to receive(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used, 1.0TiB free')
subject.disk
expect(subject).to have_received(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used')
expect(subject).to have_received(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used, 1.0TiB free')
end
end
end
Expand Down

0 comments on commit effdefe

Please sign in to comment.