Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A more detailed list of clients #63

Merged
merged 7 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.6.0] - 2023-11-29

- Added badges to README.md (#62).
- Config now accommodates client ID, key, and name, allowing users to specify individual client details.(#63).

IbraAoad marked this conversation as resolved.
Show resolved Hide resolved

## [0.5.0] - 2023-10-26

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ receivers:
...
- name: cos-alerter
webhook_configs:
- url: http://<cos-alerter-address>:8080/alive?clientid=<clientid>
- url: http://<cos-alerter-address>:8080/alive?clientid=<clientid>&key=<clientkey>
dstathis marked this conversation as resolved.
Show resolved Hide resolved
route:
...
routes:
Expand Down
45 changes: 34 additions & 11 deletions cos_alerter/alerter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import datetime
import logging
import os
import sys
import textwrap
import threading
import time
Expand All @@ -14,7 +15,8 @@

import apprise
import durationpy
import yaml
from ruamel.yaml import YAML
from ruamel.yaml.constructor import DuplicateKeyError

logger = logging.getLogger(__name__)

Expand All @@ -30,14 +32,35 @@ def set_path(self, path: str):
"""Set the config file path."""
self.path = Path(path)

def _validate_hashes(self, clients):
"""Validate that keys in the clients dictionary are valid SHA-512 hashes."""
for client_info in clients.values():
client_key = client_info.get("key", "")
is_valid = len(client_key) == 128
if client_key and not is_valid:
return False
return True

IbraAoad marked this conversation as resolved.
Show resolved Hide resolved
def reload(self):
"""Reload config values from the disk."""
yaml = YAML(typ="rt")
with open(
os.path.join(os.path.dirname(os.path.realpath(__file__)), "config-defaults.yaml")
) as f:
self.data = yaml.safe_load(f)
self.data = yaml.load(f)
with open(self.path, "r") as f:
user_data = yaml.safe_load(f)
try:
user_data = yaml.load(f)
except DuplicateKeyError:
logger.critical("Duplicate client IDs found in COS Alerter config. Exiting...")
sys.exit(1)

# Validate that keys are valid SHA-512 hashes
if user_data and user_data.get("watch", {}).get("clients"):
if not self._validate_hashes(user_data["watch"]["clients"]):
logger.critical("Invalid SHA-512 hash(es) in config. Exiting...")
sys.exit(1)

deep_update(self.data, user_data)
self.data["watch"]["down_interval"] = durationpy.from_str(
self.data["watch"]["down_interval"]
Expand All @@ -50,15 +73,15 @@ def reload(self):
def deep_update(base: dict, new: typing.Optional[dict]):
"""Deep dict update.

Same as dict.update() except it recurses into dubdicts.
Same as dict.update() except it recurses into subdicts.
"""
if new is None:
return
for key in base:
if key in new and isinstance(base[key], dict):
deep_update(base[key], new[key])
elif key in new:
base[key] = new[key]
for key, new_value in new.items():
if key in base and isinstance(base[key], dict) and isinstance(new_value, dict):
deep_update(base[key], new_value)
else:
base[key] = new_value


config = Config()
Expand Down Expand Up @@ -120,9 +143,9 @@ def initialize():
# ...
# }
state["clients"] = {}
for client in config["watch"]["clients"]:
for client_id in config["watch"]["clients"]:
alert_time = None if config["watch"]["wait_for_first_connection"] else current_time
state["clients"][client] = {
state["clients"][client_id] = {
"lock": threading.Lock(),
"alert_time": alert_time,
"notify_time": None,
Expand Down
16 changes: 11 additions & 5 deletions cos_alerter/config-defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@ watch:
# This allows you to configure COS Alerter before configuring Alertmanager.
wait_for_first_connection: true

# The list of Alertmanager instances we are monitoring. Alertmanager instances should be
# configured with the clientid=<client> parameter.
# Configuration for monitoring Alertmanager instances.
# - clientid: Unique identifier for the Alertmanager instance.
# - key: Secret key for authenticating and authorizing communication with COS Alerter. (Should be an SHA512 hash)
IbraAoad marked this conversation as resolved.
Show resolved Hide resolved
# - name: Descriptive name for the instance.
# eg:
# clients:
# - "client0"
# - "client1"
clients: []
# clientid1:
# key: "822295b207a0b73dd4690b60a03c55599346d44aef3da4cf28c3296eadb98b2647ae18863cc3ae8ae5574191b60360858982fd8a8d176c0edf646ce6eee24ef9"
# name: "Instance Name 1"
# clientid2:
# key: "0415b0cad09712bd1ed094bc06ed421231d0603465e9841c959e9f9dcf735c9ce704df7a0c849a4e0db405c916f679a0e6c3f63f9e26191dda8069e1b44a3bc8"
# name: "Instance Name 2"
IbraAoad marked this conversation as resolved.
Show resolved Hide resolved
clients: {}

notify:

Expand Down
32 changes: 23 additions & 9 deletions cos_alerter/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""HTTP server for COS Alerter."""

import hashlib
import logging

import timeago
Expand All @@ -28,9 +29,10 @@ def dashboard():
status = "up" if not state.is_down() else "down"
if last_alert is None:
status = "unknown"
client_name = config["watch"]["clients"][clientid].get("name", "")
clients.append(
{
"clientid": clientid,
"client_name": client_name,
"status": status,
"alert_time": alert_time,
}
Expand All @@ -44,16 +46,28 @@ def alive():
# TODO Decide if we should validate the request.
params = request.args
clientid_list = params.getlist("clientid") # params is a werkzeug.datastructures.MultiDict
if len(clientid_list) < 1:
logger.warning("Request %s has no clientid.", request.url)
return 'Parameter "clientid" required.', 400
if len(clientid_list) > 1:
logger.warning("Request %s specified clientid more than once.", request.url)
return 'Parameter "clientid" provided more than once.', 400
key_list = params.getlist("key")

if len(clientid_list) < 1 or len(key_list) < 1:
logger.warning("Request %s is missing clientid or key.", request.url)
return 'Parameters "clientid" and "key" are required.', 400
if len(clientid_list) > 1 or len(key_list) > 1:
logger.warning("Request %s specified clientid or key more than once.", request.url)
return 'Parameters "clientid" and "key" should be provided exactly once.', 400
clientid = clientid_list[0]
if clientid not in config["watch"]["clients"]:
logger.warning("Request %s specified an unknown clientid.")
key = key_list[0]

# Find the client with the specified clientid
client_info = config["watch"]["clients"].get(clientid)
if not client_info:
logger.warning("Request %s specified an unknown clientid.", request.url)
return 'Clientid {params["clientid"]} not found. ', 404

# Hash the key and compare with the stored hashed key
hashed_key = hashlib.sha512(key.encode()).hexdigest()
if hashed_key != client_info.get("key", ""):
logger.warning("Request %s provided an incorrect key.", request.url)
return "Incorrect key for the specified clientid.", 401
logger.info("Received alert from Alertmanager clientid: %s.", clientid)
with AlerterState(clientid) as state:
state.reset_alert_timeout()
Expand Down
2 changes: 1 addition & 1 deletion cos_alerter/templates/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ <h2>Clients</h2>
<tbody>
{% for client in clients %}
<tr>
<td>{{ client["clientid"] }}</td>
<td>{{ client["client_name"] }}</td>
{% if client["status"] == "up" %}
<td>✅ Up</td>
{% elif client["status"] == "down" %}
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "cos-alerter"
version = "0.5.0"
version = "0.6.0"
authors = [
{ name="Dylan Stephano-Shachter", email="[email protected]" }
]
Expand All @@ -26,6 +26,7 @@ dependencies = [
"pyyaml~=6.0",
"timeago~=1.0",
"waitress~=2.1",
"ruamel.yaml~=0.18.0"
]

[project.urls]
Expand Down
4 changes: 2 additions & 2 deletions rockcraft.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: cos-alerter
summary: A liveness checker for self-monitoring.
description: Receive regular pings from the cos stack and alert when they stop.
version: "0.5.0" # NOTE: Make sure this matches `cos-alerter` below
version: "0.6.0" # NOTE: Make sure this matches `cos-alerter` below
base: ubuntu:22.04
license: Apache-2.0
platforms:
Expand All @@ -11,7 +11,7 @@ parts:
plugin: python
source: .
python-packages:
- cos-alerter==0.5.0 # NOTE: Make sure this matches `version` above
- cos-alerter==0.6.0 # NOTE: Make sure this matches `version` above
stage-packages:
- python3-venv
services:
Expand Down
2 changes: 1 addition & 1 deletion snap/snapcraft.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cos-alerter
version: '0.5.0'
version: '0.6.0'
summary: A watchdog alerting on alertmanager notification failures.
license: Apache-2.0
contact: [email protected]
Expand Down
7 changes: 6 additions & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
"watch": {
"down_interval": "5m",
"wait_for_first_connection": False,
"clients": ["client0"],
"clients": {
"clientid1": {
"key": "822295b207a0b73dd4690b60a03c55599346d44aef3da4cf28c3296eadb98b2647ae18863cc3ae8ae5574191b60360858982fd8a8d176c0edf646ce6eee24ef9",
"name": "Instance Name 1",
},
},
},
"notify": {
"destinations": DESTINATIONS,
Expand Down
63 changes: 55 additions & 8 deletions tests/test_alerter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,53 @@ def test_config_default_empty_file(fake_fs):
assert config["watch"]["down_interval"] == 300


def test_duplicate_key_error(fake_fs):
duplicate_config = """
watch:
down_interval: "5m"
wait_for_first_connection: true
clients:
clientid1:
key: "clientkey1"
name: "Instance Name 1"
clientid1:
key: "clientkey1"
name: "Instance Name 1"
"""
with open("/etc/cos-alerter.yaml", "w") as f:
f.write(duplicate_config)

try:
config.reload()
except SystemExit as exc:
assert exc.code == 1
else:
# If no exception is raised, fail the test
assert False


def test_invalid_hashes(fake_fs):
duplicate_config = """
watch:
down_interval: "5m"
wait_for_first_connection: true
clients:
invalidhashclient:
key: "E0E06B8DB6ED8DD4E1FFE98376E606BDF4FE4ABB4AF65BFE8B18FBFA6564D8B3"
name: "Instance Name 1"
"""
with open("/etc/cos-alerter.yaml", "w") as f:
f.write(duplicate_config)

try:
config.reload()
except SystemExit as exc:
assert exc.code == 1
else:
# If no exception is raised, fail the test
assert False


def test_config_default_partial_file(fake_fs):
conf = yaml.dump({"log_level": "info"})
with open("/etc/cos-alerter.yaml", "w") as f:
Expand All @@ -50,7 +97,7 @@ def test_config_default_override(fake_fs):
def test_initialize(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
assert state.start_date == 1672531200.0
assert state.start_time == 1000
Expand All @@ -72,7 +119,7 @@ def test_up_time(monotonic_mock, fake_fs):
def test_is_down_from_initialize(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 1180 # Three minutes have passed
assert state.is_down() is False
Expand All @@ -85,7 +132,7 @@ def test_is_down_from_initialize(monotonic_mock, fake_fs):
def test_is_down_with_reset_alert_timeout(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 2000
state.reset_alert_timeout()
Expand All @@ -106,7 +153,7 @@ def test_is_down_with_wait_for_first_connection(monotonic_mock, fake_fs):
config.reload()
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 1500
assert state.is_down() is False # 6 minutes have passes but we have not started counting.
Expand All @@ -122,7 +169,7 @@ def test_is_down_with_wait_for_first_connection(monotonic_mock, fake_fs):
def test_is_down(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 2000
state.reset_alert_timeout()
Expand All @@ -137,7 +184,7 @@ def test_is_down(monotonic_mock, fake_fs):
def test_recently_notified(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
state._set_notify_time()
monotonic_mock.return_value = 2800 # 30 minutes have passed
Expand All @@ -153,7 +200,7 @@ def test_recently_notified(monotonic_mock, fake_fs):
def test_notify(notify_mock, add_mock, monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")

with state:
state.notify()
Expand All @@ -166,7 +213,7 @@ def test_notify(notify_mock, add_mock, monotonic_mock, fake_fs):
title="**Alertmanager is Down!**",
body=textwrap.dedent(
"""
Your Alertmanager instance: client0 seems to be down!
Your Alertmanager instance: clientid1 seems to be down!
It has not alerted COS-Alerter ever.
"""
),
Expand Down
Loading
Loading