diff --git a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index ca554fb..c651765 100644 --- a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -340,7 +340,7 @@ def _on_scrape_targets_changed(self, event): import yaml from cosl import JujuTopology -from cosl.rules import AlertRules +from cosl.rules import AlertRules, generic_alert_groups from ops.charm import CharmBase, RelationRole from ops.framework import ( BoundEvent, @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 48 +LIBPATCH = 49 PYDEPS = ["cosl"] @@ -1531,6 +1531,9 @@ def set_scrape_job_spec(self, _=None): alert_rules = AlertRules(query_type="promql", topology=self.topology) alert_rules.add_path(self._alert_rules_path, recursive=True) + alert_rules.add( + generic_alert_groups.application_rules, group_name_prefix=self.topology.identifier + ) alert_rules_as_dict = alert_rules.as_dict() for relation in self._charm.model.relations[self._relation_name]: @@ -1776,6 +1779,7 @@ def __init__( relation_names: Optional[dict] = None, relabel_instance=True, resolve_addresses=False, + path_to_own_alert_rules: Optional[str] = None, ): """Construct a `MetricsEndpointAggregator`. @@ -1795,6 +1799,7 @@ def __init__( resolve_addresses: A boolean flag indiccating if the aggregator should attempt to perform DNS lookups of targets and append a `dns_name` label + path_to_own_alert_rules: Optionally supply a path for alert rule files """ self._charm = charm @@ -1807,6 +1812,8 @@ def __init__( self._alert_rules_relation = relation_names.get("alert_rules", "prometheus-rules") super().__init__(charm, self._prometheus_relation) + self.topology = JujuTopology.from_charm(charm) + self._stored.set_default(jobs=[], alert_rules=[]) self._relabel_instance = relabel_instance @@ -1816,6 +1823,8 @@ def __init__( prometheus_events = self._charm.on[self._prometheus_relation] self.framework.observe(prometheus_events.relation_joined, self._set_prometheus_data) + self.path_to_own_alert_rules = path_to_own_alert_rules + # manage list of Prometheus scrape jobs from related scrape targets target_events = self._charm.on[self._target_relation] self.framework.observe(target_events.relation_changed, self._on_prometheus_targets_changed) @@ -1838,6 +1847,7 @@ def _set_prometheus_data(self, event): if not self._charm.unit.is_leader(): return + # Gather the scrape jobs jobs = [] + _type_convert_stored( self._stored.jobs # pyright: ignore ) # list of scrape jobs, one per relation @@ -1846,6 +1856,7 @@ def _set_prometheus_data(self, event): if targets and relation.app: jobs.append(self._static_scrape_job(targets, relation.app.name)) + # Gather the alert rules groups = [] + _type_convert_stored( self._stored.alert_rules # pyright: ignore ) # list of alert rule groups @@ -1856,7 +1867,17 @@ def _set_prometheus_data(self, event): rules = self._label_alert_rules(unit_rules, appname) group = {"name": self.group_name(appname), "rules": rules} groups.append(group) + alert_rules = AlertRules(query_type="promql", topology=self.topology) + # Add alert rules from file + if self.path_to_own_alert_rules: + alert_rules.add_path(self.path_to_own_alert_rules, recursive=True) + # Add generic alert rules + alert_rules.add( + generic_alert_groups.application_rules, group_name_prefix=self.topology.identifier + ) + groups.extend(alert_rules.as_dict()["groups"]) + # Set scrape jobs and alert rules in relation data event.relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) event.relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups})