From 5e0eeef1ee113de946a33e0df0f0d6e3270f41d8 Mon Sep 17 00:00:00 2001 From: Anton Smorodskyi Date: Mon, 23 Oct 2023 13:59:58 +0200 Subject: [PATCH] Introduce accumulating statistic in time-series DB Idea is to start gathering statistic about amount of entities existing in every account at every point of time. This will allow detect different kind of anomalies in testing pipelines (e.g. bugs in scheduling , bugs in cleanup etc.) This PR just introduce functionality which allow to achieve this in future PRs we will start using it in EVERY query for EVERY cloud provider. --- .gitignore | 3 --- ocw/lib/azure.py | 12 ++++++---- ocw/lib/influx.py | 57 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + templates/pcw.ini | 8 +++++++ tests/test_influx.py | 6 +++++ webui/PCWConfig.py | 17 +++++++------ 7 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 ocw/lib/influx.py create mode 100644 tests/test_influx.py diff --git a/.gitignore b/.gitignore index 138209fc..0aa39ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,9 +11,6 @@ static/ venv/ .venv/ pyvenv.cfg -lib64/ -lib/ -bin/ # Codecov .coverage diff --git a/ocw/lib/azure.py b/ocw/lib/azure.py index 984a9751..86135200 100644 --- a/ocw/lib/azure.py +++ b/ocw/lib/azure.py @@ -10,7 +10,8 @@ from dateutil.parser import parse from webui.PCWConfig import PCWConfig from .provider import Provider -from ..models import Instance +from ..models import Instance, ProviderChoice +from .influx import influxwrite, Influx class Azure(Provider): @@ -81,10 +82,11 @@ def get_storage_key(self, storage_account: str) -> str: storage_keys = [v.value for v in storage_keys.keys] return storage_keys[0] + @influxwrite(Influx.VMS_QUANTITY, ProviderChoice.AZURE) def list_instances(self) -> list: return list(self.compute_mgmt_client().virtual_machines.list_all()) - def get_vm_types_in_resource_group(self, resource_group: str) -> str: + def get_vm_types_in_resource_group(self, resource_group: str) -> str | None: self.log_dbg(f"Listing VMs for {resource_group}") type_set = set() try: @@ -94,9 +96,7 @@ def get_vm_types_in_resource_group(self, resource_group: str) -> str: except ResourceNotFoundError: self.log_dbg(f"{resource_group} already deleted") return None - if len(type_set) > 0: - return ', '.join(type_set) - return "N/A" + return ', '.join(type_set) if type_set else "N/A" def get_resource_properties(self, resource_id): return self.resource_mgmt_client().resources.get_by_id(resource_id, api_version="2023-07-03").properties @@ -111,9 +111,11 @@ def delete_resource(self, resource_id: str) -> None: self.log_info(f"Deleting of resource group {resource_id}") self.resource_mgmt_client().resource_groups.begin_delete(resource_id) + @influxwrite(Influx.IMAGES_QUANTITY, ProviderChoice.AZURE) def list_images(self): return self.list_resource(filters="resourceType eq 'Microsoft.Compute/images'") + @influxwrite(Influx.DISK_QUANTITY, ProviderChoice.AZURE) def list_disks(self): return self.list_resource(filters="resourceType eq 'Microsoft.Compute/disks'") diff --git a/ocw/lib/influx.py b/ocw/lib/influx.py new file mode 100644 index 00000000..f8ed8228 --- /dev/null +++ b/ocw/lib/influx.py @@ -0,0 +1,57 @@ +import os +import logging +from influxdb_client import InfluxDBClient, Point +from influxdb_client.client.write_api import SYNCHRONOUS, WriteApi +from influxdb_client.client.exceptions import InfluxDBError +from urllib3.exceptions import HTTPError, TimeoutError + + +from webui.PCWConfig import PCWConfig +from ocw.enums import ProviderChoice + +logger = logging.getLogger(__name__) + + +def influxwrite(influx_type: str, provider: ProviderChoice): + def decorator(func): + def wrapper(*args, **kwargs): + objects = func(*args, **kwargs) + Influx().write(provider.value, influx_type, len(objects)) + return objects + return wrapper + return decorator + + +class Influx: + __client: WriteApi | None = None + VMS_QUANTITY: str = "vms_quantity" + IMAGES_QUANTITY: str = "images_quantity" + DISK_QUANTITY: str = "disk_quantity" + + def __init__(self) -> None: + if self.__client is None: + if os.getenv("INFLUX_TOKEN") is None: + logger.warning("INFLUX_TOKEN is not set, InfluxDB will not be used") + elif PCWConfig.has("influxdb/url"): + self.bucket: str = str(PCWConfig.get_feature_property("influxdb", "bucket")) + self.org: str = str(PCWConfig.get_feature_property("influxdb", "org")) + url: str = str(PCWConfig.get_feature_property("influxdb", "url")) + self.__client = InfluxDBClient( + url=url, + token=os.getenv("INFLUX_TOKEN"), + org=str(PCWConfig.get_feature_property("influxdb", "org")), + ).write_api(write_options=SYNCHRONOUS) + + # this is implementation of Singleton pattern + def __new__(cls: type["Influx"]) -> "Influx": + if not hasattr(cls, "instance") or cls.instance is None: + cls.instance = super(Influx, cls).__new__(cls) + return cls.instance + + def write(self, measurement: str, field: str, value: int) -> None: + if self.__client: + point = Point(measurement).field(field, value) + try: + self.__client.write(bucket=self.bucket, org=self.org, record=point) + except (InfluxDBError, HTTPError, TimeoutError) as exception: + logger.warning(f"Failed to write to influxdb(record={point}): {exception}") diff --git a/requirements.txt b/requirements.txt index fe89102c..b671bbf7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ openstacksdk~=1.5.0 python-dateutil apscheduler kubernetes +influxdb-client diff --git a/templates/pcw.ini b/templates/pcw.ini index 4bdd8008..7f69e307 100644 --- a/templates/pcw.ini +++ b/templates/pcw.ini @@ -57,3 +57,11 @@ vpc_cleanup = true [updaterun] # if openqa_ttl tag is not defined this TTL will be set to the instance default_ttl = 44100 # value is in seconds + +# used to store statistic about amount of entities tracked in the cloud +[influxdb] +# defines standard influxdb connection params - organization , bucket +# for more details please refer to official documentation https://docs.influxdata.com/influxdb/v2/api-guide/client-libraries/python/#Copyright +org=pcw +bucket=cloud_stat +url=http://localhost:8086 diff --git a/tests/test_influx.py b/tests/test_influx.py new file mode 100644 index 00000000..995a519c --- /dev/null +++ b/tests/test_influx.py @@ -0,0 +1,6 @@ +from ocw.lib.influx import Influx + + +def test_influx_init(): + influx = Influx() + assert hasattr(influx, "__client") is False diff --git a/webui/PCWConfig.py b/webui/PCWConfig.py index 0dd1d8ad..c639b10c 100644 --- a/webui/PCWConfig.py +++ b/webui/PCWConfig.py @@ -53,8 +53,8 @@ def getList(self, config_path: str, default: list = None) -> list: class PCWConfig(): @staticmethod - def get_feature_property(feature: str, property: str, namespace: str = None): - default_values = { + def get_feature_property(feature: str, feature_property: str, namespace: str | None = None) -> str | int: + default_values: dict[str, dict[str, int | type[int] | str | type[str] | type[str] | None]] = { 'cleanup/max-age-hours': {'default': 24 * 7, 'return_type': int}, 'cleanup/azure-gallery-name': {'default': 'test_image_gallery', 'return_type': str}, 'cleanup/azure-storage-resourcegroup': {'default': 'openqa-upload', 'return_type': str}, @@ -70,12 +70,15 @@ def get_feature_property(feature: str, property: str, namespace: str = None): 'notify/smtp': {'default': None, 'return_type': str}, 'notify/smtp-port': {'default': 25, 'return_type': int}, 'notify/from': {'default': 'pcw@publiccloud.qa.suse.de', 'return_type': str}, + 'influxdb/org': {'default': 'pcw', 'return_type': str}, + 'influxdb/bucket': {'default': 'cloud_stat', 'return_type': str}, + 'influxdb/url': {'default': None, 'return_type': str}, } - key = '/'.join([feature, property]) + key = '/'.join([feature, feature_property]) if key not in default_values: raise LookupError(f"Missing {key} in default_values list") if namespace: - setting = f'{feature}.namespace.{namespace}/{property}' + setting = f'{feature}.namespace.{namespace}/{feature_property}' if PCWConfig.has(setting): return default_values[key]['return_type'](ConfigFile().get(setting)) return default_values[key]['return_type']( @@ -114,10 +117,10 @@ def has(setting: str) -> bool: return False @staticmethod - def getBoolean(config_path: str, namespace: str = None, default=False) -> bool: + def getBoolean(config_path: str, namespace: str | None = None, default=False) -> bool: if namespace: - feature, property = config_path.split('/') - setting = f'{feature}.namespace.{namespace}/{property}' + feature, feature_property = config_path.split('/') + setting = f'{feature}.namespace.{namespace}/{feature_property}' if PCWConfig.has(setting): value = ConfigFile().get(setting) else: