diff --git a/Dockerfile b/Dockerfile index c9ee359..cf7b3e4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -FROM python:3.9.0-buster AS build +FROM python:3.9-buster AS build COPY . /src WORKDIR /src RUN python3 setup.py bdist_wheel -FROM python:3.9.0-slim-buster +FROM python:3.9-slim-buster COPY --from=build /src/dist/*.whl /tmp # hadolint ignore=DL3013 diff --git a/manifests/base/suite-runner-template.yaml b/manifests/base/suite-runner-template.yaml index 8de9502..c017007 100644 --- a/manifests/base/suite-runner-template.yaml +++ b/manifests/base/suite-runner-template.yaml @@ -58,6 +58,8 @@ data: name: {etos_configmap} - secretRef: name: {etos_rabbitmq_secret} + - configMapRef: + name: {etos_observability_configmap} env: - name: TERCC value: '{EiffelTestExecutionRecipeCollectionCreatedEvent}' @@ -65,6 +67,10 @@ data: value: esr - name: KUBEXIT_GRAVEYARD value: /graveyard + - name: OTEL_CONTEXT + value: {otel_context} + - name: OTEL_COLLECTOR_HOST + value: {otel_collector_host} volumeMounts: - name: graveyard mountPath: /graveyard diff --git a/requirements.txt b/requirements.txt index d9e117f..40b5b6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,7 @@ # scipy==1.0 # pyscaffold==3.2.3 -etos_lib==3.2.1 +etos_lib==4.2.0 +opentelemetry-api~=1.21 +opentelemetry-exporter-otlp~=1.21 +opentelemetry-sdk~=1.21 diff --git a/setup.cfg b/setup.cfg index 48f7d45..0db0819 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,7 +26,10 @@ setup_requires = pyscaffold>=3.2a0,<3.3a0 # Add here dependencies of your project (semicolon/line-separated), e.g. install_requires = pyscaffold==3.2.3 - etos_lib==3.2.1 + etos_lib==4.2.0 + opentelemetry-api~=1.21 + opentelemetry-exporter-otlp~=1.21 + opentelemetry-sdk~=1.21 # Require a specific Python version, e.g. Python 2.7 or >= 3.4 python_requires = >=3.4 diff --git a/src/suite_starter/__init__.py b/src/suite_starter/__init__.py index 3fd7013..014623a 100644 --- a/src/suite_starter/__init__.py +++ b/src/suite_starter/__init__.py @@ -16,6 +16,13 @@ """ETOS suite starter module.""" import os from importlib.metadata import version, PackageNotFoundError + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_NAMESPACE, SERVICE_VERSION, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + from etos_lib.logging.logger import setup_logging # The suite starter shall not send logs to RabbitMQ as it @@ -31,3 +38,18 @@ DEV = os.getenv("DEV", "false").lower() == "true" ENVIRONMENT = "development" if DEV else "production" setup_logging("ETOS Suite Starter", VERSION, ENVIRONMENT) + +if os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"): + PROVIDER = TracerProvider( + resource=Resource.create( + { + SERVICE_NAME: "etos-suite-starter", + SERVICE_VERSION: VERSION, + SERVICE_NAMESPACE: ENVIRONMENT, + } + ) + ) + EXPORTER = OTLPSpanExporter() + PROCESSOR = BatchSpanProcessor(EXPORTER) + PROVIDER.add_span_processor(PROCESSOR) + trace.set_tracer_provider(PROVIDER) diff --git a/src/suite_starter/suite_starter.py b/src/suite_starter/suite_starter.py index 91e7136..b0d13dc 100644 --- a/src/suite_starter/suite_starter.py +++ b/src/suite_starter/suite_starter.py @@ -21,9 +21,14 @@ import os from pathlib import Path +from opentelemetry import trace, context +from opentelemetry.propagate import inject + from etos_lib import ETOS from etos_lib.kubernetes.jobs import Job from etos_lib.logging.logger import FORMAT_CONFIG +from etos_lib.opentelemetry.semconv import Attributes as SemConvAttributes + LOGGER = logging.getLogger(__name__) # Remove spam from pika. @@ -57,6 +62,7 @@ def __init__(self, suite_runner_template_path: str = "/app/suite_runner_template self.suite_runner_callback, can_nack=True, ) + self.tracer = trace.get_tracer(__name__) def _load_template(self, suite_runner_template_path: str) -> str: """Load the suite runner template file.""" @@ -71,6 +77,7 @@ def _validate_template(self, suite_runner_template: str): "EiffelTestExecutionRecipeCollectionCreatedEvent": "FakeEvent", "suite_id": "FakeID", "job_name": "FakeName", + "otel_context": "", } formatted = suite_runner_template.format(**data, **self.etos.config.get("configuration")) job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT"))) @@ -82,13 +89,26 @@ def _configure(self): "docker_image": os.getenv("SUITE_RUNNER"), "log_listener": os.getenv("LOG_LISTENER"), "etos_configmap": os.getenv("ETOS_CONFIGMAP"), + "etos_observability_configmap": os.getenv("ETOS_OBSERVABILITY_CONFIGMAP"), "etos_rabbitmq_secret": os.getenv("ETOS_RABBITMQ_SECRET"), "ttl": os.getenv("ETOS_ESR_TTL", "3600"), "termination_grace_period": os.getenv("ETOS_TERMINATION_GRACE_PERIOD", "300"), "sidecar_image": os.getenv("ETOS_SIDECAR_IMAGE"), + "otel_collector_host": os.getenv("OTEL_COLLECTOR_HOST") or "null", } self.etos.config.set("configuration", configuration) + def _get_current_context(self): + """Get current OpenTelemetry context.""" + ctx = context.get_current() + LOGGER.info("Current OpenTelemetry context: %s", ctx) + carrier = {} + # inject() creates a dict with context reference, + # e. g. {'traceparent': '00-0be6c260d9cbe9772298eaf19cb90a5b-371353ee8fbd3ced-01'} + inject(carrier) + env = ",".join(f"{k}={v}" for k, v in carrier.items()) + return env + def suite_runner_callback(self, event, _): """Start a suite runner on a TERCC event. @@ -97,31 +117,37 @@ def suite_runner_callback(self, event, _): :return: Whether event was ACK:ed or not. :rtype: bool """ - suite_id = event.meta.event_id - FORMAT_CONFIG.identifier = suite_id - LOGGER.info("Received a TERCC event. Build data for ESR.") - data = {"EiffelTestExecutionRecipeCollectionCreatedEvent": json.dumps(event.json)} - data["suite_id"] = suite_id - - job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT"))) - job_name = job.uniqueify(f"suite-runner-{suite_id}").lower() - data["job_name"] = job_name - - LOGGER.info("Dynamic data: %r", data) - LOGGER.info("Static data: %r", self.etos.config.get("configuration")) - try: - assert data["EiffelTestExecutionRecipeCollectionCreatedEvent"] - except AssertionError as exception: - LOGGER.critical("Incomplete data for ESR. %r", exception) - raise - - body = job.load_yaml( - self.suite_runner_template.format(**data, **self.etos.config.get("configuration")) - ) - LOGGER.info("Starting new executor: %r", job_name) - job.create_job(body) - LOGGER.info("ESR successfully launched.") - return True + with self.tracer.start_as_current_span("suite", context=context.get_current()) as span: + suite_id = event.meta.event_id + FORMAT_CONFIG.identifier = suite_id + LOGGER.info("Received a TERCC event. Build data for ESR.") + data = {"EiffelTestExecutionRecipeCollectionCreatedEvent": json.dumps(event.json)} + data["suite_id"] = suite_id + data["otel_context"] = self._get_current_context() + span.set_attribute(SemConvAttributes.SUITE_ID, suite_id) + + job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT"))) + job_name = job.uniqueify(f"suite-runner-{suite_id}").lower() + span.set_attribute(SemConvAttributes.SUITE_RUNNER_JOB_ID, job_name) + data["job_name"] = job_name + + LOGGER.info("Dynamic data: %r", data) + LOGGER.info("Static data: %r", self.etos.config.get("configuration")) + try: + assert data["EiffelTestExecutionRecipeCollectionCreatedEvent"] + except AssertionError as exception: + LOGGER.critical("Incomplete data for ESR. %r", exception) + span.record_exception(exception) + span.set_status(trace.Status(trace.StatusCode.ERROR)) + raise + + body = job.load_yaml( + self.suite_runner_template.format(**data, **self.etos.config.get("configuration")) + ) + LOGGER.info("Starting new executor: %r", job_name) + job.create_job(body) + LOGGER.info("ESR successfully launched.") + return True def run(self): """Run the SuiteStarter main loop.