Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tracing using OpenTelemetry #34

Merged
merged 10 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM python:3.9.0-buster AS build
FROM python:3.9-buster AS build

COPY . /src
WORKDIR /src
RUN python3 setup.py bdist_wheel

FROM python:3.9.0-slim-buster
FROM python:3.9-slim-buster

COPY --from=build /src/dist/*.whl /tmp
# hadolint ignore=DL3013
Expand Down
6 changes: 6 additions & 0 deletions manifests/base/suite-runner-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,19 @@ data:
name: {etos_configmap}
- secretRef:
name: {etos_rabbitmq_secret}
- configMapRef:
name: {etos_observability_configmap}
env:
- name: TERCC
value: '{EiffelTestExecutionRecipeCollectionCreatedEvent}'
- name: KUBEXIT_NAME
value: esr
- name: KUBEXIT_GRAVEYARD
value: /graveyard
- name: OTEL_CONTEXT
value: {otel_context}
- name: OTEL_COLLECTOR_HOST
value: {otel_collector_host}
volumeMounts:
- name: graveyard
mountPath: /graveyard
Expand Down
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@
# scipy==1.0
#
pyscaffold==3.2.3
etos_lib==3.2.1
etos_lib==4.2.0
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ setup_requires = pyscaffold>=3.2a0,<3.3a0
# Add here dependencies of your project (semicolon/line-separated), e.g.
install_requires =
pyscaffold==3.2.3
etos_lib==3.2.1
etos_lib==4.2.0
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21

# Require a specific Python version, e.g. Python 2.7 or >= 3.4
python_requires = >=3.4
Expand Down
22 changes: 22 additions & 0 deletions src/suite_starter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
"""ETOS suite starter module."""
import os
from importlib.metadata import version, PackageNotFoundError

from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_NAMESPACE, SERVICE_VERSION, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from etos_lib.logging.logger import setup_logging

# The suite starter shall not send logs to RabbitMQ as it
Expand All @@ -31,3 +38,18 @@
DEV = os.getenv("DEV", "false").lower() == "true"
ENVIRONMENT = "development" if DEV else "production"
setup_logging("ETOS Suite Starter", VERSION, ENVIRONMENT)

if os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
PROVIDER = TracerProvider(
resource=Resource.create(
{
SERVICE_NAME: "etos-suite-starter",
SERVICE_VERSION: VERSION,
SERVICE_NAMESPACE: ENVIRONMENT,
}
)
)
EXPORTER = OTLPSpanExporter()
PROCESSOR = BatchSpanProcessor(EXPORTER)
PROVIDER.add_span_processor(PROCESSOR)
trace.set_tracer_provider(PROVIDER)
76 changes: 51 additions & 25 deletions src/suite_starter/suite_starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,14 @@
import os
from pathlib import Path

from opentelemetry import trace, context
from opentelemetry.propagate import inject

from etos_lib import ETOS
from etos_lib.kubernetes.jobs import Job
from etos_lib.logging.logger import FORMAT_CONFIG
from etos_lib.opentelemetry.semconv import Attributes as SemConvAttributes


LOGGER = logging.getLogger(__name__)
# Remove spam from pika.
Expand Down Expand Up @@ -57,6 +62,7 @@ def __init__(self, suite_runner_template_path: str = "/app/suite_runner_template
self.suite_runner_callback,
can_nack=True,
)
self.tracer = trace.get_tracer(__name__)

def _load_template(self, suite_runner_template_path: str) -> str:
"""Load the suite runner template file."""
Expand All @@ -71,6 +77,7 @@ def _validate_template(self, suite_runner_template: str):
"EiffelTestExecutionRecipeCollectionCreatedEvent": "FakeEvent",
"suite_id": "FakeID",
"job_name": "FakeName",
"otel_context": "",
}
formatted = suite_runner_template.format(**data, **self.etos.config.get("configuration"))
job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT")))
Expand All @@ -82,13 +89,26 @@ def _configure(self):
"docker_image": os.getenv("SUITE_RUNNER"),
"log_listener": os.getenv("LOG_LISTENER"),
"etos_configmap": os.getenv("ETOS_CONFIGMAP"),
"etos_observability_configmap": os.getenv("ETOS_OBSERVABILITY_CONFIGMAP"),
"etos_rabbitmq_secret": os.getenv("ETOS_RABBITMQ_SECRET"),
"ttl": os.getenv("ETOS_ESR_TTL", "3600"),
"termination_grace_period": os.getenv("ETOS_TERMINATION_GRACE_PERIOD", "300"),
"sidecar_image": os.getenv("ETOS_SIDECAR_IMAGE"),
"otel_collector_host": os.getenv("OTEL_COLLECTOR_HOST") or "null",
}
self.etos.config.set("configuration", configuration)

def _get_current_context(self):
andmat900 marked this conversation as resolved.
Show resolved Hide resolved
"""Get current OpenTelemetry context."""
ctx = context.get_current()
LOGGER.info("Current OpenTelemetry context: %s", ctx)
carrier = {}
# inject() creates a dict with context reference,
# e. g. {'traceparent': '00-0be6c260d9cbe9772298eaf19cb90a5b-371353ee8fbd3ced-01'}
inject(carrier)
env = ",".join(f"{k}={v}" for k, v in carrier.items())
return env

def suite_runner_callback(self, event, _):
"""Start a suite runner on a TERCC event.

Expand All @@ -97,31 +117,37 @@ def suite_runner_callback(self, event, _):
:return: Whether event was ACK:ed or not.
:rtype: bool
"""
suite_id = event.meta.event_id
FORMAT_CONFIG.identifier = suite_id
LOGGER.info("Received a TERCC event. Build data for ESR.")
data = {"EiffelTestExecutionRecipeCollectionCreatedEvent": json.dumps(event.json)}
data["suite_id"] = suite_id

job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT")))
job_name = job.uniqueify(f"suite-runner-{suite_id}").lower()
data["job_name"] = job_name

LOGGER.info("Dynamic data: %r", data)
LOGGER.info("Static data: %r", self.etos.config.get("configuration"))
try:
assert data["EiffelTestExecutionRecipeCollectionCreatedEvent"]
except AssertionError as exception:
LOGGER.critical("Incomplete data for ESR. %r", exception)
raise

body = job.load_yaml(
self.suite_runner_template.format(**data, **self.etos.config.get("configuration"))
)
LOGGER.info("Starting new executor: %r", job_name)
job.create_job(body)
LOGGER.info("ESR successfully launched.")
return True
with self.tracer.start_as_current_span("suite", context=context.get_current()) as span:
suite_id = event.meta.event_id
FORMAT_CONFIG.identifier = suite_id
LOGGER.info("Received a TERCC event. Build data for ESR.")
data = {"EiffelTestExecutionRecipeCollectionCreatedEvent": json.dumps(event.json)}
data["suite_id"] = suite_id
data["otel_context"] = self._get_current_context()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if there is no context set? ETOS must work even if it is not

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made adjustments so that it works without errors in the log.

span.set_attribute(SemConvAttributes.SUITE_ID, suite_id)

job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT")))
job_name = job.uniqueify(f"suite-runner-{suite_id}").lower()
span.set_attribute(SemConvAttributes.SUITE_RUNNER_JOB_ID, job_name)
data["job_name"] = job_name

LOGGER.info("Dynamic data: %r", data)
LOGGER.info("Static data: %r", self.etos.config.get("configuration"))
try:
assert data["EiffelTestExecutionRecipeCollectionCreatedEvent"]
except AssertionError as exception:
LOGGER.critical("Incomplete data for ESR. %r", exception)
span.record_exception(exception)
span.set_status(trace.Status(trace.StatusCode.ERROR))
raise

body = job.load_yaml(
self.suite_runner_template.format(**data, **self.etos.config.get("configuration"))
)
LOGGER.info("Starting new executor: %r", job_name)
job.create_job(body)
LOGGER.info("ESR successfully launched.")
return True

def run(self):
"""Run the SuiteStarter main loop.
Expand Down
Loading