Skip to content

Commit

Permalink
[DPE-3456] add messages for incorrect rels to mongos (#346)
Browse files Browse the repository at this point in the history
## Issue
Right now Charmed MongoDB applications that are running as shard or
replication can be integrated with mongos , this goes against the
proposed architecture from the [sharding spec
doc](https://docs.google.com/document/d/1kx5HUnMR2tiQQpCoWIyxJRUO2fdX4hhzVlo86noh-ts/edit?usp=sharingConnect)

 
## Solution
for cases in which either a Charmed MongoDB instance is in the role of
`shard` or `replication` and a relation has been made to `mongos` then
go into BlockedState
  • Loading branch information
MiaAltieri authored Feb 9, 2024
1 parent 7efb3eb commit cfa3ade
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 12 deletions.
24 changes: 19 additions & 5 deletions lib/charms/mongodb/v0/config_server_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from charms.mongodb.v1.mongos import MongosConnection
from ops.charm import CharmBase, EventBase, RelationBrokenEvent
from ops.framework import Object
from ops.model import ActiveStatus, MaintenanceStatus, WaitingStatus
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus

from config import Config

Expand All @@ -35,7 +35,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 5
LIBPATCH = 6


class ClusterProvider(Object):
Expand Down Expand Up @@ -69,9 +69,9 @@ def pass_hook_checks(self, event: EventBase) -> bool:
event.defer()
return False

if not self.charm.is_role(Config.Role.CONFIG_SERVER):
if not self.is_valid_mongos_integration():
logger.info(
"Skipping %s. ShardingProvider is only be executed by config-server", type(event)
"Skipping %s. ClusterProvider is only be executed by config-server", type(event)
)
return False

Expand All @@ -80,9 +80,24 @@ def pass_hook_checks(self, event: EventBase) -> bool:

return True

def is_valid_mongos_integration(self) -> bool:
"""Returns true if the integration to mongos is valid."""
is_integrated_to_mongos = len(
self.charm.model.relations[Config.Relations.CLUSTER_RELATIONS_NAME]
)

if not self.charm.is_role(Config.Role.CONFIG_SERVER) and is_integrated_to_mongos:
return False

return True

def _on_relation_changed(self, event) -> None:
"""Handles providing mongos with KeyFile and hosts."""
if not self.pass_hook_checks(event):
if not self.is_valid_mongos_integration():
self.charm.unit.status = BlockedStatus(
"Relation to mongos not supported, config role must be config-server"
)
logger.info("Skipping relation joined event: hook checks did not pass")
return

Expand Down Expand Up @@ -208,7 +223,6 @@ def _on_relation_changed(self, event) -> None:
event.relation.id, CONFIG_SERVER_DB_KEY
)
if not key_file_contents or not config_server_db:
event.defer()
self.charm.unit.status = WaitingStatus("Waiting for secrets from config-server")
return

Expand Down
7 changes: 6 additions & 1 deletion lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 5
LIBPATCH = 6
KEYFILE_KEY = "key-file"
HOSTS_KEY = "host"
OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username())
Expand Down Expand Up @@ -539,6 +539,11 @@ def pass_hook_checks(self, event):
logger.info("skipping %s is only be executed by shards", type(event))
return False

# occasionally, broken events have no application, in these scenarios nothing should be
# processed.
if not event.relation.app:
return False

mongos_hosts = event.relation.data[event.relation.app].get(HOSTS_KEY, None)
if isinstance(event, RelationBrokenEvent) and not mongos_hosts:
logger.info("Config-server relation never set up, no need to process broken event.")
Expand Down
22 changes: 17 additions & 5 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,11 @@ def _on_storage_detaching(self, event: StorageDetachingEvent) -> None:
logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e)

def _on_update_status(self, event: UpdateStatusEvent):
# cannot have both legacy and new relations since they have different auth requirements
if self.client_relations._get_users_from_relations(
None, rel="obsolete"
) and self.client_relations._get_users_from_relations(None):
self.unit.status = BlockedStatus("cannot have both legacy and new relations")
# user-made mistakes might result in other incorrect statues. Prioritise informing users of
# their mistake.
invalid_integration_status = self.get_invalid_integration_status()
if invalid_integration_status:
self.unit.status = invalid_integration_status
return

# no need to report on replica set status until initialised
Expand Down Expand Up @@ -1353,6 +1353,18 @@ def _is_removing_last_replica(self) -> bool:
"""Returns True if the last replica (juju unit) is getting removed."""
return self.app.planned_units() == 0 and len(self._peers.units) == 0

def get_invalid_integration_status(self) -> Optional[StatusBase]:
"""Returns a status if an invalid integration is present."""
if self.client_relations._get_users_from_relations(
None, rel="obsolete"
) and self.client_relations._get_users_from_relations(None):
return BlockedStatus("cannot have both legacy and new relations")

if not self.cluster.is_valid_mongos_integration():
return BlockedStatus(
"Relation to mongos not supported, config role must be config-server"
)

def get_status(self) -> StatusBase:
"""Returns the status with the highest priority from backups, sharding, and mongod.
Expand Down
97 changes: 96 additions & 1 deletion tests/integration/sharding_tests/test_sharding_relations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
import time

import pytest
from juju.errors import JujuAPIError
from pytest_operator.plugin import OpsTest
Expand All @@ -11,6 +13,8 @@
REPLICATION_APP_NAME = "replication"
APP_CHARM_NAME = "application"
LEGACY_APP_CHARM_NAME = "legacy-application"
MONGOS_APP_NAME = "mongos"
MONGOS_HOST_APP_NAME = "application-host"

SHARDING_COMPONENTS = [SHARD_ONE_APP_NAME, CONFIG_SERVER_ONE_APP_NAME]

Expand All @@ -28,7 +32,11 @@

@pytest.mark.abort_on_fail
async def test_build_and_deploy(
ops_test: OpsTest, application_charm, legacy_charm, database_charm
ops_test: OpsTest,
application_charm,
legacy_charm,
database_charm,
mongos_host_application_charm,
) -> None:
"""Build and deploy a sharded cluster."""
await ops_test.model.deploy(
Expand All @@ -44,6 +52,18 @@ async def test_build_and_deploy(
await ops_test.model.deploy(
database_charm, config={"role": "shard"}, application_name=SHARD_ONE_APP_NAME
)
await ops_test.model.deploy(
MONGOS_APP_NAME,
channel="6/edge",
revision=3,
)

# TODO: Future PR, once data integrator works with mongos charm deploy that charm instead of
# packing and deploying the charm in the application dir.
await ops_test.model.deploy(
mongos_host_application_charm, application_name=MONGOS_HOST_APP_NAME
)

await ops_test.model.deploy(database_charm, application_name=REPLICATION_APP_NAME)
await ops_test.model.deploy(application_charm, application_name=APP_CHARM_NAME)
await ops_test.model.deploy(legacy_charm, application_name=LEGACY_APP_CHARM_NAME)
Expand All @@ -59,6 +79,19 @@ async def test_build_and_deploy(
timeout=TIMEOUT,
)

await ops_test.model.integrate(
f"{MONGOS_APP_NAME}",
f"{MONGOS_HOST_APP_NAME}",
)

await ops_test.model.wait_for_idle(
apps=[MONGOS_HOST_APP_NAME, MONGOS_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
raise_on_error=False,
)


async def test_only_one_config_server_relation(ops_test: OpsTest) -> None:
"""Verify that a shard can only be related to one config server."""
Expand Down Expand Up @@ -218,3 +251,65 @@ async def test_replication_shard_relation(ops_test: OpsTest):
raise_on_blocked=False,
timeout=TIMEOUT,
)


async def test_replication_mongos_relation(ops_test: OpsTest) -> None:
"""Verifies connecting a replica to a mongos router fails."""
# attempt to add a replication deployment as a shard to the config server.
await ops_test.model.integrate(
f"{REPLICATION_APP_NAME}",
f"{MONGOS_APP_NAME}",
)

await ops_test.model.wait_for_idle(
apps=[REPLICATION_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
)

replication_unit = ops_test.model.applications[REPLICATION_APP_NAME].units[0]
assert (
replication_unit.workload_status_message
== "Relation to mongos not supported, config role must be config-server"
), "replica cannot be related to mongos."

# clean up relations
await ops_test.model.applications[REPLICATION_APP_NAME].remove_relation(
f"{REPLICATION_APP_NAME}:cluster",
f"{MONGOS_APP_NAME}:cluster",
)

# TODO remove this and wait for mongos to be active
# right now we cannot wait for `mongos` to be active after removing the relation due to a bug
# in the mongos charm. To fix the bug it is first necessary to publish the updated library
# lib/charms/mongodb/v0/config_server.py
time.sleep(60)


async def test_shard_mongos_relation(ops_test: OpsTest) -> None:
"""Verifies connecting a shard to a mongos router fails."""
# attempt to add a replication deployment as a shard to the config server.
await ops_test.model.integrate(
f"{SHARD_ONE_APP_NAME}",
f"{MONGOS_APP_NAME}",
)

await ops_test.model.wait_for_idle(
apps=[SHARD_ONE_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
)

shard_unit = ops_test.model.applications[SHARD_ONE_APP_NAME].units[0]
assert (
shard_unit.workload_status_message
== "Relation to mongos not supported, config role must be config-server"
), "replica cannot be related to mongos."

# clean up relations
await ops_test.model.applications[SHARD_ONE_APP_NAME].remove_relation(
f"{MONGOS_APP_NAME}:cluster",
f"{SHARD_ONE_APP_NAME}:cluster",
)
1 change: 1 addition & 0 deletions tests/unit/test_config_server_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def is_not_config_mock_call(*args):
return False

self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.add_relation("cluster", "mongos")

# fails due to being run on non-config-server
self.harness.charm.is_role = is_not_config_mock_call
Expand Down

0 comments on commit cfa3ade

Please sign in to comment.