Skip to content

Commit

Permalink
Enhance data sanitizer to support field exclusion
Browse files Browse the repository at this point in the history
- Add 'sensitive_field_to_exclude' configuration to allow specific fields to bypass sanitization.
- Update default sensitive fields to include 'client_id' and 'client_secret'.
- Extend unit tests to validate the new exclusion functionality for sensitive fields.
  • Loading branch information
muralov committed Jan 21, 2025
1 parent 6e39be5 commit 39283b8
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 11 deletions.
25 changes: 19 additions & 6 deletions src/services/data_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@
"PASSWORD",
"PASS",
"KEY",
"CERT",
"PRIVATE",
"CREDENTIAL",
"AUTH",
"USERNAME",
"USER_NAME",
"CLIENT_ID",
"CLIENT_SECRET",
]

# Fields that typically contain sensitive data
Expand All @@ -52,6 +51,14 @@
"first_name",
"lastname",
"last_name",
"client_id",
"client_secret",
]

# default fields to exclude from sanitization
DEFAULT_SENSITIVE_FIELD_TO_EXCLUDE = [
"secretName",
"authorizers",
]

REDACTED_VALUE = "[REDACTED]"
Expand All @@ -73,6 +80,7 @@ def __init__(self, config: DataSanitizationConfig | None = None):
resources_to_sanitize=DEFAULT_SENSITIVE_RESOURCES,
sensitive_env_vars=DEFAULT_SENSITIVE_ENV_VARS,
sensitive_field_names=DEFAULT_SENSITIVE_FIELD_NAMES,
sensitive_field_to_exclude=DEFAULT_SENSITIVE_FIELD_TO_EXCLUDE,
)
self.scrubber = scrubadub.Scrubber()

Expand Down Expand Up @@ -165,10 +173,15 @@ def _sanitize_dict(self, data: dict) -> dict:
result = data.copy()

for key, value in data.items():
# Check if the key should be excluded from sanitization
if (
self.config.sensitive_field_to_exclude
and key in self.config.sensitive_field_to_exclude
):
result[key] = value
# Check if the key indicates sensitive data
key_lower = key.lower()
if any(
sensitive in key_lower
elif any(
sensitive in key.lower()
for sensitive in self.config.sensitive_field_names
):
result[key] = REDACTED_VALUE
Expand Down
1 change: 1 addition & 0 deletions src/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class DataSanitizationConfig(BaseModel):
resources_to_sanitize: list[str] | None = None
sensitive_field_names: list[str] | None = None
sensitive_env_vars: list[str] | None = None
sensitive_field_to_exclude: list[str] | None = None


class Config(BaseModel):
Expand Down
190 changes: 185 additions & 5 deletions tests/unit/services/test_data_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ def setup(self):
{
"kind": "ConfigMap",
"metadata": {"name": "my-configmap"},
"data": {"username": "admin", "password": "secret"},
"data": {
"username": "admin",
"password": "test_password",
"secret_key": "test_secret_key",
"secretName": "test_secretName",
},
},
]

Expand All @@ -46,7 +51,8 @@ def setup(self):
DataSanitizationConfig(
resources_to_sanitize=["Pod"],
sensitive_env_vars=["CUSTOM_SECRET"],
sensitive_field_names=["password"],
sensitive_field_names=["password", "secret_key", "secretName"],
sensitive_field_to_exclude=["secretName", "secret_key"],
),
test_data,
[
Expand Down Expand Up @@ -75,15 +81,21 @@ def setup(self):
{
"kind": "ConfigMap",
"metadata": {"name": "my-configmap"},
"data": {"username": "admin", "password": REDACTED_VALUE},
"data": {
"username": "admin",
"password": REDACTED_VALUE,
"secret_key": "test_secret_key", # Not redacted due to exclusion
"secretName": "test_secretName", # Not redacted due to exclusion
},
},
],
),
(
DataSanitizationConfig(
resources_to_sanitize=["Pod"],
sensitive_env_vars=["NORMAL_VAR"],
sensitive_field_names=["username"],
sensitive_field_names=["username", "password"],
sensitive_field_to_exclude=["username"],
),
test_data,
[
Expand Down Expand Up @@ -112,7 +124,12 @@ def setup(self):
{
"kind": "ConfigMap",
"metadata": {"name": "my-configmap"},
"data": {"username": REDACTED_VALUE, "password": "secret"},
"data": {
"username": "admin", # Not redacted due to exclusion
"password": REDACTED_VALUE,
"secret_key": "test_secret_key",
"secretName": "test_secretName",
},
},
],
),
Expand Down Expand Up @@ -839,6 +856,169 @@ def test_data_structures_and_pii(self, test_data, expected_results, error):
},
},
),
# test APIRule resources
(
{
"apiVersion": "gateway.kyma-project.io/v2alpha1",
"kind": "APIRule",
"metadata": {
"name": "my-kyma-resource",
"namespace": "kyma-system",
},
"spec": {
"hosts": ["subdomain.domain.com"],
"service": {"name": "service", "port": "8080"},
"gateway": "kyma-system/kyma-gateway",
"rules": [
{
"jwt": {
"authentications": [
{"issuer": "issuer", "jwksUri": "jwksUri"}
]
},
"methods": ["GET"],
"path": "/*",
}
],
},
},
{
"apiVersion": "gateway.kyma-project.io/v2alpha1",
"kind": "APIRule",
"metadata": {
"name": "my-kyma-resource",
"namespace": "kyma-system",
},
"spec": {
"hosts": ["subdomain.domain.com"],
"service": {"name": "service", "port": "8080"},
"gateway": "kyma-system/kyma-gateway",
"rules": [
{
"jwt": {
"authentications": REDACTED_VALUE,
},
"methods": ["GET"],
"path": "/*",
}
],
},
},
),
# test api rule with sensitive data
(
{
"apiVersion": "gateway.kyma-project.io/v2alpha1",
"kind": "APIRule",
"metadata": {"name": "test-apirule", "namespace": "test-namespace"},
"spec": {
"hosts": ["test.domain.com"],
"service": {"name": "test-service", "port": "8080"},
"gateway": "kyma-gateway/kyma-system",
"rules": [
{
"extAuth": {"authorizers": ["oauth2-proxy"]},
"methods": ["GET"],
"path": "/*",
}
],
},
},
{
"apiVersion": "gateway.kyma-project.io/v2alpha1",
"kind": "APIRule",
"metadata": {"name": "test-apirule", "namespace": "test-namespace"},
"spec": {
"hosts": ["test.domain.com"],
"service": {"name": "test-service", "port": "8080"},
"gateway": "kyma-gateway/kyma-system",
"rules": [
{
"extAuth": REDACTED_VALUE,
"methods": ["GET"],
"path": "/*",
}
],
},
},
),
# test serverless resource
(
{
"apiVersion": "operator.kyma-project.io/v1alpha1",
"kind": "Serverless",
"metadata": {
"finalizers": [
"serverless-operator.kyma-project.io/deletion-hook"
],
"name": "default",
},
"namespace": "kyma-system",
"spec": {
"dockerRegistry": {
"enableInternal": False,
"secretName": "my-secret",
},
"eventing": {
"endpoint": "http://eventing-publisher-proxy.kyma-system.svc.cluster.local/publish",
},
"tracing": {
"endpoint": "http://telemetry-otlp-traces.kyma-system.svc.cluster.local:4318/v1/traces",
},
"secretName": "my-secret",
},
"eventing": {
"endpoint": "http://eventing-publisher-proxy.kyma-system.svc.cluster.local/publish",
},
"tracing": {
"endpoint": "http://telemetry-otlp-traces.kyma-system.svc.cluster.local:4318/v1/traces",
},
"targetCPUUtilizationPercentage": 50,
"functionRequeueDuration": "5m",
"functionBuildExecutorArgs": "--insecure,--skip-tls-verify,--skip-unused-stages,--log-format=text,--cache=true,--use-new-run,--compressed-caching=false",
"functionBuildMaxSimultaneousJobs": 5,
"healthzLivenessTimeout": "10s",
"defaultBuildJobPreset": "normal",
"defaultRuntimePodPreset": "M",
},
{
"apiVersion": "operator.kyma-project.io/v1alpha1",
"kind": "Serverless",
"metadata": {
"finalizers": [
"serverless-operator.kyma-project.io/deletion-hook"
],
"name": "default",
},
"namespace": "kyma-system",
"spec": {
"dockerRegistry": {
"enableInternal": False,
"secretName": "my-secret",
},
"eventing": {
"endpoint": "{{URL}}",
},
"tracing": {
"endpoint": "{{URL}}",
},
"secretName": "my-secret",
},
"eventing": {
"endpoint": "{{URL}}",
},
"tracing": {
"endpoint": "{{URL}}",
},
"targetCPUUtilizationPercentage": 50,
"functionRequeueDuration": "5m",
"functionBuildExecutorArgs": "--insecure,--skip-tls-verify,--skip-unused-stages,--log-format=text,--cache=true,--use-new-run,--compressed-caching=false",
"functionBuildMaxSimultaneousJobs": 5,
"healthzLivenessTimeout": "10s",
"defaultBuildJobPreset": "normal",
"defaultRuntimePodPreset": "M",
},
),
],
)
def test_kubernetes_resources(self, test_data, expected_results):
Expand Down

0 comments on commit 39283b8

Please sign in to comment.