From c01a23faa856cc77a8f56bd1ab8af8488f42bfcc Mon Sep 17 00:00:00 2001 From: Pavel Mitrafanau Date: Fri, 20 Dec 2024 13:08:17 +0100 Subject: [PATCH] feat: Setting for tweaking merging strategy for JSON arrays in configs (#622) --- README.md | 101 +++++++++--------- .../core/server/config/FileConfigStore.java | 28 ++++- .../server/config/FileConfigStoreTest.java | 77 +++++++++++++ .../core/server/config/first.config.json | 9 ++ .../core/server/config/second.config.json | 9 ++ 5 files changed, 169 insertions(+), 55 deletions(-) create mode 100644 server/src/test/java/com/epam/aidial/core/server/config/FileConfigStoreTest.java create mode 100644 server/src/test/resources/com/epam/aidial/core/server/config/first.config.json create mode 100644 server/src/test/resources/com/epam/aidial/core/server/config/second.config.json diff --git a/README.md b/README.md index de68e071..e9d8efcd 100644 --- a/README.md +++ b/README.md @@ -46,56 +46,57 @@ Priority order: 2. File specified in "AIDIAL_SETTINGS" environment variable. 3. Default resource file: src/main/resources/aidial.settings.json. -| Setting | Default | Required | Description -|-----------------------------------------------|:--------------------------:|:--------:|-------------------------------------------------| -| config.files | aidial.config.json | No |List of paths to dynamic settings. Refer to [example](sample/aidial.config.json) of the file with [dynamic settings](#dynamic-settings).| -| config.reload | 60000 | No |Config reload interval in milliseconds. -| identityProviders | - | Yes |Map of identity providers. **Note**: At least one identity provider must be provided. Refer to [examples](sample/aidial.settings.json) to view available providers. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Auth/2.%20Web/1.overview.md) to view guidelines for configuring supported providers. -| identityProviders.*.jwksUrl | - | Optional |Url to jwks provider. **Required** if `disabledVerifyJwt` is set to `false`. **Note**: Either `jwksUrl` or `userInfoEndpoint` must be provided. -| identityProviders.*.userInfoEndpoint | - | Optional |Url to user info endpoint. **Note**: Either `jwksUrl` or `userInfoEndpoint` must be provided or `disableJwtVerification` is unset. Refer to [Google example](sample/aidial.settings.json). -| identityProviders.*.rolePath | - | Yes |Path(s) to the claim user roles in JWT token or user info response, e.g. `resource_access.chatbot-ui.roles` or just `roles`. Can be single String or Array of Strings. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Auth/2.%20Web/1.overview.md) to view guidelines for configuring supported providers. -| identityProviders.*.projectPath | - | No |Path(s) to the claim in JWT token or user info response, e.g. `azp`, `aud` or `some.path.client` from which project name can be taken. Can be single String. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Auth/2.%20Web/1.overview.md) to view guidelines for configuring supported providers. -| identityProviders.*.rolesDelimiter | - | No |Delimiter to split roles into array in case when list of roles presented as single String. e.g. `"rolesDelimiter": " "` -| identityProviders.*.loggingKey | - | No |User information to search in claims of JWT token. `email` or `sub` should be sufficient in most cases. **Note**: `email` might be unavailable for some IDPs. Please check your IDP documentation in this case. -| identityProviders.*.loggingSalt | - | No |Salt to hash user information for logging. -| identityProviders.*.positiveCacheExpirationMs | 600000 | No | How long to retain JWKS response in the cache in case of successfull response. -| identityProviders.*.negativeCacheExpirationMs | 10000 | No |How long to retain JWKS response in the cache in case of failed response. -| identityProviders.*.issuerPattern | - | No |Regexp to match the claim "iss" to identity provider. -| identityProviders.*.disableJwtVerification | false | No |The flag disables JWT verification. *Note*. `userInfoEndpoint` must be unset if the flag is set to `true`. -| vertx.* | - | No |Vertx settings. Refer to [vertx.io](https://vertx.io/docs/apidocs/io/vertx/core/VertxOptions.html) to learn more. -| server.* | - | No |Vertx HTTP server settings for incoming requests. -| client.* | - | No |Vertx HTTP client settings for outbound requests. -| storage.provider | filesystem | Yes |Specifies blob storage provider. Supported providers: s3, aws-s3, azureblob, google-cloud-storage, filesystem. See examples in the sections below. -| storage.endpoint | - | Optional |Specifies endpoint url for s3 compatible storages. **Note**: The setting might be required. That depends on a concrete provider. -| storage.identity | - | Optional |Blob storage access key. Can be optional for filesystem, aws-s3, google-cloud-storage providers. Refer to [sections in this document](#aws-s3-blob-store) dedicated to specific storage providers. -| storage.credential | - | Optional |Blob storage secret key. Can be optional for filesystem, aws-s3, google-cloud-storage providers. -| storage.bucket | - | No |Blob storage bucket. -| storage.overrides.* | - | No |Key-value pairs to override storage settings. `*` might be any specific blob storage setting to be overridden. Refer to [examples](#temporary-credentials-1) in the sections below. -| storage.createBucket | false | No |Indicates whether bucket should be created on start-up. -| storage.prefix | - | No |Base prefix for all stored resources. The purpose to use the same bucket for different environments, e.g. dev, prod, pre-prod. Must not contain path separators or any invalid chars. -| storage.maxUploadedFileSize | 536870912 | No |Maximum size in bytes of uploaded file. If a size of uploaded file exceeds the limit the server returns HTTP code 413 -| encryption.secret | - | No |Secret is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string. -| encryption.key | - | No |Key is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string. -| resources.maxSize | 1048576 | No |Max allowed size in bytes for a resource. -| resources.syncPeriod | 60000 | No |Period in milliseconds, how frequently check for resources to sync. -| resources.syncDelay | 120000 | No |Delay in milliseconds for a resource to be written back in object storage after last modification. -| resources.syncBatch | 4096 | No |How many resources to sync in one go. -| resources.cacheExpiration | 300000 | No |Expiration in milliseconds for synced resources in Redis. -| resources.compressionMinSize | 256 | No |Compress a resource with gzip if its size in bytes more or equal to this value. -| redis.singleServerConfig.address | - | Yes |Redis single server addresses, e.g. "redis://host:port". Either `singleServerConfig` or `clusterServersConfig` must be provided. -| redis.clusterServersConfig.nodeAddresses | - | Yes |Json array with Redis cluster server addresses, e.g. ["redis://host1:port1","redis://host2:port2"]. Either `singleServerConfig` or `clusterServersConfig` must be provided. -| redis.provider.* | - | No |Provider specific settings -| redis.provider.name | - | Yes |Provider name. The valid values are `aws-elasti-cache`(see [instructions](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/auth-iam.html)). -| redis.provider.userId | - | Yes | IAM-enabled user ID. **Note**. It's applied to `aws-elasti-cache` -| redis.provider.region | - | Yes | Geo region where the cache is located. **Note**. It's applied to `aws-elasti-cache` -| redis.provider.clusterName | - | Yes | Redis cluster name. **Note**. It's applied to `aws-elasti-cache` -| redis.provider.serverless | - | Yes | The flag indicates if the cache is serverless. **Note**. It's applied to `aws-elasti-cache` -| invitations.ttlInSeconds | 259200 | No |Invitation time to live in seconds. -| access.admin.rules | - | No |Matches claims from identity providers with the rules to figure out whether a user is allowed to perform admin actions, like deleting any resource or approving a publication. Example: [{"source": "roles", "function": "EQUAL", "targets": ["admin"]}]. If roles contain "admin, the actions are allowed. -| applications.includeCustomApps | false | No |The flag indicates whether custom applications should be included into openai listing -| applications.controllerEndpoint | - | No |The endpoint to Application Controller Web Service that manages deployments for applications with functions -| applications.controllerTimeout | 240000 | No |The timeout of operations to Application Controller Web Service -| applications.checkPeriod | 300000 | No |The interval at which to check the pending operations for applications with functions +| Setting | Default | Required | Description +|-----------------------------------------------|:------------------:|:--------:|-------------------------------------------------| +| config.files | aidial.config.json | No |List of paths to dynamic settings. Refer to [example](sample/aidial.config.json) of the file with [dynamic settings](#dynamic-settings).| +| config.reload | 60000 | No |Config reload interval in milliseconds. +| config.jsonMergeStrategy.overwriteArrays | false | No |Specifies a merging strategy for JSON arrays. If it's set to `true`, arrays will be overwritten. Otherwise, they will be concatenated. +| identityProviders | - | Yes |Map of identity providers. **Note**: At least one identity provider must be provided. Refer to [examples](sample/aidial.settings.json) to view available providers. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Auth/2.%20Web/1.overview.md) to view guidelines for configuring supported providers. +| identityProviders.*.jwksUrl | - | Optional |Url to jwks provider. **Required** if `disabledVerifyJwt` is set to `false`. **Note**: Either `jwksUrl` or `userInfoEndpoint` must be provided. +| identityProviders.*.userInfoEndpoint | - | Optional |Url to user info endpoint. **Note**: Either `jwksUrl` or `userInfoEndpoint` must be provided or `disableJwtVerification` is unset. Refer to [Google example](sample/aidial.settings.json). +| identityProviders.*.rolePath | - | Yes |Path(s) to the claim user roles in JWT token or user info response, e.g. `resource_access.chatbot-ui.roles` or just `roles`. Can be single String or Array of Strings. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Auth/2.%20Web/1.overview.md) to view guidelines for configuring supported providers. +| identityProviders.*.projectPath | - | No |Path(s) to the claim in JWT token or user info response, e.g. `azp`, `aud` or `some.path.client` from which project name can be taken. Can be single String. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Auth/2.%20Web/1.overview.md) to view guidelines for configuring supported providers. +| identityProviders.*.rolesDelimiter | - | No |Delimiter to split roles into array in case when list of roles presented as single String. e.g. `"rolesDelimiter": " "` +| identityProviders.*.loggingKey | - | No |User information to search in claims of JWT token. `email` or `sub` should be sufficient in most cases. **Note**: `email` might be unavailable for some IDPs. Please check your IDP documentation in this case. +| identityProviders.*.loggingSalt | - | No |Salt to hash user information for logging. +| identityProviders.*.positiveCacheExpirationMs | 600000 | No | How long to retain JWKS response in the cache in case of successfull response. +| identityProviders.*.negativeCacheExpirationMs | 10000 | No |How long to retain JWKS response in the cache in case of failed response. +| identityProviders.*.issuerPattern | - | No |Regexp to match the claim "iss" to identity provider. +| identityProviders.*.disableJwtVerification | false | No |The flag disables JWT verification. *Note*. `userInfoEndpoint` must be unset if the flag is set to `true`. +| vertx.* | - | No |Vertx settings. Refer to [vertx.io](https://vertx.io/docs/apidocs/io/vertx/core/VertxOptions.html) to learn more. +| server.* | - | No |Vertx HTTP server settings for incoming requests. +| client.* | - | No |Vertx HTTP client settings for outbound requests. +| storage.provider | filesystem | Yes |Specifies blob storage provider. Supported providers: s3, aws-s3, azureblob, google-cloud-storage, filesystem. See examples in the sections below. +| storage.endpoint | - | Optional |Specifies endpoint url for s3 compatible storages. **Note**: The setting might be required. That depends on a concrete provider. +| storage.identity | - | Optional |Blob storage access key. Can be optional for filesystem, aws-s3, google-cloud-storage providers. Refer to [sections in this document](#aws-s3-blob-store) dedicated to specific storage providers. +| storage.credential | - | Optional |Blob storage secret key. Can be optional for filesystem, aws-s3, google-cloud-storage providers. +| storage.bucket | - | No |Blob storage bucket. +| storage.overrides.* | - | No |Key-value pairs to override storage settings. `*` might be any specific blob storage setting to be overridden. Refer to [examples](#temporary-credentials-1) in the sections below. +| storage.createBucket | false | No |Indicates whether bucket should be created on start-up. +| storage.prefix | - | No |Base prefix for all stored resources. The purpose to use the same bucket for different environments, e.g. dev, prod, pre-prod. Must not contain path separators or any invalid chars. +| storage.maxUploadedFileSize | 536870912 | No |Maximum size in bytes of uploaded file. If a size of uploaded file exceeds the limit the server returns HTTP code 413 +| encryption.secret | - | No |Secret is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string. +| encryption.key | - | No |Key is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string. +| resources.maxSize | 1048576 | No |Max allowed size in bytes for a resource. +| resources.syncPeriod | 60000 | No |Period in milliseconds, how frequently check for resources to sync. +| resources.syncDelay | 120000 | No |Delay in milliseconds for a resource to be written back in object storage after last modification. +| resources.syncBatch | 4096 | No |How many resources to sync in one go. +| resources.cacheExpiration | 300000 | No |Expiration in milliseconds for synced resources in Redis. +| resources.compressionMinSize | 256 | No |Compress a resource with gzip if its size in bytes more or equal to this value. +| redis.singleServerConfig.address | - | Yes |Redis single server addresses, e.g. "redis://host:port". Either `singleServerConfig` or `clusterServersConfig` must be provided. +| redis.clusterServersConfig.nodeAddresses | - | Yes |Json array with Redis cluster server addresses, e.g. ["redis://host1:port1","redis://host2:port2"]. Either `singleServerConfig` or `clusterServersConfig` must be provided. +| redis.provider.* | - | No |Provider specific settings +| redis.provider.name | - | Yes |Provider name. The valid values are `aws-elasti-cache`(see [instructions](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/auth-iam.html)). +| redis.provider.userId | - | Yes | IAM-enabled user ID. **Note**. It's applied to `aws-elasti-cache` +| redis.provider.region | - | Yes | Geo region where the cache is located. **Note**. It's applied to `aws-elasti-cache` +| redis.provider.clusterName | - | Yes | Redis cluster name. **Note**. It's applied to `aws-elasti-cache` +| redis.provider.serverless | - | Yes | The flag indicates if the cache is serverless. **Note**. It's applied to `aws-elasti-cache` +| invitations.ttlInSeconds | 259200 | No |Invitation time to live in seconds. +| access.admin.rules | - | No |Matches claims from identity providers with the rules to figure out whether a user is allowed to perform admin actions, like deleting any resource or approving a publication. Example: [{"source": "roles", "function": "EQUAL", "targets": ["admin"]}]. If roles contain "admin, the actions are allowed. +| applications.includeCustomApps | false | No |The flag indicates whether custom applications should be included into openai listing +| applications.controllerEndpoint | - | No |The endpoint to Application Controller Web Service that manages deployments for applications with functions +| applications.controllerTimeout | 240000 | No |The timeout of operations to Application Controller Web Service +| applications.checkPeriod | 300000 | No |The interval at which to check the pending operations for applications with functions ### Storage requirements diff --git a/server/src/main/java/com/epam/aidial/core/server/config/FileConfigStore.java b/server/src/main/java/com/epam/aidial/core/server/config/FileConfigStore.java index dc9b9e84..bd04e3d3 100644 --- a/server/src/main/java/com/epam/aidial/core/server/config/FileConfigStore.java +++ b/server/src/main/java/com/epam/aidial/core/server/config/FileConfigStore.java @@ -12,9 +12,10 @@ import com.epam.aidial.core.config.Role; import com.epam.aidial.core.config.Route; import com.epam.aidial.core.server.security.ApiKeyStore; -import com.epam.aidial.core.server.upstream.UpstreamRouteProvider; -import com.epam.aidial.core.server.util.ProxyUtil; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.json.JsonMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; import io.vertx.core.Vertx; import io.vertx.core.json.JsonObject; import lombok.SneakyThrows; @@ -32,11 +33,13 @@ @Slf4j public final class FileConfigStore implements ConfigStore { + private final JsonMapper jsonMapper; private final String[] paths; private volatile Config config; private final ApiKeyStore apiKeyStore; public FileConfigStore(Vertx vertx, JsonObject settings, ApiKeyStore apiKeyStore) { + this.jsonMapper = buildJsonMapper(settings); this.apiKeyStore = apiKeyStore; this.paths = settings.getJsonArray("files") .stream().map(path -> (String) path).toArray(String[]::new); @@ -126,15 +129,15 @@ private void load(boolean fail) { } private Config loadConfig() throws Exception { - JsonNode tree = ProxyUtil.MAPPER.createObjectNode(); + JsonNode tree = jsonMapper.createObjectNode(); for (String path : paths) { try (InputStream stream = openStream(path)) { - tree = ProxyUtil.MAPPER.readerForUpdating(tree).readTree(stream); + tree = jsonMapper.readerForUpdating(tree).readTree(stream); } } - return ProxyUtil.MAPPER.convertValue(tree, Config.class); + return jsonMapper.convertValue(tree, Config.class); } @SneakyThrows @@ -195,4 +198,19 @@ private static void setMissingFeatures(Deployment model, Features features) { modelFeatures.setContentPartsSupported(features.getContentPartsSupported()); } } + + private JsonMapper buildJsonMapper(JsonObject settings) { + JsonMapper mapper = JsonMapper.builder() + .enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS) + .build(); + + boolean overwriteArrays = settings + .getJsonObject("jsonMergeStrategy", new JsonObject()) + .getBoolean("overwriteArrays", false); + + mapper.configOverride(ArrayNode.class) + .setMergeable(!overwriteArrays); + + return mapper; + } } diff --git a/server/src/test/java/com/epam/aidial/core/server/config/FileConfigStoreTest.java b/server/src/test/java/com/epam/aidial/core/server/config/FileConfigStoreTest.java new file mode 100644 index 00000000..a5c21981 --- /dev/null +++ b/server/src/test/java/com/epam/aidial/core/server/config/FileConfigStoreTest.java @@ -0,0 +1,77 @@ +package com.epam.aidial.core.server.config; + +import com.epam.aidial.core.config.Config; +import com.epam.aidial.core.server.security.ApiKeyStore; +import io.vertx.core.Vertx; +import io.vertx.core.json.JsonArray; +import io.vertx.core.json.JsonObject; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.util.List; +import java.util.Set; +import javax.annotation.Nullable; + +@ExtendWith(MockitoExtension.class) +public class FileConfigStoreTest { + + @Mock + private Vertx vertx; + + @Mock + private ApiKeyStore apiKeyStore; + + @Test + public void testLoad_ArrayMergeStrategy_Overwrite() { + FileConfigStore fileConfigStore = new FileConfigStore(vertx, prepareSettings(true), apiKeyStore); + Set expectedUserRoles = Set.of("second_role1"); + + Config config = fileConfigStore.load(); + + Set actualUserRoles = config.getModels().get("testModel").getUserRoles(); + Assertions.assertEquals(expectedUserRoles, actualUserRoles); + } + + @Test + public void testLoad_ArrayMergeStrategy_Concat() { + FileConfigStore fileConfigStore = new FileConfigStore(vertx, prepareSettings(false), apiKeyStore); + Set expectedUserRoles = Set.of("first_role1", "second_role1"); + + Config config = fileConfigStore.load(); + + Set actualUserRoles = config.getModels().get("testModel").getUserRoles(); + Assertions.assertEquals(expectedUserRoles, actualUserRoles); + } + + @Test + public void testLoad_DefaultArrayMergeStrategy_Concat() { + FileConfigStore fileConfigStore = new FileConfigStore(vertx, prepareSettings(null), apiKeyStore); + Set expectedUserRoles = Set.of("first_role1", "second_role1"); + + Config config = fileConfigStore.load(); + + Set actualUserRoles = config.getModels().get("testModel").getUserRoles(); + Assertions.assertEquals(expectedUserRoles, actualUserRoles); + } + + private static JsonObject prepareSettings(@Nullable Boolean overwriteArrays) { + JsonObject settings = new JsonObject(); + + settings.put("files", new JsonArray( + List.of( + "com/epam/aidial/core/server/config/first.config.json", + "com/epam/aidial/core/server/config/second.config.json" + )) + ); + settings.put("reload", 1000); + + if (overwriteArrays != null) { + settings.put("jsonMergeStrategy", new JsonObject().put("overwriteArrays", overwriteArrays)); + } + + return settings; + } +} diff --git a/server/src/test/resources/com/epam/aidial/core/server/config/first.config.json b/server/src/test/resources/com/epam/aidial/core/server/config/first.config.json new file mode 100644 index 00000000..0a6e20fa --- /dev/null +++ b/server/src/test/resources/com/epam/aidial/core/server/config/first.config.json @@ -0,0 +1,9 @@ +{ + "models": { + "testModel": { + "userRoles": [ + "first_role1" + ] + } + } +} diff --git a/server/src/test/resources/com/epam/aidial/core/server/config/second.config.json b/server/src/test/resources/com/epam/aidial/core/server/config/second.config.json new file mode 100644 index 00000000..3fed4087 --- /dev/null +++ b/server/src/test/resources/com/epam/aidial/core/server/config/second.config.json @@ -0,0 +1,9 @@ +{ + "models": { + "testModel": { + "userRoles": [ + "second_role1" + ] + } + } +}