From 60fa67a8f4b89fa01f388457b674f0d1fb7a1516 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 1 Oct 2024 10:58:41 -0700 Subject: [PATCH 1/8] add dbt_config --- config/README.md | 0 config/external_config.json | 138 ++++++++++++++++++ config/pyproject.toml | 175 +++++++++++++++++++++++ config/src/dbt_config/__about__.py | 1 + config/src/dbt_config/__init__.py | 0 config/src/dbt_config/external_config.py | 62 ++++++++ config/test/test_external_config.py | 36 +++++ dbt_common/config/__init__.py | 0 dbt_common/events/common_types_pb2.py | 71 +++++++++ 9 files changed, 483 insertions(+) create mode 100644 config/README.md create mode 100644 config/external_config.json create mode 100644 config/pyproject.toml create mode 100644 config/src/dbt_config/__about__.py create mode 100644 config/src/dbt_config/__init__.py create mode 100644 config/src/dbt_config/external_config.py create mode 100644 config/test/test_external_config.py create mode 100644 dbt_common/config/__init__.py create mode 100644 dbt_common/events/common_types_pb2.py diff --git a/config/README.md b/config/README.md new file mode 100644 index 00000000..e69de29b diff --git a/config/external_config.json b/config/external_config.json new file mode 100644 index 00000000..76e69911 --- /dev/null +++ b/config/external_config.json @@ -0,0 +1,138 @@ +{ + "id": "dbt-common/external-catalog-config-v0", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "..", + "title": "External Catalog Config", + "type": "object", + "required": [ + "catalogs" + ], + "properties": { + "catalogs": { + "type": "array", + "items": { + "$ref": "#/$defs/externalCatalog" + } + } + }, + "$defs": { + "icebergConfiguration": { + "type": "object", + "required": [ + "table_format", + "external_location", + "location" + ], + "properties": { + "table_format": { + "type": "string", + "description": "The table format" + }, + "namespace": { + "type": "string", + "description": "The namespace", + "default": "dbt" + }, + "external_location": { + "type": "string", + "description": "The external location", + "format": "uri" + } + } + }, + "glueConfiguration": { + "type": "object", + "required": [ + "table_format", + "external_location", + "location" + ], + "properties": { + "table_format": { + "type": "string", + "description": "The table format" + }, + "namespace": { + "type": "string", + "description": "The namespace", + "default": "dbt" + }, + "external_location": { + "type": "string", + "description": "The external location", + "format": "uri" + }, + "aws_account_id": { + "type": "string", + "description": "The AWS account ID" + }, + "role_arn": { + "type": "string", + "description": "The role ARN" + } + } + }, + "management": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "default": true, + "description": "Whether management is enabled" + }, + "create_if_not_exists": { + "type": "boolean", + "default": false, + "description": "Whether to create the external catalog if it does not exist" + }, + "alter_if_different": { + "type": "boolean", + "default": false, + "description": "Whether to alter the external catalog if it exists" + }, + "read_only": { + "type": "boolean", + "default": true, + "description": "Whether the external catalog is read-only" + }, + "refresh": { + "type": "string", + "enum": [ + "always", + "never", + "on_change" + ], + "default": "on_change", + "description": "Whether to refresh the external catalog" + } + } + }, + "externalCatalog": { + "type": "object", + "required": [ + "type", + "name", + "configuration", + "management" + ], + "properties": { + "name": { + "type": "string", + "description": "The name of the external catalog" + }, + "type": { + "enum": [ + "iceberg", + "glue" + ] + }, + "configuration": { + "anyOf": [{"$ref": "#/$defs/icebergConfiguration"}, {"$ref": "#/$defs/glueConfiguration"}] + }, + "management": { + "$ref": "#/$defs/management" + } + } + } +} +} \ No newline at end of file diff --git a/config/pyproject.toml b/config/pyproject.toml new file mode 100644 index 00000000..6cd373bd --- /dev/null +++ b/config/pyproject.toml @@ -0,0 +1,175 @@ +[project] +name = "dbt-config" +dynamic = ["version"] +description = "The shared common utilities that dbt-core and adapter implementations use" +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "dbt Labs", email = "info@dbtlabs.com" }, +] +maintainers = [ + { name = "dbt Labs", email = "info@dbtlabs.com" }, +] +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "pydantic", +] + +[project.optional-dependencies] +lint = [ + "black>=23.3,<24.0", + "flake8", + "flake8-pyproject", + "flake8-docstrings", + "mypy>=1.3,<2.0", + "pytest>=7.3,<8.0", # needed for linting tests + "types-Jinja2>=2.11,<3.0", + "types-jsonschema>=4.17,<5.0", + "types-protobuf>=4.24,<5.0", + "types-python-dateutil>=2.8,<3.0", + "types-PyYAML>=6.0,<7.0", + "types-requests" +] +test = [ + "pytest>=7.3,<8.0", + "pytest-mock", + "pytest-xdist>=3.2,<4.0", + "pytest-cov>=4.1,<5.0", + "hypothesis>=6.87,<7.0", +] +build = [ + "wheel", + "twine", + "check-wheel-contents", +] + +[project.urls] +Homepage = "https://github.com/dbt-labs/dbt-common" +Repository = "https://github.com/dbt-labs/dbt-common.git" +Issues = "https://github.com/dbt-labs/dbt-common/issues" +Changelog = "https://github.com/dbt-labs/dbt-common/blob/main/CHANGELOG.md" + +[tool.hatch.version] +path = "src/dbt_config/__about__.py" + +### Default env & scripts + +[tool.hatch.envs.default] +description = "Default environment with dependencies for running dbt-common" +features = ["lint", "test"] + +### Test settings, envs & scripts +[tool.hatch.envs.test] +description = "Env for running development commands for testing" +features = ["test"] + +[tool.hatch.envs.test.scripts] +unit = "python -m pytest --cov=dbt_config --cov-report=xml {args:test/}" + +### Linting settings, envs & scripts + +[tool.hatch.envs.lint] +type = "virtual" +description = "Env for running development commands for linting" +features = ["lint"] + +[tool.hatch.envs.lint.scripts] +all = [ + "- black", + "- flake8", + "- mypy", +] +black = "python -m black ." +flake8 = "python -m flake8 ." +mypy = "python -m mypy ." + +[tool.black] +line-length = 99 +target-version = ['py38'] + +[tool.flake8] +max-line-length = 99 +select = ["E", "W", "F"] +ignore = ["E203", "E501", "E741", "W503", "W504"] +exclude = [ + "venv", + ".venv", + "env*", + ".hatch/*", +] +per-file-ignores = ["*/__init__.py: F401", "*/conftest.py: F401"] +docstring-convention = "google" + +[tool.mypy] +mypy_path = "third-party-stubs/" +namespace_packages = true +warn_unused_configs = true +show_error_codes = true +disable_error_code = "attr-defined" # TODO: revisit once other mypy errors resolved +disallow_untyped_defs = false # TODO: add type annotations everywhere +warn_redundant_casts = true +ignore_missing_imports = true +exclude = [ + "env*", + "third-party-stubs/*", +] + + +### Build settings, envs & scripts + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +sources = ["/src"] +exclude = [ + "/.github", + "/.changes", + ".changie.yaml", + ".gitignore", + ".pre-commit-config.yaml", + "CONTRIBUTING.md", + "/test", +] + +[tool.hatch.build.targets.wheel] +sources = ["/src"] +packages = ["dbt_config"] + +[tool.hatch.envs.build] +description = "Env for running development commands for linting" +features = ["build"] +packages = ["dbt_config"] + +[tool.hatch.envs.build.scripts] +check-all = [ + "- check-wheel", + "- check-sdist", +] +check-wheel = [ + "twine check dist/*", + "find ./dist/dbt_config-*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/", + "pip freeze | grep dbt-config", +] +check-sdist = [ + "check-wheel-contents dist/*.whl --ignore W007,W008", + "find ./dist/dbt_config-*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/", + "pip freeze | grep dbt-config", +] diff --git a/config/src/dbt_config/__about__.py b/config/src/dbt_config/__about__.py new file mode 100644 index 00000000..06fbe7e6 --- /dev/null +++ b/config/src/dbt_config/__about__.py @@ -0,0 +1 @@ +version = "0.0.1" \ No newline at end of file diff --git a/config/src/dbt_config/__init__.py b/config/src/dbt_config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/config/src/dbt_config/external_config.py b/config/src/dbt_config/external_config.py new file mode 100644 index 00000000..bba52a20 --- /dev/null +++ b/config/src/dbt_config/external_config.py @@ -0,0 +1,62 @@ +# generated by datamodel-codegen: +# filename: external_config.json +# timestamp: 2024-09-30T21:24:46+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import List, Optional, Union + +from pydantic import AnyUrl, BaseModel, Field + + +class IcebergConfiguration(BaseModel): + table_format: str = Field(..., description='The table format') + namespace: Optional[str] = Field('dbt', description='The namespace') + external_location: AnyUrl = Field(..., description='The external location') + + +class GlueConfiguration(BaseModel): + table_format: str = Field(..., description='The table format') + namespace: Optional[str] = Field('dbt', description='The namespace') + external_location: AnyUrl = Field(..., description='The external location') + aws_account_id: Optional[str] = Field(None, description='The AWS account ID') + role_arn: Optional[str] = Field(None, description='The role ARN') + + +class Refresh(Enum): + always = 'always' + never = 'never' + on_change = 'on_change' + + +class Management(BaseModel): + enabled: Optional[bool] = Field(True, description='Whether management is enabled') + create_if_not_exists: Optional[bool] = Field( + False, description='Whether to create the external catalog if it does not exist' + ) + alter_if_different: Optional[bool] = Field( + False, description='Whether to alter the external catalog if it exists' + ) + read_only: Optional[bool] = Field( + True, description='Whether the external catalog is read-only' + ) + refresh: Optional[Refresh] = Field( + 'on_change', description='Whether to refresh the external catalog' + ) + + +class Type(Enum): + iceberg = 'iceberg' + glue = 'glue' + + +class ExternalCatalog(BaseModel): + name: str = Field(..., description='The name of the external catalog') + type: Type + configuration: Union[IcebergConfiguration, GlueConfiguration] + management: Management + + +class ExternalCatalogConfig(BaseModel): + catalogs: List[ExternalCatalog] diff --git a/config/test/test_external_config.py b/config/test/test_external_config.py new file mode 100644 index 00000000..cca79d05 --- /dev/null +++ b/config/test/test_external_config.py @@ -0,0 +1,36 @@ +import yaml + +from dbt_config.external_config import ExternalCatalogConfig + +__EXAMPLE_VALID_CONFIG = """ +catalogs: # list of objects + - name: "titanic" # p0 name of the catalog + type: iceberg # p0 + management: # Not P0, this governs how dbt manages the catalog integration + enabled: True # p0 + create_if_not_exists: True # we will likely default this to false as it typically requires admin privileges + alter_if_different: False + refresh: "always" #oneOf: "never"|"on-run-start" + configuration: + table_format: "iceberg" # p0 delta/hudi etc + namespace: "default" + external_location: 'azfs://external-location-bucket-path/directory' + + - name: "elmers" + type: glue + management: # Not P0, this governs how dbt manages the catalog integration + create_if_not_exists: True + alter_if_different: False + read_only: True # if we try to persist a model here dbt raises an exception + configuration: + namespace: "awsdatacatalog" + external_location: 's3://external-location-bucket-path/directory' + aws_account_id: "123456089" + role_arn: "someRole" + table_format: "iceberg" +""" + + +def test_parse_external_config(): + unparsed_config = yaml.safe_load(__EXAMPLE_VALID_CONFIG) + ExternalCatalogConfig.model_validate(unparsed_config) diff --git a/dbt_common/config/__init__.py b/dbt_common/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dbt_common/events/common_types_pb2.py b/dbt_common/events/common_types_pb2.py new file mode 100644 index 00000000..aa03438c --- /dev/null +++ b/dbt_common/events/common_types_pb2.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: common_types.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x12\x63ommon_types.proto\x12\x0bproto_types\x1a\x1fgoogle/protobuf/timestamp.proto"\x91\x02\n\tEventInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\t\x12\x0b\n\x03msg\x18\x03 \x01(\t\x12\r\n\x05level\x18\x04 \x01(\t\x12\x15\n\rinvocation_id\x18\x05 \x01(\t\x12\x0b\n\x03pid\x18\x06 \x01(\x05\x12\x0e\n\x06thread\x18\x07 \x01(\t\x12&\n\x02ts\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x30\n\x05\x65xtra\x18\t \x03(\x0b\x32!.proto_types.EventInfo.ExtraEntry\x12\x10\n\x08\x63\x61tegory\x18\n \x01(\t\x1a,\n\nExtraEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"6\n\x0eGenericMessage\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo"1\n\x11RetryExternalCall\x12\x0f\n\x07\x61ttempt\x18\x01 \x01(\x05\x12\x0b\n\x03max\x18\x02 \x01(\x05"j\n\x14RetryExternalCallMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12,\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x1e.proto_types.RetryExternalCall"#\n\x14RecordRetryException\x12\x0b\n\x03\x65xc\x18\x01 \x01(\t"p\n\x17RecordRetryExceptionMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12/\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32!.proto_types.RecordRetryException"@\n\x13SystemCouldNotWrite\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0e\n\x06reason\x18\x02 \x01(\t\x12\x0b\n\x03\x65xc\x18\x03 \x01(\t"n\n\x16SystemCouldNotWriteMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12.\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32 .proto_types.SystemCouldNotWrite"!\n\x12SystemExecutingCmd\x12\x0b\n\x03\x63md\x18\x01 \x03(\t"l\n\x15SystemExecutingCmdMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12-\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x1f.proto_types.SystemExecutingCmd"\x1c\n\x0cSystemStdOut\x12\x0c\n\x04\x62msg\x18\x01 \x01(\t"`\n\x0fSystemStdOutMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\'\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x19.proto_types.SystemStdOut"\x1c\n\x0cSystemStdErr\x12\x0c\n\x04\x62msg\x18\x01 \x01(\t"`\n\x0fSystemStdErrMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\'\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x19.proto_types.SystemStdErr",\n\x16SystemReportReturnCode\x12\x12\n\nreturncode\x18\x01 \x01(\x05"t\n\x19SystemReportReturnCodeMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\x31\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32#.proto_types.SystemReportReturnCode"\x19\n\nFormatting\x12\x0b\n\x03msg\x18\x01 \x01(\t"\\\n\rFormattingMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12%\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x17.proto_types.Formatting"\x13\n\x04Note\x12\x0b\n\x03msg\x18\x01 \x01(\t"P\n\x07NoteMsg\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.proto_types.EventInfo\x12\x1f\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x11.proto_types.Noteb\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "common_types_pb2", _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _EVENTINFO_EXTRAENTRY._options = None + _EVENTINFO_EXTRAENTRY._serialized_options = b"8\001" + _globals["_EVENTINFO"]._serialized_start = 69 + _globals["_EVENTINFO"]._serialized_end = 342 + _globals["_EVENTINFO_EXTRAENTRY"]._serialized_start = 298 + _globals["_EVENTINFO_EXTRAENTRY"]._serialized_end = 342 + _globals["_GENERICMESSAGE"]._serialized_start = 344 + _globals["_GENERICMESSAGE"]._serialized_end = 398 + _globals["_RETRYEXTERNALCALL"]._serialized_start = 400 + _globals["_RETRYEXTERNALCALL"]._serialized_end = 449 + _globals["_RETRYEXTERNALCALLMSG"]._serialized_start = 451 + _globals["_RETRYEXTERNALCALLMSG"]._serialized_end = 557 + _globals["_RECORDRETRYEXCEPTION"]._serialized_start = 559 + _globals["_RECORDRETRYEXCEPTION"]._serialized_end = 594 + _globals["_RECORDRETRYEXCEPTIONMSG"]._serialized_start = 596 + _globals["_RECORDRETRYEXCEPTIONMSG"]._serialized_end = 708 + _globals["_SYSTEMCOULDNOTWRITE"]._serialized_start = 710 + _globals["_SYSTEMCOULDNOTWRITE"]._serialized_end = 774 + _globals["_SYSTEMCOULDNOTWRITEMSG"]._serialized_start = 776 + _globals["_SYSTEMCOULDNOTWRITEMSG"]._serialized_end = 886 + _globals["_SYSTEMEXECUTINGCMD"]._serialized_start = 888 + _globals["_SYSTEMEXECUTINGCMD"]._serialized_end = 921 + _globals["_SYSTEMEXECUTINGCMDMSG"]._serialized_start = 923 + _globals["_SYSTEMEXECUTINGCMDMSG"]._serialized_end = 1031 + _globals["_SYSTEMSTDOUT"]._serialized_start = 1033 + _globals["_SYSTEMSTDOUT"]._serialized_end = 1061 + _globals["_SYSTEMSTDOUTMSG"]._serialized_start = 1063 + _globals["_SYSTEMSTDOUTMSG"]._serialized_end = 1159 + _globals["_SYSTEMSTDERR"]._serialized_start = 1161 + _globals["_SYSTEMSTDERR"]._serialized_end = 1189 + _globals["_SYSTEMSTDERRMSG"]._serialized_start = 1191 + _globals["_SYSTEMSTDERRMSG"]._serialized_end = 1287 + _globals["_SYSTEMREPORTRETURNCODE"]._serialized_start = 1289 + _globals["_SYSTEMREPORTRETURNCODE"]._serialized_end = 1333 + _globals["_SYSTEMREPORTRETURNCODEMSG"]._serialized_start = 1335 + _globals["_SYSTEMREPORTRETURNCODEMSG"]._serialized_end = 1451 + _globals["_FORMATTING"]._serialized_start = 1453 + _globals["_FORMATTING"]._serialized_end = 1478 + _globals["_FORMATTINGMSG"]._serialized_start = 1480 + _globals["_FORMATTINGMSG"]._serialized_end = 1572 + _globals["_NOTE"]._serialized_start = 1574 + _globals["_NOTE"]._serialized_end = 1593 + _globals["_NOTEMSG"]._serialized_start = 1595 + _globals["_NOTEMSG"]._serialized_end = 1675 +# @@protoc_insertion_point(module_scope) From dc73432abad09fa654bdd4ec1a58e720d9d084dc Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 1 Oct 2024 11:10:56 -0700 Subject: [PATCH 2/8] add dbt_config --- config/pyproject.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/config/pyproject.toml b/config/pyproject.toml index 6cd373bd..8c4caf6f 100644 --- a/config/pyproject.toml +++ b/config/pyproject.toml @@ -60,7 +60,7 @@ build = [ ] [project.urls] -Homepage = "https://github.com/dbt-labs/dbt-common" +Homepage = "https://github.com/dbt-labs/dbt-common/config/" Repository = "https://github.com/dbt-labs/dbt-common.git" Issues = "https://github.com/dbt-labs/dbt-common/issues" Changelog = "https://github.com/dbt-labs/dbt-common/blob/main/CHANGELOG.md" @@ -138,7 +138,6 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.sdist] -sources = ["/src"] exclude = [ "/.github", "/.changes", @@ -150,13 +149,12 @@ exclude = [ ] [tool.hatch.build.targets.wheel] -sources = ["/src"] packages = ["dbt_config"] [tool.hatch.envs.build] description = "Env for running development commands for linting" features = ["build"] -packages = ["dbt_config"] +packages = ["src/dbt_config"] [tool.hatch.envs.build.scripts] check-all = [ From e3e5e63e03edf9609c1264c11fe8159ee251b867 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 1 Oct 2024 11:25:19 -0700 Subject: [PATCH 3/8] add dbt_config --- config/pyproject.toml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/config/pyproject.toml b/config/pyproject.toml index 8c4caf6f..60a9a3f8 100644 --- a/config/pyproject.toml +++ b/config/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dbt-config" dynamic = ["version"] -description = "The shared common utilities that dbt-core and adapter implementations use" +description = "The shared configuration interfaces" readme = "README.md" requires-python = ">=3.8" license = "Apache-2.0" @@ -71,7 +71,7 @@ path = "src/dbt_config/__about__.py" ### Default env & scripts [tool.hatch.envs.default] -description = "Default environment with dependencies for running dbt-common" +description = "Default environment with dependencies for running dbt-config" features = ["lint", "test"] ### Test settings, envs & scripts @@ -80,7 +80,7 @@ description = "Env for running development commands for testing" features = ["test"] [tool.hatch.envs.test.scripts] -unit = "python -m pytest --cov=dbt_config --cov-report=xml {args:test/}" +unit = "python -m pytest --cov=dbt_config --cov-report=xml test/" ### Linting settings, envs & scripts @@ -149,7 +149,6 @@ exclude = [ ] [tool.hatch.build.targets.wheel] -packages = ["dbt_config"] [tool.hatch.envs.build] description = "Env for running development commands for linting" From 55d21565297fec3ec65954a99433deca95a6e5f3 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 1 Oct 2024 13:58:54 -0700 Subject: [PATCH 4/8] add py.typed --- config/src/dbt_config/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 config/src/dbt_config/py.typed diff --git a/config/src/dbt_config/py.typed b/config/src/dbt_config/py.typed new file mode 100644 index 00000000..e69de29b From d0c50a099c79d7aa74f894c3803098492f393d85 Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 3 Oct 2024 15:52:50 -0700 Subject: [PATCH 5/8] rename py module --- config/pyproject.toml | 2 +- config/src/dbt_config/external_config.py | 62 ------------------------ config/test/test_external_config.py | 5 +- 3 files changed, 4 insertions(+), 65 deletions(-) delete mode 100644 config/src/dbt_config/external_config.py diff --git a/config/pyproject.toml b/config/pyproject.toml index 60a9a3f8..83fdd0db 100644 --- a/config/pyproject.toml +++ b/config/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "pydantic", + "pydantic" ] [project.optional-dependencies] diff --git a/config/src/dbt_config/external_config.py b/config/src/dbt_config/external_config.py deleted file mode 100644 index bba52a20..00000000 --- a/config/src/dbt_config/external_config.py +++ /dev/null @@ -1,62 +0,0 @@ -# generated by datamodel-codegen: -# filename: external_config.json -# timestamp: 2024-09-30T21:24:46+00:00 - -from __future__ import annotations - -from enum import Enum -from typing import List, Optional, Union - -from pydantic import AnyUrl, BaseModel, Field - - -class IcebergConfiguration(BaseModel): - table_format: str = Field(..., description='The table format') - namespace: Optional[str] = Field('dbt', description='The namespace') - external_location: AnyUrl = Field(..., description='The external location') - - -class GlueConfiguration(BaseModel): - table_format: str = Field(..., description='The table format') - namespace: Optional[str] = Field('dbt', description='The namespace') - external_location: AnyUrl = Field(..., description='The external location') - aws_account_id: Optional[str] = Field(None, description='The AWS account ID') - role_arn: Optional[str] = Field(None, description='The role ARN') - - -class Refresh(Enum): - always = 'always' - never = 'never' - on_change = 'on_change' - - -class Management(BaseModel): - enabled: Optional[bool] = Field(True, description='Whether management is enabled') - create_if_not_exists: Optional[bool] = Field( - False, description='Whether to create the external catalog if it does not exist' - ) - alter_if_different: Optional[bool] = Field( - False, description='Whether to alter the external catalog if it exists' - ) - read_only: Optional[bool] = Field( - True, description='Whether the external catalog is read-only' - ) - refresh: Optional[Refresh] = Field( - 'on_change', description='Whether to refresh the external catalog' - ) - - -class Type(Enum): - iceberg = 'iceberg' - glue = 'glue' - - -class ExternalCatalog(BaseModel): - name: str = Field(..., description='The name of the external catalog') - type: Type - configuration: Union[IcebergConfiguration, GlueConfiguration] - management: Management - - -class ExternalCatalogConfig(BaseModel): - catalogs: List[ExternalCatalog] diff --git a/config/test/test_external_config.py b/config/test/test_external_config.py index cca79d05..53ed56cd 100644 --- a/config/test/test_external_config.py +++ b/config/test/test_external_config.py @@ -1,6 +1,6 @@ import yaml -from dbt_config.external_config import ExternalCatalogConfig +from dbt_config.catalog_config import ExternalCatalogConfig __EXAMPLE_VALID_CONFIG = """ catalogs: # list of objects @@ -33,4 +33,5 @@ def test_parse_external_config(): unparsed_config = yaml.safe_load(__EXAMPLE_VALID_CONFIG) - ExternalCatalogConfig.model_validate(unparsed_config) + config = ExternalCatalogConfig.model_validate(unparsed_config) + assert config.catalogs[0].name == "titanic" From 8a38c377f4960de9a799f75c7fcc000eb5fb54b7 Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 9 Oct 2024 12:56:14 -0700 Subject: [PATCH 6/8] update json-schema to include internal_namespace --- config/external_config.json | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/config/external_config.json b/config/external_config.json index 76e69911..6dcd824c 100644 --- a/config/external_config.json +++ b/config/external_config.json @@ -1,6 +1,6 @@ { "id": "dbt-common/external-catalog-config-v0", - "$schema": "http://json-schema.org/draft-06/schema#", + "$schema": "http://json-schema.org/draft-07/schema#", "description": "..", "title": "External Catalog Config", "type": "object", @@ -21,18 +21,37 @@ "required": [ "table_format", "external_location", + "internal_namespace", "location" ], "properties": { "table_format": { "type": "string", - "description": "The table format" + "description": "The table format", + "default": "iceberg" }, - "namespace": { + "catalog_namespace": { "type": "string", "description": "The namespace", "default": "dbt" }, + "internal_namespace": { + "type": "object", + "required": [ + "database", + "schema" + ], + "properties": { + "database": { + "type": "string", + "description": "The database" + }, + "schema": { + "type": "string", + "description": "The schema" + } + } + }, "external_location": { "type": "string", "description": "The external location", @@ -98,11 +117,11 @@ "refresh": { "type": "string", "enum": [ - "always", + "on-start", "never", - "on_change" + "just-in-time" ], - "default": "on_change", + "default": "on-start", "description": "Whether to refresh the external catalog" } } @@ -127,12 +146,13 @@ ] }, "configuration": { - "anyOf": [{"$ref": "#/$defs/icebergConfiguration"}, {"$ref": "#/$defs/glueConfiguration"}] + "type": "object", + "oneOf": [{"$ref": "#/$defs/icebergConfiguration"}, {"$ref": "#/$defs/glueConfiguration"}] }, "management": { "$ref": "#/$defs/management" } - } + } } } } \ No newline at end of file From dbc3ee7627254a6c4700248dc0a1cc45bb09a4b0 Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 9 Oct 2024 13:06:36 -0700 Subject: [PATCH 7/8] migrate to catalog_config --- config/src/dbt_config/catalog_config.py | 68 +++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 config/src/dbt_config/catalog_config.py diff --git a/config/src/dbt_config/catalog_config.py b/config/src/dbt_config/catalog_config.py new file mode 100644 index 00000000..aaaed319 --- /dev/null +++ b/config/src/dbt_config/catalog_config.py @@ -0,0 +1,68 @@ +# generated by datamodel-codegen: +# filename: external_config.json +# timestamp: 2024-10-09T19:40:39+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import List, Optional, Union + +from pydantic import AnyUrl, BaseModel, Field + + +class InternalNamespace(BaseModel): + database: str = Field(..., description='The database') + schema_: str = Field(..., alias='schema', description='The schema') + + +class IcebergConfiguration(BaseModel): + table_format: str = Field(..., description='The table format') + catalog_namespace: Optional[str] = Field('dbt', description='The namespace') + internal_namespace: InternalNamespace + external_location: AnyUrl = Field(..., description='The external location') + + +class GlueConfiguration(BaseModel): + table_format: str = Field(..., description='The table format') + namespace: Optional[str] = Field('dbt', description='The namespace') + external_location: AnyUrl = Field(..., description='The external location') + aws_account_id: Optional[str] = Field(None, description='The AWS account ID') + role_arn: Optional[str] = Field(None, description='The role ARN') + + +class Refresh(Enum): + on_start = 'on-start' + never = 'never' + just_in_time = 'just-in-time' + + +class Management(BaseModel): + enabled: Optional[bool] = Field(True, description='Whether management is enabled') + create_if_not_exists: Optional[bool] = Field( + False, description='Whether to create the external catalog if it does not exist' + ) + alter_if_different: Optional[bool] = Field( + False, description='Whether to alter the external catalog if it exists' + ) + read_only: Optional[bool] = Field( + True, description='Whether the external catalog is read-only' + ) + refresh: Optional[Refresh] = Field( + 'on-start', description='Whether to refresh the external catalog' + ) + + +class Type(Enum): + iceberg = 'iceberg' + glue = 'glue' + + +class ExternalCatalog(BaseModel): + name: str = Field(..., description='The name of the external catalog') + type: Type + configuration: Union[IcebergConfiguration] + management: Management + + +class ExternalCatalogConfig(BaseModel): + catalogs: List[ExternalCatalog] From 7fbac10a0384de81dc21f6c2c950db527b63306b Mon Sep 17 00:00:00 2001 From: Colin Date: Fri, 18 Oct 2024 09:36:37 -0700 Subject: [PATCH 8/8] add external_volume --- config/external_config.json | 23 ++++++++++++++++++----- config/src/dbt_config/catalog_config.py | 11 ++++++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/config/external_config.json b/config/external_config.json index 6dcd824c..1b3c3ad5 100644 --- a/config/external_config.json +++ b/config/external_config.json @@ -52,11 +52,24 @@ } } }, - "external_location": { - "type": "string", - "description": "The external location", - "format": "uri" - } + "external_volume": { + "type": "object", + "required": [ + "type", + "path" + ], + "properties": { + "external_location": { + "type": "string", + "description": "The external location URI", + "format": "uri" + }, + "name": { + "type": "string", + "description": "The name of the volume" + } + } + } } }, "glueConfiguration": { diff --git a/config/src/dbt_config/catalog_config.py b/config/src/dbt_config/catalog_config.py index aaaed319..01f10b89 100644 --- a/config/src/dbt_config/catalog_config.py +++ b/config/src/dbt_config/catalog_config.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: external_config.json -# timestamp: 2024-10-09T19:40:39+00:00 +# timestamp: 2024-10-11T03:41:52+00:00 from __future__ import annotations @@ -15,11 +15,16 @@ class InternalNamespace(BaseModel): schema_: str = Field(..., alias='schema', description='The schema') +class ExternalVolume(BaseModel): + external_location: Optional[AnyUrl] = Field(None, description='The external location') + name: Optional[str] = Field(None, description='The name of the volume') + + class IcebergConfiguration(BaseModel): table_format: str = Field(..., description='The table format') catalog_namespace: Optional[str] = Field('dbt', description='The namespace') internal_namespace: InternalNamespace - external_location: AnyUrl = Field(..., description='The external location') + external_volume: Optional[ExternalVolume] = None class GlueConfiguration(BaseModel): @@ -60,7 +65,7 @@ class Type(Enum): class ExternalCatalog(BaseModel): name: str = Field(..., description='The name of the external catalog') type: Type - configuration: Union[IcebergConfiguration] + configuration: Union[IcebergConfiguration, GlueConfiguration] management: Management