From 1741de322c6c2771904d204ecd1c0387ad789628 Mon Sep 17 00:00:00 2001 From: crowemi Date: Wed, 5 Apr 2023 15:08:38 -0700 Subject: [PATCH 1/3] rebase mian --- poetry.lock | 119 +++++++++---------------------- pyproject.toml | 2 +- target_s3/formats/format_base.py | 26 +++++-- target_s3/sinks.py | 33 +++++---- target_s3/target.py | 115 +++++++++++++++++++---------- 5 files changed, 151 insertions(+), 144 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9a5b7a9..e4c08fa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -229,37 +229,37 @@ files = [ [[package]] name = "black" -version = "23.1.0" +version = "23.3.0" description = "The uncompromising code formatter." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "black-23.1.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221"}, - {file = "black-23.1.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26"}, - {file = "black-23.1.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b"}, - {file = "black-23.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"}, - {file = "black-23.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074"}, - {file = "black-23.1.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27"}, - {file = "black-23.1.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648"}, - {file = "black-23.1.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958"}, - {file = "black-23.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a"}, - {file = "black-23.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481"}, - {file = "black-23.1.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad"}, - {file = "black-23.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8"}, - {file = "black-23.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24"}, - {file = "black-23.1.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6"}, - {file = "black-23.1.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd"}, - {file = "black-23.1.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580"}, - {file = "black-23.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468"}, - {file = "black-23.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753"}, - {file = "black-23.1.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651"}, - {file = "black-23.1.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06"}, - {file = "black-23.1.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739"}, - {file = "black-23.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9"}, - {file = "black-23.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555"}, - {file = "black-23.1.0-py3-none-any.whl", hash = "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32"}, - {file = "black-23.1.0.tar.gz", hash = "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"}, + {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"}, + {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"}, + {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"}, + {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"}, + {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"}, + {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"}, + {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"}, + {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"}, + {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"}, + {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"}, + {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"}, + {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"}, + {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"}, ] [package.dependencies] @@ -639,23 +639,6 @@ files = [ docs = ["furo (>=2022.12.7)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.5)"] testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-timeout (>=2.1)"] -[[package]] -name = "flake8" -version = "3.9.2" -description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -files = [ - {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"}, - {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"}, -] - -[package.dependencies] -mccabe = ">=0.6.0,<0.7.0" -pycodestyle = ">=2.7.0,<2.8.0" -pyflakes = ">=2.3.0,<2.4.0" - [[package]] name = "frozenlist" version = "1.3.3" @@ -1006,18 +989,6 @@ pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] -[[package]] -name = "mccabe" -version = "0.6.1" -description = "McCabe checker, plugin for flake8" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, - {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, -] - [[package]] name = "memoization" version = "0.4.0" @@ -1159,7 +1130,7 @@ python2 = ["typed-ast (>=1.4.0,<1.5.0)"] name = "mypy-extensions" version = "0.4.4" description = "Experimental type system extensions for programs checked with the mypy typechecker." -category = "dev" +category = "main" optional = false python-versions = ">=2.7" files = [ @@ -1208,7 +1179,7 @@ files = [ name = "packaging" version = "23.0" description = "Core utilities for Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1268,7 +1239,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "pathspec" version = "0.11.0" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1327,7 +1298,7 @@ files = [ name = "platformdirs" version = "3.0.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1417,18 +1388,6 @@ files = [ [package.dependencies] numpy = ">=1.16.6" -[[package]] -name = "pycodestyle" -version = "2.7.0" -description = "Python style guide checker" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"}, - {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"}, -] - [[package]] name = "pycparser" version = "2.21" @@ -1459,18 +1418,6 @@ snowballstemmer = ">=2.2.0" [package.extras] toml = ["tomli (>=1.2.3)"] -[[package]] -name = "pyflakes" -version = "2.3.1" -description = "passive checker of Python programs" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"}, - {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"}, -] - [[package]] name = "pyjwt" version = "2.6.0" @@ -2087,7 +2034,7 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2390,4 +2337,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.11" -content-hash = "aaaf22a6a0cdca45b30666f4c955f63822d7e2964372125fd48c3485a00a3fac" +content-hash = "bf6330f40391730a496371c7a3ee15057ce72be2112122c484f3646824540836" diff --git a/pyproject.toml b/pyproject.toml index 75e6d7c..461d248 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,9 +23,9 @@ pymongo = "^4.3.3" boto3 = "~1.24" [tool.poetry.dev-dependencies] +black = "^23.3.0" pytest = "^6.2.5" tox = "^3.24.4" -flake8 = "^3.9.2" pydocstyle = "^6.1.1" mypy = "^0.910" types-requests = "^2.26.1" diff --git a/target_s3/formats/format_base.py b/target_s3/formats/format_base.py index 8b7cf3f..8e879fb 100644 --- a/target_s3/formats/format_base.py +++ b/target_s3/formats/format_base.py @@ -35,17 +35,33 @@ class FormatBase(metaclass=ABCMeta): def __init__(self, config: dict, context: dict, extension: str) -> None: # TODO: perhaps we should do some scrubbing here? self.config = config + + self.format = config.get("format", None) + assert self.format, "FormatBase.__init__: Expecting format in configuration." + + self.cloud_provider = config.get("cloud_provider", None) + assert ( + self.cloud_provider + ), "FormatBase.__init__: Expecting cloud provider in configuration" + self.context = context self.extension = extension self.compression = "gz" # TODO: need a list of compatible compression types self.stream_name_path_override = config.get("stream_name_path_override", None) - self.bucket = config.get("bucket") # required - self.session = Session( - region_name=config.get("aws_region"), - profile_name=config.get("aws_profile_name", None), - ) + if self.cloud_provider.get("cloud_provider_type", None) == "aws": + aws_config = self.cloud_provider.get("aws", None) + assert aws_config, "FormatBase.__init__: Expecting aws in configuration" + + self.bucket = aws_config.get("aws_bucket", None) # required + self.session = Session( + aws_access_key_id=aws_config.get("aws_access_key_id", None), + aws_secret_access_key=aws_config.get("aws_secret_access_key", None), + region_name=aws_config.get("aws_region"), + profile_name=aws_config.get("aws_profile_name", None), + ) + self.prefix = config.get("prefix", None) self.logger = context["logger"] self.fully_qualified_key = self.create_key() diff --git a/target_s3/sinks.py b/target_s3/sinks.py index e89e2b0..27c05e7 100644 --- a/target_s3/sinks.py +++ b/target_s3/sinks.py @@ -12,11 +12,7 @@ LOGGER = logging.getLogger("target-s3") -FORMAT_TYPE = { - "parquet": FormatParquet, - "csv": FormatCsv, - "json": FormatJson -} +FORMAT_TYPE = {"parquet": FormatParquet, "csv": FormatCsv, "json": FormatJson} class s3Sink(BatchSink): @@ -24,25 +20,36 @@ class s3Sink(BatchSink): MAX_SIZE = 10000 # Max records to write in one batch - def __init__(self, target: any, stream_name: str, schema: dict, key_properties: list[str] | None) -> None: + def __init__( + self, + target: any, + stream_name: str, + schema: dict, + key_properties: list[str] | None, + ) -> None: super().__init__(target, stream_name, schema, key_properties) # what type of file are we building? - self.format_type = self.config.get('format_type') + self.format_type = self.config.get("format", None).get("format_type", None) if self.format_type: if self.format_type not in FORMAT_TYPE: - raise Exception(f"Unknown file type specified. {key_properties['type']}") + raise Exception( + f"Unknown file type specified. {key_properties['type']}" + ) else: raise Exception("No file type supplied.") def process_batch(self, context: dict) -> None: """Write out any prepped records and return once fully written.""" # add stream name to context - context['stream_name'] = self.stream_name - context['logger'] = self.logger + context["stream_name"] = self.stream_name + context["logger"] = self.logger # creates new object for each batch - format_type_client = format_type_factory(FORMAT_TYPE[self.format_type], self.config, context) + format_type_client = format_type_factory( + FORMAT_TYPE[self.format_type], self.config, context + ) # force base object_type_client to object_type_base class - assert isinstance(format_type_client, FormatBase) is True, \ - f"format_type_client must be of type Base; Type: {type(self.format_type_client)}." + assert ( + isinstance(format_type_client, FormatBase) is True + ), f"format_type_client must be of type Base; Type: {type(self.format_type_client)}." format_type_client.run() diff --git a/target_s3/target.py b/target_s3/target.py index 9d26148..788b9c4 100644 --- a/target_s3/target.py +++ b/target_s3/target.py @@ -4,7 +4,6 @@ from singer_sdk.target_base import Target from singer_sdk import typing as th -from target_s3 import sinks from target_s3.formats.format_base import DATE_GRAIN @@ -19,37 +18,88 @@ class Targets3(Target): name = "target-s3" config_jsonschema = th.PropertiesList( th.Property( - "aws_access_key", - th.StringType, - secret=True, - description="The aws secret access key for auth to S3.", - ), - th.Property( - "aws_secret_access_key", - th.StringType, - secret=True, - required=False, - description="The aws secret access key for auth to S3.", - ), - th.Property( - "aws_region", - th.StringType, - description="The aws region to target", - required=True, + "format", + th.ObjectType( + th.Property( + "format_type", + th.StringType, + required=True, + allowed_values=[ + "parquet", + "json", + ], # TODO: configure this from class + ), + th.Property( + "format_parquet", + th.ObjectType(), + required=False, + ), + th.Property( + "format_json", + th.ObjectType(), + required=False, + ), + th.Property( + "format_csv", + th.ObjectType(), + required=False, + ), + ), ), th.Property( - "aws_profile_name", - th.StringType, - description="The aws profile name used with SSO.", - required=False, + "cloud_provider", + th.ObjectType( + th.Property( + "cloud_provider_type", + th.StringType, + required=True, + allowed_values=["aws"], # TODO: configure this from class + ), + th.Property( + "aws", + th.ObjectType( + th.Property( + "aws_access_key_id", + th.StringType, + required=True, + secret=True, + ), + th.Property( + "aws_secret_access_key", + th.StringType, + required=True, + secret=True, + ), + th.Property( + "aws_region", + th.StringType, + required=True, + ), + th.Property( + "aws_profile_name", + th.StringType, + required=True, + ), + th.Property( + "aws_bucket", + th.StringType, + required=True, + ), + th.Property( + "aws_endpoint_override", + th.StringType, + required=True, + ), + ), + required=False, + ), + ), ), th.Property( - "bucket", + "prefix", th.StringType, - description="The aws bucket to target.", - required=True, + description="The prefix for the key.", ), - th.Property("prefix", th.StringType, description="The prefix for the key."), th.Property( "stream_name_path_override", th.StringType, @@ -87,24 +137,11 @@ class Targets3(Target): allowed_values=DATE_GRAIN.keys(), default="day", ), - th.Property( - "format_type", - th.StringType, - description="The format of the storage object.", - allowed_values=sinks.FORMAT_TYPE.keys(), - required=True, - ), th.Property( "flatten_records", th.BooleanType, description="A flag indictating to flatten records.", ), - th.Property( - "set_dtype_string", - th.BooleanType, - description="A flag indictating to set dytpe to string.", - ), - th.Property("stream_maps", th.ObjectType()), ).to_dict() default_sink_class = s3Sink From 6aa3f4dd37cb75d937420a193256c09a270cbf6f Mon Sep 17 00:00:00 2001 From: crowemi Date: Wed, 5 Apr 2023 15:53:35 -0700 Subject: [PATCH 2/3] added sample-config, updated readme with new config --- README.md | 122 ++++++++-------------------- sample-config.json | 27 ++++++ target_s3/formats/format_base.py | 6 +- target_s3/formats/format_parquet.py | 29 +++++-- 4 files changed, 85 insertions(+), 99 deletions(-) create mode 100644 sample-config.json diff --git a/README.md b/README.md index 3f29fc9..ec5a4fd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # target-s3 -`target-s3` is a Singer target for s3. +`target-s3` is inteded to be a multi-format/multi-cloud Singer target. Build with the [Meltano Target SDK](https://sdk.meltano.com). @@ -28,6 +28,37 @@ pipx install git+https://github.com/ORG_NAME/target-s3.git@main ### Accepted Config Options +```json +{ + "format": { + "format_type": "json", + "format_parquet": {}, + "format_json": {}, + "format_csv": {} + }, + "cloud_provider": { + "cloud_provider_type": "aws", + "aws": { + "aws_access_key_id": "test", + "aws_secret_access_key": "test", + "aws_region": "us-west-2", + "aws_profile_name": "test-profile", + "aws_bucket": "test-bucket", + "aws_endpoint_override": "http://localhost:4566" + } + }, + "prefix": "path/to/output", + "stream_name_path_override": "StreamName", + "include_process_date": true, + "append_date_to_prefix": false, + "append_date_to_prefix_grain": "day", + "append_date_to_filename": true, + "append_date_to_filename_grain": "microsecond", + "flatten_records": false +} +``` + + - ## Usage You can easily run `target-s3` by itself or in a pipeline using [Meltano](https://meltano.com/). @@ -97,62 +95,6 @@ target-s3 --help tap-carbon-intensity | target-s3 --config /path/to/target-s3-config.json ``` -## Developer Resources - -Follow these instructions to contribute to this project. - -### Initialize your Development Environment - -```bash -pipx install poetry -poetry install -``` - -### Create and Run Tests - -Create tests within the `target_s3/tests` subfolder and - then run: - -```bash -poetry run pytest -``` - -You can also test the `target-s3` CLI interface directly using `poetry run`: - -```bash -poetry run target-s3 --help -``` - -### Testing with [Meltano](https://meltano.com/) - -_**Note:** This target will work in any Singer environment and does not require Meltano. -Examples here are for convenience and to streamline end-to-end orchestration scenarios._ - - - -Next, install Meltano (if you haven't already) and any needed plugins: - -```bash -# Install meltano -pipx install meltano -# Initialize meltano within this directory -cd target-s3 -meltano install -``` - -Now you can test and orchestrate using Meltano: - -```bash -# Test invocation: -meltano invoke target-s3 --version -# OR run a test `elt` pipeline with the Carbon Intensity sample tap: -meltano elt tap-carbon-intensity target-s3 -``` - ### SDK Dev Guide See the [dev guide](https://sdk.meltano.com/en/latest/dev_guide.html) for more instructions on how to use the Meltano Singer SDK to diff --git a/sample-config.json b/sample-config.json new file mode 100644 index 0000000..b4103ea --- /dev/null +++ b/sample-config.json @@ -0,0 +1,27 @@ +{ + "format": { + "format_type": "json", + "format_parquet": {}, + "format_json": {}, + "format_csv": {} + }, + "cloud_provider": { + "cloud_provider_type": "aws", + "aws": { + "aws_access_key_id": "test", + "aws_secret_access_key": "test", + "aws_region": "us-west-2", + "aws_profile_name": "test-profile", + "aws_bucket": "test-bucket", + "aws_endpoint_override": "http://localhost:4566" + } + }, + "prefix": "path/to/output", + "stream_name_path_override": "StreamName", + "include_process_date": true, + "append_date_to_prefix": false, + "append_date_to_prefix_grain": "day", + "append_date_to_filename": true, + "append_date_to_filename_grain": "microsecond", + "flatten_records": false +} \ No newline at end of file diff --git a/target_s3/formats/format_base.py b/target_s3/formats/format_base.py index 8e879fb..d0c9f65 100644 --- a/target_s3/formats/format_base.py +++ b/target_s3/formats/format_base.py @@ -61,6 +61,10 @@ def __init__(self, config: dict, context: dict, extension: str) -> None: region_name=aws_config.get("aws_region"), profile_name=aws_config.get("aws_profile_name", None), ) + self.client = self.session.client( + "s3", + endpoint_url=aws_config.get("aws_endpoint_override", None), + ) self.prefix = config.get("prefix", None) self.logger = context["logger"] @@ -75,7 +79,7 @@ def _write(self, contents: str = None) -> None: with open( f"s3://{self.fully_qualified_key}.{self.extension}.{self.compression}", "w", - transport_params={"client": self.session.client("s3")}, + transport_params={"client": self.client}, ) as f: f.write(contents) diff --git a/target_s3/formats/format_parquet.py b/target_s3/formats/format_parquet.py index 4f99eb2..05c3da4 100644 --- a/target_s3/formats/format_parquet.py +++ b/target_s3/formats/format_parquet.py @@ -8,19 +8,32 @@ class FormatParquet(FormatBase): def __init__(self, config, context) -> None: super().__init__(config, context, "parquet") - self.create_filesystem() + cloud_provider_config = config.get("cloud_provider", None) + cloud_provider_config_type = cloud_provider_config.get( + "cloud_provider_type", None + ) + self.file_system = self.create_filesystem( + cloud_provider_config_type, + cloud_provider_config.get(cloud_provider_config_type, None), + ) def create_filesystem( self, - ) -> None: + cloud_provider: str, + cloud_provider_config: dict, + ) -> fs.FileSystem: """Creates a pyarrow FileSystem object for accessing S3.""" try: - self.file_system = fs.S3FileSystem( - access_key=self.session.get_credentials().access_key, - secret_key=self.session.get_credentials().secret_key, - session_token=self.session.get_credentials().token, - region=self.session.region_name, - ) + if cloud_provider == "aws": + return fs.S3FileSystem( + access_key=self.session.get_credentials().access_key, + secret_key=self.session.get_credentials().secret_key, + session_token=self.session.get_credentials().token, + region=self.session.region_name, + endpoint_override=cloud_provider_config.get( + "aws_endpoint_override", None + ), + ) except Exception as e: self.logger.error("Failed to create parquet file system.") self.logger.error(e) From d142d3a175020c50eeeb1e89a0be8e8055c7c482 Mon Sep 17 00:00:00 2001 From: crowemi Date: Wed, 5 Apr 2023 16:26:46 -0700 Subject: [PATCH 3/3] adjust required input params --- target_s3/target.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target_s3/target.py b/target_s3/target.py index 788b9c4..a8b8792 100644 --- a/target_s3/target.py +++ b/target_s3/target.py @@ -61,13 +61,13 @@ class Targets3(Target): th.Property( "aws_access_key_id", th.StringType, - required=True, + required=False, secret=True, ), th.Property( "aws_secret_access_key", th.StringType, - required=True, + required=False, secret=True, ), th.Property( @@ -78,7 +78,7 @@ class Targets3(Target): th.Property( "aws_profile_name", th.StringType, - required=True, + required=False, ), th.Property( "aws_bucket", @@ -88,7 +88,7 @@ class Targets3(Target): th.Property( "aws_endpoint_override", th.StringType, - required=True, + required=False, ), ), required=False,