From 1fdaa1d0748e2b415e90f0567322edd1e4f4751e Mon Sep 17 00:00:00 2001 From: Maxime Carbonneau-Leclerc Date: Tue, 20 Dec 2022 17:53:35 -0500 Subject: [PATCH] [ISSUE #20044] generate pydantic models from handwritten schema (#20475) * handwritten low code manifest example components * add MinMaxDatetime to jsonschema * add a basic gradle command to generate manifest components * Add auth components to handwritten component schema - ApiKeyAuthenticator - BasicHttpAuthenticator - BearerAuthenticator - DeclarativeOauth2Authenticator - NoAuth * Respect optional properties in DeclarativeOauth2Authenticator * Fix `Dict[str, Any]` mapping in auth components * add default error handler composite error handler and http response filter components * [low code component schema] adding backoff strategies to schema * [low code component schema] fix float types * [low code component schema] add RecordFilter * Remove `config` from auth components * [low code component schema] add Interpolation (with pending question on 'type' not being defined) * Add CartesianProductStreamSlicer & DatetimeStreamSlicer * Add ListStreamSlicer, and fix nesting of DatetimeStreamSlicer * [low code component schema] add InterpolatedRequestOptionsProvider * Add slicer components, and fix a couple of components after reviewing output * [low code component schema] adding transformations and adding type to interpolators * adding spec and a few small tweaks * Add DefaultSchemaLoader * [low code component schema] attempt on custom class * Add descriptions for auth components * add RequestOption * remove interpolated objects from the schema in favor of strings only * a few schema fixes and adding some custom pagination and stream slicer * [low code component schema] fix CustomBackoffStrategy * Add CustomRecordExtractor * add some description and add additional properties * insert a transformer to hydrate default manifest components and perform validation against the handwritten schema * [low code component schema] validating existing schemas * [low code component schema] clean validation script * add manifest transformer tests and a few tweaks to the schema * Revert "[low code component schema] clean validation script" This reverts commit 2408f41cf6e84e9bf9111da832e08a96292fb5b1. * Revert "[low code component schema] validating existing schemas" This reverts commit 9d3997781562faecd4046372816afe35c3b31332. * [low code component schema] integrate validation script to gradle * [low code component schema] updating validation script permissions * remove a few model gen spike files and clean up comments * default types should take parent type into account and a few schema changes * [ISSUE #20044] generate pydantic models from handwritten schema * [ISSUE #20044] code review * [ISSUE #20044] re-generating declarative component schema files Co-authored-by: brianjlai Co-authored-by: Catherine Noll --- .pre-commit-config.yaml | 3 +- airbyte-cdk/python/README.md | 7 + .../low_code_component_schema.yaml | 918 ++++++++++++++++++ .../sources/declarative/models/__init__.py | 3 + .../models/declarative_component_schema.py | 490 ++++++++++ .../bin/generate-component-manifest-files.sh | 27 + airbyte-cdk/python/build.gradle | 11 + build.gradle | 6 + pyproject.toml | 1 + 9 files changed, 1465 insertions(+), 1 deletion(-) create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/low_code_component_schema.yaml create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py create mode 100755 airbyte-cdk/python/bin/generate-component-manifest-files.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f82f2b5ba91f5..c1577406f64bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,8 @@ repos: rev: v0.8.8 hooks: - id: licenseheaders - args: ["--tmpl=LICENSE_SHORT", "--ext=py", "-f"] + args: + ["--tmpl=LICENSE_SHORT", "--ext=py", "-x=**/models/__init__.py", "-f"] - repo: https://github.com/psf/black rev: 22.3.0 hooks: diff --git a/airbyte-cdk/python/README.md b/airbyte-cdk/python/README.md index d318c7cb81f8a..a660ef8dba851 100644 --- a/airbyte-cdk/python/README.md +++ b/airbyte-cdk/python/README.md @@ -66,6 +66,13 @@ pip install -e ".[dev]" # [dev] installs development-only dependencies * Perform static type checks using `mypy airbyte_cdk`. `MyPy` configuration is in `.mypy.ini`. * The `type_check_and_test.sh` script bundles both type checking and testing in one convenient command. Feel free to use it! +##### Autogenerated files +If the iteration you are working on includes changes to the models, you might want to regenerate them. In order to do that, you can run: +```commandline +SUB_BUILD=CONNECTORS_BASE ./gradlew format --scan --info --stacktrace +``` +This will generate the files based on the schemas, add the license information and format the code. If you want to only do the former and rely on pre-commit to the others, you can run the appropriate generation command i.e. `./gradlew generateProtocolClassFiles` or `./gradlew generateComponentManifestClassFiles`. + #### Testing All tests are located in the `unit_tests` directory. Run `pytest --cov=airbyte_cdk unit_tests/` to run them. This also presents a test coverage report. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/low_code_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/low_code_component_schema.yaml new file mode 100644 index 0000000000000..02fdc9d3b24bf --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/low_code_component_schema.yaml @@ -0,0 +1,918 @@ +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/declarative/handwritten_manifest.yaml +title: LowCodeComponentManifest +type: object +description: Low Code connector components +version: 1.0.0 +required: + - check + - streams + - version +properties: + check: + "$ref": "#/definitions/CheckStream" + streams: + type: array + items: + "$ref": "#/definitions/DeclarativeStream" + version: + type: string + spec: + "$ref": "#/definitions/Spec" +definitions: + AddedFieldDefinition: + description: Defines the field to add on a record + type: object + required: + - type + - path + - value + properties: + type: + enum: [AddedFieldDefinition] + path: + type: array + items: + type: string + value: + type: string + AddFields: + description: Transformation which adds field to an output record. The path of the added field can be nested. + type: object + required: + - type + - fields + properties: + type: + enum: [AddFields] + fields: + type: array + items: + - "$ref": "#/definitions/AddedFieldDefinition" + $options: + type: object + additionalProperties: true + ApiKeyAuthenticator: + description: Authenticator for requests requiring an api token + type: object + required: + - type + - api_token + properties: + type: + enum: [ApiKeyAuthenticator] + api_token: + type: string + header: + type: string + BasicHttpAuthenticator: + description: Authenticator for requests authenticated with a username and optional password + type: object + required: + - type + - username + properties: + type: + enum: [BasicHttpAuthenticator] + username: + type: string + password: + type: string + BearerAuthenticator: + description: Authenticator for requests authenticated with a Bearer token + type: object + required: + - type + - api_token + properties: + type: + enum: [BearerAuthenticator] + api_token: + type: string + CartesianProductStreamSlicer: + description: Stream slicer that iterates over the cartesian product of input stream slicers + type: object + required: + - type + - stream_slicers + properties: + type: + enum: [CartesianProductStreamSlicer] + stream_slicers: + type: array + items: + anyOf: + - "$ref": "#/definitions/CustomStreamSlicer" + - "$ref": "#/definitions/DatetimeStreamSlicer" + - "$ref": "#/definitions/ListStreamSlicer" + - "$ref": "#/definitions/SingleSlice" + - "$ref": "#/definitions/SubstreamSlicer" + $options: + type: object + additionalProperties: true + CheckStream: + description: Checks the connections by trying to read records from one or many of the streams selected by the developer + type: object + required: + - type + - stream_names + properties: + type: + enum: [CheckStream] + stream_names: + type: array + items: + type: string + CompositeErrorHandler: + description: Error handler that sequentially iterates over a list of error handlers + type: object + required: + - error_handlers + properties: + type: + enum: [CompositeErrorHandler] + error_handlers: + type: array + items: + anyOf: + - "$ref": "#/definitions/CompositeErrorHandler" + - "$ref": "#/definitions/DefaultErrorHandler" + $options: + type: object + additionalProperties: true + ConstantBackoffStrategy: + description: Backoff strategy with a constant backoff interval + type: object + required: + - type + - backoff_time_in_seconds + properties: + type: + enum: [ConstantBackoffStrategy] + backoff_time_in_seconds: + anyOf: + - type: number + - type: string + $options: + type: object + additionalProperties: true + CursorPagination: + description: Pagination strategy that evaluates an interpolated string to define the next page token + type: object + required: + - type + - cursor_value + properties: + type: + enum: [CursorPagination] + cursor_value: + type: string + page_size: + type: integer + stop_condition: + type: string + decoder: + "$ref": "#/definitions/JsonDecoder" + $options: + type: object + additionalProperties: true + CustomAuthenticator: + description: Authenticator component whose behavior is derived from a custom code implementation of the connector + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + enum: [CustomAuthenticator] + class_name: + type: string + additionalProperties: true + $options: + type: object + additionalProperties: true + CustomBackoffStrategy: + description: Backoff strategy component whose behavior is derived from a custom code implementation of the connector + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + enum: [CustomBackoffStrategy] + class_name: + type: string + $options: + type: object + additionalProperties: true + CustomErrorHandler: + description: Error handler component whose behavior is derived from a custom code implementation of the connector + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + enum: [CustomErrorHandler] + class_name: + type: string + $options: + type: object + additionalProperties: true + CustomPaginationStrategy: + description: Pagination strategy component whose behavior is derived from a custom code implementation of the connector + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + enum: [CustomPaginationStrategy] + class_name: + type: string + $options: + type: object + additionalProperties: true + CustomRecordExtractor: + description: Record extractor component whose behavior is derived from a custom code implementation of the connector + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + enum: [CustomRecordExtractor] + class_name: + type: string + $options: + type: object + additionalProperties: true + CustomStreamSlicer: + description: Stream slicer component whose behavior is derived from a custom code implementation of the connector + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + enum: [CustomStreamSlicer] + class_name: + type: string + $options: + type: object + additionalProperties: true + DatetimeStreamSlicer: + description: Stream slicer that slices the stream over a datetime range + type: object + required: + - type + - cursor_field + - end_datetime + - datetime_format + - start_datetime + - step + properties: + type: + enum: [DatetimeStreamSlicer] + cursor_field: + type: string + datetime_format: + type: string + end_datetime: + anyOf: + - type: string + - "$ref": "#/definitions/MinMaxDatetime" + start_datetime: + anyOf: + - type: string + - "$ref": "#/definitions/MinMaxDatetime" + step: + type: string + end_time_option: + "$ref": "#/definitions/RequestOption" + lookback_window: + type: string + start_time_option: + "$ref": "#/definitions/RequestOption" + stream_state_field_end: + type: string + stream_state_field_start: + type: string + $options: + type: object + additionalProperties: true + DeclarativeOauth2Authenticator: + description: Authenticator for requests using Oauth authentication + type: object + required: + - type + - client_id + - client_secret + - refresh_token + - token_refresh_endpoint + properties: + type: + enum: [DeclarativeOauth2Authenticator] + client_id: + type: string + client_secret: + type: string + refresh_token: + type: string + token_refresh_endpoint: + type: string + access_token_name: + type: string + expires_in_name: + type: string + grant_type: + type: string + refresh_request_body: + type: object + additionalProperties: true + scopes: + type: array + items: + type: string + token_expiry_date: + type: string + DeclarativeStream: + description: A stream whose behavior is described by a set of declarative low code components + type: object + additionalProperties: true + required: + - type + - retriever + properties: + type: + enum: [DeclarativeStream] + retriever: + "$ref": "#/definitions/SimpleRetriever" + checkpoint_interval: + type: integer + name: + type: string + default: "" + primary_key: + anyOf: + - type: string + - type: array + items: + type: string + - type: array + items: + type: array + items: + type: string + default: "" + schema_loader: + anyOf: + - "$ref": "#/definitions/DefaultSchemaLoader" + - "$ref": "#/definitions/JsonFileSchemaLoader" + stream_cursor_field: + anyOf: + - type: string + - type: array + items: + - type: string + transformations: + type: array + items: + anyOf: + - "$ref": "#/definitions/AddFields" + - "$ref": "#/definitions/RemoveFields" + $options: + type: object + additional_properties: true + DefaultErrorHandler: + description: The default error handler. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff + type: object + properties: + type: + enum: [DefaultErrorHandler] + backoff_strategies: + type: array + items: + anyOf: + - "$ref": "#/definitions/ConstantBackoffStrategy" + - "$ref": "#/definitions/CustomBackoffStrategy" + - "$ref": "#/definitions/ExponentialBackoffStrategy" + - "$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy" + - "$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy" + max_retries: + type: integer + default: 5 + response_filters: + type: array + items: + "$ref": "#/definitions/HttpResponseFilter" + $options: + type: object + additional_properties: true + DefaultPaginator: + description: Default pagination implementation to request pages of results with a fixed size until the pagination strategy no longer returns a next_page_token + type: object + required: + - type + - pagination_strategy + - url_base + properties: + type: + enum: [DefaultPaginator] + pagination_strategy: + anyOf: + - "$ref": "#/definitions/CursorPagination" + - "$ref": "#/definitions/CustomPaginationStrategy" + - "$ref": "#/definitions/OffsetIncrement" + - "$ref": "#/definitions/PageIncrement" + url_base: + type: string + decoder: + "$ref": "#/definitions/JsonDecoder" + page_size_option: + "$ref": "#/definitions/RequestOption" + page_token_option: + "$ref": "#/definitions/RequestOption" + $options: + type: object + additionalProperties: true + DefaultSchemaLoader: + description: Loads a schema from the default location or returns an empty schema for streams that have not defined their schema file yet. + type: object + required: + - type + properties: + type: + enum: [DefaultSchemaLoader] + $options: + type: object + additionalProperties: true + DpathExtractor: + description: Record extractor that searches a decoded response over a path defined as an array of fields + type: object + required: + - type + - field_pointer + properties: + type: + enum: [DpathExtractor] + field_pointer: + type: array + items: + - type: string + decoder: + "$ref": "#/definitions/JsonDecoder" + $options: + type: object + additionalProperties: true + ExponentialBackoffStrategy: + description: Backoff strategy with an exponential backoff interval + type: object + required: + - type + properties: + type: + enum: [ExponentialBackoffStrategy] + factor: + anyOf: + - type: number + - type: string + default: 5 + $options: + type: object + additionalProperties: true + HttpRequester: + description: Default implementation of a requester + type: object + required: + - type + - name + - path + - url_base + properties: + type: + enum: [HttpRequester] + name: + type: string + path: + type: string + url_base: + type: string + authenticator: + anyOf: + - "$ref": "#/definitions/ApiKeyAuthenticator" + - "$ref": "#/definitions/BasicHttpAuthenticator" + - "$ref": "#/definitions/BearerAuthenticator" + - "$ref": "#/definitions/CustomAuthenticator" + - "$ref": "#/definitions/DeclarativeOauth2Authenticator" + - "$ref": "#/definitions/NoAuth" + error_handler: + anyOf: + - "$ref": "#/definitions/DefaultErrorHandler" + - "$ref": "#/definitions/CustomErrorHandler" + - "$ref": "#/definitions/CompositeErrorHandler" + http_method: + anyOf: + - type: string + - type: string + enum: + - GET + - POST + default: GET + request_options_provider: + "$ref": "#/definitions/InterpolatedRequestOptionsProvider" + $options: + type: object + additionalProperties: true + HttpResponseFilter: + description: A filter that is used to select on properties of the HTTP response received + type: object + required: + - action + properties: + type: + enum: [HttpResponseFilter] + action: + type: string + enum: + - SUCCESS + - FAIL + - RETRY + - IGNORE + error_message: + type: string + error_message_contains: + type: string + http_codes: + type: array + items: + type: integer + uniqueItems: true + predicate: + type: string + $options: + type: object + additionalProperties: true + InterpolatedRequestOptionsProvider: + description: Defines the request options to set on an outgoing HTTP request by evaluating `InterpolatedMapping`s + type: object + required: + - type + properties: + type: + enum: [InterpolatedRequestOptionsProvider] + request_body_data: + anyOf: + - type: string + - type: object + additionalProperties: + type: string + request_body_json: + anyOf: + - type: string + - type: object + additionalProperties: + type: string + request_headers: + anyOf: + - type: string + - type: object + additionalProperties: + type: string + request_parameters: + anyOf: + - type: string + - type: object + additionalProperties: + type: string + $options: + type: object + additionalProperties: true + JsonFileSchemaLoader: + description: Loads the schema from a json file + type: object + required: + - type + properties: + type: + enum: [JsonFileSchemaLoader] + file_path: + type: string + $options: + type: object + additionalProperties: true + JsonDecoder: + type: object + required: + - type + properties: + type: + enum: [JsonDecoder] + ListStreamSlicer: + description: Stream slicer that iterates over the values of a list + type: object + required: + - type + - cursor_field + - slice_values + properties: + type: + enum: [ListStreamSlicer] + cursor_field: + type: string + slice_values: + anyOf: + - type: string + - type: array + items: + type: string + request_option: + "$ref": "#/definitions/RequestOption" + $options: + type: object + additionalProperties: true + MinMaxDatetime: + description: Compares the provided date against optional minimum or maximum times. The max_datetime serves as the ceiling and will be returned when datetime exceeds it. The min_datetime serves as the floor + type: object + required: + - type + - datetime + properties: + type: + enum: [MinMaxDatetime] + datetime: + type: string + datetime_format: + type: string + default: "" + max_datetime: + type: string + min_datetime: + type: string + $options: + type: object + additionalProperties: true + NoAuth: + description: Authenticator for requests requiring no authentication + type: object + required: + - type + properties: + type: + enum: [NoAuth] + $options: + type: object + additionalProperties: true + NoPagination: + description: Pagination implementation that never returns a next page + type: object + required: + - type + properties: + type: + enum: [NoPagination] + OffsetIncrement: + description: Pagination strategy that returns the number of records reads so far and returns it as the next page token + type: object + required: + - type + - page_size + properties: + type: + enum: [OffsetIncrement] + page_size: + type: integer + $options: + type: object + additionalProperties: true + PageIncrement: + description: Pagination strategy that returns the number of pages reads so far and returns it as the next page token + type: object + required: + - type + - page_size + properties: + type: + enum: [PageIncrement] + page_size: + type: integer + start_from_page: + type: integer + default: 0 + $options: + type: object + additionalProperties: true + ParentStreamConfig: + description: Describes how to create a stream slice from a parent stream + type: object + required: + - type + - parent_key + - stream + - stream_slice_field + properties: + type: + enum: [ParentStreamConfig] + parent_key: + type: string + stream: + "$ref": "#/definitions/DeclarativeStream" + stream_slice_field: + type: string + request_option: + "$ref": "#/definitions/RequestOption" + $options: + type: object + additionalProperties: true + RecordFilter: + description: Filter applied on a list of Records + type: object + required: + - type + properties: + type: + enum: [RecordFilter] + backoff_time_in_seconds: + type: string + default: "" + $options: + type: object + additionalProperties: true + RecordSelector: + description: Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering records based on a heuristic. + type: object + required: + - type + - extractor + properties: + type: + enum: [RecordSelector] + extractor: + anyOf: + - "$ref": "#/definitions/CustomRecordExtractor" + - "$ref": "#/definitions/DpathExtractor" + record_filter: + "$ref": "#/definitions/RecordFilter" + $options: + type: object + additionalProperties: true + RemoveFields: + description: A transformation which removes fields from a record. The fields removed are designated using FieldPointers. During transformation, if a field or any of its parents does not exist in the record, no error is thrown. + type: object + required: + - type + - field_pointers + properties: + type: + enum: [RemoveFields] + field_pointers: + type: array + items: + items: + type: string + RequestOption: + description: Describes an option to set on a request + type: object + required: + - type + - inject_into + properties: + type: + enum: [RequestOption] + inject_into: + enum: + - request_parameter + - header + - path + - body_data + - body_json + field_name: + type: string + SimpleRetriever: + description: Retrieves records by synchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the stream slicer. + type: object + required: + - type + - record_selector + - requester + properties: + type: + enum: [SimpleRetriever] + record_selector: + "$ref": "#/definitions/RecordSelector" + requester: + "$ref": "#/definitions/HttpRequester" + name: + type: string + default: "" + paginator: + anyOf: + - "$ref": "#/definitions/DefaultPaginator" + - "$ref": "#/definitions/NoPagination" + primary_key: + anyOf: + - type: string + - type: array + items: + type: string + - type: array + items: + type: array + items: + type: string + default: "" + stream_slicer: + anyOf: + - "$ref": "#/definitions/CartesianProductStreamSlicer" + - "$ref": "#/definitions/CustomStreamSlicer" + - "$ref": "#/definitions/DatetimeStreamSlicer" + - "$ref": "#/definitions/ListStreamSlicer" + - "$ref": "#/definitions/SingleSlice" + - "$ref": "#/definitions/SubstreamSlicer" + $options: + type: object + additionalProperties: true + SingleSlice: + description: Stream slicer returning only a single stream slice + type: object + required: + - type + properties: + type: + enum: [SingleSlice] + $options: + type: object + additionalProperties: true + Spec: + description: A connection specification made up of information about the connector and how it can be configured + type: object + required: + - type + - connection_specification + - documentation_url + properties: + type: + enum: [Spec] + connection_specification: + type: object + additionalProperties: true + documentation_url: + type: string + SubstreamSlicer: + description: Stream slicer that iterates over the parent's stream slices and records and emits slices by interpolating the slice_definition mapping + type: object + required: + - type + - parent_stream_configs + properties: + type: + enum: [SubstreamSlicer] + parent_stream_configs: + type: array + items: + "$ref": "#/definitions/ParentStreamConfig" + $options: + type: object + additionalProperties: true + WaitTimeFromHeaderBackoffStrategy: + description: Extract wait time from http header + type: object + required: + - type + - header + properties: + type: + enum: [WaitTimeFromHeaderBackoffStrategy] + header: + type: string + regex: + type: string + $options: + type: object + additionalProperties: true + WaitUntilTimeFromHeaderBackoffStrategy: + description: Extract time at which we can retry the request from response header and wait for the difference between now and that time + type: object + required: + - type + - header + properties: + type: + enum: [WaitUntilTimeFromHeaderBackoffStrategy] + header: + type: string + min_wait: + anyOf: + - type: number + - type: string + regex: + type: string + $options: + type: object + additionalProperties: true diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py new file mode 100644 index 0000000000000..986458e0779a1 --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py @@ -0,0 +1,3 @@ +# generated by generate-component-manifest-files +from .declarative_component_schema import * +from .low_code_component_schema import * diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py new file mode 100644 index 0000000000000..aff43c54c4a5d --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -0,0 +1,490 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +# generated by datamodel-codegen: +# filename: declarative_component_schema.yaml + +from __future__ import annotations + +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, Extra, Field +from typing_extensions import Literal + + +class AddedFieldDefinition(BaseModel): + type: Literal["AddedFieldDefinition"] + path: List[str] + value: str + + +class AddFields(BaseModel): + type: Literal["AddFields"] + fields: List[AddedFieldDefinition] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class ApiKeyAuthenticator(BaseModel): + type: Literal["ApiKeyAuthenticator"] + api_token: str + header: Optional[str] = None + + +class BasicHttpAuthenticator(BaseModel): + type: Literal["BasicHttpAuthenticator"] + username: str + password: Optional[str] = None + + +class BearerAuthenticator(BaseModel): + type: Literal["BearerAuthenticator"] + api_token: str + + +class CheckStream(BaseModel): + type: Literal["CheckStream"] + stream_names: List[str] + + +class ConstantBackoffStrategy(BaseModel): + type: Literal["ConstantBackoffStrategy"] + backoff_time_in_seconds: Union[float, str] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomAuthenticator(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomAuthenticator"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomBackoffStrategy(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomBackoffStrategy"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomErrorHandler(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomErrorHandler"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomPaginationStrategy(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomPaginationStrategy"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomRecordExtractor(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomRecordExtractor"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomRetriever(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomRetriever"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomStreamSlicer(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomStreamSlicer"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CustomTransformation(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["CustomTransformation"] + class_name: str + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DeclarativeOauth2Authenticator(BaseModel): + type: Literal["DeclarativeOauth2Authenticator"] + client_id: str + client_secret: str + refresh_token: str + token_refresh_endpoint: str + access_token_name: Optional[str] = None + expires_in_name: Optional[str] = None + grant_type: Optional[str] = None + refresh_request_body: Optional[Dict[str, Any]] = None + scopes: Optional[List[str]] = None + token_expiry_date: Optional[str] = None + + +class DefaultSchemaLoader(BaseModel): + type: Literal["DefaultSchemaLoader"] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class ExponentialBackoffStrategy(BaseModel): + type: Literal["ExponentialBackoffStrategy"] + factor: Optional[Union[float, str]] = 5 + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class HttpMethodEnum(Enum): + GET = "GET" + POST = "POST" + + +class Action(Enum): + SUCCESS = "SUCCESS" + FAIL = "FAIL" + RETRY = "RETRY" + IGNORE = "IGNORE" + + +class HttpResponseFilter(BaseModel): + type: Literal["HttpResponseFilter"] + action: Action + error_message: Optional[str] = None + error_message_contains: Optional[str] = None + http_codes: Optional[List[int]] = None + predicate: Optional[str] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class InlineSchemaLoader(BaseModel): + type: Literal["InlineSchemaLoader"] + schema_: Optional[Dict[str, Any]] = Field(None, alias="schema") + + +class InterpolatedRequestOptionsProvider(BaseModel): + type: Literal["InterpolatedRequestOptionsProvider"] + request_body_data: Optional[Union[str, Dict[str, str]]] = None + request_body_json: Optional[Union[str, Dict[str, str]]] = None + request_headers: Optional[Union[str, Dict[str, str]]] = None + request_parameters: Optional[Union[str, Dict[str, str]]] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class Type(Enum): + JsonFileSchemaLoader = "JsonFileSchemaLoader" + JsonSchema = "JsonSchema" + + +class JsonFileSchemaLoader(BaseModel): + type: Type + file_path: Optional[str] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class JsonDecoder(BaseModel): + type: Literal["JsonDecoder"] + + +class MinMaxDatetime(BaseModel): + type: Literal["MinMaxDatetime"] + datetime: str + datetime_format: Optional[str] = "" + max_datetime: Optional[str] = None + min_datetime: Optional[str] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class NoAuth(BaseModel): + type: Literal["NoAuth"] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class NoPagination(BaseModel): + type: Literal["NoPagination"] + + +class OffsetIncrement(BaseModel): + type: Literal["OffsetIncrement"] + page_size: Union[float, str] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class PageIncrement(BaseModel): + type: Literal["PageIncrement"] + page_size: int + start_from_page: Optional[int] = 0 + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class PrimaryKey(BaseModel): + __root__: Union[str, List[str], List[List[str]]] = Field(..., description="The stream field to be used to distinguish unique rows") + + +class RecordFilter(BaseModel): + type: Literal["RecordFilter"] + backoff_time_in_seconds: Optional[str] = "" + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class RemoveFields(BaseModel): + type: Literal["RemoveFields"] + field_pointers: List[List[str]] + + +class InjectInto(Enum): + request_parameter = "request_parameter" + header = "header" + path = "path" + body_data = "body_data" + body_json = "body_json" + + +class RequestOption(BaseModel): + type: Literal["RequestOption"] + inject_into: InjectInto + field_name: Optional[str] = None + + +class Schemas(BaseModel): + pass + + class Config: + extra = Extra.allow + + +class SessionTokenAuthenticator(BaseModel): + type: Literal["SessionTokenAuthenticator"] + api_url: str + header: str + login_url: str + session_token: str + session_token_response_key: str + username: str + validate_session_url: str + password: Optional[str] = "" + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class SingleSlice(BaseModel): + type: Literal["SingleSlice"] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class Spec(BaseModel): + type: Literal["Spec"] + connection_specification: Dict[str, Any] + documentation_url: str + + +class WaitTimeFromHeader(BaseModel): + type: Literal["WaitTimeFromHeader"] + header: str + regex: Optional[str] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class WaitUntilTimeFromHeader(BaseModel): + type: Literal["WaitUntilTimeFromHeader"] + header: str + min_wait: Optional[Union[float, str]] = None + regex: Optional[str] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CursorPagination(BaseModel): + type: Literal["CursorPagination"] + cursor_value: str + page_size: Optional[int] = None + stop_condition: Optional[str] = None + decoder: Optional[JsonDecoder] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DatetimeStreamSlicer(BaseModel): + type: Literal["DatetimeStreamSlicer"] + cursor_field: str + datetime_format: str + end_datetime: Union[str, MinMaxDatetime] + start_datetime: Union[str, MinMaxDatetime] + step: str + end_time_option: Optional[RequestOption] = None + lookback_window: Optional[str] = None + start_time_option: Optional[RequestOption] = None + stream_state_field_end: Optional[str] = None + stream_state_field_start: Optional[str] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DefaultErrorHandler(BaseModel): + type: Literal["DefaultErrorHandler"] + backoff_strategies: Optional[ + List[ + Union[ + ConstantBackoffStrategy, + CustomBackoffStrategy, + ExponentialBackoffStrategy, + WaitTimeFromHeader, + WaitUntilTimeFromHeader, + ] + ] + ] = None + max_retries: Optional[int] = 5 + response_filters: Optional[List[HttpResponseFilter]] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DefaultPaginator(BaseModel): + type: Literal["DefaultPaginator"] + pagination_strategy: Union[CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement] + url_base: str + decoder: Optional[JsonDecoder] = None + page_size_option: Optional[RequestOption] = None + page_token_option: Optional[RequestOption] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DpathExtractor(BaseModel): + type: Literal["DpathExtractor"] + field_pointer: List[str] + decoder: Optional[JsonDecoder] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class ListStreamSlicer(BaseModel): + type: Literal["ListStreamSlicer"] + cursor_field: str + slice_values: Union[str, List[str]] + request_option: Optional[RequestOption] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class RecordSelector(BaseModel): + type: Literal["RecordSelector"] + extractor: Union[CustomRecordExtractor, DpathExtractor] + record_filter: Optional[RecordFilter] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class CompositeErrorHandler(BaseModel): + type: Literal["CompositeErrorHandler"] + error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class HttpRequester(BaseModel): + type: Literal["HttpRequester"] + name: str + path: str + url_base: str + authenticator: Optional[ + Union[ + ApiKeyAuthenticator, + BasicHttpAuthenticator, + BearerAuthenticator, + CustomAuthenticator, + DeclarativeOauth2Authenticator, + NoAuth, + SessionTokenAuthenticator, + ] + ] = None + error_handler: Optional[Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler]] = None + http_method: Optional[Union[str, HttpMethodEnum]] = "GET" + request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DeclarativeSource(BaseModel): + type: Literal["DeclarativeSource"] + check: CheckStream + streams: List[DeclarativeStream] + version: str + schemas: Optional[Schemas] = None + spec: Optional[Spec] = None + + +class CartesianProductStreamSlicer(BaseModel): + type: Literal["CartesianProductStreamSlicer"] + stream_slicers: List[ + Union[ + CustomStreamSlicer, + DatetimeStreamSlicer, + ListStreamSlicer, + SingleSlice, + SubstreamSlicer, + ] + ] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class DeclarativeStream(BaseModel): + class Config: + extra = Extra.allow + + type: Literal["DeclarativeStream"] + retriever: Union[CustomRetriever, SimpleRetriever] + checkpoint_interval: Optional[int] = None + name: Optional[str] = "" + primary_key: Optional[Union[str, List[str], List[List[str]]]] = "" + schema_loader: Optional[Union[DefaultSchemaLoader, InlineSchemaLoader, JsonFileSchemaLoader]] = None + stream_cursor_field: Optional[Union[str, List[str]]] = None + transformations: Optional[List[Union[AddFields, CustomTransformation, RemoveFields]]] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class ParentStreamConfig(BaseModel): + type: Literal["ParentStreamConfig"] + parent_key: str + stream: DeclarativeStream + stream_slice_field: str + request_option: Optional[RequestOption] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class SimpleRetriever(BaseModel): + type: Literal["SimpleRetriever"] + record_selector: RecordSelector + requester: HttpRequester + name: Optional[str] = "" + paginator: Optional[Union[DefaultPaginator, NoPagination]] = None + primary_key: Optional[PrimaryKey] = None + stream_slicer: Optional[ + Union[ + CartesianProductStreamSlicer, + CustomStreamSlicer, + DatetimeStreamSlicer, + ListStreamSlicer, + SingleSlice, + SubstreamSlicer, + ] + ] = None + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +class SubstreamSlicer(BaseModel): + type: Literal["SubstreamSlicer"] + parent_stream_configs: List[ParentStreamConfig] + _options: Optional[Dict[str, Any]] = Field(None, alias="$options") + + +CompositeErrorHandler.update_forward_refs() +DeclarativeSource.update_forward_refs() +CartesianProductStreamSlicer.update_forward_refs() +DeclarativeStream.update_forward_refs() +SimpleRetriever.update_forward_refs() diff --git a/airbyte-cdk/python/bin/generate-component-manifest-files.sh b/airbyte-cdk/python/bin/generate-component-manifest-files.sh new file mode 100755 index 0000000000000..31d5066dce382 --- /dev/null +++ b/airbyte-cdk/python/bin/generate-component-manifest-files.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -e + +[ -z "$ROOT_DIR" ] && exit 1 + +YAML_DIR=airbyte-cdk/python/airbyte_cdk/sources/declarative +OUTPUT_DIR=airbyte-cdk/python/airbyte_cdk/sources/declarative/models + +function main() { + rm -rf "$ROOT_DIR/$OUTPUT_DIR"/*.py + echo "# generated by generate-component-manifest-files" > "$ROOT_DIR/$OUTPUT_DIR"/__init__.py + + for f in "$ROOT_DIR/$YAML_DIR"/*.yaml; do + filename_wo_ext=$(basename "$f" | cut -d . -f 1) + echo "from .$filename_wo_ext import *" >> "$ROOT_DIR/$OUTPUT_DIR"/__init__.py + + docker run --user "$(id -u):$(id -g)" -v "$ROOT_DIR":/airbyte airbyte/code-generator:dev \ + --input "/airbyte/$YAML_DIR/$filename_wo_ext.yaml" \ + --output "/airbyte/$OUTPUT_DIR/$filename_wo_ext.py" \ + --use-title-as-name \ + --disable-timestamp \ + --enum-field-as-literal one + done +} + +main "$@" diff --git a/airbyte-cdk/python/build.gradle b/airbyte-cdk/python/build.gradle index c5cf8cc3af33e..addc32e7c80f9 100644 --- a/airbyte-cdk/python/build.gradle +++ b/airbyte-cdk/python/build.gradle @@ -13,6 +13,12 @@ task generateProtocolClassFiles(type: Exec) { dependsOn ':tools:code-generator:airbyteDocker' } +task generateComponentManifestClassFiles(type: Exec) { + environment 'ROOT_DIR', rootDir.absolutePath + commandLine 'bin/generate-component-manifest-files.sh' + dependsOn ':tools:code-generator:airbyteDocker' +} + task validateSourceYamlManifest(type: Exec) { environment 'ROOT_DIR', rootDir.absolutePath commandLine 'bin/validate-yaml-schema.sh' @@ -22,3 +28,8 @@ blackFormat.dependsOn generateProtocolClassFiles isortFormat.dependsOn generateProtocolClassFiles flakeCheck.dependsOn generateProtocolClassFiles installReqs.dependsOn generateProtocolClassFiles + +blackFormat.dependsOn generateComponentManifestClassFiles +isortFormat.dependsOn generateComponentManifestClassFiles +flakeCheck.dependsOn generateComponentManifestClassFiles +installReqs.dependsOn generateComponentManifestClassFiles diff --git a/build.gradle b/build.gradle index 8c30547fc84c4..5d7c2213598f6 100644 --- a/build.gradle +++ b/build.gradle @@ -485,6 +485,7 @@ subprojects { subproj -> task('generate') { dependsOn subprojects.collect { it.getTasksByName('generateProtocolClassFiles', true) } + dependsOn subprojects.collect { it.getTasksByName('generateComponentManifestClassFiles', true) } dependsOn subprojects.collect { it.getTasksByName('generateJsonSchema2Pojo', true) } } @@ -527,6 +528,11 @@ subprojects { if (generateFilesTask != null) { licenseTask.dependsOn generateFilesTask } + + def generateManifestFilesTask = project.tasks.findByName('generateComponentManifestClassFiles') + if (generateManifestFilesTask != null) { + licenseTask.dependsOn generateManifestFilesTask + } } task listAllDependencies(type: DependencyReportTask) {} } diff --git a/pyproject.toml b/pyproject.toml index 17544d0afc311..e584fd9ba9c9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ extend-exclude = [ "models", ".eggs", "airbyte-cdk/python/airbyte_cdk/models/__init__.py", + "airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py", ".tox", "airbyte_api_client", "**/generated/*",