From 0fdcf3ca23a72aadb8b58701dcc1bdc250c5fa2e Mon Sep 17 00:00:00 2001 From: ynaim94-harrys Date: Mon, 19 Sep 2022 20:59:16 -0400 Subject: [PATCH] initial commit --- .github/workflows/ci_workflow.yml | 58 +++++++++++++ .gitignore | 136 ++++++++++++++++++++++++++++++ .secrets/.gitignore | 10 +++ README.md | 131 ++++++++++++++++++++++++++++ meltano.yml | 25 ++++++ mypy.ini | 6 ++ output/.gitignore | 4 + pyproject.toml | 38 +++++++++ tap_gladly/__init__.py | 0 tap_gladly/client.py | 100 ++++++++++++++++++++++ tap_gladly/streams.py | 62 ++++++++++++++ tap_gladly/tap.py | 58 +++++++++++++ tap_gladly/tests/__init__.py | 1 + tap_gladly/tests/test_core.py | 26 ++++++ tox.ini | 53 ++++++++++++ 15 files changed, 708 insertions(+) create mode 100644 .github/workflows/ci_workflow.yml create mode 100644 .gitignore create mode 100644 .secrets/.gitignore create mode 100644 README.md create mode 100644 meltano.yml create mode 100644 mypy.ini create mode 100644 output/.gitignore create mode 100644 pyproject.toml create mode 100644 tap_gladly/__init__.py create mode 100644 tap_gladly/client.py create mode 100644 tap_gladly/streams.py create mode 100644 tap_gladly/tap.py create mode 100644 tap_gladly/tests/__init__.py create mode 100644 tap_gladly/tests/test_core.py create mode 100644 tox.ini diff --git a/.github/workflows/ci_workflow.yml b/.github/workflows/ci_workflow.yml new file mode 100644 index 0000000..0547784 --- /dev/null +++ b/.github/workflows/ci_workflow.yml @@ -0,0 +1,58 @@ +### A CI workflow template that runs linting and python testing +### TODO: Modify as needed or as desired. + +name: Test tap-gladly + +on: [push] + +jobs: + linting: + + runs-on: ubuntu-latest + strategy: + matrix: + # Only lint using the primary version used for dev + python-version: [3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.1.8 + - name: Install dependencies + run: | + poetry install + - name: Run lint command from tox.ini + run: | + poetry run tox -e lint + + pytest: + + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + strategy: + matrix: + python-version: [3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.1.11 + - name: Install dependencies + run: | + poetry install + - name: Test with pytest + run: | + poetry run pytest --capture=no diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..475019c --- /dev/null +++ b/.gitignore @@ -0,0 +1,136 @@ +# Secrets and internal config files +**/.secrets/* + +# Ignore meltano internal cache and sqlite systemdb + +.meltano/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/.secrets/.gitignore b/.secrets/.gitignore new file mode 100644 index 0000000..33c6acd --- /dev/null +++ b/.secrets/.gitignore @@ -0,0 +1,10 @@ +# IMPORTANT! This folder is hidden from git - if you need to store config files or other secrets, +# make sure those are never staged for commit into your git repo. You can store them here or another +# secure location. +# +# Note: This may be redundant with the global .gitignore for, and is provided +# for redundancy. If the `.secrets` folder is not needed, you may delete it +# from the project. + +* +!.gitignore diff --git a/README.md b/README.md new file mode 100644 index 0000000..3c05045 --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +# tap-gladly + +`tap-gladly` is a Singer tap for gladly. + +Built with the [Meltano Tap SDK](https://sdk.meltano.com) for Singer Taps. + + + +## Configuration + +### Accepted Config Options + + + +A full list of supported settings and capabilities for this +tap is available by running: + +```bash +tap-gladly --about +``` + +### Configure using environment variables + +This Singer tap will automatically import any environment variables within the working directory's +`.env` if the `--config=ENV` is provided, such that config values will be considered if a matching +environment variable is set either in the terminal context or in the `.env` file. + +### Source Authentication and Authorization + + + +## Usage + +You can easily run `tap-gladly` by itself or in a pipeline using [Meltano](https://meltano.com/). + +### Executing the Tap Directly + +```bash +tap-gladly --version +tap-gladly --help +tap-gladly --config CONFIG --discover > ./catalog.json +``` + +## Developer Resources + +Follow these instructions to contribute to this project. + +### Initialize your Development Environment + +```bash +pipx install poetry +poetry install +``` + +### Create and Run Tests + +Create tests within the `tap_gladly/tests` subfolder and + then run: + +```bash +poetry run pytest +``` + +You can also test the `tap-gladly` CLI interface directly using `poetry run`: + +```bash +poetry run tap-gladly --help +``` + +### Testing with [Meltano](https://www.meltano.com) + +_**Note:** This tap will work in any Singer environment and does not require Meltano. +Examples here are for convenience and to streamline end-to-end orchestration scenarios._ + + + +Next, install Meltano (if you haven't already) and any needed plugins: + +```bash +# Install meltano +pipx install meltano +# Initialize meltano within this directory +cd tap-gladly +meltano install +``` + +Now you can test and orchestrate using Meltano: + +```bash +# Test invocation: +meltano invoke tap-gladly --version +# OR run a test `elt` pipeline: +meltano elt tap-gladly target-jsonl +``` + +### SDK Dev Guide + +See the [dev guide](https://sdk.meltano.com/en/latest/dev_guide.html) for more instructions on how to use the SDK to +develop your own taps and targets. diff --git a/meltano.yml b/meltano.yml new file mode 100644 index 0000000..d30099d --- /dev/null +++ b/meltano.yml @@ -0,0 +1,25 @@ +version: 1 +send_anonymous_usage_stats: true +project_id: "tap-gladly" +plugins: + extractors: + - name: "tap-gladly" + namespace: "tap_gladly" + pip_url: -e . + capabilities: + - state + - catalog + - discover + config: + start_date: '2010-01-01T00:00:00Z' + settings: + # TODO: To configure using Meltano, declare settings and their types here: + - name: username + - name: password + kind: password + - name: start_date + value: '2010-01-01T00:00:00Z' + loaders: + - name: target-jsonl + variant: andyh1203 + pip_url: target-jsonl diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..ba621de --- /dev/null +++ b/mypy.ini @@ -0,0 +1,6 @@ +[mypy] +python_version = 3.9 +warn_unused_configs = True + +[mypy-backoff.*] +ignore_missing_imports = True diff --git a/output/.gitignore b/output/.gitignore new file mode 100644 index 0000000..80ff9d2 --- /dev/null +++ b/output/.gitignore @@ -0,0 +1,4 @@ +# This directory is used as a target by target-jsonl, so ignore all files + +* +!.gitignore diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1edcf8d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[tool.poetry] +name = "tap-gladly" +version = "0.0.1" +description = "`tap-gladly` is a Singer tap for gladly, built with the Meltano SDK for Singer Taps." +authors = ["Youssef Naim"] +keywords = [ + "ELT", + "gladly", +] +license = "Apache 2.0" + +[tool.poetry.dependencies] +python = "<3.11,>=3.7.1" +requests = "^2.25.1" +singer-sdk = "^0.10.0" + +[tool.poetry.dev-dependencies] +pytest = "^6.2.5" +tox = "^3.24.4" +flake8 = "^3.9.2" +black = "^21.9b0" +pydocstyle = "^6.1.1" +mypy = "^0.910" +types-requests = "^2.26.1" +isort = "^5.10.1" + +[tool.isort] +profile = "black" +multi_line_output = 3 # Vertical Hanging Indent +src_paths = "tap_gladly" + +[build-system] +requires = ["poetry-core>=1.0.8"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +# CLI declaration +tap-gladly = 'tap_gladly.tap:Tapgladly.cli' diff --git a/tap_gladly/__init__.py b/tap_gladly/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tap_gladly/client.py b/tap_gladly/client.py new file mode 100644 index 0000000..3e793f0 --- /dev/null +++ b/tap_gladly/client.py @@ -0,0 +1,100 @@ +"""REST client handling, including gladlyStream base class.""" + +import requests +from pathlib import Path +from typing import Any, Dict, Optional, Union, List, Iterable + +from memoization import cached + +from singer_sdk.helpers.jsonpath import extract_jsonpath +from singer_sdk.streams import RESTStream +from singer_sdk.authenticators import APIKeyAuthenticator + + +SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") + + +class gladlyStream(RESTStream): + """gladly stream class.""" + + # TODO: Set the API's base URL here: + url_base = "https://api.mysample.com" + + # OR use a dynamic url_base: + # @property + # def url_base(self) -> str: + # """Return the API URL root, configurable via tap settings.""" + # return self.config["api_url"] + + records_jsonpath = "$[*]" # Or override `parse_response`. + next_page_token_jsonpath = "$.next_page" # Or override `get_next_page_token`. + + @property + def authenticator(self) -> APIKeyAuthenticator: + """Return a new authenticator object.""" + return APIKeyAuthenticator.create_for_stream( + self, + key="x-api-key", + value=self.config.get("api_key"), + location="header" + ) + + @property + def http_headers(self) -> dict: + """Return the http headers needed.""" + headers = {} + if "user_agent" in self.config: + headers["User-Agent"] = self.config.get("user_agent") + # If not using an authenticator, you may also provide inline auth headers: + # headers["Private-Token"] = self.config.get("auth_token") + return headers + + def get_next_page_token( + self, response: requests.Response, previous_token: Optional[Any] + ) -> Optional[Any]: + """Return a token for identifying next page or None if no more pages.""" + # TODO: If pagination is required, return a token which can be used to get the + # next page. If this is the final page, return "None" to end the + # pagination loop. + if self.next_page_token_jsonpath: + all_matches = extract_jsonpath( + self.next_page_token_jsonpath, response.json() + ) + first_match = next(iter(all_matches), None) + next_page_token = first_match + else: + next_page_token = response.headers.get("X-Next-Page", None) + + return next_page_token + + def get_url_params( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization.""" + params: dict = {} + if next_page_token: + params["page"] = next_page_token + if self.replication_key: + params["sort"] = "asc" + params["order_by"] = self.replication_key + return params + + def prepare_request_payload( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Optional[dict]: + """Prepare the data payload for the REST API request. + + By default, no payload will be sent (return None). + """ + # TODO: Delete this method if no payload is required. (Most REST APIs.) + return None + + def parse_response(self, response: requests.Response) -> Iterable[dict]: + """Parse the response and return an iterator of result records.""" + # TODO: Parse response body and return a set of records. + yield from extract_jsonpath(self.records_jsonpath, input=response.json()) + + def post_process(self, row: dict, context: Optional[dict]) -> dict: + """As needed, append or transform raw data to match expected structure.""" + # TODO: Delete this method if not needed. + return row diff --git a/tap_gladly/streams.py b/tap_gladly/streams.py new file mode 100644 index 0000000..68ea823 --- /dev/null +++ b/tap_gladly/streams.py @@ -0,0 +1,62 @@ +"""Stream type classes for tap-gladly.""" + +from pathlib import Path +from typing import Any, Dict, Optional, Union, List, Iterable + +from singer_sdk import typing as th # JSON Schema typing helpers + +from tap_gladly.client import gladlyStream + +# TODO: Delete this is if not using json files for schema definition +SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") +# TODO: - Override `UsersStream` and `GroupsStream` with your own stream definition. +# - Copy-paste as many times as needed to create multiple stream types. + + +class UsersStream(gladlyStream): + """Define custom stream.""" + name = "users" + path = "/users" + primary_keys = ["id"] + replication_key = None + # Optionally, you may also use `schema_filepath` in place of `schema`: + # schema_filepath = SCHEMAS_DIR / "users.json" + schema = th.PropertiesList( + th.Property("name", th.StringType), + th.Property( + "id", + th.StringType, + description="The user's system ID" + ), + th.Property( + "age", + th.IntegerType, + description="The user's age in years" + ), + th.Property( + "email", + th.StringType, + description="The user's email address" + ), + th.Property("street", th.StringType), + th.Property("city", th.StringType), + th.Property( + "state", + th.StringType, + description="State name in ISO 3166-2 format" + ), + th.Property("zip", th.StringType), + ).to_dict() + + +class GroupsStream(gladlyStream): + """Define custom stream.""" + name = "groups" + path = "/groups" + primary_keys = ["id"] + replication_key = "modified" + schema = th.PropertiesList( + th.Property("name", th.StringType), + th.Property("id", th.StringType), + th.Property("modified", th.DateTimeType), + ).to_dict() diff --git a/tap_gladly/tap.py b/tap_gladly/tap.py new file mode 100644 index 0000000..06a2636 --- /dev/null +++ b/tap_gladly/tap.py @@ -0,0 +1,58 @@ +"""gladly tap class.""" + +from typing import List + +from singer_sdk import Tap, Stream +from singer_sdk import typing as th # JSON schema typing helpers +# TODO: Import your custom stream types here: +from tap_gladly.streams import ( + gladlyStream, + UsersStream, + GroupsStream, +) +# TODO: Compile a list of custom stream types here +# OR rewrite discover_streams() below with your custom logic. +STREAM_TYPES = [ + UsersStream, + GroupsStream, +] + + +class Tapgladly(Tap): + """gladly tap class.""" + name = "tap-gladly" + + # TODO: Update this section with the actual config values you expect: + config_jsonschema = th.PropertiesList( + th.Property( + "auth_token", + th.StringType, + required=True, + description="The token to authenticate against the API service" + ), + th.Property( + "project_ids", + th.ArrayType(th.StringType), + required=True, + description="Project IDs to replicate" + ), + th.Property( + "start_date", + th.DateTimeType, + description="The earliest record date to sync" + ), + th.Property( + "api_url", + th.StringType, + default="https://api.mysample.com", + description="The url for the API service" + ), + ).to_dict() + + def discover_streams(self) -> List[Stream]: + """Return a list of discovered streams.""" + return [stream_class(tap=self) for stream_class in STREAM_TYPES] + + +if __name__ == "__main__": + Tapgladly.cli() diff --git a/tap_gladly/tests/__init__.py b/tap_gladly/tests/__init__.py new file mode 100644 index 0000000..8589a28 --- /dev/null +++ b/tap_gladly/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite for tap-gladly.""" diff --git a/tap_gladly/tests/test_core.py b/tap_gladly/tests/test_core.py new file mode 100644 index 0000000..5fcf9dd --- /dev/null +++ b/tap_gladly/tests/test_core.py @@ -0,0 +1,26 @@ +"""Tests standard tap features using the built-in SDK tests library.""" + +import datetime + +from singer_sdk.testing import get_standard_tap_tests + +from tap_gladly.tap import Tapgladly + +SAMPLE_CONFIG = { + "start_date": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d") + # TODO: Initialize minimal tap config +} + + +# Run standard built-in tap tests from the SDK: +def test_standard_tap_tests(): + """Run standard tap tests from the SDK.""" + tests = get_standard_tap_tests( + Tapgladly, + config=SAMPLE_CONFIG + ) + for test in tests: + test() + + +# TODO: Create additional tests as appropriate for your tap. diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..5a1b562 --- /dev/null +++ b/tox.ini @@ -0,0 +1,53 @@ +# This file can be used to customize tox tests as well as other test frameworks like flake8 and mypy + +[tox] +envlist = py38 +; envlist = py37, py38, py39 +isolated_build = true + +[testenv] +whitelist_externals = poetry + +commands = + poetry install -v + poetry run pytest + poetry run black --check tap_gladly/ + poetry run flake8 tap_gladly + poetry run pydocstyle tap_gladly + poetry run mypy tap_gladly --exclude='tap_gladly/tests' + +[testenv:pytest] +# Run the python tests. +# To execute, run `tox -e pytest` +envlist = py37, py38, py39 +commands = + poetry install -v + poetry run pytest + +[testenv:format] +# Attempt to auto-resolve lint errors before they are raised. +# To execute, run `tox -e format` +commands = + poetry install -v + poetry run black tap_gladly/ + poetry run isort tap_gladly + +[testenv:lint] +# Raise an error if lint and style standards are not met. +# To execute, run `tox -e lint` +commands = + poetry install -v + poetry run black --check --diff tap_gladly/ + poetry run isort --check tap_gladly + poetry run flake8 tap_gladly + poetry run pydocstyle tap_gladly + # refer to mypy.ini for specific settings + poetry run mypy tap_gladly --exclude='tap_gladly/tests' + +[flake8] +ignore = W503 +max-line-length = 88 +max-complexity = 10 + +[pydocstyle] +ignore = D105,D203,D213