From c7ec0657499c7388ca89eda368b4615694cd2048 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Fri, 23 Feb 2024 05:18:12 -0600 Subject: [PATCH 01/16] Move unit tests to subfolder --- tests/{ => unit}/test_clients.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{ => unit}/test_clients.py (100%) diff --git a/tests/test_clients.py b/tests/unit/test_clients.py similarity index 100% rename from tests/test_clients.py rename to tests/unit/test_clients.py From 724c89e19d0c31e899148ef9296557b546b1a2e5 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sun, 10 Mar 2024 22:52:31 -0500 Subject: [PATCH 02/16] chore: add vscode files to git --- .vscode/extensions.json | 3 +++ .vscode/settings.json | 7 +++++++ 2 files changed, 10 insertions(+) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..29d4338 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,3 @@ +{ + "recommendations": ["trunk.io"] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..f42f632 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "editor.defaultFormatter": "trunk.io", + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "files.trimTrailingWhitespace": true, + "git.suggestSmartCommit": true +} From 236851bc6362be15cdb4d7bd01cee195cdd385af Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sun, 10 Mar 2024 22:54:32 -0500 Subject: [PATCH 03/16] ci: add trunk folder to git --- .github/workflows/python-package.yml | 101 +++++----- .gitignore | 2 + .pre-commit-config.yaml | 19 -- .trunk/.gitignore | 9 + .trunk/configs/.markdownlint.yaml | 13 ++ .trunk/configs/.yamllint.yaml | 7 + .trunk/configs/bandit.yaml | 2 + .trunk/configs/ruff.toml | 5 + .trunk/trunk.yaml | 62 ++++++ commitlint.config.js | 1 + poetry.lock | 289 ++++++++++++--------------- pyproject.toml | 4 +- 12 files changed, 280 insertions(+), 234 deletions(-) delete mode 100644 .pre-commit-config.yaml create mode 100644 .trunk/.gitignore create mode 100644 .trunk/configs/.markdownlint.yaml create mode 100644 .trunk/configs/.yamllint.yaml create mode 100644 .trunk/configs/bandit.yaml create mode 100644 .trunk/configs/ruff.toml create mode 100644 .trunk/trunk.yaml create mode 100644 commitlint.config.js diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f363da0..7b3a2a7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,9 +5,9 @@ name: Python package on: push: - branches: [ "main" ] + branches: [main] pull_request: - branches: [ "main" ] + branches: [main] jobs: build: @@ -17,56 +17,47 @@ jobs: matrix: python-version: ["3.11", "3.12"] steps: - #---------------------------------------------- - # check-out repo and set-up python - #---------------------------------------------- - - name: Check out repo - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - #---------------------------------------------- - # install & configure poetry - #---------------------------------------------- - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - virtualenvs-in-project: true - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install poetry - poetry install - #---------------------------------------------- - # load cached venv if cache exists - #---------------------------------------------- - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} - #---------------------------------------------- - # install dependencies if cache does not exist - #---------------------------------------------- - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-root - #---------------------------------------------- - # run linter - #---------------------------------------------- - - name: Lint with ruff - run: | - source .venv/bin/activate - # stop the build if there are Python syntax errors or undefined names - ruff check . --select=E9,F63,F7,F82 --output-format=full --no-fix --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - ruff check . --select=E501,C901 --line-length=127 --exit-zero --no-fix --statistics - #---------------------------------------------- - # run tests - #---------------------------------------------- - - name: Test with pytest - run: | - source .venv/bin/activate - pytest + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repo + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + id: setup-python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + #---------------------------------------------- + # install & configure poetry + #---------------------------------------------- + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-in-project: true + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install poetry + poetry install + #---------------------------------------------- + # load cached venv if cache exists + #---------------------------------------------- + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + #---------------------------------------------- + # install dependencies if cache does not exist + #---------------------------------------------- + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + #---------------------------------------------- + # run tests + #---------------------------------------------- + - name: Test with pytest + run: | + source .venv/bin/activate + pytest diff --git a/.gitignore b/.gitignore index 68bc17f..24ea1f1 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# ----- diffbot-kg ----- diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 3b1b4ed..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks -repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files -- repo: https://github.com/psf/black - rev: 24.2.0 - hooks: - - id: black - args: ["."] -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.2 - hooks: - - id: ruff diff --git a/.trunk/.gitignore b/.trunk/.gitignore new file mode 100644 index 0000000..15966d0 --- /dev/null +++ b/.trunk/.gitignore @@ -0,0 +1,9 @@ +*out +*logs +*actions +*notifications +*tools +plugins +user_trunk.yaml +user.yaml +tmp diff --git a/.trunk/configs/.markdownlint.yaml b/.trunk/configs/.markdownlint.yaml new file mode 100644 index 0000000..325667c --- /dev/null +++ b/.trunk/configs/.markdownlint.yaml @@ -0,0 +1,13 @@ +# Autoformatter friendly markdownlint config (all formatting rules disabled) +default: true +blank_lines: false +bullet: false +html: false +indentation: false +line_length: false +no-duplicate-heading: + siblings_only: true +no-trailing-punctuation: false +spaces: false +url: false +whitespace: false diff --git a/.trunk/configs/.yamllint.yaml b/.trunk/configs/.yamllint.yaml new file mode 100644 index 0000000..184e251 --- /dev/null +++ b/.trunk/configs/.yamllint.yaml @@ -0,0 +1,7 @@ +rules: + quoted-strings: + required: only-when-needed + extra-allowed: ["{|}"] + key-duplicates: {} + octal-values: + forbid-implicit-octal: true diff --git a/.trunk/configs/bandit.yaml b/.trunk/configs/bandit.yaml new file mode 100644 index 0000000..ec304e8 --- /dev/null +++ b/.trunk/configs/bandit.yaml @@ -0,0 +1,2 @@ +assert_used: + skips: [./tests/*.py] diff --git a/.trunk/configs/ruff.toml b/.trunk/configs/ruff.toml new file mode 100644 index 0000000..f5a235c --- /dev/null +++ b/.trunk/configs/ruff.toml @@ -0,0 +1,5 @@ +# Generic, formatter-friendly config. +select = ["B", "D3", "E", "F"] + +# Never enforce `E501` (line length violations). This should be handled by formatters. +ignore = ["E501"] diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml new file mode 100644 index 0000000..7cfdba7 --- /dev/null +++ b/.trunk/trunk.yaml @@ -0,0 +1,62 @@ +# This file controls the behavior of Trunk: https://docs.trunk.io/cli +# To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml +version: 0.1 +cli: + version: 1.20.1 +# Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins) +plugins: + sources: + - id: trunk + ref: v1.4.4 + uri: https://github.com/trunk-io/plugins +# Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes) +runtimes: + enabled: + - node@18.12.1 + - python@3.10.8 +# This is the section where you manage your linters. (https://docs.trunk.io/check/configuration) +lint: + ignore: + - linters: [ALL] + paths: + # Ignore generated files + - tests/functional/cassettes/*.yaml + definitions: + - name: bandit + direct_configs: [bandit.yaml] + commands: + - name: lint + run: bandit --exit-zero -c bandit.yaml --format json --output ${tmpfile} ${target} + - name: trufflehog + commands: + - name: lint + run: trufflehog filesystem --json --fail --exclude-paths=/.gitignore ${target} + enabled: + - actionlint@1.6.27 + - bandit@1.7.7 + - checkov@3.2.30 + - git-diff-check + - markdownlint@0.39.0 + - osv-scanner@1.6.2 + - prettier@3.2.5 + - ruff@0.3.1 + - semgrep@1.64.0 + - sourcery@1.15.0 + - taplo@0.8.1 + - trivy@0.49.1 + - trufflehog-git@3.68.5 + - trufflehog@3.68.4 + - yamllint@1.35.1 + disabled: + - black + - isort +actions: + enabled: + - commitizen + - commitlint + - git-lfs + - trufflehog-pre-commit + - trunk-announce + - trunk-check-pre-push + - trunk-fmt-pre-commit + - trunk-upgrade-available diff --git a/commitlint.config.js b/commitlint.config.js new file mode 100644 index 0000000..5073c20 --- /dev/null +++ b/commitlint.config.js @@ -0,0 +1 @@ +module.exports = { extends: ["@commitlint/config-conventional"] }; diff --git a/poetry.lock b/poetry.lock index 6e00bf7..3fc5101 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -87,7 +87,6 @@ files = [ [package.dependencies] aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" @@ -121,17 +120,6 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" -[[package]] -name = "async-timeout" -version = "4.0.3" -description = "Timeout context manager for asyncio programs" -optional = false -python-versions = ">=3.7" -files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] - [[package]] name = "attrs" version = "23.2.0" @@ -188,8 +176,6 @@ mypy-extensions = ">=0.4.3" packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -197,17 +183,6 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] -[[package]] -name = "cfgv" -version = "3.4.0" -description = "Validate configuration and produce human readable error messages." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, - {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, -] - [[package]] name = "click" version = "8.1.7" @@ -233,47 +208,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "distlib" -version = "0.3.8" -description = "Distribution utilities" -optional = false -python-versions = "*" -files = [ - {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, - {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, -] - -[[package]] -name = "exceptiongroup" -version = "1.2.0" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, -] - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "filelock" -version = "3.13.1" -description = "A platform independent file lock." -optional = false -python-versions = ">=3.8" -files = [ - {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"}, - {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, -] - -[package.extras] -docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] -typing = ["typing-extensions (>=4.8)"] - [[package]] name = "frozenlist" version = "1.4.1" @@ -360,20 +294,6 @@ files = [ {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, ] -[[package]] -name = "identify" -version = "2.5.35" -description = "File identification library for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "identify-2.5.35-py2.py3-none-any.whl", hash = "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e"}, - {file = "identify-2.5.35.tar.gz", hash = "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791"}, -] - -[package.extras] -license = ["ukkonen"] - [[package]] name = "idna" version = "3.6" @@ -506,20 +426,6 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -[[package]] -name = "nodeenv" -version = "1.8.0" -description = "Node.js virtual environment builder" -optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" -files = [ - {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, - {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, -] - -[package.dependencies] -setuptools = "*" - [[package]] name = "packaging" version = "23.2" @@ -572,24 +478,6 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] -[[package]] -name = "pre-commit" -version = "3.6.2" -description = "A framework for managing and maintaining multi-language pre-commit hooks." -optional = false -python-versions = ">=3.9" -files = [ - {file = "pre_commit-3.6.2-py2.py3-none-any.whl", hash = "sha256:ba637c2d7a670c10daedc059f5c49b5bd0aadbccfcd7ec15592cf9665117532c"}, - {file = "pre_commit-3.6.2.tar.gz", hash = "sha256:c3ef34f463045c88658c5b99f38c1e297abdcc0ff13f98d3370055fbbfabc67e"}, -] - -[package.dependencies] -cfgv = ">=2.0.0" -identify = ">=1.0.0" -nodeenv = ">=0.11.1" -pyyaml = ">=5.1" -virtualenv = ">=20.10.0" - [[package]] name = "pytest" version = "8.0.1" @@ -603,11 +491,9 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=1.3.0,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -647,6 +533,35 @@ pytest = ">=5.0" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "pytest-vcr" +version = "1.0.2" +description = "Plugin for managing VCR.py cassettes" +optional = false +python-versions = "*" +files = [ + {file = "pytest-vcr-1.0.2.tar.gz", hash = "sha256:23ee51b75abbcc43d926272773aae4f39f93aceb75ed56852d0bf618f92e1896"}, + {file = "pytest_vcr-1.0.2-py2.py3-none-any.whl", hash = "sha256:2f316e0539399bea0296e8b8401145c62b6f85e9066af7e57b6151481b0d6d9c"}, +] + +[package.dependencies] +pytest = ">=3.6.0" +vcrpy = "*" + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pyyaml" version = "6.0.1" @@ -733,22 +648,6 @@ files = [ {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"}, ] -[[package]] -name = "setuptools" -version = "69.1.0" -description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "setuptools-69.1.0-py3-none-any.whl", hash = "sha256:c054629b81b946d63a9c6e732bc8b2513a7c3ea645f11d0139a2191d735c60c6"}, - {file = "setuptools-69.1.0.tar.gz", hash = "sha256:850894c4195f09c4ed30dba56213bf7c3f21d86ed6bdaafb5df5972593bfc401"}, -] - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] - [[package]] name = "tenacity" version = "8.2.3" @@ -764,47 +663,119 @@ files = [ doc = ["reno", "sphinx", "tornado (>=4.5)"] [[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" +name = "urllib3" +version = "1.26.18" +description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, + {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, + {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, ] +[package.extras] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + [[package]] -name = "typing-extensions" -version = "4.9.0" -description = "Backported and Experimental Type Hints for Python 3.8+" +name = "vcrpy" +version = "6.0.1" +description = "Automatically mock your HTTP interactions to simplify and speed up testing" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "vcrpy-6.0.1.tar.gz", hash = "sha256:9e023fee7f892baa0bbda2f7da7c8ac51165c1c6e38ff8688683a12a4bde9278"}, ] +[package.dependencies] +PyYAML = "*" +urllib3 = {version = "<2", markers = "platform_python_implementation == \"PyPy\""} +wrapt = "*" +yarl = "*" + +[package.extras] +tests = ["Werkzeug (==2.0.3)", "aiohttp", "boto3", "httplib2", "httpx", "pytest", "pytest-aiohttp", "pytest-asyncio", "pytest-cov", "pytest-httpbin", "requests (>=2.22.0)", "tornado", "urllib3"] + [[package]] -name = "virtualenv" -version = "20.25.1" -description = "Virtual Python Environment builder" +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." optional = false -python-versions = ">=3.7" +python-versions = ">=3.6" files = [ - {file = "virtualenv-20.25.1-py3-none-any.whl", hash = "sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a"}, - {file = "virtualenv-20.25.1.tar.gz", hash = "sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] -[package.dependencies] -distlib = ">=0.3.7,<1" -filelock = ">=3.12.2,<4" -platformdirs = ">=3.9.1,<5" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] - [[package]] name = "yarl" version = "1.9.4" @@ -910,5 +881,5 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" -python-versions = "^3.10" -content-hash = "8bdcccf50a04936f9794f11c26039f83e25b33a115fb13316e1409afa2d42302" +python-versions = "^3.11" +content-hash = "4c0148994409e7f485cd8f2da0b445569735a13f03b0d4c05751b8a5aa576935" diff --git a/pyproject.toml b/pyproject.toml index 166d311..89a0757 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,10 +19,12 @@ pytest-asyncio = "^0.23.5" pytest-mock = "^3.12.0" ruff = "^0.2.2" black = "^24.2.0" -pre-commit = "^3.6.2" +python-dotenv = "^1.0.1" +pytest-vcr = "^1.0.2" [tool.pytest.ini_options] pythonpath = "src" +addopts = ["--import-mode=importlib"] [build-system] requires = ["poetry-core"] From 4efaa7a4d51d32129f0eec74d2358432b2dbdae1 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sun, 10 Mar 2024 23:00:08 -0500 Subject: [PATCH 04/16] test: working on functional tests --- src/diffbot_kg/clients.py | 179 +++++++++++++++++++------- src/diffbot_kg/session.py | 16 +++ tests/__init__.py | 0 tests/functional/__init__.py | 0 tests/functional/cassettes/.gitignore | 1 + tests/functional/test_clients.py | 77 +++++++++++ tests/unit/__init__.py | 0 tests/unit/test_clients.py | 40 ++---- 8 files changed, 243 insertions(+), 70 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/functional/__init__.py create mode 100644 tests/functional/cassettes/.gitignore create mode 100644 tests/functional/test_clients.py create mode 100644 tests/unit/__init__.py diff --git a/src/diffbot_kg/clients.py b/src/diffbot_kg/clients.py index 0a3a3ec..68b994c 100644 --- a/src/diffbot_kg/clients.py +++ b/src/diffbot_kg/clients.py @@ -9,75 +9,134 @@ class BaseDiffbotKGClient: - url = URL("https://kg.diffbot.com/kg/v3/") + """ + Base class for Diffbot Knowledge Graph API clients. + """ - def __init__(self, token, **kwargs) -> None: - for kwarg in kwargs: - if kwarg not in ["token", "useCache", "jsonmode", "size"]: - raise ValueError(f"Invalid kwarg: {kwarg}") + url = URL("https://kg.diffbot.com/kg/v3/", encoded=True) + param_keys = ["jsonmode", "nonCanonicalFacts", "size"] - self.default_params = {"token": token, **kwargs} + def __init__(self, token, **default_params) -> None: + """ + Initializes a new instance of the BaseDiffbotKGClient class (only + callable by subclasses). + + Args: + token (str): The API token for authentication. + **default_params: Default parameters for API requests. + + Raises: + ValueError: If an invalid keyword argument is provided. + """ + + for param in default_params: + if param not in self.param_keys: + raise ValueError(f"Invalid param: {param}") + + self.default_params = {"token": token, **default_params} self.s = DiffbotSession() def _merge_params(self, params) -> dict[str, Any]: + """ + Merges the given parameters with the default parameters. + + Args: + params (dict): The parameters to merge. + + Returns: + dict: The merged parameters. + """ + params = params or {} params = {**self.default_params, **params} params = {k: v for k, v in params.items() if v is not None} return params async def _get(self, url: str | URL, params=None, headers=None) -> DiffbotResponse: + """ + Sends a GET request to the Diffbot API. + + Args: + url (str | URL): The URL to send the request to. + params (dict, optional): The query parameters for the request. Defaults to None. + headers (dict, optional): The headers for the request. Defaults to None. + + Returns: + DiffbotResponse: The response from the API. + """ + + params = self._merge_params(params) resp = await self.s.get(str(url), params=params, headers=headers) return resp async def _post( - self, url: str | URL, params: dict | None = None + self, url: str | URL, params: dict | None = None, data: dict | None = None ) -> DiffbotResponse: - """POST request to Diffbot API as alternative to GET for large queries. - All params except token are placed in the body of the request.""" + """ + Sends a POST request to the Diffbot API. + + Args: + url (str | URL): The URL to send the request to. + params (dict, optional): The query parameters for the request. Defaults to None. + data (dict, optional): The data for the request body. Defaults to None. + + Returns: + DiffbotResponse: The response from the API. + """ + + params = self._merge_params(params) token = params.pop("token", None) if params else None json, params = params, {"token": token} - # headers = {"accept": "application/json", "content-type": "application/json"} headers = {"content-type": "application/json"} resp = await self.s.post(str(url), params=params, headers=headers, json=json) return resp - async def _post_or_put(self, url: str | URL, params: dict | None = None): - # Diffbot uses nginx, which has a 4096 byte limit on URL by default - # but there are other factors, so we'll play it safe. - # 250 chars == 2000 bytes - if params is None: - params = {} - else: - params = {k: v for k, v in params.items() if v is not None} + async def _get_or_post(self, url: str | URL, params: dict | None = None): + """ + Sends a GET or POST request to the Diffbot API, depending on the length of the URL. - url_len = len(str(url % params)) - if url_len > 250: - resp = await self._post(url, params=params) - else: - resp = await self._get(url, params=params) + Args: + url (str | URL): The URL to send the request to. + params (dict, optional): The query parameters for the request. Defaults to None. - return resp + Returns: + DiffbotResponse: The response from the API. + """ + + params = self._merge_params(params) + + url_len = len(bytes(str(url % params), encoding="ascii")) + + if url_len <= 3000: + return await self._get(url, params=params) + else: + return await self._post(url, params=params) class DiffbotSearchClient(BaseDiffbotKGClient): + """ + A client for interacting with Diffbot's Knowledge Graph search API. + """ + search_url = BaseDiffbotKGClient.url / "dql" - report_url = BaseDiffbotKGClient.url / "dql/report" - report_by_id_url = BaseDiffbotKGClient.url / "dql/report/{id}" + report_url = search_url / "report" + report_by_id_url = report_url / "{id}" async def search(self, params: dict) -> DiffbotResponse: - """Search Dreport_urliffbot's Knowledge Graph. + """Search Diffbot's Knowledge Graph. Args: params (dict): Dict of params to send in request Returns: - response: requests.Response object + DiffbotResponse: The response from the Diffbot API. """ - resp = await self._post_or_put(self.search_url, params=params) + # params["query"] = quote(params["query"], encoding="ascii") + resp = await self._get_or_post(self.search_url, params=params) return resp async def coverage_report_by_id(self, report_id: str) -> DiffbotResponse: @@ -89,6 +148,7 @@ async def coverage_report_by_id(self, report_id: str) -> DiffbotResponse: Returns: DiffbotResponse: The response from the Diffbot API. """ + url = str(self.report_by_id_url).format(id=report_id) resp = await self._get(url) return resp @@ -102,29 +162,58 @@ async def coverage_report_by_query(self, query: str) -> DiffbotResponse: Returns: DiffbotResponse: The response from the Diffbot API. """ + + # params = {"query": quote(query)} params = {"query": query} resp = await self._get(self.report_url, params=params) return resp class DiffbotEnhanceClient(BaseDiffbotKGClient): + """ + A client for interacting with the Diffbot Enhance API. + + This client provides methods for enhancing content using the Diffbot Enhance API, + managing bulk jobs, and retrieving job results and coverage reports. + """ + enhance_url = BaseDiffbotKGClient.url / "enhance" - bulk_enhance_url = enhance_url / "bulk" - bulk_status_url = BaseDiffbotKGClient.url / "enhance/bulk/status" - single_bulkjob_result_url = ( - BaseDiffbotKGClient.url / "enhance/bulk/{bulkjobId}/{jobIdx}" - ) - bulk_job_results_url = BaseDiffbotKGClient.url / "enhance/bulk/{bulkjobId}" - bulk_job_coverage_report_url = ( - BaseDiffbotKGClient.url / "enhance/bulk/{bulkjobId}/coverage/{reportId}" - ) + enhance_bulk_url = enhance_url / "bulk" + bulk_status_url = enhance_bulk_url / "status" + single_bulkjob_result_url = enhance_bulk_url / "{bulkjobId}/{jobIdx}" + bulk_job_results_url = enhance_bulk_url / "{bulkjobId}" + bulk_job_coverage_report_url = enhance_bulk_url / "{bulkjobId}/coverage/{reportId}" + bulk_job_stop_url = enhance_bulk_url / "{bulkjobId}/stop" + + param_keys = BaseDiffbotKGClient.param_keys + ["refresh", "search", "useCache"] async def enhance(self, params) -> DiffbotResponse: + """ + Enhance content using the Diffbot Enhance API. + + Args: + params (dict): The parameters for enhancing the content. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + resp = await self._get(self.enhance_url, params=params) return resp - bulk_job_stop_url = BaseDiffbotKGClient.url / "enhance/bulk/{bulkjobId}/stop" - ... # Other methods + async def create_bulkjob(self, params) -> DiffbotResponse: + """ + Create a bulk job for enhancing multiple content items. + + Args: + params (dict): The parameters for creating the bulk job. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + resp = await self._post(self.enhance_bulk_url, params=params) + return resp async def stop_bulkjob(self, bulkjobId: str) -> DiffbotResponse: """ @@ -136,6 +225,7 @@ async def stop_bulkjob(self, bulkjobId: str) -> DiffbotResponse: Returns: DiffbotResponse: The response from the Diffbot API. """ + url = str(self.bulk_job_stop_url).format(bulkjobId=bulkjobId) return await self._get(url) @@ -152,15 +242,12 @@ async def download_single_bulkjob_result( Returns: DiffbotResponse: The response from the Diffbot API. """ + url = str(self.single_bulkjob_result_url).format( bulkjobId=bulkjobId, jobIdx=jobIdx ) return await self._get(url) - async def create_bulkjob(self, params) -> DiffbotResponse: - resp = await self._post(self.bulk_enhance_url, params=params) - return resp - async def list_bulkjobs_for_token(self) -> DiffbotResponse: """ Poll the status of all Enhance Bulkjobs for a token. @@ -168,6 +255,7 @@ async def list_bulkjobs_for_token(self) -> DiffbotResponse: Returns: DiffbotResponse: The response from the Diffbot API. """ + return await self._get(self.bulk_status_url) async def poll_bulkjob_status(self, bulkjobId: str) -> DiffbotResponse: @@ -180,6 +268,7 @@ async def poll_bulkjob_status(self, bulkjobId: str) -> DiffbotResponse: Returns: DiffbotResponse: The response from the Diffbot API. """ + url = str(self.bulk_status_url).format(bulkjobId=bulkjobId) return await self._get(url) @@ -193,6 +282,7 @@ async def download_bulkjob_results(self, bulkjobId: str) -> DiffbotResponse: Returns: DiffbotResponse: The response from the Diffbot API. """ + url = str(self.bulk_job_results_url).format(bulkjobId=bulkjobId) return await self._get(url) @@ -209,6 +299,7 @@ async def download_bulkjob_coverage_report( Returns: DiffbotResponse: The response from the Diffbot API. """ + url = str(self.bulk_job_coverage_report_url).format( bulkjobId=bulkjobId, reportId=reportId ) diff --git a/src/diffbot_kg/session.py b/src/diffbot_kg/session.py index a7206df..d1eb7ad 100644 --- a/src/diffbot_kg/session.py +++ b/src/diffbot_kg/session.py @@ -54,6 +54,10 @@ class RetryableException(Exception): pass +class URLTooLongException(Exception): + pass + + # TODO: Should this be a subclass of ClientSession? class DiffbotSession: """ @@ -103,8 +107,20 @@ async def _request(self, method, url, **kwargs) -> DiffbotResponse: resp.reason, resp.headers, ) + raise RetryableException from e + elif resp.status == 414: + log.debug( + "URLTooLongException: %s (%s %s %s)", + e, + resp.status, + resp.reason, + resp.headers, + ) + + raise URLTooLongException from e + log.exception( "%s (%s %s %s)", e, resp.status, resp.reason, resp.headers ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/functional/cassettes/.gitignore b/tests/functional/cassettes/.gitignore new file mode 100644 index 0000000..1e82fc7 --- /dev/null +++ b/tests/functional/cassettes/.gitignore @@ -0,0 +1 @@ +*.yaml diff --git a/tests/functional/test_clients.py b/tests/functional/test_clients.py new file mode 100644 index 0000000..968e2d3 --- /dev/null +++ b/tests/functional/test_clients.py @@ -0,0 +1,77 @@ +import os + +import dotenv +import pytest +from diffbot_kg.clients import DiffbotEnhanceClient, DiffbotSearchClient + +ORG_NAME = "Diffbot" +ORG_ENTITY_ID = "EYX1i02YVPsuT7fPLUYgRhQ" + + +class Secret: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Secret(********)" + + def __str___(self): + return "*******" + + +@pytest.fixture(scope="session", autouse=True) +def token(): + __tracebackhide__ = True + dotenv.load_dotenv(override=True) + return Secret(os.environ.get("DIFFBOT_TOKEN")) + + +@pytest.hookimpl(tryfirst=True) +def pytest_sanitize_hook(items): + secrets = [token()] + for item in items: + for secret in secrets: + item.add_marker(pytest.mark.sanitize(secret)) + + +# @pytest.mark.usefixtures("suppress_aiottp_output") +class TestDiffbotSearchClient: + @pytest.mark.asyncio + @pytest.mark.vcr + # @pytest.mark.sanitize(token()) + async def test_search_client_search(self, token): + client = DiffbotSearchClient(token=token.value) + response = await client.search( + {"query": f'type:Organization strict:name:"{ORG_NAME}"'} + ) + assert response.status == 200 + assert response.content["hits"] == 1 + assert response.content["results"] == 1 + assert response.data[0]["entity"]["id"] == ORG_ENTITY_ID + + +# @pytest.mark.usefixtures("suppress_aiottp_output") +class TestDiffbotEnhanceClient: + @pytest.mark.asyncio + @pytest.mark.vcr + async def test_enhance_client_enhance(self, token): + client = DiffbotEnhanceClient(token=token.value) + response = await client.enhance({"type": "Organization", "name": ORG_NAME}) + assert response.status == 200 + assert response.content["hits"] == 1 + assert response.data[0]["entity"]["id"] == ORG_ENTITY_ID + + @pytest.mark.asyncio + @pytest.mark.vcr + async def test_enhance_client_create_bulkjob(self, token): + import logging + + logging.basicConfig(level=logging.CRITICAL) + logging.getLogger("diffbot_kg").setLevel(logging.CRITICAL) + + logging.getLogger("diffbot_kg.session").setLevel(logging.CRITICAL) + logging.getLogger("diffbot_kg.clients").setLevel(logging.CRITICAL) + client = DiffbotEnhanceClient(token=token.value) + response = await client.create_bulkjob({"uris": ["http://diffbot.com"]}) + assert response.status == 200 + assert "bulkJobId" in response.content diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_clients.py b/tests/unit/test_clients.py index b511f5f..44468ff 100644 --- a/tests/unit/test_clients.py +++ b/tests/unit/test_clients.py @@ -1,31 +1,26 @@ # Generated by CodiumAI import pytest - from diffbot_kg.clients import DiffbotEnhanceClient, DiffbotSearchClient from diffbot_kg.session import DiffbotResponse class TestDiffbotSearchClient: - - # mocker fixture - @pytest.fixture - def mock(self, mocker): - return mocker + @pytest.fixture(scope="class") + def search_client(self): + # trunk-ignore(bandit/B106) + return DiffbotSearchClient(token="fake_token") # Returns a DiffbotResponse object when given a search query. @pytest.mark.asyncio - async def test_mocked_search_query(self, mock): - # Initialize the DiffbotSearchClient object - client = DiffbotSearchClient(token="valid_token") - + async def test_mocked_search_query(self, mock, client): # Define the search query parameters params = {"query": "your_search_query", "limit": 10} # Mock the _post_or_put method mock.patch.object( DiffbotSearchClient, - "_post_or_put", + "_get_or_post", return_value=DiffbotResponse(200, {}, {}), # type: ignore ) @@ -38,23 +33,19 @@ async def test_mocked_search_query(self, mock): class TestDiffbotEnhanceClient: - - # mocker fixture - @pytest.fixture - def mock(self, mocker): - return mocker + @pytest.fixture(scope="class") + def client(self): + # trunk-ignore(bandit/B106) + return DiffbotEnhanceClient(token="valid_token") # Returns a DiffbotResponse object when given an enhance query. @pytest.mark.asyncio - async def test_mocked_enhance_query(self, mock): - # Initialize the DiffbotSearchClient object - client = DiffbotEnhanceClient(token="valid_token") - + async def test_mocked_enhance_query(self, mocker, client): # Define the search query parameters params = {"query": "your_search_query", "limit": 10} # Mock the _post_or_put method - mock.patch.object( + mocker.patch.object( DiffbotEnhanceClient, "_get", return_value=DiffbotResponse(200, {}, {}), # type: ignore @@ -69,15 +60,12 @@ async def test_mocked_enhance_query(self, mock): # Returns a DiffbotResponse object when given a bulk enhance query. @pytest.mark.asyncio - async def test_mocked_create_bulkjob(self, mock): - # Initialize the DiffbotSearchClient object - client = DiffbotEnhanceClient(token="valid_token") - + async def test_mocked_create_bulkjob(self, mocker, client): # Define the search query parameters params = {"query": "your_bulk_enhance_query", "limit": 10} # Mock the _post_or_put method - mock.patch.object( + mocker.patch.object( DiffbotEnhanceClient, "_post", return_value=DiffbotResponse(200, {}, {}), # type: ignore From 3124aa208ae1ab6438e8d9956f8cac2bf05200ba Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sun, 10 Mar 2024 23:33:49 -0500 Subject: [PATCH 05/16] style: update github actions to pass trunk checks --- .github/workflows/python-package.yml | 3 +++ .github/workflows/python-publish.yml | 33 ++++++++++++++-------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7b3a2a7..501c0b2 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -9,6 +9,9 @@ on: pull_request: branches: [main] +permissions: + contents: read + jobs: build: runs-on: ubuntu-latest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index bdaab28..790db44 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -17,23 +17,22 @@ permissions: jobs: deploy: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: 3.x + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} From 4e50942bb0e1d9f2faf0730339c2653e786402f4 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sun, 10 Mar 2024 23:34:19 -0500 Subject: [PATCH 06/16] chore: add excluded files to vscode settings --- .vscode/settings.json | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f42f632..65abc54 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,5 +3,25 @@ "files.insertFinalNewline": true, "files.trimFinalNewlines": true, "files.trimTrailingWhitespace": true, - "git.suggestSmartCommit": true + "git.suggestSmartCommit": true, + "files.exclude": { + "**/.git": true, + "**/.svn": true, + "**/.hg": true, + "**/CVS": true, + "**/.DS_Store": true, + "**/Thumbs.db": true, + "**/*.pyc": true, + "**/.trunk/*actions/": true, + "**/.trunk/*logs/": true, + "**/.trunk/*notifications/": true, + "**/.trunk/*out/": true, + "**/.trunk/*plugins/": true, + "**/__pycache__": true, + ".ruff_cache": true, + ".pytest_cache": true, + ".aider.*": true, + ".python-version": true + }, + "explorerExclude.backup": {} } From 666f4980d4c1692cf63beb886079c859aabe65f4 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 10:24:30 -0500 Subject: [PATCH 07/16] feat: v0.2.0 Major bug fixes, more methods, and functional tests --- .trunk/trunk.yaml | 2 +- .vscode/settings.json | 5 +- poetry.lock | 127 +------- pyproject.toml | 3 +- src/diffbot_kg/__init__.py | 4 - src/diffbot_kg/clients.py | 306 ------------------ src/diffbot_kg/clients/base.py | 124 +++++++ src/diffbot_kg/clients/enhance.py | 172 ++++++++++ src/diffbot_kg/clients/search.py | 66 ++++ src/diffbot_kg/{ => clients}/session.py | 47 +-- src/diffbot_kg/models/response/__init__.py | 13 + src/diffbot_kg/models/response/base.py | 58 ++++ .../models/response/bulkjob_create.py | 16 + .../models/response/bulkjob_list.py | 14 + .../models/response/bulkjob_results.py | 30 ++ .../models/response/bulkjob_status.py | 24 ++ .../models/response/coverage_report.py | 5 + src/diffbot_kg/models/response/entities.py | 26 ++ tests/functional/cassettes/.gitignore | 1 - tests/functional/{ => clients}/__init__.py | 0 tests/functional/clients/cassettes/.gitignore | 1 + tests/functional/clients/conftest.py | 45 +++ .../functional/clients/test_enhance_client.py | 203 ++++++++++++ .../functional/clients/test_search_client.py | 33 ++ tests/functional/test_clients.py | 77 ----- tests/unit/{ => clients}/__init__.py | 0 tests/unit/clients/test_enhance_client.py | 65 ++++ tests/unit/clients/test_search_client.py | 44 +++ tests/unit/test_clients.py | 79 ----- 29 files changed, 968 insertions(+), 622 deletions(-) delete mode 100644 src/diffbot_kg/clients.py create mode 100644 src/diffbot_kg/clients/base.py create mode 100644 src/diffbot_kg/clients/enhance.py create mode 100644 src/diffbot_kg/clients/search.py rename src/diffbot_kg/{ => clients}/session.py (66%) create mode 100644 src/diffbot_kg/models/response/__init__.py create mode 100644 src/diffbot_kg/models/response/base.py create mode 100644 src/diffbot_kg/models/response/bulkjob_create.py create mode 100644 src/diffbot_kg/models/response/bulkjob_list.py create mode 100644 src/diffbot_kg/models/response/bulkjob_results.py create mode 100644 src/diffbot_kg/models/response/bulkjob_status.py create mode 100644 src/diffbot_kg/models/response/coverage_report.py create mode 100644 src/diffbot_kg/models/response/entities.py delete mode 100644 tests/functional/cassettes/.gitignore rename tests/functional/{ => clients}/__init__.py (100%) create mode 100644 tests/functional/clients/cassettes/.gitignore create mode 100644 tests/functional/clients/conftest.py create mode 100644 tests/functional/clients/test_enhance_client.py create mode 100644 tests/functional/clients/test_search_client.py delete mode 100644 tests/functional/test_clients.py rename tests/unit/{ => clients}/__init__.py (100%) create mode 100644 tests/unit/clients/test_enhance_client.py create mode 100644 tests/unit/clients/test_search_client.py delete mode 100644 tests/unit/test_clients.py diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index 7cfdba7..befbf9b 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -20,7 +20,7 @@ lint: - linters: [ALL] paths: # Ignore generated files - - tests/functional/cassettes/*.yaml + - tests/functional/**/cassettes/*.yaml definitions: - name: bandit direct_configs: [bandit.yaml] diff --git a/.vscode/settings.json b/.vscode/settings.json index 65abc54..f0a5570 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -23,5 +23,8 @@ ".aider.*": true, ".python-version": true }, - "explorerExclude.backup": {} + "explorerExclude.backup": {}, + "python.testing.pytestArgs": ["tests"], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/poetry.lock b/poetry.lock index 3fc5101..626b41f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -139,64 +139,6 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] -[[package]] -name = "black" -version = "24.2.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.8" -files = [ - {file = "black-24.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6981eae48b3b33399c8757036c7f5d48a535b962a7c2310d19361edeef64ce29"}, - {file = "black-24.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d533d5e3259720fdbc1b37444491b024003e012c5173f7d06825a77508085430"}, - {file = "black-24.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61a0391772490ddfb8a693c067df1ef5227257e72b0e4108482b8d41b5aee13f"}, - {file = "black-24.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:992e451b04667116680cb88f63449267c13e1ad134f30087dec8527242e9862a"}, - {file = "black-24.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:163baf4ef40e6897a2a9b83890e59141cc8c2a98f2dda5080dc15c00ee1e62cd"}, - {file = "black-24.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e37c99f89929af50ffaf912454b3e3b47fd64109659026b678c091a4cd450fb2"}, - {file = "black-24.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9de21bafcba9683853f6c96c2d515e364aee631b178eaa5145fc1c61a3cc92"}, - {file = "black-24.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:9db528bccb9e8e20c08e716b3b09c6bdd64da0dd129b11e160bf082d4642ac23"}, - {file = "black-24.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d84f29eb3ee44859052073b7636533ec995bd0f64e2fb43aeceefc70090e752b"}, - {file = "black-24.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e08fb9a15c914b81dd734ddd7fb10513016e5ce7e6704bdd5e1251ceee51ac9"}, - {file = "black-24.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:810d445ae6069ce64030c78ff6127cd9cd178a9ac3361435708b907d8a04c693"}, - {file = "black-24.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ba15742a13de85e9b8f3239c8f807723991fbfae24bad92d34a2b12e81904982"}, - {file = "black-24.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7e53a8c630f71db01b28cd9602a1ada68c937cbf2c333e6ed041390d6968faf4"}, - {file = "black-24.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:93601c2deb321b4bad8f95df408e3fb3943d85012dddb6121336b8e24a0d1218"}, - {file = "black-24.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0057f800de6acc4407fe75bb147b0c2b5cbb7c3ed110d3e5999cd01184d53b0"}, - {file = "black-24.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:faf2ee02e6612577ba0181f4347bcbcf591eb122f7841ae5ba233d12c39dcb4d"}, - {file = "black-24.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:057c3dc602eaa6fdc451069bd027a1b2635028b575a6c3acfd63193ced20d9c8"}, - {file = "black-24.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:08654d0797e65f2423f850fc8e16a0ce50925f9337fb4a4a176a7aa4026e63f8"}, - {file = "black-24.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca610d29415ee1a30a3f30fab7a8f4144e9d34c89a235d81292a1edb2b55f540"}, - {file = "black-24.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:4dd76e9468d5536abd40ffbc7a247f83b2324f0c050556d9c371c2b9a9a95e31"}, - {file = "black-24.2.0-py3-none-any.whl", hash = "sha256:e8a6ae970537e67830776488bca52000eaa37fa63b9988e8c487458d9cd5ace6"}, - {file = "black-24.2.0.tar.gz", hash = "sha256:bce4f25c27c3435e4dace4815bcb2008b87e167e3bf4ee47ccdc5ce906eb4894"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "click" -version = "8.1.7" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - [[package]] name = "colorama" version = "0.4.6" @@ -415,54 +357,17 @@ files = [ {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -optional = false -python-versions = ">=3.5" -files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, -] - [[package]] name = "packaging" -version = "23.2" +version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, ] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - -[[package]] -name = "platformdirs" -version = "4.2.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -optional = false -python-versions = ">=3.8" -files = [ - {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, - {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, -] - -[package.extras] -docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] - [[package]] name = "pluggy" version = "1.4.0" @@ -480,33 +385,33 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pytest" -version = "8.0.1" +version = "8.1.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, - {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, + {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, + {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.3.0,<2.0" +pluggy = ">=1.4,<2.0" [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-asyncio" -version = "0.23.5" +version = "0.23.6" description = "Pytest support for asyncio" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-asyncio-0.23.5.tar.gz", hash = "sha256:3a048872a9c4ba14c3e90cc1aa20cbc2def7d01c7c8db3777ec281ba9c057675"}, - {file = "pytest_asyncio-0.23.5-py3-none-any.whl", hash = "sha256:4e7093259ba018d58ede7d5315131d21923a60f8a6e9ee266ce1589685c89eac"}, + {file = "pytest-asyncio-0.23.6.tar.gz", hash = "sha256:ffe523a89c1c222598c76856e76852b787504ddb72dd5d9b6617ffa8aa2cde5f"}, + {file = "pytest_asyncio-0.23.6-py3-none-any.whl", hash = "sha256:68516fdd1018ac57b846c9846b954f0393b26f094764a28c955eabb0536a4e8a"}, ] [package.dependencies] @@ -518,17 +423,17 @@ testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] [[package]] name = "pytest-mock" -version = "3.12.0" +version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, - {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, ] [package.dependencies] -pytest = ">=5.0" +pytest = ">=6.2.5" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] @@ -882,4 +787,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "4c0148994409e7f485cd8f2da0b445569735a13f03b0d4c05751b8a5aa576935" +content-hash = "2e88f5434ea0bee3b5813f023909ff982b247dc2eac1039b9fd319f26e6409d2" diff --git a/pyproject.toml b/pyproject.toml index 89a0757..86eefc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "diffbot-kg" -version = "0.1.0" +version = "0.2.0" description = "Python client for the Diffbot Knowledge Graph API." authors = ["Brendan C. Smith"] license = "MIT" @@ -18,7 +18,6 @@ pytest = "^8.0.1" pytest-asyncio = "^0.23.5" pytest-mock = "^3.12.0" ruff = "^0.2.2" -black = "^24.2.0" python-dotenv = "^1.0.1" pytest-vcr = "^1.0.2" diff --git a/src/diffbot_kg/__init__.py b/src/diffbot_kg/__init__.py index a65fd0e..e69de29 100644 --- a/src/diffbot_kg/__init__.py +++ b/src/diffbot_kg/__init__.py @@ -1,4 +0,0 @@ -__all__ = ["DiffbotEnhanceClient", "DiffbotSearchClient", "DiffbotResponse"] - -from .clients import DiffbotEnhanceClient, DiffbotSearchClient -from .session import DiffbotResponse diff --git a/src/diffbot_kg/clients.py b/src/diffbot_kg/clients.py deleted file mode 100644 index 68b994c..0000000 --- a/src/diffbot_kg/clients.py +++ /dev/null @@ -1,306 +0,0 @@ -import logging -from typing import Any - -from yarl import URL - -from diffbot_kg.session import DiffbotResponse, DiffbotSession - -log = logging.getLogger(__name__) - - -class BaseDiffbotKGClient: - """ - Base class for Diffbot Knowledge Graph API clients. - """ - - url = URL("https://kg.diffbot.com/kg/v3/", encoded=True) - param_keys = ["jsonmode", "nonCanonicalFacts", "size"] - - def __init__(self, token, **default_params) -> None: - """ - Initializes a new instance of the BaseDiffbotKGClient class (only - callable by subclasses). - - Args: - token (str): The API token for authentication. - **default_params: Default parameters for API requests. - - Raises: - ValueError: If an invalid keyword argument is provided. - """ - - for param in default_params: - if param not in self.param_keys: - raise ValueError(f"Invalid param: {param}") - - self.default_params = {"token": token, **default_params} - self.s = DiffbotSession() - - def _merge_params(self, params) -> dict[str, Any]: - """ - Merges the given parameters with the default parameters. - - Args: - params (dict): The parameters to merge. - - Returns: - dict: The merged parameters. - """ - - params = params or {} - params = {**self.default_params, **params} - params = {k: v for k, v in params.items() if v is not None} - return params - - async def _get(self, url: str | URL, params=None, headers=None) -> DiffbotResponse: - """ - Sends a GET request to the Diffbot API. - - Args: - url (str | URL): The URL to send the request to. - params (dict, optional): The query parameters for the request. Defaults to None. - headers (dict, optional): The headers for the request. Defaults to None. - - Returns: - DiffbotResponse: The response from the API. - """ - - params = self._merge_params(params) - resp = await self.s.get(str(url), params=params, headers=headers) - return resp - - async def _post( - self, url: str | URL, params: dict | None = None, data: dict | None = None - ) -> DiffbotResponse: - """ - Sends a POST request to the Diffbot API. - - Args: - url (str | URL): The URL to send the request to. - params (dict, optional): The query parameters for the request. Defaults to None. - data (dict, optional): The data for the request body. Defaults to None. - - Returns: - DiffbotResponse: The response from the API. - """ - - params = self._merge_params(params) - - token = params.pop("token", None) if params else None - json, params = params, {"token": token} - - headers = {"content-type": "application/json"} - - resp = await self.s.post(str(url), params=params, headers=headers, json=json) - return resp - - async def _get_or_post(self, url: str | URL, params: dict | None = None): - """ - Sends a GET or POST request to the Diffbot API, depending on the length of the URL. - - Args: - url (str | URL): The URL to send the request to. - params (dict, optional): The query parameters for the request. Defaults to None. - - Returns: - DiffbotResponse: The response from the API. - """ - - params = self._merge_params(params) - - url_len = len(bytes(str(url % params), encoding="ascii")) - - if url_len <= 3000: - return await self._get(url, params=params) - else: - return await self._post(url, params=params) - - -class DiffbotSearchClient(BaseDiffbotKGClient): - """ - A client for interacting with Diffbot's Knowledge Graph search API. - """ - - search_url = BaseDiffbotKGClient.url / "dql" - report_url = search_url / "report" - report_by_id_url = report_url / "{id}" - - async def search(self, params: dict) -> DiffbotResponse: - """Search Diffbot's Knowledge Graph. - - Args: - params (dict): Dict of params to send in request - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - # params["query"] = quote(params["query"], encoding="ascii") - resp = await self._get_or_post(self.search_url, params=params) - return resp - - async def coverage_report_by_id(self, report_id: str) -> DiffbotResponse: - """Download coverage report by report ID. - - Args: - report_id (str): The report ID string. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - url = str(self.report_by_id_url).format(id=report_id) - resp = await self._get(url) - return resp - - async def coverage_report_by_query(self, query: str) -> DiffbotResponse: - """Download coverage report by DQL query. - - Args: - query (str): The DQL query string. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - # params = {"query": quote(query)} - params = {"query": query} - resp = await self._get(self.report_url, params=params) - return resp - - -class DiffbotEnhanceClient(BaseDiffbotKGClient): - """ - A client for interacting with the Diffbot Enhance API. - - This client provides methods for enhancing content using the Diffbot Enhance API, - managing bulk jobs, and retrieving job results and coverage reports. - """ - - enhance_url = BaseDiffbotKGClient.url / "enhance" - enhance_bulk_url = enhance_url / "bulk" - bulk_status_url = enhance_bulk_url / "status" - single_bulkjob_result_url = enhance_bulk_url / "{bulkjobId}/{jobIdx}" - bulk_job_results_url = enhance_bulk_url / "{bulkjobId}" - bulk_job_coverage_report_url = enhance_bulk_url / "{bulkjobId}/coverage/{reportId}" - bulk_job_stop_url = enhance_bulk_url / "{bulkjobId}/stop" - - param_keys = BaseDiffbotKGClient.param_keys + ["refresh", "search", "useCache"] - - async def enhance(self, params) -> DiffbotResponse: - """ - Enhance content using the Diffbot Enhance API. - - Args: - params (dict): The parameters for enhancing the content. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - resp = await self._get(self.enhance_url, params=params) - return resp - - async def create_bulkjob(self, params) -> DiffbotResponse: - """ - Create a bulk job for enhancing multiple content items. - - Args: - params (dict): The parameters for creating the bulk job. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - resp = await self._post(self.enhance_bulk_url, params=params) - return resp - - async def stop_bulkjob(self, bulkjobId: str) -> DiffbotResponse: - """ - Stop an active Enhance Bulkjob by its ID. - - Args: - bulkjobId (str): The ID of the bulk job. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - url = str(self.bulk_job_stop_url).format(bulkjobId=bulkjobId) - return await self._get(url) - - async def download_single_bulkjob_result( - self, bulkjobId: str, jobIdx: str - ) -> DiffbotResponse: - """ - Download the result of a single job within a bulkjob by specifying the index of the job. - - Args: - bulkjobId (str): The ID of the bulk job. - jobIdx (str): The index of the job within the bulk job. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - url = str(self.single_bulkjob_result_url).format( - bulkjobId=bulkjobId, jobIdx=jobIdx - ) - return await self._get(url) - - async def list_bulkjobs_for_token(self) -> DiffbotResponse: - """ - Poll the status of all Enhance Bulkjobs for a token. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - return await self._get(self.bulk_status_url) - - async def poll_bulkjob_status(self, bulkjobId: str) -> DiffbotResponse: - """ - Poll the status of an Enhance Bulkjob by its ID. - - Args: - bulkjobId (str): The ID of the bulk job. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - url = str(self.bulk_status_url).format(bulkjobId=bulkjobId) - return await self._get(url) - - async def download_bulkjob_results(self, bulkjobId: str) -> DiffbotResponse: - """ - Download the results of a completed Enhance Bulkjob by its ID. - - Args: - bulkjobId (str): The ID of the bulk job. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - url = str(self.bulk_job_results_url).format(bulkjobId=bulkjobId) - return await self._get(url) - - async def download_bulkjob_coverage_report( - self, bulkjobId: str, reportId: str - ) -> DiffbotResponse: - """ - Download the coverage report of a completed Enhance Bulkjob by its ID and report ID. - - Args: - bulkjobId (str): The ID of the bulk job. - reportId (str): The ID of the report. - - Returns: - DiffbotResponse: The response from the Diffbot API. - """ - - url = str(self.bulk_job_coverage_report_url).format( - bulkjobId=bulkjobId, reportId=reportId - ) - return await self._get(url) diff --git a/src/diffbot_kg/clients/base.py b/src/diffbot_kg/clients/base.py new file mode 100644 index 0000000..3790a28 --- /dev/null +++ b/src/diffbot_kg/clients/base.py @@ -0,0 +1,124 @@ +from typing import Any + +from diffbot_kg.clients.session import BaseDiffbotResponse, DiffbotSession +from yarl import URL + + +class BaseDiffbotKGClient: + """ + Base class for Diffbot Knowledge Graph API clients. + """ + + url = URL("https://kg.diffbot.com/kg/v3/") + + def __init__(self, token, **default_params) -> None: + """ + Initializes a new instance of the BaseDiffbotKGClient class (only + callable by subclasses). + + Args: + token (str): The API token for authentication. + **default_params: Default parameters for API requests. + + Raises: + ValueError: If an invalid keyword argument is provided. + """ + + self.default_params = {"token": token, **default_params} + self.s = DiffbotSession() + + def _merge_params(self, params) -> dict[str, Any]: + """ + Merges the given parameters with the default parameters. + + Args: + params (dict): The parameters to merge. + + Returns: + dict: The merged parameters. + """ + + params = params or {} + params = {**self.default_params, **params} + params = {k: v for k, v in params.items() if v is not None} + return params + + async def _get( + self, url: str | URL, params=None, headers=None + ) -> BaseDiffbotResponse: + """ + Sends a GET request to the Diffbot API. + + Args: + url (str | URL): The URL to send the request to. + params (dict, optional): The query parameters for the request. Defaults to None. + headers (dict, optional): The headers for the request. Defaults to None. + + Returns: + BaseDiffbotResponse: The response from the API. + """ + + headers = {"accept": "application/json", **(headers or {})} + + params = self._merge_params(params) + resp = await self.s.get(url, params=params, headers=headers) + return resp + + async def _post( + self, + url: str | URL, + params: dict | None = None, + json: dict | list[dict] | None = None, + headers=None, + ) -> BaseDiffbotResponse: + """ + Sends a POST request to the Diffbot API. + + Args: + url (str | URL): The URL to send the request to. + params (dict, optional): The query parameters for the request. Defaults to None. + data (dict, optional): The data for the request body. Defaults to None. + + Returns: + BaseDiffbotResponse: The response from the API. + """ + + params = self._merge_params(params) + + headers = { + "content-type": "application/json", + "accept": "application/json", + **(headers or {}), + } + + resp = await self.s.post(url, params=params, headers=headers, json=json) + return resp + + async def _get_or_post( + self, url: str | URL, params: dict | None = None + ) -> BaseDiffbotResponse: + """ + Sends a GET or POST request to the Diffbot API, depending on the length of the URL. + + Args: + url (str | URL): The URL to send the request to. + params (dict, optional): The query parameters for the request. Defaults to None. + + Returns: + BaseDiffbotResponse: The response from the API. + """ + + params = self._merge_params(params) + + url_len = len(bytes(str(url % params), encoding="ascii")) + + # sourcery skip: remove-unnecessary-else + if url_len <= 3000: + return await self._get(url, params=params) + else: + token = params.pop("token", None) if params else None + json, params = params, {"token": token} + return await self._post(url, params=params, json=json) + + async def close(self): + await self.s.close() diff --git a/src/diffbot_kg/clients/enhance.py b/src/diffbot_kg/clients/enhance.py new file mode 100644 index 0000000..e103d46 --- /dev/null +++ b/src/diffbot_kg/clients/enhance.py @@ -0,0 +1,172 @@ +from typing import cast + +from diffbot_kg.clients.base import BaseDiffbotKGClient +from diffbot_kg.models.response import ( + DiffbotBulkJobCreateResponse, + DiffbotBulkJobStatusResponse, + DiffbotCoverageReportResponse, + DiffbotEntitiesResponse, + DiffbotListBulkJobsResponse, +) +from diffbot_kg.models.response.bulkjob_results import DiffbotBulkJobResultsResponse + + +class DiffbotEnhanceClient(BaseDiffbotKGClient): + """ + A client for interacting with the Diffbot Enhance API. + + This client provides methods for enhancing content using the Diffbot Enhance API, + managing bulk jobs, and retrieving job results and coverage reports. + """ + + enhance_url = BaseDiffbotKGClient.url / "enhance" + bulk_job_url = enhance_url / "bulk" + list_bulk_jobs_url = bulk_job_url / "status" + bulk_job_status_url = bulk_job_url / "{bulkjobId}" / "status" + bulk_job_results_url = bulk_job_url / "{bulkjobId}" + bulk_job_single_result_url = bulk_job_results_url / "{jobIdx}" + bulk_job_coverage_report_url = bulk_job_url / "report/{bulkjobId}/{reportId}" + bulk_job_stop_url = bulk_job_url / "{bulkjobId}/stop" + + async def enhance(self, params) -> DiffbotEntitiesResponse: + """ + Enhance content using the Diffbot Enhance API. + + Args: + params (dict): The parameters for enhancing the content. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + resp = await self._get(self.enhance_url, params=params) + resp.__class__ = DiffbotEntitiesResponse + return cast(DiffbotEntitiesResponse, resp) + + async def create_bulkjob( + self, json: list[dict], params=None + ) -> DiffbotBulkJobCreateResponse: + """ + Create a bulk job for enhancing multiple content items. + + Args: + data (list[dict]): The content items to enhance. + params (dict): The parameters for creating the bulk job. + + Returns: + DiffbotBulkJobResponse: The response from the Diffbot API. + """ + + if json is None or not json: + raise ValueError("data must be provided") + + resp = await self._post(self.bulk_job_url, params=params, json=json) + resp.__class__ = DiffbotBulkJobCreateResponse + return cast(DiffbotBulkJobCreateResponse, resp) + + async def bulkjob_status(self, bulkjobId: str) -> DiffbotBulkJobStatusResponse: + """ + Poll the status of an Enhance Bulkjob by its ID. + + Args: + bulkjobId (str): The ID of the bulk job. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + url = self.bulk_job_status_url.human_repr().format(bulkjobId=bulkjobId) + resp = await self._get(url) + resp.__class__ = DiffbotBulkJobStatusResponse + return cast(DiffbotBulkJobStatusResponse, resp) + + async def list_bulkjobs(self) -> DiffbotListBulkJobsResponse: + """ + Poll the status of all Enhance Bulkjobs for a token. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + resp = await self._get(self.list_bulk_jobs_url) + resp.__class__ = DiffbotListBulkJobsResponse + return cast(DiffbotListBulkJobsResponse, resp) + + async def bulkjob_results(self, bulkjobId: str) -> DiffbotBulkJobResultsResponse: + """ + Download the results of a completed Enhance Bulkjob by its ID. + + Args: + bulkjobId (str): The ID of the bulk job. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + url = self.bulk_job_results_url.human_repr().format(bulkjobId=bulkjobId) + resp = await self._get(url) + resp.__class__ = DiffbotBulkJobResultsResponse + return cast(DiffbotBulkJobResultsResponse, resp) + + async def bulkjob_coverage_report( + self, bulkjobId: str, reportId: str + ) -> DiffbotCoverageReportResponse: + """ + Download the coverage report of a completed Enhance Bulkjob by its ID and report ID. + + Args: + bulkjobId (str): The ID of the bulk job. + reportId (str): The ID of the report. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + url = self.bulk_job_coverage_report_url.human_repr().format( + bulkjobId=bulkjobId, reportId=reportId + ) + resp = await self._get(url) + resp.__class__ = DiffbotCoverageReportResponse + return cast(DiffbotCoverageReportResponse, resp) + + async def single_bulkjob_result( + self, + bulkjobId: str, + jobIdx: int, + ) -> DiffbotEntitiesResponse: + """ + Download the result of a single job within a bulkjob by specifying the index of the job. + + Args: + bulkjobId (str): The ID of the bulk job. + jobIdx (int): The index of the job within the bulk job. + + Returns: + DiffbotEntitiesResponse: The response from the Diffbot API. + """ + + url = self.bulk_job_single_result_url.human_repr().format( + bulkjobId=bulkjobId, jobIdx=jobIdx + ) + resp = await self._get(url) + resp.__class__ = DiffbotEntitiesResponse + return cast(DiffbotEntitiesResponse, resp) + + async def stop_bulkjob( + self, + bulkJobId: str, + ) -> DiffbotBulkJobStatusResponse: + """ + Stop an active Enhance Bulkjob by its ID. + + Args: + bulkjobId (str): The ID of the bulk job. + + Returns: + DiffbotEntitiesResponse: The response from the Diffbot API. + """ + + url = self.bulk_job_stop_url.human_repr().format(bulkjobId=bulkJobId) + resp = await self._get(url) + resp.__class__ = DiffbotBulkJobStatusResponse + return cast(DiffbotBulkJobStatusResponse, resp) diff --git a/src/diffbot_kg/clients/search.py b/src/diffbot_kg/clients/search.py new file mode 100644 index 0000000..0a7fe8f --- /dev/null +++ b/src/diffbot_kg/clients/search.py @@ -0,0 +1,66 @@ +from typing import cast + +from diffbot_kg.clients.base import BaseDiffbotKGClient +from diffbot_kg.models.response import ( + DiffbotCoverageReportResponse, + DiffbotEntitiesResponse, +) + + +class DiffbotSearchClient(BaseDiffbotKGClient): + """ + A client for interacting with Diffbot's Knowledge Graph search API. + """ + + search_url = BaseDiffbotKGClient.url / "dql" + report_url = search_url / "report" + report_by_id_url = report_url / "{id}" + + async def search(self, params: dict) -> DiffbotEntitiesResponse: + """Search Diffbot's Knowledge Graph. + + Args: + params (dict): Dict of params to send in request + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + # No change needed here; the issue will be addressed in the session handling + resp = await self._get_or_post(self.search_url, params=params) + resp.__class__ = DiffbotEntitiesResponse + return cast(DiffbotEntitiesResponse, resp) + + async def coverage_report_by_id( + self, report_id: str + ) -> DiffbotCoverageReportResponse: + """Download coverage report by report ID. + + Args: + report_id (str): The report ID string. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + url = str(self.report_by_id_url).format(id=report_id) + resp = await self._get(url) + resp.__class__ = DiffbotCoverageReportResponse + return cast(DiffbotCoverageReportResponse, resp) + + async def coverage_report_by_query( + self, query: str + ) -> DiffbotCoverageReportResponse: + """Download coverage report by DQL query. + + Args: + query (str): The DQL query string. + + Returns: + DiffbotResponse: The response from the Diffbot API. + """ + + params = {"query": query} + resp = await self._get(self.report_url, params=params) + resp.__class__ = DiffbotCoverageReportResponse + return cast(DiffbotCoverageReportResponse, resp) diff --git a/src/diffbot_kg/session.py b/src/diffbot_kg/clients/session.py similarity index 66% rename from src/diffbot_kg/session.py rename to src/diffbot_kg/clients/session.py index d1eb7ad..ac1ce59 100644 --- a/src/diffbot_kg/session.py +++ b/src/diffbot_kg/clients/session.py @@ -1,10 +1,9 @@ import logging from http import HTTPMethod -from typing import Any, List, Self +from typing import Self import aiohttp import aiolimiter -from multidict import CIMultiDictProxy from tenacity import ( after_log, retry, @@ -13,41 +12,9 @@ wait_random_exponential, ) -log = logging.getLogger(__name__) - - -class DiffbotResponse: - """DiffbotResponse represents the response from a Diffbot API request. - - It contains the response status, headers, and JSON content. Provides - convenience properties to access the 'data' and 'entities' portions - of the JSON content. - - The create classmethod is the main constructor, which handles converting - an aiohttp response into a DiffbotResponse. - """ - - def __init__( - self, status: int, headers: CIMultiDictProxy[str], content: dict[str, Any] - ): - self.status = status - self.headers = headers - self.content = content - - @property - def data(self) -> List[dict]: - return self.content["data"] +from diffbot_kg.models.response.base import BaseDiffbotResponse - @property - def entities(self) -> List[dict]: - # Note: this class/method will not be compatible with facet queries - # (no entities returned) - return [d["entity"] for d in self.data] - - @classmethod - async def create(cls, resp: aiohttp.ClientResponse) -> Self: - """Unpack an aiohttp response object and return a DiffbotResponse instance.""" - return cls(resp.status, resp.headers, await resp.json()) +log = logging.getLogger(__name__) class RetryableException(Exception): @@ -74,11 +41,11 @@ def __init__(self) -> None: self._session = aiohttp.ClientSession(headers=headers, timeout=timeout) self._limiter = aiolimiter.AsyncLimiter(max_rate=5, time_period=1) - async def get(self, url, **kwargs) -> DiffbotResponse: + async def get(self, url, **kwargs) -> BaseDiffbotResponse: resp = await self._request(HTTPMethod.GET, url, **kwargs) return resp - async def post(self, url, **kwargs) -> DiffbotResponse: + async def post(self, url, **kwargs) -> BaseDiffbotResponse: resp = await self._request(HTTPMethod.POST, url, **kwargs) return resp @@ -93,7 +60,7 @@ async def close(self) -> None: wait=wait_random_exponential(multiplier=0.5, min=2, max=30), after=after_log(log, logging.DEBUG), ) - async def _request(self, method, url, **kwargs) -> DiffbotResponse: + async def _request(self, method, url, **kwargs) -> BaseDiffbotResponse: async with self._limiter: async with await self._session.request(method, url, **kwargs) as resp: try: @@ -126,7 +93,7 @@ async def _request(self, method, url, **kwargs) -> DiffbotResponse: ) raise e - return await DiffbotResponse.create(resp) + return await BaseDiffbotResponse.create(resp) async def __aenter__(self) -> Self: return self diff --git a/src/diffbot_kg/models/response/__init__.py b/src/diffbot_kg/models/response/__init__.py new file mode 100644 index 0000000..6a71719 --- /dev/null +++ b/src/diffbot_kg/models/response/__init__.py @@ -0,0 +1,13 @@ +from diffbot_kg.models.response.bulkjob_create import DiffbotBulkJobCreateResponse +from diffbot_kg.models.response.bulkjob_list import DiffbotListBulkJobsResponse +from diffbot_kg.models.response.bulkjob_status import DiffbotBulkJobStatusResponse +from diffbot_kg.models.response.coverage_report import DiffbotCoverageReportResponse +from diffbot_kg.models.response.entities import DiffbotEntitiesResponse + +__all__ = [ + DiffbotEntitiesResponse.__name__, + DiffbotCoverageReportResponse.__name__, + DiffbotBulkJobCreateResponse.__name__, + DiffbotListBulkJobsResponse.__name__, + DiffbotBulkJobStatusResponse.__name__, +] # type: ignore diff --git a/src/diffbot_kg/models/response/base.py b/src/diffbot_kg/models/response/base.py new file mode 100644 index 0000000..18cd5e2 --- /dev/null +++ b/src/diffbot_kg/models/response/base.py @@ -0,0 +1,58 @@ +import json +import logging +from typing import Any, Self, cast + +import aiohttp +from multidict import CIMultiDictProxy + +log = logging.getLogger(__name__) + + +class BaseDiffbotResponse: + def __init__( + self, + status: int, + headers: CIMultiDictProxy[str], + content: dict[str, Any] | list[dict[str, Any]] | str, + ): + self.status = status + self.headers = headers + self.content = content + + @classmethod + async def create(cls, resp: aiohttp.ClientResponse) -> Self: + """Unpack an aiohttp response object and return a BaseDiffbotResponse instance.""" + + if resp.content_type == "application/json": + content = await resp.json() + elif resp.content_type == "application/json-lines": + text = await resp.text() + content = [json.loads(line) for line in text.strip().split("\n")] + else: + content = await resp.text() + return cls(resp.status, resp.headers, content) + + +class BaseJsonDiffbotResponse(BaseDiffbotResponse): + def __init__( + self, status: int, headers: CIMultiDictProxy[str], content: dict[str, Any] + ): + super().__init__(status, headers, content) + + self.content = cast(dict[str, Any], content) + + +class BaseJsonLinesDiffbotResponse(BaseDiffbotResponse): + def __init__( + self, status: int, headers: CIMultiDictProxy[str], content: list[dict[str, Any]] + ): + super().__init__(status, headers, content) + + self.content = cast(list[dict[str, Any]], content) + + +class BaseTextDiffbotResponse(BaseDiffbotResponse): + def __init__(self, status: int, headers: CIMultiDictProxy[str], content: str): + super().__init__(status, headers, content) + + self.content = cast(str, content) diff --git a/src/diffbot_kg/models/response/bulkjob_create.py b/src/diffbot_kg/models/response/bulkjob_create.py new file mode 100644 index 0000000..793e4e6 --- /dev/null +++ b/src/diffbot_kg/models/response/bulkjob_create.py @@ -0,0 +1,16 @@ +from diffbot_kg.models.response.base import BaseJsonDiffbotResponse + + +class DiffbotBulkJobCreateResponse(BaseJsonDiffbotResponse): + """DiffbotResponse represents the response from a Diffbot BulkJob API request. + + It contains the response status, headers, and JSON content. Provides + convenience properties to access the 'jobId' key from the content + + The create classmethod is the main constructor, which handles converting + an aiohttp response into a DiffbotResponse. + """ + + @property + def jobId(self) -> str: + return self.content["job_id"] diff --git a/src/diffbot_kg/models/response/bulkjob_list.py b/src/diffbot_kg/models/response/bulkjob_list.py new file mode 100644 index 0000000..3c6f94e --- /dev/null +++ b/src/diffbot_kg/models/response/bulkjob_list.py @@ -0,0 +1,14 @@ +from diffbot_kg.models.response.base import BaseJsonLinesDiffbotResponse + + +class DiffbotListBulkJobsResponse(BaseJsonLinesDiffbotResponse): + """DiffbotBulkJobListResponse represents the status of a Diffbot Enhance + API List BulkJobs for Token API request. + + It contains the response status, headers, and JSON content. + + The create classmethod is the main constructor, which handles converting + an aiohttp response into a DiffbotResponse. + """ + + pass diff --git a/src/diffbot_kg/models/response/bulkjob_results.py b/src/diffbot_kg/models/response/bulkjob_results.py new file mode 100644 index 0000000..9f7ff23 --- /dev/null +++ b/src/diffbot_kg/models/response/bulkjob_results.py @@ -0,0 +1,30 @@ +import contextlib + +from diffbot_kg.models.response.base import BaseJsonLinesDiffbotResponse + + +class DiffbotBulkJobResultsResponse(BaseJsonLinesDiffbotResponse): + """DiffbotBulkJobResultsResponse represents the status of a Diffbot Enhance BulkJob. + + It contains the response status, headers, and JSON content. Provides + convenience properties to access the 'jobId' key from the content + + The create classmethod is the main constructor, which handles converting + an aiohttp response into a DiffbotResponse. + """ + + @property + def jobId(self) -> str: + for query in self.content: + with contextlib.suppress(KeyError): + return query["request_ctx"]["query_ctx"]["bulkjobId"] + + raise RuntimeError("No bulkJobId found in the response") + + @property + def reportId(self) -> str: + return self.headers["X-Diffbot-ReportId"] + + @property + def entities(self): + return [data["entity"] for result in self.content for data in result["data"]] diff --git a/src/diffbot_kg/models/response/bulkjob_status.py b/src/diffbot_kg/models/response/bulkjob_status.py new file mode 100644 index 0000000..9db4a64 --- /dev/null +++ b/src/diffbot_kg/models/response/bulkjob_status.py @@ -0,0 +1,24 @@ +from diffbot_kg.models.response.base import BaseJsonDiffbotResponse + + +class DiffbotBulkJobStatusResponse(BaseJsonDiffbotResponse): + """DiffbotBulkJobStatusResponse represents the status of a Diffbot Enhance BulkJob. + + It contains the response status, headers, and JSON content. Provides + convenience properties to access the 'jobId' key from the content + + The create classmethod is the main constructor, which handles converting + an aiohttp response into a DiffbotResponse. + """ + + @property + def jobId(self) -> str: + return self.content["content"]["job_id"] + + @property + def complete(self) -> str: + return self.content["content"]["status"] == "COMPLETE" + + @property + def reports(self): + return self.content["content"]["reports"] diff --git a/src/diffbot_kg/models/response/coverage_report.py b/src/diffbot_kg/models/response/coverage_report.py new file mode 100644 index 0000000..76bb7f8 --- /dev/null +++ b/src/diffbot_kg/models/response/coverage_report.py @@ -0,0 +1,5 @@ +from diffbot_kg.models.response.base import BaseTextDiffbotResponse + + +class DiffbotCoverageReportResponse(BaseTextDiffbotResponse): + pass diff --git a/src/diffbot_kg/models/response/entities.py b/src/diffbot_kg/models/response/entities.py new file mode 100644 index 0000000..cbfd228 --- /dev/null +++ b/src/diffbot_kg/models/response/entities.py @@ -0,0 +1,26 @@ +from typing import List + +from diffbot_kg.models.response.base import BaseJsonDiffbotResponse + + +class DiffbotEntitiesResponse(BaseJsonDiffbotResponse): + """DiffbotQueryResponse represents the response from a Diffbot API request + containing a list of entities. + + It contains the response status, headers, and JSON content. Provides + convenience properties to access the 'data' and 'entities' portions + of the JSON content. + + The create classmethod is the main constructor, which handles converting + an aiohttp response into a DiffbotResponse. + """ + + @property + def data(self) -> List[dict]: + return self.content["data"] + + @property + def entities(self) -> List[dict]: + # Note: this class/method will not be compatible with facet queries + # (no entities returned) + return [d["entity"] for d in self.data] diff --git a/tests/functional/cassettes/.gitignore b/tests/functional/cassettes/.gitignore deleted file mode 100644 index 1e82fc7..0000000 --- a/tests/functional/cassettes/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.yaml diff --git a/tests/functional/__init__.py b/tests/functional/clients/__init__.py similarity index 100% rename from tests/functional/__init__.py rename to tests/functional/clients/__init__.py diff --git a/tests/functional/clients/cassettes/.gitignore b/tests/functional/clients/cassettes/.gitignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/tests/functional/clients/cassettes/.gitignore @@ -0,0 +1 @@ +* diff --git a/tests/functional/clients/conftest.py b/tests/functional/clients/conftest.py new file mode 100644 index 0000000..36c988d --- /dev/null +++ b/tests/functional/clients/conftest.py @@ -0,0 +1,45 @@ +import logging +import os + +import dotenv +import pytest + +ORG_NAME = "Diffbot" +ORG_ENTITY_ID = "EYX1i02YVPsuT7fPLUYgRhQ" +ORG_URL = "www.diffbot.com" + +ORG2_NAME = "Apple" +ORG2_ENTITY_ID = "EHb0_0NEcMwyY8b083taTTw" +ORG2_URL = "www.apple.com" + + +class Secret: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Secret(********)" + + def __str___(self): + return "*******" + + +@pytest.fixture(scope="session", autouse=True) +def token(): + __tracebackhide__ = True + dotenv.load_dotenv(override=True) + return Secret(os.environ.get("DIFFBOT_TOKEN")) + + +@pytest.fixture(scope="session", autouse=True) +def suppress_aiohttp_output(): + log = logging.getLogger("aiohttp") + log.setLevel(logging.CRITICAL + 1) + + +# @pytest.hookimpl(tryfirst=True) +# def pytest_sanitize_hook(items): +# secrets = [token()] +# for item in items: +# for secret in secrets: +# item.add_marker(pytest.mark.sanitize(secret)) diff --git a/tests/functional/clients/test_enhance_client.py b/tests/functional/clients/test_enhance_client.py new file mode 100644 index 0000000..b05e015 --- /dev/null +++ b/tests/functional/clients/test_enhance_client.py @@ -0,0 +1,203 @@ +import logging +import time + +import pytest +from aiohttp import ClientResponseError +from diffbot_kg.clients.enhance import DiffbotEnhanceClient + +from tests.functional.clients.conftest import ( + ORG2_ENTITY_ID, + ORG2_NAME, + ORG2_URL, + ORG_ENTITY_ID, + ORG_NAME, + ORG_URL, + Secret, +) + +log = logging.getLogger(__name__) + + +def _get_job_id(request): + job_id = request.config.cache.get("enhanceBulkJobId", None) + if job_id is None: + pytest.fail("Enhance bulk job ID not found in cache") + + return job_id + + +@pytest.mark.vcr(record_mode="new_episodes") +@pytest.mark.usefixtures("suppress_aiohttp_output") +class TestDiffbotEnhanceClient: + @pytest.mark.asyncio + async def test_enhance(self, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + # ACT + response = await client.enhance({"type": "Organization", "name": ORG_NAME}) + + # ASSERT + assert response.status == 200 + assert response.content["hits"] == 1 + assert response.entities[0]["id"] == ORG_ENTITY_ID + + @pytest.mark.asyncio + async def test_create_bulkjob(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + params = {"size": 1} + + # ACT + response = await client.create_bulkjob( + [ + {"type": "Organization", "name": ORG_NAME, "url": ORG_URL}, + {"type": "Organization", "name": ORG2_NAME, "url": ORG2_URL}, + ], + params, + ) + + # ASSERT + assert response.status == 202 + assert "job_id" in response.content + + # TEARDOWN + request.config.cache.set("enhanceBulkJobId", response.jobId) + await client.close() + + @pytest.mark.asyncio + async def test_list_bulkjobs(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + job_id = _get_job_id(request) + + # ACT + response = await client.list_bulkjobs() + + # ASSERT + assert response.status == 200 + assert any(x["job_id"] == job_id for x in response.content) + + # TEARDOWN + await client.close() + + @pytest.mark.asyncio + async def test_bulkjob_status(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + job_id = _get_job_id(request) + + DELAY = 10 + start = time.time() + + # ACT + while time.time() - start <= DELAY: + response = await client.bulkjob_status(job_id) + if response.complete: + break + time.sleep(1) + + # ASSERT + assert response.status == 200 + assert response.jobId == job_id + assert response.complete + + if len(response.reports) > 1: + log.warning( + "More than one report found for bulk job. Investigate: %s", + response.reports, + ) + + # TEARDOWN + request.config.cache.set( + "enhanceBulkJobCoverageReportId", response.reports[0]["reportId"] + ) + await client.close() + + @pytest.mark.asyncio + async def test_bulkjob_results(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + job_id = _get_job_id(request) + + # ACT + response = await client.bulkjob_results(job_id) + + # ASSERT + assert response.status == 200 + assert len(response.content) == 2 + assert response.content[0]["hits"] == 1 + assert len(response.content[0]["data"]) == 1 + assert response.entities[0]["id"] == ORG_ENTITY_ID + assert response.content[1]["hits"] == 1 + assert len(response.content[1]["data"]) == 1 + assert response.entities[1]["id"] == ORG2_ENTITY_ID + + # TEARDOWN + await client.close() + + @pytest.mark.asyncio + async def test_single_bulkjob_result(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + job_id = _get_job_id(request) + + # ACT + response = await client.single_bulkjob_result(job_id, 0) + + # ASSERT + assert response.status == 200 + assert response.content["hits"] == 1 + assert len(response.data) == 1 + assert response.entities[0]["id"] == ORG_ENTITY_ID + + # TEARDOWN + await client.close() + + @pytest.mark.asyncio + async def test_bulkjob_stop(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + job_id = _get_job_id(request) + + # ACT + response = await client.stop_bulkjob(job_id) + + # ASSERT + assert response.status == 200 + assert response.content["status"] == "COMPLETE" + assert response.content["message"] == f"Bulkjob [{job_id}] is completed" + + # TEARDOWN + await client.close() + + @pytest.mark.asyncio + async def test_bulkjob_coverage_report(self, request, token: Secret): + # ARRANGE + client = DiffbotEnhanceClient(token=token.value) + + job_id = _get_job_id(request) + report_id = request.config.cache.get("enhanceBulkJobCoverageReportId", None) + if report_id is None: + pytest.fail("Enhance bulk job coverage report ID not found in cache") + + DELAY = 10 + start = time.time() + + # ACT + while time.time() - start <= DELAY: + try: + response = await client.bulkjob_coverage_report(job_id, report_id) + except ClientResponseError: + time.sleep(1) + else: + break + + # ASSERT + assert response.status == 200 + assert len(response.content.strip().split("\n")) == 4 diff --git a/tests/functional/clients/test_search_client.py b/tests/functional/clients/test_search_client.py new file mode 100644 index 0000000..5aaee91 --- /dev/null +++ b/tests/functional/clients/test_search_client.py @@ -0,0 +1,33 @@ +import pytest +from diffbot_kg.clients.search import DiffbotSearchClient + +from tests.functional.clients.conftest import ORG_ENTITY_ID, ORG_NAME, Secret + + +@pytest.fixture(scope="session") +def client(token): + yield DiffbotSearchClient(token=token.value) + + +@pytest.mark.vcr(record_mode="new_episodes") +@pytest.mark.usefixtures("suppress_aiohttp_output") +class TestDiffbotSearchClient: + # @pytest.mark.sanitize(token()) + @pytest.mark.asyncio + async def test_search(self, token: Secret): + # ARRANGE + client = DiffbotSearchClient(token=token.value) + + # ACT + response = await client.search( + {"query": f'type:Organization strict:name:"{ORG_NAME}"'} + ) + + # ASSERT + assert response.status == 200 + assert response.content["hits"] == 1 + assert response.content["results"] == 1 + assert response.entities[0]["id"] == ORG_ENTITY_ID + + # TEARDOWN + await client.close() diff --git a/tests/functional/test_clients.py b/tests/functional/test_clients.py deleted file mode 100644 index 968e2d3..0000000 --- a/tests/functional/test_clients.py +++ /dev/null @@ -1,77 +0,0 @@ -import os - -import dotenv -import pytest -from diffbot_kg.clients import DiffbotEnhanceClient, DiffbotSearchClient - -ORG_NAME = "Diffbot" -ORG_ENTITY_ID = "EYX1i02YVPsuT7fPLUYgRhQ" - - -class Secret: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Secret(********)" - - def __str___(self): - return "*******" - - -@pytest.fixture(scope="session", autouse=True) -def token(): - __tracebackhide__ = True - dotenv.load_dotenv(override=True) - return Secret(os.environ.get("DIFFBOT_TOKEN")) - - -@pytest.hookimpl(tryfirst=True) -def pytest_sanitize_hook(items): - secrets = [token()] - for item in items: - for secret in secrets: - item.add_marker(pytest.mark.sanitize(secret)) - - -# @pytest.mark.usefixtures("suppress_aiottp_output") -class TestDiffbotSearchClient: - @pytest.mark.asyncio - @pytest.mark.vcr - # @pytest.mark.sanitize(token()) - async def test_search_client_search(self, token): - client = DiffbotSearchClient(token=token.value) - response = await client.search( - {"query": f'type:Organization strict:name:"{ORG_NAME}"'} - ) - assert response.status == 200 - assert response.content["hits"] == 1 - assert response.content["results"] == 1 - assert response.data[0]["entity"]["id"] == ORG_ENTITY_ID - - -# @pytest.mark.usefixtures("suppress_aiottp_output") -class TestDiffbotEnhanceClient: - @pytest.mark.asyncio - @pytest.mark.vcr - async def test_enhance_client_enhance(self, token): - client = DiffbotEnhanceClient(token=token.value) - response = await client.enhance({"type": "Organization", "name": ORG_NAME}) - assert response.status == 200 - assert response.content["hits"] == 1 - assert response.data[0]["entity"]["id"] == ORG_ENTITY_ID - - @pytest.mark.asyncio - @pytest.mark.vcr - async def test_enhance_client_create_bulkjob(self, token): - import logging - - logging.basicConfig(level=logging.CRITICAL) - logging.getLogger("diffbot_kg").setLevel(logging.CRITICAL) - - logging.getLogger("diffbot_kg.session").setLevel(logging.CRITICAL) - logging.getLogger("diffbot_kg.clients").setLevel(logging.CRITICAL) - client = DiffbotEnhanceClient(token=token.value) - response = await client.create_bulkjob({"uris": ["http://diffbot.com"]}) - assert response.status == 200 - assert "bulkJobId" in response.content diff --git a/tests/unit/__init__.py b/tests/unit/clients/__init__.py similarity index 100% rename from tests/unit/__init__.py rename to tests/unit/clients/__init__.py diff --git a/tests/unit/clients/test_enhance_client.py b/tests/unit/clients/test_enhance_client.py new file mode 100644 index 0000000..6d385fe --- /dev/null +++ b/tests/unit/clients/test_enhance_client.py @@ -0,0 +1,65 @@ +import pytest +from diffbot_kg.clients.enhance import DiffbotEnhanceClient +from diffbot_kg.clients.session import DiffbotSession +from diffbot_kg.models.response import DiffbotEntitiesResponse +from diffbot_kg.models.response.base import BaseDiffbotResponse +from diffbot_kg.models.response.bulkjob_create import DiffbotBulkJobCreateResponse + + +class TestDiffbotEnhanceClient: + @pytest.fixture(scope="class") + def client(self): + # trunk-ignore(bandit/B106) + return DiffbotEnhanceClient(token="valid_token") + + @pytest.mark.asyncio + async def test_mocked_enhance(self, mocker, client): + # ARRANGE + + # Define the search query parameters + params = {"query": "your_search_query", "limit": 10} + + # Mock the _post_or_put method + mocker.patch.object( + DiffbotSession, + "get", + return_value=BaseDiffbotResponse(200, {}, {}), # type: ignore + ) + + # ACT + response = await client.enhance(params) + + # ASSERT + assert DiffbotSession.get.is_called_with( + DiffbotEnhanceClient.enhance_url, + params=params, + headers={"accept": "application/json"}, + ) + assert isinstance(response, DiffbotEntitiesResponse) + assert response.status == 200 + + @pytest.mark.asyncio + async def test_mocked_create_bulkjob(self, mocker, client): + # ARRANGE + + # Define the search query parameters + params = {"query": "your_bulk_enhance_query", "limit": 10} + + # Mock the _post_or_put method + mocker.patch.object( + DiffbotSession, + "post", + return_value=BaseDiffbotResponse(202, {}, {}), # type: ignore + ) + + # ACT + response = await client.create_bulkjob(params) + + # ASSERT + assert DiffbotSession.post.is_called_with( + DiffbotEnhanceClient.bulk_job_url, + params=params, + headers={"accept": "application/json"}, + ) + assert isinstance(response, DiffbotBulkJobCreateResponse) + assert response.status == 202 diff --git a/tests/unit/clients/test_search_client.py b/tests/unit/clients/test_search_client.py new file mode 100644 index 0000000..898f090 --- /dev/null +++ b/tests/unit/clients/test_search_client.py @@ -0,0 +1,44 @@ +import pytest +from diffbot_kg.clients.search import DiffbotSearchClient +from diffbot_kg.clients.session import DiffbotSession +from diffbot_kg.models.response import DiffbotEntitiesResponse +from diffbot_kg.models.response.base import BaseDiffbotResponse + +# trunk-ignore(bandit/B105) +TOKEN = "fake_token" + + +class TestDiffbotSearchClient: + @pytest.fixture(scope="class") + def client(self): + # trunk-ignore(bandit/B106) + return DiffbotSearchClient(token=TOKEN) + + # Returns a DiffbotResponse object when given a search query. + @pytest.mark.asyncio + async def test_mocked_search(self, mocker, client): + # ARRANGE + + # Define the search query parameters + params = {"query": "your_search_query", "limit": 10} + + # Mock the _post_or_put method + mocker.patch.object( + DiffbotSession, + "get", + return_value=BaseDiffbotResponse(200, {}, {}), # type: ignore + ) + + # ACT + response = await client.search(params) + + # ASSERT + params = client.default_params | params + + DiffbotSession.get.assert_called_with( + DiffbotSearchClient.search_url, + params=params, + headers={"accept": "application/json"}, + ) + assert isinstance(response, DiffbotEntitiesResponse) + assert response.status == 200 diff --git a/tests/unit/test_clients.py b/tests/unit/test_clients.py deleted file mode 100644 index 44468ff..0000000 --- a/tests/unit/test_clients.py +++ /dev/null @@ -1,79 +0,0 @@ -# Generated by CodiumAI - -import pytest -from diffbot_kg.clients import DiffbotEnhanceClient, DiffbotSearchClient -from diffbot_kg.session import DiffbotResponse - - -class TestDiffbotSearchClient: - @pytest.fixture(scope="class") - def search_client(self): - # trunk-ignore(bandit/B106) - return DiffbotSearchClient(token="fake_token") - - # Returns a DiffbotResponse object when given a search query. - @pytest.mark.asyncio - async def test_mocked_search_query(self, mock, client): - # Define the search query parameters - params = {"query": "your_search_query", "limit": 10} - - # Mock the _post_or_put method - mock.patch.object( - DiffbotSearchClient, - "_get_or_post", - return_value=DiffbotResponse(200, {}, {}), # type: ignore - ) - - # Call the search method - response = await client.search(params) - - # Assert that the response is an instance of DiffbotResponse - assert isinstance(response, DiffbotResponse) - assert response.status == 200 - - -class TestDiffbotEnhanceClient: - @pytest.fixture(scope="class") - def client(self): - # trunk-ignore(bandit/B106) - return DiffbotEnhanceClient(token="valid_token") - - # Returns a DiffbotResponse object when given an enhance query. - @pytest.mark.asyncio - async def test_mocked_enhance_query(self, mocker, client): - # Define the search query parameters - params = {"query": "your_search_query", "limit": 10} - - # Mock the _post_or_put method - mocker.patch.object( - DiffbotEnhanceClient, - "_get", - return_value=DiffbotResponse(200, {}, {}), # type: ignore - ) - - # Call the search method - response = await client.enhance(params) - - # Assert that the response is an instance of DiffbotResponse - assert isinstance(response, DiffbotResponse) - assert response.status == 200 - - # Returns a DiffbotResponse object when given a bulk enhance query. - @pytest.mark.asyncio - async def test_mocked_create_bulkjob(self, mocker, client): - # Define the search query parameters - params = {"query": "your_bulk_enhance_query", "limit": 10} - - # Mock the _post_or_put method - mocker.patch.object( - DiffbotEnhanceClient, - "_post", - return_value=DiffbotResponse(200, {}, {}), # type: ignore - ) - - # Call the bulk enhance method - response = await client.create_bulkjob(params) - - # Assert that the response is an instance of DiffbotResponse - assert isinstance(response, DiffbotResponse) - assert response.status == 200 From 5700b03205fd5a01bc0f09f33147c766e0798bd5 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 10:26:26 -0500 Subject: [PATCH 08/16] chore: remove commented code --- tests/functional/clients/conftest.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/functional/clients/conftest.py b/tests/functional/clients/conftest.py index 36c988d..5173442 100644 --- a/tests/functional/clients/conftest.py +++ b/tests/functional/clients/conftest.py @@ -35,11 +35,3 @@ def token(): def suppress_aiohttp_output(): log = logging.getLogger("aiohttp") log.setLevel(logging.CRITICAL + 1) - - -# @pytest.hookimpl(tryfirst=True) -# def pytest_sanitize_hook(items): -# secrets = [token()] -# for item in items: -# for secret in secrets: -# item.add_marker(pytest.mark.sanitize(secret)) From 77730629a434d74177ee80e0eaefba16dbdef947 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 10:53:29 -0500 Subject: [PATCH 09/16] test: move conftest in order to prevent unit tests from trying to use it --- tests/functional/__init__.py | 0 tests/functional/clients/test_enhance_client.py | 2 +- tests/functional/clients/test_search_client.py | 2 +- tests/functional/{clients => }/conftest.py | 0 tests/unit/__init__.py | 0 5 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 tests/functional/__init__.py rename tests/functional/{clients => }/conftest.py (100%) create mode 100644 tests/unit/__init__.py diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/functional/clients/test_enhance_client.py b/tests/functional/clients/test_enhance_client.py index b05e015..1542c92 100644 --- a/tests/functional/clients/test_enhance_client.py +++ b/tests/functional/clients/test_enhance_client.py @@ -5,7 +5,7 @@ from aiohttp import ClientResponseError from diffbot_kg.clients.enhance import DiffbotEnhanceClient -from tests.functional.clients.conftest import ( +from tests.functional.conftest import ( ORG2_ENTITY_ID, ORG2_NAME, ORG2_URL, diff --git a/tests/functional/clients/test_search_client.py b/tests/functional/clients/test_search_client.py index 5aaee91..3ddd38a 100644 --- a/tests/functional/clients/test_search_client.py +++ b/tests/functional/clients/test_search_client.py @@ -1,7 +1,7 @@ import pytest from diffbot_kg.clients.search import DiffbotSearchClient -from tests.functional.clients.conftest import ORG_ENTITY_ID, ORG_NAME, Secret +from tests.functional.conftest import ORG_ENTITY_ID, ORG_NAME, Secret @pytest.fixture(scope="session") diff --git a/tests/functional/clients/conftest.py b/tests/functional/conftest.py similarity index 100% rename from tests/functional/clients/conftest.py rename to tests/functional/conftest.py diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 From f31f00764871935474d39087acebac333e683018 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 11:22:47 -0500 Subject: [PATCH 10/16] test: suppress aiohttp traces --- .github/workflows/python-package.yml | 1 + .github/workflows/python-publish.yml | 1 - tests/functional/conftest.py | 12 ++++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 501c0b2..f58b9f8 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,6 +15,7 @@ permissions: jobs: build: runs-on: ubuntu-latest + environment: test strategy: fail-fast: false matrix: diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 790db44..0d88edb 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -18,7 +18,6 @@ permissions: jobs: deploy: runs-on: ubuntu-latest - steps: - uses: actions/checkout@v3 - name: Set up Python diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index 5173442..680acb4 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -1,6 +1,9 @@ import logging import os +import re +import sys +import aiohttp.client_exceptions import dotenv import pytest @@ -35,3 +38,12 @@ def token(): def suppress_aiohttp_output(): log = logging.getLogger("aiohttp") log.setLevel(logging.CRITICAL + 1) + + def my_except_hook(exctype, value, traceback): + if exctype == aiohttp.client_exceptions.ClientResponseError: + msg = str(value) + re.sub(os.environ.get("DIFFBOT_TOKEN", ""), "********", msg) + else: + sys.__excepthook__(exctype, value, traceback) + + sys.excepthook = my_except_hook From 6c856c42c006b1a86ad2b8e69c9814cc108cf507 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 11:37:39 -0500 Subject: [PATCH 11/16] test: suppress traces (fixed hopefully) --- tests/functional/conftest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index 680acb4..9f8e69b 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -41,8 +41,9 @@ def suppress_aiohttp_output(): def my_except_hook(exctype, value, traceback): if exctype == aiohttp.client_exceptions.ClientResponseError: - msg = str(value) - re.sub(os.environ.get("DIFFBOT_TOKEN", ""), "********", msg) + token = os.environ.get("DIFFBOT_TOKEN", "") + value.message = re.sub(token, "********", str(value)) + sys.__excepthook__(exctype, value, None) else: sys.__excepthook__(exctype, value, traceback) From b47a580915c9d8ce3d76b7bde44b27a2c5b2acb5 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 12:05:34 -0500 Subject: [PATCH 12/16] test: suppress all tracebacks --- tests/functional/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index 9f8e69b..9231283 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -39,6 +39,10 @@ def suppress_aiohttp_output(): log = logging.getLogger("aiohttp") log.setLevel(logging.CRITICAL + 1) + # sledgehammer approach. only an exception hook should be used instead, + # but cannot get it to work for the traceback + sys.tracebacklimit = 0 + def my_except_hook(exctype, value, traceback): if exctype == aiohttp.client_exceptions.ClientResponseError: token = os.environ.get("DIFFBOT_TOKEN", "") From 7e00da5736820c58a4e122701e5c330cf26fc359 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 12:05:51 -0500 Subject: [PATCH 13/16] test: correctly use env variable --- .github/workflows/python-package.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f58b9f8..6aa0676 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,6 +16,8 @@ jobs: build: runs-on: ubuntu-latest environment: test + env: + DIFFBOT_TOKEN: ${{ secrets.DIFFBOT_TOKEN }} strategy: fail-fast: false matrix: From 6cccbbe6d026a8a0f561b2eedb3e04fb871a6243 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 12:55:00 -0500 Subject: [PATCH 14/16] ci: make versioned tests parallel --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6aa0676..0eb2ac9 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -22,6 +22,7 @@ jobs: fail-fast: false matrix: python-version: ["3.11", "3.12"] + max-parallel: 4 steps: #---------------------------------------------- # check-out repo and set-up python From ba2a4e7e9b937e5cb2e97625c177e2da81e8d944 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 13:01:16 -0500 Subject: [PATCH 15/16] ci: update versions, disable trufflehog due to current errors --- .trunk/trunk.yaml | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index befbf9b..4c56246 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -2,12 +2,12 @@ # To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml version: 0.1 cli: - version: 1.20.1 + version: 1.21.0 # Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins) plugins: sources: - id: trunk - ref: v1.4.4 + ref: v1.4.5 uri: https://github.com/trunk-io/plugins # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes) runtimes: @@ -33,21 +33,22 @@ lint: run: trufflehog filesystem --json --fail --exclude-paths=/.gitignore ${target} enabled: - actionlint@1.6.27 - - bandit@1.7.7 - - checkov@3.2.30 + - bandit@1.7.8 + - checkov@3.2.53 - git-diff-check - markdownlint@0.39.0 - - osv-scanner@1.6.2 + - osv-scanner@1.7.0 - prettier@3.2.5 - - ruff@0.3.1 - - semgrep@1.64.0 - - sourcery@1.15.0 + - ruff@0.3.5 + - semgrep@1.67.0 + - sourcery@1.16.0 - taplo@0.8.1 - - trivy@0.49.1 - - trufflehog-git@3.68.5 - - trufflehog@3.68.4 + - trivy@0.50.1 + - trufflehog-git@3.72.0 + # - trufflehog@3.71.0 - yamllint@1.35.1 disabled: + - trufflehog - black - isort actions: From 1d57806b1a1a0c87752d9a0ed22804fc019803d3 Mon Sep 17 00:00:00 2001 From: Brendan Smith Date: Sat, 6 Apr 2024 13:02:23 -0500 Subject: [PATCH 16/16] ci: fix bandit error due to unecessary ignore --- tests/unit/clients/test_search_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/clients/test_search_client.py b/tests/unit/clients/test_search_client.py index 898f090..7941d42 100644 --- a/tests/unit/clients/test_search_client.py +++ b/tests/unit/clients/test_search_client.py @@ -11,7 +11,6 @@ class TestDiffbotSearchClient: @pytest.fixture(scope="class") def client(self): - # trunk-ignore(bandit/B106) return DiffbotSearchClient(token=TOKEN) # Returns a DiffbotResponse object when given a search query.