Skip to content

Commit

Permalink
Merge pull request #42 from investigativedata/develop
Browse files Browse the repository at this point in the history
v0.3.1
  • Loading branch information
simonwoerpel authored May 20, 2024
2 parents c4c829d + 5e73fbb commit 6ace7c1
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.0
current_version = 0.3.1
commit = True
tag = True
message = 🔖 Bump version: {current_version} → {new_version}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
Expand All @@ -41,15 +41,15 @@ jobs:
- name: set PY
run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV
- name: Set up poetry cache
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PY }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv
- name: Set up pre-commit cache
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ runner.os }}-${{ env.PY }}-${{ hashFiles('.pre-commit-config.yaml') }}
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM ghcr.io/investigativedata/ftm-docker:main

RUN apt-get update && apt-get -y upgrade
RUN apt-get update && apt-get -y upgrade && && apt-get -y autoremove && apt-get clean

COPY ftm_columnstore /app/ftm_columnstore
COPY setup.py /app/setup.py
Expand All @@ -9,7 +9,7 @@ COPY pyproject.toml /app/pyproject.toml
COPY VERSION /app/VERSION

WORKDIR /app
RUN pip install -U pip setuptools
RUN pip install .
RUN pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir .

ENTRYPOINT ["ftmcs"]
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.0
0.3.1
2 changes: 1 addition & 1 deletion ftm_columnstore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
# FIXME sqlalchemy monkey patch not working
nomenklatura.settings.DB_URL = "sqlite:///:memory:"

__version__ = "0.3.0"
__version__ = "0.3.1"

__all__ = ["get_engine", "get_store"]
18 changes: 17 additions & 1 deletion ftm_columnstore/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Annotated, Optional

import typer
from ftmq.io import smart_read_proxies
from ftmq.io import smart_read_proxies, smart_write_proxies
from rich import print

from ftm_columnstore import get_engine, get_store, settings
Expand Down Expand Up @@ -34,6 +34,22 @@ def cli_init(
engine.ensure(recreate=recreate, exists_ok=True)


@cli.command("iterate", help="Read entities from the store.")
def cli_iterate(
dataset: Annotated[
Optional[str], typer.Option("-d", help="Dataset to read from")
] = None,
out_uri: Annotated[
str, typer.Option("-o", help="Entities uri (as interpreted by `ftmq`)")
] = "-",
):
"""
Read entities from the store and write json to `-o` (default: stdout)
"""
store = get_store(dataset=dataset)
smart_write_proxies(out_uri, store.iterate(dataset), serialize=True)


@cli.command("write", help="Write entity fragments to the store.")
def cli_write(
in_uri: Annotated[
Expand Down
12 changes: 10 additions & 2 deletions ftm_columnstore/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def create_statements(self) -> Iterable[str]:
INDEX tix (prop_type) TYPE set(0) GRANULARITY 1,
INDEX pix (prop) TYPE set(0) GRANULARITY 1
) ENGINE = ReplacingMergeTree(last_seen)
PRIMARY KEY (canonical_id, entity_id, id)
ORDER BY (canonical_id, entity_id, id)
PRIMARY KEY (canonical_id, entity_id, prop, value, id)
ORDER BY (canonical_id, entity_id, prop, value, id)
"""

create_table_fpx = f"""
Expand Down Expand Up @@ -234,6 +234,14 @@ def create_statements(self) -> Iterable[str]:
"""

projections = (
f"""ALTER TABLE {self.table} ADD PROJECTION {self.table}_dataset (
SELECT * ORDER BY dataset,canonical_id,prop)""",
f"""ALTER TABLE {self.table} ADD PROJECTION {self.table}_schema (
SELECT * ORDER BY schema,canonical_id,prop)""",
f"""ALTER TABLE {self.table} ADD PROJECTION {self.table}_dataset_schema (
SELECT * ORDER BY dataset,schema,canonical_id,prop)""",
f"""ALTER TABLE {self.table} ADD PROJECTION {self.table}_schema (
SELECT * ORDER BY schema,canonical_id,prop)""",
f"""ALTER TABLE {self.table} ADD PROJECTION {self.table}_values (
SELECT * ORDER BY value,prop)""",
f"""ALTER TABLE {self.table} ADD PROJECTION {self.table}_canonical_lookup (
Expand Down
2 changes: 1 addition & 1 deletion ftm_columnstore/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

VERSION = "0.3.0"
VERSION = "0.3.1"


def get_env(name, default=None):
Expand Down
30 changes: 15 additions & 15 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ftm-columnstore"
version = "0.3.0"
version = "0.3.1"
description = "Column store implementation for ftm data based on clickhouse"
authors = ["Simon Wörpel <[email protected]>"]
license = "GPL3"
Expand Down Expand Up @@ -34,7 +34,7 @@ libindic-utils = "^1.0.3"
metaphone = "^0.6"
pandas = "^2.2.2"
rich = "^13.7.1"
ftmq = "^0.6.4"
ftmq = "^0.6.5"


[tool.poetry.group.dev.dependencies]
Expand Down
24 changes: 12 additions & 12 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -625,9 +625,9 @@ frozenlist==1.4.1 ; python_version >= "3.11" and python_version < "3.12" \
fsspec==2024.5.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:1d021b0b0f933e3b3029ed808eb400c08ba101ca2de4b3483fbc9ca23fcee94a \
--hash=sha256:e0fdbc446d67e182f49a70b82cf7889028a63588fde6b222521f10937b2b670c
ftmq==0.6.4 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:0ca4bcfe72bbcdbf624dc079f7beae3407c209c5588a9749a4e14dcd96584aee \
--hash=sha256:12a97f5abf630590acc121d67a9da7e2077a21d675740cce38060517d9602896
ftmq==0.6.5 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:37bc1a32ebf5d12c13d27a30d24a1e4e6eaa55f7d6bcf66709cfeb07fdf0999e \
--hash=sha256:80b5e1faee042418a8c6d61336aff2f667ce59d69e05ed2826f2565bbe022fbb
gcsfs==2024.5.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:49978e7eb68800c2d074bb07b39050f8ae990899855abcdae6ef478a94528451 \
--hash=sha256:e54eaaffb82aaa369aea9b985e5db19a8446a325f796481303abe71a4e3427d6
Expand Down Expand Up @@ -1487,9 +1487,9 @@ pytest-cov==4.1.0 ; python_version >= "3.11" and python_version < "3.12" \
pytest-env==1.1.3 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:aada77e6d09fcfb04540a6e462c58533c37df35fa853da78707b17ec04d17dfc \
--hash=sha256:fcd7dc23bb71efd3d35632bde1bbe5ee8c8dc4489d6617fb010674880d96216b
pytest==8.2.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233 \
--hash=sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f
pytest==8.2.1 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd \
--hash=sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1
python-dateutil==2.9.0.post0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
--hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
Expand Down Expand Up @@ -1570,9 +1570,9 @@ requests-oauthlib==2.0.0 ; python_version >= "3.11" and python_version < "3.12"
requests-toolbelt==1.0.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \
--hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06
requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \
--hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1
requests==2.32.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:f2c3881dddb70d056c5bd7600a4fae312b2a300e39be6a118d30b90bd27262b5 \
--hash=sha256:fa5490319474c82ef1d2c9bc459d3652e3ae4ef4c4ebdd18a21145a47ca4b6b8
rich==13.7.1 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
Expand Down Expand Up @@ -1713,9 +1713,9 @@ stringcase==1.2.0 ; python_version >= "3.11" and python_version < "3.12" \
text-unidecode==1.3 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8 \
--hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93
textual==0.61.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:176ac3aa5427fc076492d16afd20ea5c508605c2826cd176c8f5ac2589a1ee46 \
--hash=sha256:91c83a659da40b227eced4fa749026a236b493cc5911a9bedd990ad5f0786be2
textual==0.61.1 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:58ef207424eb1015b85ca68abebf1a3c7b5cec108eec0d6a110f2aac9bc4d74f \
--hash=sha256:8f1092f0db58f5214c0de1f8174ecf9db08356e30d0acca69a8df9a183a29f6c
threadpoolctl==3.5.0 ; python_version >= "3.11" and python_version < "3.12" \
--hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \
--hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
Expand Down
15 changes: 13 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from pathlib import Path

import orjson
from nomenklatura.entity import CompositeEntity
from typer.testing import CliRunner

from ftm_columnstore.cli import cli
Expand All @@ -21,3 +19,16 @@ def test_cli(fixtures_path: Path):
in_uri = str(fixtures_path / "eu_authorities.ftm.json")
result = runner.invoke(cli, ["write", "-i", in_uri, "-d", "eu_authorities"])
assert result.exit_code == 0

in_uri = str(fixtures_path / "donations.ijson")
result = runner.invoke(cli, ["write", "-i", in_uri, "-d", "donations"])
assert result.exit_code == 0

res = runner.invoke(cli, ["iterate"])
assert result.exit_code == 0
lines = _get_lines(res.stdout)
assert len(lines) == 625
res = runner.invoke(cli, ["iterate", "-d", "donations"])
assert result.exit_code == 0
lines = _get_lines(res.stdout)
assert len(lines) == 474

0 comments on commit 6ace7c1

Please sign in to comment.