Skip to content

Commit

Permalink
Merge pull request #38 from investigativedata/develop
Browse files Browse the repository at this point in the history
v0.3.0
  • Loading branch information
simonwoerpel authored May 19, 2024
2 parents a7303a7 + 1fefbfd commit c4c829d
Show file tree
Hide file tree
Showing 15 changed files with 3,816 additions and 179,077 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.1
current_version = 0.3.0
commit = True
tag = True
message = 🔖 Bump version: {current_version} → {new_version}
Expand Down
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# * Run "pre-commit install".
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.6.0
hooks:
- id: check-added-large-files
exclude: (\.csv|\.i?jsonl?)$
Expand All @@ -32,26 +32,26 @@ repos:
- id: absolufy-imports

- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
args: ["--profile", "black"]

- repo: https://github.com/psf/black
rev: 23.7.0
rev: 24.4.2
hooks:
- id: black

- repo: https://github.com/csachs/pyproject-flake8
rev: v6.0.0.post1
rev: v7.0.0
hooks:
- id: pyproject-flake8
additional_dependencies: [ flake8-bugbear ]
args: [ "--extend-ignore", "E501" ]
exclude: (test_[\w]+\.py)$

- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
rev: v2.2.6
hooks:
- id: codespell
exclude: (test_[\w]+\.py|\.csv|\.i?jsonl?|\.lock)$
Expand All @@ -69,7 +69,7 @@ repos:
- id: rst-inline-touching-normal

- repo: https://github.com/python-poetry/poetry
rev: 1.5.0
rev: 1.8.0
hooks:
- id: poetry-check
- id: poetry-lock
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.1
0.3.0
2 changes: 1 addition & 1 deletion ftm_columnstore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
# FIXME sqlalchemy monkey patch not working
nomenklatura.settings.DB_URL = "sqlite:///:memory:"

__version__ = "0.2.1"
__version__ = "0.3.0"

__all__ = ["get_engine", "get_store"]
2 changes: 1 addition & 1 deletion ftm_columnstore/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

VERSION = "0.2.1"
VERSION = "0.3.0"


def get_env(name, default=None):
Expand Down
3,428 changes: 1,754 additions & 1,674 deletions poetry.lock

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ftm-columnstore"
version = "0.2.1"
version = "0.3.0"
description = "Column store implementation for ftm data based on clickhouse"
authors = ["Simon Wörpel <[email protected]>"]
license = "GPL3"
Expand All @@ -26,16 +26,15 @@ ftmcs = "ftm_columnstore.cli:cli"
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
banal = "^1.0.6"
typer = "^0.9.0"
clickhouse-driver = {extras = ["numpy"], version = "^0.2.6"}
orjson = "^3.9.10"
pyicu = "^2.12"
typer = "^0.12.3"
clickhouse-driver = {extras = ["numpy"], version = "^0.2.7"}
pyicu = "^2.13.1"
libindic-soundex = "^1.0.2"
libindic-utils = "^1.0.3"
metaphone = "^0.6"
pandas = "^2.1.3"
rich = "^13.6.0"
ftmq = "^0.5.0"
pandas = "^2.2.2"
rich = "^13.7.1"
ftmq = "^0.6.4"


[tool.poetry.group.dev.dependencies]
Expand All @@ -45,7 +44,7 @@ black = "^23.11.0"
isort = "^5.12.0"
flake8 = "^6.1.0"
mypy = "^1.7.0"
pytest = "^7.4.3"
pytest = "^8.2.0"
pytest-cov = "^4.1.0"
pytest-env = ">=1.1.1"
pre-commit = "^3.5.0"
Expand All @@ -58,5 +57,6 @@ build-backend = "poetry.core.masonry.api"
[tool.pytest.ini_options]
env = [
"NOMENKLATURA_STATEMENT_TABLE=test_table",
"LOG_LEVEL=DEBUG"
"LOG_LEVEL=DEBUG",
"MAX_SQL_AGG_GROUPS=11"
]
2,816 changes: 1,415 additions & 1,401 deletions requirements.txt

Large diffs are not rendered by default.

23 changes: 9 additions & 14 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from ftmq.io import smart_read_proxies

FIXTURES_PATH = (Path(__file__).parent / "fixtures").absolute()
AUTHORITIES = "eu_authorities.ftm.json"
DONATIONS = "donations.ijson"


@pytest.fixture(scope="module")
Expand All @@ -12,25 +14,18 @@ def fixtures_path():


@pytest.fixture(scope="module")
def luanda_leaks():
return smart_read_proxies(FIXTURES_PATH / "icij_luanda_leaks.jsonl")


# @pytest.fixture(scope="module")
# def wd_peps():
# return smart_read_proxies(FIXTURES_PATH / "wd_peps.ftm.json")


@pytest.fixture(scope="module")
def ec_meetings():
return smart_read_proxies(FIXTURES_PATH / "ec_meetings.ftm.json")
def proxies():
proxies = []
proxies.extend(smart_read_proxies(FIXTURES_PATH / AUTHORITIES))
proxies.extend(smart_read_proxies(FIXTURES_PATH / DONATIONS))
return proxies


@pytest.fixture(scope="module")
def eu_authorities():
return smart_read_proxies(FIXTURES_PATH / "eu_authorities.ftm.json")
return [x for x in smart_read_proxies(FIXTURES_PATH / AUTHORITIES)]


@pytest.fixture(scope="module")
def donations():
return smart_read_proxies(FIXTURES_PATH / "donations.ijson")
return [x for x in smart_read_proxies(FIXTURES_PATH / DONATIONS)]
948 changes: 474 additions & 474 deletions tests/fixtures/donations.ijson

Large diffs are not rendered by default.

45,038 changes: 0 additions & 45,038 deletions tests/fixtures/ec_meetings.ftm.json

This file was deleted.

852 changes: 0 additions & 852 deletions tests/fixtures/icij_luanda_leaks.jsonl

This file was deleted.

129,557 changes: 0 additions & 129,557 deletions tests/fixtures/wd_peps.ftm.json

This file was deleted.

18 changes: 1 addition & 17 deletions tests/test_fingerprints.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
def test_fingerprints():
# find similarities by phonetic algorithm
# FIXME query here for reference
q = """SELECT entity_id FROM ftm_columnstore_test_fpx
WHERE algorithm = '{algorithm}' AND value IN (
SELECT value FROM (
Expand All @@ -11,20 +12,3 @@ def test_fingerprints():
GROUP BY value
HAVING entities > 2
))"""
# ds = get_dataset("luanda_leaks")
# ds = ds.store
# entities = [
# e for e in ds.EQ.where(entity_id__in=q.format(algorithm="fingerprint"))
# ]
# self.assertEqual(len(entities), 125)
# FIXME
# names = [n for e in entities for n in e.names]
# for name in [
# "Galp Energia Overseas Block 14 B.V.",
# "Galp Energia Overseas Block 32 B.V.",
# "Galp Energia Overseas Block 33 B.V.",
# "Windhoek PEL 23 B.V.",
# "Windhoek PEL 24 B.V.",
# "Windhoek PEL 28 B.V.",
# ]:
# self.assertIn(name, names)
Loading

0 comments on commit c4c829d

Please sign in to comment.