Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.7.1 #366

Merged
merged 15 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.7.0
current_version = 0.7.1
commit = True
tag = True
message = 🔖 Bump version: {current_version} → {new_version}
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install poetry
run: curl -sSL https://install.python-poetry.org | python3 -
- name: Install leveldb
run: sudo apt-get install pkg-config libleveldb-dev
- name: Configure poetry
run: poetry config virtualenvs.in-project true
- name: set PY
Expand All @@ -43,7 +45,7 @@ jobs:
path: ~/.cache/pre-commit
key: pre-commit-${{ runner.os }}-${{ env.PY }}-${{ hashFiles('.pre-commit-config.yaml') }}
- name: Install dependencies
run: poetry install --with dev
run: poetry install --with dev --all-extras
- name: Run pre-commit hooks
run: poetry run pre-commit run
- name: Lint with flake8
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,4 @@ repos:
- id: poetry-lock
args: ["--no-update"]
- id: poetry-export
args: ["--dev", "-f", "requirements.txt", "-o", "requirements.txt"]
args: ["-f", "requirements.txt", "-o", "requirements.txt"]
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ RUN apt-get -qq update && apt-get -qq -y upgrade
RUN apt-get install -qq -y pkg-config libicu-dev libleveldb-dev
RUN apt-get -qq -y autoremove && apt-get clean

RUN pip install --no-cache-dir -q -U pip setuptools
RUN pip install --no-cache-dir -q --no-binary=:pyicu: pyicu

COPY ftmq /src/ftmq
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
all: clean install test

install:
poetry install --with dev
poetry install --with dev --all-extras

lint:
poetry run flake8 ftmq --count --select=E9,F63,F7,F82 --show-source --statistics
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7.0
0.7.1
2 changes: 1 addition & 1 deletion ftmq/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ftmq.query import Query

__version__ = "0.7.0"
__version__ = "0.7.1"
__all__ = ["Query"]
11 changes: 5 additions & 6 deletions ftmq/model/dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from datetime import datetime
from typing import Iterable, Literal, Self, TypeVar

from nomenklatura.dataset.catalog import DataCatalog as NKCatalog
from nomenklatura.dataset.dataset import Dataset as NKDataset
from normality import slugify
from pantomime.types import FTM
from pydantic import AnyUrl, HttpUrl
from rigour.mime.types import FTM

from ftmq.enums import Categories, Frequencies
from ftmq.model.coverage import Coverage, DatasetStats, Schemata
Expand Down Expand Up @@ -83,6 +82,7 @@ class Dataset(BaseModel):
aleph_url: HttpUrl | None = None
tags: list[str] | None = []
content_type: ContentType | None = "structured"
total_file_size: int | None = 0

git_repo: AnyUrl | None = None
uri: str | None = None
Expand Down Expand Up @@ -143,16 +143,15 @@ def get(self, name: str) -> Dataset | None:

def get_scope(self) -> NKDataset:
# FIXME clarify
return NKDataset(
NKCatalog(
NKDataset, {"datasets": [make_dataset(n).to_dict() for n in self.names]}
),
ds = NKDataset(
{
"name": slugify(self.name),
"title": self.name.title(),
"children": self.names,
},
)
ds.children = {make_dataset(n) for n in self.names}
return ds

def iterate(self) -> CEGenerator:
for dataset in self.datasets:
Expand Down
18 changes: 10 additions & 8 deletions ftmq/model/proxy.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from typing import Any, Iterable, Self, TypeAlias, TypeVar, Union
from typing import Any, Iterable, Self, Sequence, TypeAlias, TypeVar, Union

from followthemoney.types import registry
from nomenklatura.publish.names import pick_caption
from pydantic import BaseModel, ConfigDict, Field, model_validator

from ftmq.types import CE
from ftmq.util import make_proxy
from ftmq.util import make_proxy, must_str

EntityProp = TypeVar("EntityProp", bound="Entity")
Properties: TypeAlias = dict[str, list[Union[str, EntityProp]]]
Properties: TypeAlias = dict[str, Sequence[Union[str, EntityProp]]]


class Entity(BaseModel):
Expand All @@ -24,14 +25,16 @@ class Entity(BaseModel):
def from_proxy(cls, entity: CE, adjacents: Iterable[CE] | None = None) -> Self:
properties = dict(entity.properties)
if adjacents:
adjacents = {e.id: Entity.from_proxy(e) for e in adjacents}
adjacents_: dict[str, Entity] = {
must_str(e.id): Entity.from_proxy(e) for e in adjacents
}
for prop in entity.iterprops():
if prop.type == registry.entity:
properties[prop.name] = [
adjacents.get(i, i) for i in entity.get(prop)
adjacents_.get(i, i) for i in entity.get(prop)
]
return cls(
id=entity.id,
id=must_str(entity.id),
caption=entity.caption,
schema=entity.schema.name,
properties=properties,
Expand All @@ -46,6 +49,5 @@ def to_proxy(self) -> CE:
@classmethod
def get_caption(cls, data: Any) -> Any:
if data.get("caption") is None:
proxy = make_proxy(data)
data["caption"] = proxy.caption
data["caption"] = pick_caption(make_proxy(data))
return data
45 changes: 4 additions & 41 deletions ftmq/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from nomenklatura.entity import CE

from ftmq.aggregations import Aggregation, Aggregator
from ftmq.enums import Aggregations, Comparators, Properties
from ftmq.enums import Aggregations, Properties
from ftmq.exceptions import ValidationError
from ftmq.filters import (
FILTERS,
Expand Down Expand Up @@ -56,24 +56,15 @@ def serialize(self) -> list[str]:


class Query:
DEFAULT_SEARCH_PROPS = (
Properties["name"],
Properties["firstName"],
Properties["middleName"],
Properties["lastName"],
)

def __init__(
self,
filters: Iterable[F] | None = None,
search_filters: Iterable[F] | None = None,
aggregations: Iterable[Aggregation] | None = None,
aggregator: Aggregator | None = None,
sort: Sort | None = None,
slice: Slice | None = None,
):
self.filters = set(ensure_list(filters))
self.search_filters = set(ensure_list(search_filters))
self.aggregations = set(ensure_list(aggregations))
self.aggregator = aggregator
self.sort = sort
Expand Down Expand Up @@ -162,13 +153,6 @@ def lookups(self) -> dict[str, Any]:
"""
return self._get_lookups(self.filters)

@property
def search_lookups(self) -> dict[str, Any]:
"""
The current search lookups as dictionary
"""
return self._get_lookups(self.search_filters)

@property
def limit(self) -> int | None:
"""
Expand Down Expand Up @@ -283,9 +267,6 @@ def to_dict(self) -> dict[str, Any]:
```
"""
data = self.lookups
search_data = self.search_lookups
if search_data:
data["search"] = search_data
if self.sort:
data["order_by"] = self.sort.serialize()
if self.slice:
Expand Down Expand Up @@ -364,14 +345,6 @@ def where(self, **lookup: Any) -> Q:

return self._chain()

def search(self, q: str, props: Iterable[Properties | str] = None) -> Q:
# reset existing search
self.search_filters: set[F] = set()
props = props or self.DEFAULT_SEARCH_PROPS
for prop in props:
self.search_filters.add(PropertyFilter(prop, q, Comparators.ilike))
return self._chain()

def order_by(self, *values: Iterable[str], ascending: bool | None = True) -> Q:
"""
Add or update the current sorting.
Expand Down Expand Up @@ -401,23 +374,13 @@ def aggregate(
def get_aggregator(self) -> Aggregator:
return Aggregator(aggregations=self.aggregations)

def apply_filter(self, proxy: CE) -> bool:
if not self.filters:
return True
return all(f.apply(proxy) for f in self.filters)

def apply_search(self, proxy: CE) -> bool:
if not self.search_filters:
return True
return any(f.apply(proxy) for f in self.search_filters)

def apply(self, proxy: CE) -> bool:
"""
Test if a proxy matches the current `Query` instance.
"""
if self.apply_filter(proxy):
return self.apply_search(proxy)
return False
if not self.filters:
return True
return all(f.apply(proxy) for f in self.filters)

def apply_iter(self, proxies: CEGenerator) -> CEGenerator:
"""
Expand Down
24 changes: 1 addition & 23 deletions ftmq/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,26 +117,9 @@ def clause(self) -> BooleanClauseList:
)
return and_(*clauses)

@cached_property
def search_clause(self) -> BooleanClauseList | None:
if not self.q.search_filters:
return
return or_(
and_(
self.table.c.prop == f.key,
self.get_expression(self.table.c.value, f),
)
for f in self.q.search_filters
)

@cached_property
def canonical_ids(self) -> Select:
q = select(self.table.c.canonical_id.distinct()).where(self.clause)
if self.q.search_filters:
search_ids = select(self.table.c.canonical_id.distinct()).where(
self.search_clause
)
q = q.where(self.table.c.canonical_id.in_(search_ids))
if self.q.sort is None:
q = q.limit(self.q.limit).offset(self.q.offset)
return q
Expand All @@ -148,12 +131,7 @@ def all_canonical_ids(self) -> Select:
@cached_property
def _unsorted_statements(self) -> Select:
where = self.clause
if (
self.q.properties
or self.q.reversed
or self.q.search_filters
or self.q.limit
):
if self.q.properties or self.q.reversed or self.q.limit:
where = self.table.c.canonical_id.in_(self.canonical_ids)
return select(self.table).where(where).order_by(self.table.c.canonical_id)

Expand Down
30 changes: 25 additions & 5 deletions ftmq/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pycountry
from banal import ensure_list, is_listish
from followthemoney.proxy import E, EntityProxy
from followthemoney.schema import Schema
from followthemoney.types import registry
from followthemoney.util import make_entity_id, sanitize_text
Expand Down Expand Up @@ -95,6 +96,14 @@ def make_proxy(data: dict[str, Any], dataset: str | Dataset | None = None) -> CE
return proxy


def ensure_proxy(data: dict[str, Any] | CE | E) -> CompositeEntity:
if isinstance(data, CompositeEntity):
return data
if isinstance(data, EntityProxy):
data = data.to_full_dict()
return make_proxy(data)


def get_statements(proxy: CE, *datasets: str) -> SGenerator:
"""
Get statements from a `nomenklatura.entity.CompositeEntity` with multiple
Expand All @@ -119,7 +128,7 @@ def get_statements(proxy: CE, *datasets: str) -> SGenerator:


@cache
def get_country_name(alpha2: str) -> str:
def get_country_name(code: str) -> str:
"""
Get the (english) country name for the given 2-letter iso code via
[pycountry](https://pypi.org/project/pycountry/)
Expand All @@ -129,21 +138,25 @@ def get_country_name(alpha2: str) -> str:
"Germany"
>>> get_country_name("xx")
"xx"
>>> get_country_name("gb") == get_country_name("uk")
True # United Kingdom

Args:
alpha2: Two-letter iso code, case insensitive

Returns:
Either the country name for a valid code or the code as fallback.
"""
alpha2 = alpha2.lower()
code_clean = get_country_code(code)
if code_clean is None:
code_clean = code.lower()
try:
country = pycountry.countries.get(alpha_2=alpha2)
country = pycountry.countries.get(alpha_2=code_clean)
if country is not None:
return country.name
except (LookupError, AttributeError):
return alpha2
return alpha2
return code
return code_clean


@lru_cache(1024)
Expand Down Expand Up @@ -485,3 +498,10 @@ def get_featured_proxy(proxy: CE) -> CE:
for prop in proxy.schema.featured:
featured.add(prop, proxy.get(prop))
return featured


def must_str(value: Any) -> str:
value = clean_string(value)
if not value:
raise ValueError(f"Value invalid: `{value}`")
return value
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@investigativedata/ftmq",
"version": "0.7.0",
"version": "0.7.1",
"description": "javascript interface for ftmq",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand Down
Loading
Loading