Skip to content

Commit

Permalink
Merge pull request #116 from investigativedata/develop
Browse files Browse the repository at this point in the history
v0.5.1
  • Loading branch information
simonwoerpel authored Jan 3, 2024
2 parents f23db11 + 6786559 commit 19ca56a
Show file tree
Hide file tree
Showing 14 changed files with 2,845 additions and 2,554 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.0
current_version = 0.5.1
commit = True
tag = True
message = 🔖 Bump version: {current_version} → {new_version}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
all: clean install test

install:
poetry install --with dev

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.0
0.5.1
2 changes: 1 addition & 1 deletion ftmq/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ftmq.query import Query

__version__ = "0.5.0"
__version__ = "0.5.1"
__all__ = ["Query"]
82 changes: 58 additions & 24 deletions ftmq/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,55 +17,89 @@

log = logging.getLogger(__name__)

DEFAULT_MODE = "rb"


def _get_sysio(mode: str | None = DEFAULT_MODE) -> Literal[sys.stdin, sys.stdout]:
if mode.startswith("r"):
return sys.stdin
return sys.stdout


class SmartHandler:
def __init__(
self,
uri: Any,
*args,
**kwargs,
) -> None:
if not uri:
raise ValueError("Missing uri")
self.uri = str(uri)
self.args = args
kwargs["mode"] = kwargs.get("mode", DEFAULT_MODE)
self.sys_io = _get_sysio(kwargs["mode"])
if kwargs["mode"].endswith("b"):
self.sys_io = self.sys_io.buffer
self.kwargs = kwargs
self.is_buffer = self.uri == "-"
self.handler = None

def open(self):
if self.is_buffer:
self.handler = self.sys_io
else:
handler = open(self.uri, *self.args, **self.kwargs)
self.handler = handler.open()
return self.handler

def close(self):
if not self.is_buffer:
self.handler.close()

def __enter__(self):
return self.open()

def __exit__(self, *args, **kwargs) -> None:
self.close()


@contextlib.contextmanager
def smart_open(
uri: Any,
sys_io: Literal[sys.stdin.buffer, sys.stdout.buffer] | None = sys.stdin,
mode: str | None = None,
*args,
**kwargs,
):
is_buffer = False
kwargs["mode"] = kwargs.get("mode", "rb")
if not uri:
raise ValueError("Missing uri")
uri = str(uri)
if uri != "-":
fh = open(uri, *args, **kwargs)
if mode is not None:
kwargs["mode"] = mode
else:
fh = sys_io
is_buffer = True

kwargs["mode"] = kwargs.get("mode", DEFAULT_MODE)
handler = SmartHandler(uri, *args, **kwargs)
try:
if is_buffer:
yield fh
else:
yield fh.open()
yield handler.open()
finally:
if not is_buffer:
fh.close()
handler.close()


def _smart_stream(uri, *args, **kwargs) -> Any:
kwargs["mode"] = kwargs.get("mode", "rb")
with smart_open(uri, sys.stdin.buffer, *args, **kwargs) as fh:
with smart_open(uri, *args, **kwargs) as fh:
while line := fh.readline():
yield line


def smart_read(uri, *args, **kwargs) -> Any:
kwargs["mode"] = kwargs.get("mode", "rb")
stream = kwargs.pop("stream", False)
if stream:
return _smart_stream(uri, *args, **kwargs)

with smart_open(uri, sys.stdin.buffer, *args, **kwargs) as fh:
with smart_open(uri, *args, **kwargs) as fh:
return fh.read()


def smart_write(uri, content: bytes | str, *args, **kwargs) -> Any:
kwargs["mode"] = kwargs.get("mode", "wb")
with smart_open(uri, sys.stdout.buffer, *args, **kwargs) as fh:
with smart_open(uri, *args, **kwargs) as fh:
fh.write(content)


Expand All @@ -78,7 +112,7 @@ def smart_get_store(uri: PathLike, **kwargs) -> Store | None:

def smart_read_proxies(
uri: PathLike | Iterable[PathLike],
mode: str | None = "rb",
mode: str | None = DEFAULT_MODE,
serialize: bool | None = True,
query: Query | None = None,
**store_kwargs,
Expand Down Expand Up @@ -128,7 +162,7 @@ def smart_write_proxies(
log.info("Writing proxy %d ..." % ix)
return ix

with smart_open(uri, sys.stdout.buffer, mode=mode) as fh:
with smart_open(uri, mode=mode) as fh:
for proxy in proxies:
ix += 1
if serialize:
Expand Down
7 changes: 4 additions & 3 deletions ftmq/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .coverage import Coverage
from .dataset import Catalog, Dataset, Maintainer, Publisher, Resource
from ftmq.model.coverage import Coverage
from ftmq.model.dataset import Catalog, Dataset, Maintainer, Publisher, Resource
from ftmq.model.proxy import Entity

__all__ = [Catalog, Coverage, Dataset, Maintainer, Publisher, Resource]
__all__ = [Catalog, Coverage, Dataset, Entity, Maintainer, Publisher, Resource]
50 changes: 50 additions & 0 deletions ftmq/model/proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Any, Iterable, TypeAlias, Union

from followthemoney.model import registry
from pydantic import BaseModel, ConfigDict, Field, model_validator

from ftmq.types import CE
from ftmq.util import make_proxy

Properties: TypeAlias = dict[str, list[Union[str, "Entity"]]]


class Entity(BaseModel):
model_config = ConfigDict(populate_by_name=True)

id: str = Field(..., example="NK-A7z....")
caption: str = Field(..., example="John Doe")
schema_: str = Field(..., example="LegalEntity", alias="schema")
properties: Properties = Field(..., example={"name": ["John Doe"]})
datasets: list[str] = Field([], example=["us_ofac_sdn"])
referents: list[str] = Field([], example=["ofac-1234"])

@classmethod
def from_proxy(cls, entity: CE, adjacents: Iterable[CE] | None = None) -> "Entity":
properties = dict(entity.properties)
if adjacents:
adjacents = {e.id: Entity.from_proxy(e) for e in adjacents}
for prop in entity.iterprops():
if prop.type == registry.entity:
properties[prop.name] = [
adjacents.get(i, i) for i in entity.get(prop)
]
return cls(
id=entity.id,
caption=entity.caption,
schema=entity.schema.name,
properties=properties,
datasets=list(entity.datasets),
referents=list(entity.referents),
)

def to_proxy(self) -> CE:
return make_proxy(self.model_dump(by_alias=True))

@model_validator(mode="before")
@classmethod
def get_caption(cls, data: Any) -> Any:
if data.get("caption") is None:
proxy = make_proxy(data)
data["caption"] = proxy.caption
return data
Loading

0 comments on commit 19ca56a

Please sign in to comment.