Skip to content

Commit

Permalink
Merge pull request #290 from investigativedata/develop
Browse files Browse the repository at this point in the history
v0.6.9
  • Loading branch information
simonwoerpel authored Jul 24, 2024
2 parents acdc899 + 0e75e74 commit f3c97c7
Show file tree
Hide file tree
Showing 15 changed files with 272 additions and 260 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.8
current_version = 0.6.9
commit = True
tag = True
message = 🔖 Bump version: {current_version} → {new_version}
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,4 @@ jobs:
uses: coverallsapp/github-action@v2
with:
parallel-finished: true
# carryforward: "run-3.11,run-3.12"
carryforward: "run-3.11"
carryforward: "run-3.11,run-3.12"
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.6.8
0.6.9
2 changes: 1 addition & 1 deletion ftmq/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ftmq.query import Query

__version__ = "0.6.8"
__version__ = "0.6.9"
__all__ = ["Query"]
6 changes: 3 additions & 3 deletions ftmq/aleph.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class AlephStore(Store[CE, DS]):
def __init__(
self,
dataset: DS,
resolver: Resolver,
linker: Resolver,
host: str | None = None,
api_key: str | None = None,
):
super().__init__(dataset, resolver)
super().__init__(dataset, linker)
self.host = host or HOST
self.api_key = api_key or API_KEY

Expand Down Expand Up @@ -108,7 +108,7 @@ def add_statement(self, stmt: Statement) -> None:
return
if len(self.batch) >= self.BATCH:
self.flush()
canonical_id = self.store.resolver.get_canonical(stmt.entity_id)
canonical_id = self.store.linker.get_canonical(stmt.entity_id)
stmt.canonical_id = canonical_id
self.batch[stmt.canonical_id].add(stmt)

Expand Down
20 changes: 9 additions & 11 deletions ftmq/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,46 +22,44 @@ def get_store(
uri: PathLike | None = "memory:///",
catalog: Catalog | None = None,
dataset: Dataset | str | None = None,
resolver: Resolver | str | None = None,
linker: Resolver | str | None = None,
) -> Store:
if isinstance(dataset, str):
dataset = Dataset(name=dataset)
if isinstance(resolver, (str, Path)):
resolver = get_resolver(resolver)
if isinstance(linker, (str, Path)):
linker = get_resolver(linker)
uri = str(uri)
parsed = urlparse(uri)
if parsed.scheme == "memory":
return MemoryStore(catalog, dataset, resolver=resolver)
return MemoryStore(catalog, dataset, linker=linker)
if parsed.scheme == "leveldb":
path = uri.replace("leveldb://", "")
path = Path(path).absolute()
try:
from ftmq.store.level import LevelDBStore

return LevelDBStore(catalog, dataset, path=path, resolver=resolver)
return LevelDBStore(catalog, dataset, path=path, linker=linker)
except ImportError:
raise ImportError("Can not load LevelDBStore. Install `plyvel`")
if parsed.scheme == "redis":
try:
from ftmq.store.redis import RedisStore

return RedisStore(catalog, dataset, path=path, resolver=resolver)
return RedisStore(catalog, dataset, path=path, linker=linker)
except ImportError:
raise ImportError("Can not load RedisStore. Install `redis`")
if parsed.scheme == "clickhouse":
try:
from ftm_columnstore import get_store as get_cstore

return get_cstore(catalog, dataset, resolver=resolver)
return get_cstore(catalog, dataset, linker=linker)
except ImportError:
raise ImportError("Can not load ClickhouseStore. Install `ftm-columnstore`")
if "sql" in parsed.scheme:
get_metadata.cache_clear()
return SQLStore(catalog, dataset, uri=uri, resolver=resolver)
return SQLStore(catalog, dataset, uri=uri, linker=linker)
if "aleph" in parsed.scheme:
return AlephStore.from_uri(
uri, catalog=catalog, dataset=dataset, resolver=resolver
)
return AlephStore.from_uri(uri, catalog=catalog, dataset=dataset, linker=linker)
raise NotImplementedError(uri)


Expand Down
4 changes: 2 additions & 2 deletions ftmq/store/aleph.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def from_uri(
uri: str,
dataset: Dataset | str | None = None,
catalog: Catalog | None = None,
resolver: Resolver | None = None,
linker: Resolver | None = None,
) -> Self:
host, api_key, foreign_id = parse_uri(uri)
if dataset is None and foreign_id is not None:
Expand All @@ -39,4 +39,4 @@ def from_uri(
if isinstance(dataset, str):
dataset = Dataset(name=dataset)

return cls(catalog, dataset, resolver=resolver, host=host, api_key=api_key)
return cls(catalog, dataset, linker=linker, host=host, api_key=api_key)
10 changes: 5 additions & 5 deletions ftmq/store/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
self,
catalog: C | None = None,
dataset: Dataset | str | None = None,
resolver: Resolver | None = None,
linker: Resolver | None = None,
**kwargs,
) -> None:
if dataset is not None:
Expand All @@ -30,7 +30,7 @@ def __init__(
dataset = catalog.get_scope()
else:
dataset = DefaultDataset
super().__init__(dataset=dataset, resolver=resolver or Resolver(), **kwargs)
super().__init__(dataset=dataset, linker=linker or Resolver(), **kwargs)

def get_catalog(self) -> C:
# return implicit catalog computed from current datasets in store
Expand All @@ -46,7 +46,7 @@ def iterate(self, dataset: str | Dataset | None = None) -> CEGenerator:
yield from view.entities()

def resolve(self, dataset: str | Dataset | None = None) -> None:
if not self.resolver.edges:
if not self.linker.edges:
return
if dataset is not None:
if isinstance(dataset, str):
Expand All @@ -58,8 +58,8 @@ def resolve(self, dataset: str | Dataset | None = None) -> None:
else:
entities = self.iterate()
for ix, entity in enumerate(entities):
if entity.id in self.resolver.nodes:
self.update(self.resolver.get_canonical(entity.id))
if entity.id in self.linker.nodes:
self.update(self.linker.get_canonical(entity.id))
if ix and ix % 10_000 == 0:
log.info("Resolving entity %d ..." % ix)

Expand Down
31 changes: 31 additions & 0 deletions ftmq/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,34 @@ def prop_is_numeric(schema: Schema, prop: str) -> bool:
if prop is not None:
return prop.type == registry.number
return False


def get_proxy_caption_property(proxy: CE) -> dict[str, str]:
for prop in proxy.schema.caption:
for value in proxy.get(prop):
return {prop: value}
return {}


def get_dehydrated_proxy(proxy: CE) -> CE:
"""
reduce proxy payload to only include caption property
"""
return make_proxy(
{
"id": proxy.id,
"schema": proxy.schema.name,
"properties": get_proxy_caption_property(proxy),
"datasets": proxy.datasets,
}
)


def get_featured_proxy(proxy: CE) -> CE:
"""
reduce proxy payload to only include featured properties
"""
featured = get_dehydrated_proxy(proxy)
for prop in proxy.schema.featured:
featured.add(prop, proxy.get(prop))
return featured
16 changes: 8 additions & 8 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@investigativedata/ftmq",
"version": "0.6.8",
"version": "0.6.9",
"description": "javascript interface for ftmq",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand Down
Loading

0 comments on commit f3c97c7

Please sign in to comment.