Skip to content

Commit

Permalink
👽️ (nomenklatura) Resolver -> Linker
Browse files Browse the repository at this point in the history
  • Loading branch information
simonwoerpel committed Jul 23, 2024
1 parent e5b5463 commit 10bae25
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 226 deletions.
6 changes: 3 additions & 3 deletions ftmq/aleph.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class AlephStore(Store[CE, DS]):
def __init__(
self,
dataset: DS,
resolver: Resolver,
linker: Resolver,
host: str | None = None,
api_key: str | None = None,
):
super().__init__(dataset, resolver)
super().__init__(dataset, linker)
self.host = host or HOST
self.api_key = api_key or API_KEY

Expand Down Expand Up @@ -108,7 +108,7 @@ def add_statement(self, stmt: Statement) -> None:
return
if len(self.batch) >= self.BATCH:
self.flush()
canonical_id = self.store.resolver.get_canonical(stmt.entity_id)
canonical_id = self.store.linker.get_canonical(stmt.entity_id)
stmt.canonical_id = canonical_id
self.batch[stmt.canonical_id].add(stmt)

Expand Down
20 changes: 9 additions & 11 deletions ftmq/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,46 +22,44 @@ def get_store(
uri: PathLike | None = "memory:///",
catalog: Catalog | None = None,
dataset: Dataset | str | None = None,
resolver: Resolver | str | None = None,
linker: Resolver | str | None = None,
) -> Store:
if isinstance(dataset, str):
dataset = Dataset(name=dataset)
if isinstance(resolver, (str, Path)):
resolver = get_resolver(resolver)
if isinstance(linker, (str, Path)):
linker = get_resolver(linker)
uri = str(uri)
parsed = urlparse(uri)
if parsed.scheme == "memory":
return MemoryStore(catalog, dataset, resolver=resolver)
return MemoryStore(catalog, dataset, linker=linker)
if parsed.scheme == "leveldb":
path = uri.replace("leveldb://", "")
path = Path(path).absolute()
try:
from ftmq.store.level import LevelDBStore

return LevelDBStore(catalog, dataset, path=path, resolver=resolver)
return LevelDBStore(catalog, dataset, path=path, linker=linker)
except ImportError:
raise ImportError("Can not load LevelDBStore. Install `plyvel`")
if parsed.scheme == "redis":
try:
from ftmq.store.redis import RedisStore

return RedisStore(catalog, dataset, path=path, resolver=resolver)
return RedisStore(catalog, dataset, path=path, linker=linker)
except ImportError:
raise ImportError("Can not load RedisStore. Install `redis`")
if parsed.scheme == "clickhouse":
try:
from ftm_columnstore import get_store as get_cstore

return get_cstore(catalog, dataset, resolver=resolver)
return get_cstore(catalog, dataset, linker=linker)
except ImportError:
raise ImportError("Can not load ClickhouseStore. Install `ftm-columnstore`")
if "sql" in parsed.scheme:
get_metadata.cache_clear()
return SQLStore(catalog, dataset, uri=uri, resolver=resolver)
return SQLStore(catalog, dataset, uri=uri, linker=linker)
if "aleph" in parsed.scheme:
return AlephStore.from_uri(
uri, catalog=catalog, dataset=dataset, resolver=resolver
)
return AlephStore.from_uri(uri, catalog=catalog, dataset=dataset, linker=linker)
raise NotImplementedError(uri)


Expand Down
4 changes: 2 additions & 2 deletions ftmq/store/aleph.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def from_uri(
uri: str,
dataset: Dataset | str | None = None,
catalog: Catalog | None = None,
resolver: Resolver | None = None,
linker: Resolver | None = None,
) -> Self:
host, api_key, foreign_id = parse_uri(uri)
if dataset is None and foreign_id is not None:
Expand All @@ -39,4 +39,4 @@ def from_uri(
if isinstance(dataset, str):
dataset = Dataset(name=dataset)

return cls(catalog, dataset, resolver=resolver, host=host, api_key=api_key)
return cls(catalog, dataset, linker=linker, host=host, api_key=api_key)
10 changes: 5 additions & 5 deletions ftmq/store/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
self,
catalog: C | None = None,
dataset: Dataset | str | None = None,
resolver: Resolver | None = None,
linker: Resolver | None = None,
**kwargs,
) -> None:
if dataset is not None:
Expand All @@ -30,7 +30,7 @@ def __init__(
dataset = catalog.get_scope()
else:
dataset = DefaultDataset
super().__init__(dataset=dataset, resolver=resolver or Resolver(), **kwargs)
super().__init__(dataset=dataset, linker=linker or Resolver(), **kwargs)

def get_catalog(self) -> C:
# return implicit catalog computed from current datasets in store
Expand All @@ -46,7 +46,7 @@ def iterate(self, dataset: str | Dataset | None = None) -> CEGenerator:
yield from view.entities()

def resolve(self, dataset: str | Dataset | None = None) -> None:
if not self.resolver.edges:
if not self.linker.edges:
return
if dataset is not None:
if isinstance(dataset, str):
Expand All @@ -58,8 +58,8 @@ def resolve(self, dataset: str | Dataset | None = None) -> None:
else:
entities = self.iterate()
for ix, entity in enumerate(entities):
if entity.id in self.resolver.nodes:
self.update(self.resolver.get_canonical(entity.id))
if entity.id in self.linker.nodes:
self.update(self.linker.get_canonical(entity.id))
if ix and ix % 10_000 == 0:
log.info("Resolving entity %d ..." % ix)

Expand Down
Loading

0 comments on commit 10bae25

Please sign in to comment.