Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

my.google.takeout.parser: speedup event merging on newer google_takeout_parser versions #389

Merged
merged 1 commit into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions my/google/takeout/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import google_takeout_parser
from google_takeout_parser.path_dispatch import TakeoutParser
from google_takeout_parser.merge import GoogleEventSet, CacheResults
from google_takeout_parser.models import BaseEvent

# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
from my.config import google as user_config
Expand Down Expand Up @@ -95,6 +96,17 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
error_policy = config.error_policy
count = 0
emitted = GoogleEventSet()

try:
emitted_add = emitted.add_if_not_present
except AttributeError:
# compat for older versions of google_takeout_parser which didn't have this method
def emitted_add(other: BaseEvent) -> bool:
if other in emitted:
return False
emitted.add(other)
return True

# reversed shouldn't really matter? but logic is to use newer
# takeouts if they're named according to date, since JSON Activity
# is nicer than HTML Activity
Expand Down Expand Up @@ -123,10 +135,9 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
elif error_policy == 'drop':
pass
continue
if event in emitted:
continue
emitted.add(event)
yield event # type: ignore[misc]

if emitted_add(event):
yield event # type: ignore[misc]
logger.debug(
f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates"
)
Expand Down
12 changes: 6 additions & 6 deletions my/youtube/takeout.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import NamedTuple, List, Iterable, TYPE_CHECKING

from ..core import datetime_aware, Res, LazyLogger
from ..core.compat import removeprefix
from my.core import datetime_aware, make_logger, stat, Res, Stats
from my.core.compat import deprecated, removeprefix


logger = LazyLogger(__name__)
logger = make_logger(__name__)


class Watched(NamedTuple):
Expand Down Expand Up @@ -93,16 +93,16 @@ def watched() -> Iterable[Res[Watched]]:
)


from ..core import stat, Stats
def stats() -> Stats:
return stat(watched)


### deprecated stuff (keep in my.media.youtube)

if not TYPE_CHECKING:
# "deprecate" by hiding from mypy
get_watched = watched
@deprecated("use 'watched' instead")
def get_watched(*args, **kwargs):
return watched(*args, **kwargs)


def _watched_legacy() -> Iterable[Watched]:
Expand Down
Loading