Skip to content

Commit

Permalink
my.instagram.android: more defensive error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
karlicoss committed Oct 22, 2023
1 parent 86ea605 commit 6e89fd8
Showing 1 changed file with 49 additions and 34 deletions.
83 changes: 49 additions & 34 deletions my/instagram/android.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from datetime import datetime
import json
from pathlib import Path
import sqlite3
from typing import Iterator, Sequence, Optional, Dict, Union

from more_itertools import unique_everseen
Expand All @@ -22,6 +23,7 @@
assert_never,
)
from my.core.cachew import mcachew
from my.core.error import echain
from my.core.sqlite import sqlite_connect_immutable, select

from my.config import instagram as user_config
Expand Down Expand Up @@ -132,6 +134,48 @@ def _parse_message(j: Json) -> Optional[_Message]:
)


def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
# TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh
for (self_uid,) in select(('user_id',), 'FROM session', db=db):
yield User(
id=str(self_uid),
full_name=config.full_name or 'USERS_OWN_FULL_NAME',
username=config.full_name or 'USERS_OWN_USERNAME',
)

for (thread_json,) in select(('thread_info',), 'FROM threads', db=db):
j = json.loads(thread_json)
# todo in principle should leave the thread attached to the message?
# since thread is a group of users?
pre_users = []
# inviter usually contains our own user
if 'inviter' in j:
# sometimes it's missing (e.g. in broadcast channels)
pre_users.append(j['inviter'])
pre_users.extend(j['recipients'])
for r in pre_users:
# id disappeared and seems that pk_id is in use now (around december 2022)
uid = r.get('id') or r.get('pk_id')
assert uid is not None
yield User(
id=str(uid), # for some reason it's int in the db
full_name=r['full_name'],
username=r['username'],
)

for (msg_json,) in select(('message',), 'FROM messages ORDER BY timestamp', db=db):
# eh, seems to contain everything in json?
j = json.loads(msg_json)
try:
m = _parse_message(j)
if m is not None:
yield m
except Exception as e:
yield e


def _entities() -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
Expand All @@ -140,40 +184,11 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
for f in dbs:
logger.info(f'{f} : processing...')
with sqlite_connect_immutable(f) as db:
# TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh
for (self_uid,) in select(('user_id',), 'FROM session', db=db):
yield User(
id=str(self_uid),
full_name=config.full_name or 'USERS_OWN_FULL_NAME',
username=config.full_name or 'USERS_OWN_USERNAME',
)

for (thread_json,) in select(('thread_info',), 'FROM threads', db=db):
j = json.loads(thread_json)
# todo in principle should leave the thread attached to the message?
# since thread is a group of users?
# inviter usually contains our own user
for r in [j['inviter'], *j['recipients']]:
# id disappeared and seems that pk_id is in use now (around december 2022)
uid = r.get('id') or r.get('pk_id')
assert uid is not None
yield User(
id=str(uid), # for some reason it's int in the db
full_name=r['full_name'],
username=r['username'],
)

for (msg_json,) in select(('message',), 'FROM messages ORDER BY timestamp', db=db):
# eh, seems to contain everything in json?
j = json.loads(msg_json)
try:
m = _parse_message(j)
if m is not None:
yield m
except Exception as e:
yield e
try:
yield from _process_db(db=db)
except Exception as e:
# todo use error policy here
yield echain(RuntimeError(f'While processing {f}'), cause=e)


@mcachew(depends_on=inputs)
Expand Down

0 comments on commit 6e89fd8

Please sign in to comment.