From 9f0023fe2707b0220088feb3b7c7d153a2a69ad1 Mon Sep 17 00:00:00 2001
From: Tom Brown <tomgith4@thecap.org>
Date: Wed, 16 Aug 2023 12:04:03 +0200
Subject: [PATCH] an ugly hack to speed up access to versions

---
 tourist/continuumutils.py    | 98 ++++++++++++++++++++++++++++++++++++
 tourist/render_factory.py    | 30 ++++++-----
 tourist/scripts/batchtool.py | 16 ++----
 3 files changed, 121 insertions(+), 23 deletions(-)
 create mode 100644 tourist/continuumutils.py

diff --git a/tourist/continuumutils.py b/tourist/continuumutils.py
new file mode 100644
index 0000000..46532e0
--- /dev/null
+++ b/tourist/continuumutils.py
@@ -0,0 +1,98 @@
+from collections import defaultdict
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Set
+from typing import Type
+
+import attrs
+import sqlalchemy
+import sqlalchemy_continuum
+from more_itertools import last
+
+from tourist.models import tstore
+
+PoolVersion = sqlalchemy_continuum.version_class(tstore.Pool)
+PlaceVersion = sqlalchemy_continuum.version_class(tstore.Place)
+ClubVersion = sqlalchemy_continuum.version_class(tstore.Club)
+Transaction = sqlalchemy_continuum.transaction_class(tstore.Club)
+
+
+type_to_version_cls = {
+    'place': PlaceVersion,
+    'pool': PoolVersion,
+    'club': ClubVersion,
+}
+
+
+
+@attrs.frozen()
+class VersionTable:
+    """In-memory copy of one Version table, built up while replaying transactions
+    """
+    entity_types: Set[str]
+    version_cls: Type
+    versions: Dict[int, List] = attrs.field(factory=lambda: defaultdict(list))
+
+    def add_version_object(self, transaction: Transaction, new_version_obj):
+        prev_version = last(self.versions[new_version_obj.id], None)
+        if prev_version:
+            assert prev_version.end_transaction_id == transaction.id
+        self.versions[new_version_obj.id].append(new_version_obj)
+
+
+@attrs.frozen()
+class VersionTables:
+    """In memory dump of continuum versions and transactions, created to make iterating through
+    them run about 60 times faster. There is similar code in `batchtool`.
+
+    TODO(TomGoBravo): add some tests for this
+    """
+    version_tables: Dict[Type, VersionTable]
+    transaction_user_email: Dict[int, str] = attrs.field(factory=dict)
+    transaction_issued_at: Dict[int, Any] = attrs.field(factory=dict)
+
+    @staticmethod
+    def make() -> 'VersionSyncer':
+        version_tables = {version_cls: VersionTable(entity_types={type_str}, version_cls=version_cls)
+                          for type_str, version_cls in type_to_version_cls.items()}
+        return VersionTables(version_tables=version_tables)
+
+    def populate(self):
+        existing_transactions = Transaction.query.all()
+        for transaction in existing_transactions:
+            if transaction.user:
+                self.transaction_user_email[transaction.id] = transaction.user.email
+            self.transaction_issued_at[transaction.id] = transaction.issued_at
+            for version_cls, cls_changed_entities in transaction.changed_entities.items():
+                for version_obj in cls_changed_entities:
+                    self.version_tables[version_cls].add_version_object(transaction, version_obj)
+
+    def get_object_history(self, obj):
+        obj_version_type = sqlalchemy_continuum.version_class(obj.__class__)
+        version_table = self.version_tables[obj_version_type]
+        return version_table.versions[obj.id]
+
+
+def changeset(current_version, previous_version):
+    """
+    Return a dictionary of changed fields in this version with keys as
+    field names and values as lists with first value as the old field value
+    and second list value as the new value.
+
+    This is a very ugly copy of sqlalchemy_continuum.version.VersionClassBase which I created
+    because accessing the previous version is super slow.
+    """
+    data = {}
+
+    for key in sqlalchemy.inspect(current_version.__class__).columns.keys():
+        if sqlalchemy_continuum.utils.is_internal_column(current_version, key):
+            continue
+        if not previous_version:
+            old = None
+        else:
+            old = getattr(previous_version, key)
+        new = getattr(current_version, key)
+        if old != new:
+            data[key] = [old, new]
+    return data
diff --git a/tourist/render_factory.py b/tourist/render_factory.py
index dcc6ed7..ecabf58 100644
--- a/tourist/render_factory.py
+++ b/tourist/render_factory.py
@@ -4,6 +4,7 @@
 import io
 import itertools
 from typing import List, Mapping
+from typing import Type
 from typing import Union
 
 from sqlalchemy.util import IdentitySet
@@ -13,6 +14,7 @@
 import geojson
 from geoalchemy2.shape import to_shape
 
+from tourist import continuumutils
 from tourist.models import render
 from tourist.models import tstore
 
@@ -61,21 +63,23 @@ def _build_render_pool(orm_pool: tstore.Pool) -> render.Pool:
     )
 
 
-def _build_changes(orm_entity: Union[tstore.Place, tstore.Club, tstore.Pool]) -> (
-        render.PlaceEntityChanges):
+def _build_changes(orm_entity: Union[tstore.Place, tstore.Club, tstore.Pool], versions:
+        continuumutils.VersionTables) -> (render.PlaceEntityChanges):
     changes = render.PlaceEntityChanges(entity_name=orm_entity.name)
 
-    for v in orm_entity.versions:
-        user_email = None
-        if v.transaction.user:
-            user_email = v.transaction.user.email
+    prev_v = None
+    for v in versions.get_object_history(orm_entity):
+        issued_at = versions.transaction_issued_at[v.transaction_id]
+        user_email = versions.transaction_user_email.get(v.transaction_id, None)
         changes.changes.append(render.PlaceEntityChanges.Change(
-            timestamp=v.transaction.issued_at, user=user_email,
-            change=str(v.changeset)))
+            timestamp=issued_at, user=user_email,
+            change=str(continuumutils.changeset(v, prev_v))))
+        prev_v = v
     return changes
 
 
-def _build_render_place(orm_place: tstore.Place, source_by_short_name: Mapping[str, render.ClubSource]) -> render.Place:
+def _build_render_place(orm_place: tstore.Place, source_by_short_name: Mapping[str,
+      render.ClubSource], versions: continuumutils.VersionTables) -> (render.Place):
     children_geojson = orm_place.children_geojson_features
     if children_geojson:
         geojson_children_collection = geojson.FeatureCollection(children_geojson)
@@ -125,10 +129,10 @@ def _build_render_place(orm_place: tstore.Place, source_by_short_name: Mapping[s
         entity_changes = None
     else:
         recently_updated = None
-        entity_changes = [_build_changes(orm_place)]
+        entity_changes = [_build_changes(orm_place, versions)]
         for child in itertools.chain(orm_place.child_places, orm_place.child_pools,
                                            orm_place.child_clubs):
-            entity_changes.append(_build_changes(child))
+            entity_changes.append(_build_changes(child, versions))
 
 
     return render.Place(
@@ -230,9 +234,11 @@ def get_all(cls):
     all_pools: List[tstore.Pool] = get_all(tstore.Pool)
     all_sources: List[tstore.Source] = get_all(tstore.Source)
     source_by_short_name = {s.source_short_name: _build_render_club_source(s) for s in all_sources}
+    version_tables = continuumutils.VersionTables.make()
+    version_tables.populate()
 
     for place in all_places:
-        render_place = _build_render_place(place, source_by_short_name)
+        render_place = _build_render_place(place, source_by_short_name, version_tables)
         yield tstore.RenderCache(name=RenderName.PLACE_PREFIX.value + place.short_name,
                                      value_dict=cattrs.unstructure(render_place))
         if place.is_world:
diff --git a/tourist/scripts/batchtool.py b/tourist/scripts/batchtool.py
index 01b112c..ae6ab9c 100644
--- a/tourist/scripts/batchtool.py
+++ b/tourist/scripts/batchtool.py
@@ -18,6 +18,11 @@
 
 import tourist
 from tourist import render_factory
+from tourist.continuumutils import ClubVersion
+from tourist.continuumutils import PlaceVersion
+from tourist.continuumutils import PoolVersion
+from tourist.continuumutils import Transaction
+from tourist.continuumutils import type_to_version_cls
 from tourist.models import attrib
 from tourist.models import tstore
 from tourist.models.tstore import PAGE_LINK_RE
@@ -167,10 +172,6 @@ def incr_column(cls, column_name: str):
         click.echo('Run with --write to commit changes')
 
 
-PoolVersion = sqlalchemy_continuum.version_class(tstore.Pool)
-PlaceVersion = sqlalchemy_continuum.version_class(tstore.Place)
-ClubVersion = sqlalchemy_continuum.version_class(tstore.Club)
-Transaction = sqlalchemy_continuum.transaction_class(tstore.Club)
 operation_type_column_name = sqlalchemy_continuum.utils.option(tstore.Club,
                                                          'operation_type_column_name')
 
@@ -266,13 +267,6 @@ def live_versions(self):
                 yield version_obj
 
 
-type_to_version_cls = {
-    'place': PlaceVersion,
-    'pool': PoolVersion,
-    'club': ClubVersion,
-}
-
-
 @attr.s(auto_attribs=True)
 class VersionSyncer:
     """Creates version history