Skip to content

Commit

Permalink
Merge pull request #70 from m-rtijn/sqlalchemy-v2
Browse files Browse the repository at this point in the history
Move to SQLAlchemy v2, add optimizations
  • Loading branch information
m-rtijn authored Jun 16, 2023
2 parents eda3733 + badb2e7 commit 881b810
Show file tree
Hide file tree
Showing 18 changed files with 419 additions and 98 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
Argostimè is a web-app to keep an eye on prices in webshops. The name is derived from
Argos, the mythical giant with a hundred eyes, and "timè", Greek for price.

Argostimè is based on Flask and SQLAlchemy and has a modular structure so new shops can
be added easily.
Argostimè is requires Python 3.10 (or later) and is based on Flask and SQLAlchemy.
Argostimè has a modular structure so new shops can be added easily.

The "official" version of Argostimè is available at [argostime.mrtijn.nl](https://argostime.mrtijn.nl/).

Expand Down
6 changes: 3 additions & 3 deletions argostime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@
import configparser

from flask import Flask
from flask_sqlalchemy import SQLAlchemy

from argostime.products import *
from argostime.exceptions import *
from argostime.models import *
db: SQLAlchemy = SQLAlchemy()

def get_current_commit() -> str:
"""Return the hexadecimal hash of the current running commit."""
Expand Down Expand Up @@ -71,6 +70,7 @@ def create_app():
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False

app.config["GIT_CURRENT_COMMIT"] = get_current_commit()

db.init_app(app)

with app.app_context():
Expand Down
2 changes: 1 addition & 1 deletion argostime/crawler/shop/ah.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def crawl_ah(url: str) -> CrawlResult:
"p",
attrs={ "class" :lambda x: x and x.startswith("promo-sticker-text") }
)

if len(promo_text_matches) == 0:
promo_text_matches = soup.find_all(
"div",
Expand Down
23 changes: 13 additions & 10 deletions argostime/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
along with Argostimè. If not, see <https://www.gnu.org/licenses/>.
"""

from typing import List, Tuple
from datetime import datetime, timedelta
import json

from argostime import db
from argostime.exceptions import NoEffectivePriceAvailableException
from argostime.models import ProductOffer, Price

Expand All @@ -36,13 +36,16 @@ def generate_price_graph_data(offer: ProductOffer) -> str:
time of a specific ProductOffer
"""

prices: List[Price] = Price.query.filter_by(
product_offer_id=offer.id).order_by(Price.datetime).all()
prices = db.session.scalars(
db.select(Price)
.where(Price.product_offer_id == offer.id)
.order_by(Price.datetime)
).all()

dates: List[datetime] = []
effective_prices: List[float] = []
sales_index: List[Tuple[int, int]] = []
sales_dates: List[Tuple[datetime, datetime]] = []
dates: list[datetime] = []
effective_prices: list[float] = []
sales_index: list[tuple[int, int]] = []
sales_dates: list[tuple[datetime, datetime]] = []

index = 0
for price in prices:
Expand All @@ -55,11 +58,11 @@ def generate_price_graph_data(offer: ProductOffer) -> str:
sales_index.append((index, index))
else:
sales_index[-1] = (sales_index[-1][0], index)

index += 1
except NoEffectivePriceAvailableException:
pass

for sale in sales_index:
start: datetime
end: datetime
Expand All @@ -68,7 +71,7 @@ def generate_price_graph_data(offer: ProductOffer) -> str:
start = dates[sale[0]] - timedelta(hours=12)
else:
start = dates[sale[0]] - (dates[sale[0]] - dates[sale[0]-1]) / 2

if sale[1] == len(dates)-1:
end = dates[sale[1]] + timedelta(hours=12)
else:
Expand Down
117 changes: 95 additions & 22 deletions argostime/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,12 @@
from sys import maxsize
from typing import List

from flask_sqlalchemy import SQLAlchemy

from argostime.crawler import crawl_url, CrawlResult
from argostime.exceptions import CrawlerException, WebsiteNotImplementedException
from argostime.exceptions import PageNotFoundException
from argostime.exceptions import NoEffectivePriceAvailableException

db: SQLAlchemy = SQLAlchemy()

from argostime import db

class Webshop(db.Model): # type: ignore
"""A webshop, which may offer products."""
Expand Down Expand Up @@ -107,6 +104,11 @@ class ProductOffer(db.Model): # type: ignore
db.ForeignKey("Webshop.id", ondelete="CASCADE"), nullable=False)
url = db.Column(db.Unicode(1024), unique=True, nullable=False)
time_added = db.Column(db.DateTime)
average_price = db.Column(db.Float)
minimum_price = db.Column(db.Float)
maximum_price = db.Column(db.Float)
# TODO: Memoize current price with reference to the most recent Price entry

prices = db.relationship("Price", backref="product_offer", lazy=True,
cascade="all, delete", passive_deletes=True)

Expand All @@ -116,30 +118,71 @@ def __str__(self):

def get_current_price(self) -> Price:
"""Get the latest Price object related to this offer."""
return Price.query.filter_by(product_offer_id=self.id).order_by(Price.datetime.desc()).first()

def get_average_price(self) -> float:
"""Calculate the average price of this offer."""
price = db.session.scalar(
db.select(Price)
.where(Price.product_offer_id == self.id)
.order_by(Price.datetime.desc())
.limit(1)
)

return price

def update_average_price(self) -> float:
"""Calculate the average price of this offer and update ProductOffer.average_price."""
logging.debug("Updating average price for %s", self)
effective_price_values: List[float] = []
for price in Price.query.filter_by(product_offer_id=self.id).all():

prices = db.session.scalars(
db.select(Price)
.where(Price.product_offer_id == self.id)
).all()

for price in prices:
try:
effective_price_values.append(price.get_effective_price())
except NoEffectivePriceAvailableException:
# Ignore price entries without a valid price in calculating the price.
pass
try:
return statistics.mean(effective_price_values)
avg: float = statistics.mean(effective_price_values)
self.average_price = avg
db.session.commit()
return avg
except statistics.StatisticsError:
logging.debug("Called get_average_price for %s but no prices were found...", str(self))
return -1

def get_average_price(self) -> float:
"""Stub for new .average_price attribute
DEPRECATED: Use ProductOffer.average_price instead.
"""
return self.average_price

def get_prices_since(self, since_time: datetime) -> list[Price]:
"""Get all prices since given date"""
prices_since = db.session.scalars(
db.select(Price)
.where(Price.product_offer_id == self.id)
.where(Price.datetime >= since_time)
).all()

prices_since_list: list[Price] = []
for price in prices_since:
prices_since_list.append(price)

return prices_since_list

def get_lowest_price_since(self, since_time: datetime) -> float:
"""Return the lowest effective price of this offer since a specific time."""
logging.debug("Calculating lowest price since %s for %s", since_time, self)
min_price: float = maxsize
price: Price
for price in Price.query.filter(
Price.product_offer_id == self.id,
Price.datetime >= since_time).all():

prices_since = self.get_prices_since(since_time)

for price in prices_since:
try:
if price.get_effective_price() < min_price:
min_price = price.get_effective_price()
Expand All @@ -149,17 +192,29 @@ def get_lowest_price_since(self, since_time: datetime) -> float:

return min_price

def update_minimum_price(self) -> None:
"""Update the minimum price ever in the minimum column"""

min_price: float = self.get_lowest_price_since(self.time_added)
self.minimum_price = min_price
db.session.commit()

def get_lowest_price(self) -> float:
"""Return the lowest effective price of this offer."""
return self.get_lowest_price_since(self.time_added)
"""Return the lowest effective price of this offer.
DEPRECATED: Use ProductOffer.minimum_price instead
"""
return self.minimum_price

def get_highest_price_since(self, since_time: datetime) -> float:
"""Return the highest effective price of this offer since a specific time."""
logging.debug("Calculating highest price since %s for %s", since_time, self)
max_price: float = -1
price: Price
for price in Price.query.filter(
Price.product_offer_id == self.id,
Price.datetime >= since_time).all():

prices_since = self.get_prices_since(since_time)

for price in prices_since:
try:
if price.get_effective_price() > max_price:
max_price = price.get_effective_price()
Expand All @@ -168,19 +223,28 @@ def get_highest_price_since(self, since_time: datetime) -> float:

return max_price

def update_maximum_price(self) -> None:
"""Update the maximum price ever in the maximum_price column"""

max_price: float = self.get_highest_price_since(self.time_added)
self.maximum_price = max_price
db.session.commit()

def get_highest_price(self) -> float:
"""Return the highest effective price of this offer."""
return self.get_highest_price_since(self.time_added)
"""Return the highest effective price of this offer.
DEPRECATED: Use ProductOffer.maximum_price instead.
"""
return self.maximum_price

def get_price_standard_deviation_since(self, since_time: datetime) -> float:
"""Return the standard deviation of the effective price of this offer since a given date."""
effective_prices: List[float] = []
price: Price

for price in Price.query.filter(
Price.product_offer_id == self.id,
Price.datetime >= since_time).all():
prices_since = self.get_prices_since(since_time)

for price in prices_since:
try:
effective_prices.append(price.get_effective_price())
except NoEffectivePriceAvailableException:
Expand All @@ -195,6 +259,13 @@ def get_price_standard_deviation(self) -> float:
"""Return the standard deviation of the effective price of this offer."""
return self.get_price_standard_deviation_since(self.time_added)

def update_memoized_values(self) -> None:
"""Update all memoized columns"""

self.update_average_price()
self.update_minimum_price()
self.update_maximum_price()

def crawl_new_price(self) -> None:
"""Crawl the current price if we haven't already checked today."""
latest_price: Price = self.get_current_price()
Expand Down Expand Up @@ -234,3 +305,5 @@ def crawl_new_price(self) -> None:
)
db.session.add(price)
db.session.commit()

self.update_memoized_values()
19 changes: 15 additions & 4 deletions argostime/products.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
from typing import Tuple
import urllib.parse

from argostime import db
from argostime.exceptions import WebsiteNotImplementedException
from argostime.models import Webshop, Price, Product, ProductOffer, db
from argostime.models import Webshop, Price, Product, ProductOffer
from argostime.crawler import crawl_url, CrawlResult, enabled_shops

class ProductOfferAddResult(Enum):
Expand All @@ -51,7 +52,10 @@ def add_product_offer_from_url(url: str) -> Tuple[ProductOfferAddResult, Product
except KeyError as exception:
raise WebsiteNotImplementedException(url) from exception

shop: Webshop = Webshop.query.filter(Webshop.hostname.contains(shop_info["hostname"])).first()
shop: Webshop = db.session.scalar(
db.select(Webshop)
.where(Webshop.hostname.contains(shop_info["hostname"]))
)

# Add Webshop if it can't be found in the database
if shop is None:
Expand All @@ -60,14 +64,20 @@ def add_product_offer_from_url(url: str) -> Tuple[ProductOfferAddResult, Product
db.session.commit()

# Check if this ProductOffer already exists
product_offer: ProductOffer = ProductOffer.query.filter_by(url=url).first()
product_offer: ProductOffer = db.session.scalar(
db.select(ProductOffer)
.where(ProductOffer.url == url)
)
if product_offer is not None:
return (ProductOfferAddResult.ALREADY_EXISTS, product_offer)

parse_results: CrawlResult = crawl_url(url)

# Check if this Product already exists, otherwise add it to the database
product: Product = Product.query.filter_by(product_code=parse_results.product_code).first()
product: Product = db.session.scalar(
db.select(Product)
.where(Product.product_code == parse_results.product_code)
)
if product is None:
product = Product(
name=parse_results.product_name,
Expand Down Expand Up @@ -99,5 +109,6 @@ def add_product_offer_from_url(url: str) -> Tuple[ProductOfferAddResult, Product
)
db.session.add(price)
db.session.commit()
offer.update_memoized_values()

return (ProductOfferAddResult.ADDED, offer)
Loading

0 comments on commit 881b810

Please sign in to comment.