From 9a0360bdd6933ac7950d384abc54a53adeaeba07 Mon Sep 17 00:00:00 2001 From: Josef Kudera <46950237+kudj@users.noreply.github.com> Date: Mon, 11 Mar 2024 11:53:15 +0100 Subject: [PATCH] nbsp, max 365 days back remove spaces max 365 days back --- src/component.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/component.py b/src/component.py index 897db87..bf08e27 100644 --- a/src/component.py +++ b/src/component.py @@ -9,6 +9,7 @@ from requests_html import HTMLSession from keboola.utils import parse_datetime_interval, split_dates_to_chunks from keboola.csvwriter import ElasticDictWriter +import datetime class Component(ComponentBase): @@ -30,6 +31,11 @@ def run(self): eshop_id = self.cfg.report_settings.eshop_id date_from, date_to = parse_datetime_interval(self.cfg.report_settings.date_from, self.cfg.report_settings.date_to) + + if (datetime.datetime.now() - date_from).days > 365: + print("Cannot get data older than 1 year, downloading data for the last 365 days.") + date_from = datetime.datetime.now() - datetime.timedelta(days=365) + dates = split_dates_to_chunks(date_from, date_to, 0) session = HTMLSession() @@ -104,6 +110,7 @@ def get_stats_for_date(self, session, date, eshop_id): if table_body: values = [value.text.replace('Â\xa0KÄ\x8d', '').replace(' â\x82¬', '').replace('%', '') + .replace('Â', '').replace(' ', '').replace(' ', '').replace(' ', '') for value in table_body.find('tr')[0].find('td')] row = {'eshop_id': eshop_id, 'date': date["start_date"]}