From 33d7b3190d714b24aca22be9f5e5630b59caba6e Mon Sep 17 00:00:00 2001 From: simongong Date: Mon, 30 Dec 2024 10:50:35 +0800 Subject: [PATCH] Remove duplicate runs from joyrun (#757) * remove duplicate runs from joyrun and support user-defined threshold value in command line * Reformat run_page/joyrun_sync.py using black to align with github merge rules --- run_page/joyrun_sync.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/run_page/joyrun_sync.py b/run_page/joyrun_sync.py index d070210185b..3ada9556164 100755 --- a/run_page/joyrun_sync.py +++ b/run_page/joyrun_sync.py @@ -156,7 +156,7 @@ def login_by_phone(self): def get_runs_records_ids(self): payload = { - "year": 0, + "year": 0, # as of the "year". when set to 2023, it means fetch records during currentYear ~ 2023. set to 0 means fetch all. } r = self.session.post( f"{self.base_url}/userRunList.aspx", @@ -332,10 +332,29 @@ def get_all_joyrun_tracks(self, old_tracks_ids, with_gpx=False): old_gpx_ids = [i.split(".")[0] for i in old_gpx_ids if not i.startswith(".")] new_run_ids = list(set(run_ids) - set(old_tracks_ids)) tracks = [] + seen_runs = {} # Dictionary to keep track of unique runs with start time as key for i in new_run_ids: run_data = self.get_single_run_record(i) - track = self.parse_raw_data_to_nametuple(run_data, old_gpx_ids, with_gpx) - tracks.append(track) + start_time = datetime.fromtimestamp(run_data["runrecord"]["starttime"]) + distance = run_data["runrecord"]["meter"] + + is_duplicate = False + for seen_start in list(seen_runs.keys()): + if abs((start_time - seen_start).total_seconds()) <= threshold: + if distance > seen_runs[seen_start]["distance"]: + seen_runs[seen_start] = { + "run_data": run_data, + "distance": distance, + } + is_duplicate = True + break + if not is_duplicate: + seen_runs[start_time] = {"run_data": run_data, "distance": distance} + for run in seen_runs.values(): + track = self.parse_raw_data_to_nametuple( + run["run_data"], old_gpx_ids, with_gpx + ) + tracks.append(track) return tracks @@ -440,6 +459,13 @@ def _generate_svg_profile(athlete, min_grid_distance): action="store_true", help="from uid and sid for download datas", ) + parser.add_argument( + "--threshold", + dest="threshold", + help="threshold in seconds to consider runs as duplicates", + type=int, + default=10, + ) options = parser.parse_args() if options.from_uid_sid: j = Joyrun.from_uid_sid( @@ -455,7 +481,9 @@ def _generate_svg_profile(athlete, min_grid_distance): generator = Generator(SQL_FILE) old_tracks_ids = generator.get_old_tracks_ids() - tracks = j.get_all_joyrun_tracks(old_tracks_ids, options.with_gpx) + tracks = j.get_all_joyrun_tracks( + old_tracks_ids, options.with_gpx, options.threshold + ) generator.sync_from_app(tracks) activities_list = generator.load() with open(JSON_FILE, "w") as f: