-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs: added benchmarks and result table
- Loading branch information
Showing
6 changed files
with
489 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
from dataclasses import dataclass | ||
from typing import List, Tuple, Optional | ||
import asyncio | ||
from benchmarker_lib import BenchmarkSession | ||
|
||
from pg_nearest_city.base_nearest_city import Location | ||
|
||
|
||
async def benchmark_voronoi( | ||
test_points: List[Tuple[float, float]], warmup_runs: int = 5, test_runs: int = 1000 | ||
) -> Tuple[List[Location], BenchmarkSession]: | ||
"""Run benchmark for Voronoi implementation""" | ||
from pg_nearest_city import AsyncNearestCity | ||
|
||
session = BenchmarkSession("voronoi_geocoding", test_runs) | ||
results = [] | ||
|
||
# Initial memory snapshot | ||
session.mark_memory("initial_state") | ||
|
||
# Initialize geocoder | ||
async with AsyncNearestCity() as geocoder: | ||
session.mark("geocoder_initialized") | ||
|
||
# Warmup runs - only care about time | ||
session.mark_time("warmup_start") | ||
for _ in range(warmup_runs): | ||
await geocoder.query(test_points[0][0], test_points[0][1]) | ||
session.mark_time("warmup_complete") | ||
|
||
# Test runs - measure time for the batch, with periodic memory checks | ||
session.mark("test_runs_start") | ||
for i, (lat, lon) in enumerate(test_points[:test_runs]): | ||
result = await geocoder.query(lat, lon) | ||
results.append(result) | ||
# Check memory every 1000 points | ||
if i > 0 and i % 1000 == 0: | ||
session.mark_memory(f"progress_{i}") | ||
|
||
session.mark("test_runs_complete") | ||
|
||
# Final memory state | ||
session.mark_memory("final_state") | ||
|
||
return (results, session) | ||
|
||
|
||
def benchmark_kdtree( | ||
test_points: List[Tuple[float, float]], warmup_runs: int = 5, test_runs: int = 1000 | ||
) -> Tuple[List[Location], BenchmarkSession]: | ||
"""Run benchmark for KDTree implementation""" | ||
import reverse_geocoder | ||
|
||
session = BenchmarkSession("kdtree_geocoding", test_runs) | ||
results = [] | ||
|
||
# Initial memory snapshot | ||
session.mark_memory("initial_state") | ||
|
||
# Initialize geocoder - measure both time and memory | ||
rg = reverse_geocoder.RGeocoder(mode=2, verbose=False) | ||
session.mark("geocoder_initialized") | ||
|
||
# Warmup runs - only care about time | ||
session.mark_time("warmup_start") | ||
for _ in range(warmup_runs): | ||
reverse_geocoder.get(test_points[0]) | ||
session.mark_time("warmup_complete") | ||
|
||
# Test runs - measure time for the batch, with periodic memory checks | ||
session.mark("test_runs_start") | ||
for i, (lat, lon) in enumerate(test_points[:test_runs]): | ||
result = reverse_geocoder.get((lat, lon)) | ||
results.append( | ||
Location(lat=lat, lon=lon, city=result["name"], country=result["cc"]) | ||
) | ||
|
||
# Check memory every 1000 points | ||
if i > 0 and i % 1000 == 0: | ||
session.mark_memory(f"progress_{i}") | ||
|
||
session.mark("test_runs_complete") | ||
|
||
# Final memory state | ||
session.mark_memory("final_state") | ||
|
||
return (results, session) | ||
|
||
|
||
def generate_test_points(count: int = 10000) -> List[Tuple[float, float]]: | ||
"""Generate a consistent set of test points""" | ||
import random | ||
|
||
random.seed(42) | ||
points = [ | ||
(random.uniform(-90, 90), random.uniform(-180, 180)) for _ in range(count) | ||
] | ||
random.seed() | ||
return points | ||
|
||
|
||
async def main(): | ||
test_points = generate_test_points() | ||
|
||
# Run both benchmarks in separate processes | ||
from multiprocessing import Process, Queue | ||
|
||
def kdtree_process(queue): | ||
results = benchmark_kdtree(test_points) | ||
queue.put(results) | ||
|
||
def voronoi_process(queue): | ||
results = asyncio.run(benchmark_voronoi(test_points)) | ||
queue.put(results) | ||
|
||
kdtree_queue = Queue() | ||
kdtree_p = Process(target=kdtree_process, args=(kdtree_queue,)) | ||
kdtree_p.start() | ||
|
||
voronoi_queue = Queue() | ||
voronoi_p = Process(target=voronoi_process, args=(voronoi_queue,)) | ||
voronoi_p.start() | ||
|
||
kdtree_p.join() | ||
voronoi_p.join() | ||
|
||
# Get results | ||
(kdtree_results, kdtree_session) = kdtree_queue.get() | ||
(voronoi_results, voronoi_session) = voronoi_queue.get() | ||
|
||
kdtree_session.print_summary() | ||
voronoi_session.print_summary() | ||
|
||
kdtree_session.save() | ||
voronoi_session.save() | ||
|
||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
from dataclasses import dataclass | ||
from typing import Dict, List, Optional | ||
from enum import Flag, auto | ||
import time | ||
import psutil | ||
import tracemalloc | ||
from pathlib import Path | ||
import json | ||
from datetime import datetime | ||
|
||
|
||
@dataclass | ||
class BenchmarkPoint: | ||
"""A single measurement point during benchmarking""" | ||
|
||
timestamp: float | ||
label: str | ||
memory_mb: Optional[float] = None | ||
duration_ms: Optional[float] = None | ||
memory_delta_mb: Optional[float] = None | ||
duration_delta_ms: Optional[float] = None | ||
|
||
|
||
class BenchmarkSession: | ||
def __init__(self, name: str, test_runs: int): | ||
self.name = name | ||
self.test_runs = test_runs | ||
self.points: List[BenchmarkPoint] = [] | ||
self.start_time = time.perf_counter() | ||
self.last_time = self.start_time | ||
self.last_memory = None | ||
tracemalloc.start() | ||
|
||
def _get_current_memory(self) -> float: | ||
"""Get current memory usage in MB""" | ||
return psutil.Process().memory_info().rss / (1024 * 1024) | ||
|
||
def _get_time_metrics(self, current_time: float) -> tuple[float, float]: | ||
"""Calculate total and delta time in milliseconds""" | ||
total_duration = (current_time - self.start_time) * 1000 | ||
delta_duration = (current_time - self.last_time) * 1000 | ||
self.last_time = current_time | ||
return total_duration, delta_duration | ||
|
||
def _get_memory_metrics( | ||
self, current_memory: float | ||
) -> tuple[float, Optional[float]]: | ||
"""Calculate memory and delta memory in MB""" | ||
memory_delta = None | ||
if self.last_memory is not None: | ||
memory_delta = current_memory - self.last_memory | ||
self.last_memory = current_memory | ||
return current_memory, memory_delta | ||
|
||
def mark_time(self, label: str) -> BenchmarkPoint: | ||
"""Create a benchmark point measuring only time""" | ||
current_time = time.perf_counter() | ||
duration, duration_delta = self._get_time_metrics(current_time) | ||
|
||
point = BenchmarkPoint( | ||
timestamp=current_time, | ||
label=label, | ||
duration_ms=duration, | ||
duration_delta_ms=duration_delta, | ||
) | ||
self.points.append(point) | ||
return point | ||
|
||
def mark_memory(self, label: str) -> BenchmarkPoint: | ||
"""Create a benchmark point measuring only memory""" | ||
current_time = time.perf_counter() | ||
current_memory = self._get_current_memory() | ||
memory, memory_delta = self._get_memory_metrics(current_memory) | ||
|
||
point = BenchmarkPoint( | ||
timestamp=current_time, | ||
label=label, | ||
memory_mb=memory, | ||
memory_delta_mb=memory_delta, | ||
) | ||
self.points.append(point) | ||
return point | ||
|
||
def mark(self, label: str) -> BenchmarkPoint: | ||
"""Create a benchmark point measuring both time and memory""" | ||
current_time = time.perf_counter() | ||
current_memory = self._get_current_memory() | ||
|
||
duration, duration_delta = self._get_time_metrics(current_time) | ||
memory, memory_delta = self._get_memory_metrics(current_memory) | ||
|
||
point = BenchmarkPoint( | ||
timestamp=current_time, | ||
label=label, | ||
memory_mb=memory, | ||
duration_ms=duration, | ||
memory_delta_mb=memory_delta, | ||
duration_delta_ms=duration_delta, | ||
) | ||
self.points.append(point) | ||
return point | ||
|
||
def get_results(self) -> Dict: | ||
"""Get results in a structured format""" | ||
return { | ||
"name": self.name, | ||
"timestamp": datetime.now().isoformat(), | ||
"test_runs": self.test_runs, | ||
"points": [ | ||
{ | ||
"label": p.label, | ||
"memory_mb": round(p.memory_mb, 2) | ||
if p.memory_mb is not None | ||
else None, | ||
"memory_delta_mb": round(p.memory_delta_mb, 2) | ||
if p.memory_delta_mb is not None | ||
else None, | ||
"duration_ms": round(p.duration_ms, 2) | ||
if p.duration_ms is not None | ||
else None, | ||
"duration_delta_ms": round(p.duration_delta_ms, 2) | ||
if p.duration_delta_ms is not None | ||
else None, | ||
} | ||
for p in self.points | ||
], | ||
} | ||
|
||
def save(self, directory: str = "benchmarks/benchmark_results") -> str: | ||
"""Save results to a JSON file""" | ||
Path(directory).mkdir(exist_ok=True) | ||
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | ||
filename = f"{self.name}_{timestamp}.json" | ||
filepath = Path(directory) / filename | ||
|
||
with open(filepath, "w") as f: | ||
json.dump(self.get_results(), f, indent=2) | ||
|
||
return str(filepath) | ||
|
||
def print_summary(self): | ||
"""Print a human-readable summary""" | ||
print(f"\nBenchmark Summary: {self.name}") | ||
print(f"\nTest Runs: {self.test_runs}") | ||
print("-" * 50) | ||
|
||
for point in self.points: | ||
print(f"\n{point.label}:") | ||
if point.memory_mb is not None: | ||
print(f" Memory: {point.memory_mb:.2f} MB") | ||
if point.memory_delta_mb is not None: | ||
print(f" Memory Δ: {point.memory_delta_mb:+.2f} MB") | ||
if point.duration_ms is not None: | ||
print(f" Duration: {point.duration_ms:.2f} ms") | ||
if point.duration_delta_ms is not None: | ||
print(f" Duration Δ: {point.duration_delta_ms:.2f} ms") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Benchmarking Results | ||
|
||
### Test Run Performance (1000 geocoding operations) | ||
| Implementation | Test Run Time (ms) | Std Dev (ms) | Min (ms) | Max (ms) | Avg Time Per Operation (ms) | | ||
|----------------|------------------:|-------------:|---------:|---------:|---------------------------:| | ||
| KD-tree | 45,560.73 | 3,359.99 | 39,796.90| 47,936.96| 45.56 | | ||
| Voronoi | 1,831.31 | 400.08 | 1,431.14 | 2,496.77 | 1.83 | | ||
|
||
### Memory Footprint After Initialization | ||
| Implementation | Stable Memory (MB) | Memory Std Dev (MB) | Initial Memory (MB) | Memory Growth | | ||
|----------------|------------------:|-------------------:|-------------------:|---------------:| | ||
| KD-tree | 336.29 | 0.08 | ~73 | +263 MB | | ||
| Voronoi | 33.00 | 0.23 | ~25 | +8 MB | | ||
|
||
### Initialization Times | ||
| Implementation | Init Time (ms) | Warmup Time (ms) | Total Startup (ms) | | ||
|----------------|---------------:|----------------:|-------------------:| | ||
| KD-tree | ~1,380 | ~350 | ~1,730 | | ||
| Voronoi | ~16,200 | ~15 | ~16,215 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.