Skip to content

Commit

Permalink
Merge pull request #107 from maumueller/precompute_metrics
Browse files Browse the repository at this point in the history
Small changes to metrics computation + additional metrics
  • Loading branch information
erikbern authored Mar 7, 2019
2 parents 823d630 + cdda15b commit 37a70ca
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 55 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ before_install:
script:
- python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled
- python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled --batch
- sudo chmod -R 777 results/
- python plot.py --dataset $DATASET --output plot.png
- python plot.py --dataset $DATASET --output plot-batch.png --batch
- python -m unittest test/test-metrics.py
Expand Down
3 changes: 3 additions & 0 deletions ann_benchmarks/algorithms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,8 @@ def batch_query(self, X, n):
def get_batch_results(self):
return self.res

def get_additional(self):
return {}

def __str__(self):
return self.name
5 changes: 5 additions & 0 deletions ann_benchmarks/algorithms/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,13 @@ def fit(self, X):
self.index = index

def set_query_arguments(self, n_probe):
faiss.cvar.indexIVF_stats.reset()
self._n_probe = n_probe
self.index.nprobe = self._n_probe

def get_additional(self):
return {"dist_comps" : faiss.cvar.indexIVF_stats.ndis +
faiss.cvar.indexIVF_stats.nq * self._n_list}

def __str__(self):
return 'FaissIVF(n_list=%d, n_probe=%d)' % (self._n_list, self._n_probe)
4 changes: 4 additions & 0 deletions ann_benchmarks/algorithms/faiss_hnsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ def fit(self, X):
faiss.omp_set_num_threads(1)

def set_query_arguments(self, ef):
faiss.cvar.hnsw_stats.reset()
self.index.hnsw.efSearch = ef

def get_additional(self):
return {"dist_comps" : faiss.cvar.hnsw_stats.ndis}

def freeIndex(self):
del self.p
108 changes: 76 additions & 32 deletions ann_benchmarks/plotting/metrics.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,67 @@
from __future__ import absolute_import
import numpy as np

def knn(dataset_distances, run_distances, count, epsilon=1e-10):
total = len(run_distances) * count
actual = 0
for true_distances, found_distances in zip(dataset_distances, run_distances):
within = [d for d in found_distances[:count] if d <= true_distances[count - 1] + epsilon]
actual += len(within)
return float(actual) / float(total)
def knn_threshold(data, count, epsilon):
return data[count - 1] + epsilon

def epsilon_threshold(data, count, epsilon):
return data[count - 1] * (1 + epsilon)

def epsilon(dataset_distances, run_distances, count, epsilon=0.01):
def get_recall_values(dataset_distances, run_distances, count, threshold, epsilon=1e-3):
total = len(run_distances) * count
actual = 0
for true_distances, found_distances in zip(dataset_distances, run_distances):
within = [d for d in found_distances[:count] if d <= true_distances[count - 1] * (1 + epsilon)]
actual += len(within)
return float(actual) / float(total)
recalls = np.zeros(len(run_distances))
for i in range(len(run_distances)):
t = threshold(dataset_distances[i], count, epsilon)
actual = 0
for d in run_distances[i][:count]:
if d <= t:
actual += 1
recalls[i] = actual
return np.mean(recalls) / float(count), np.std(recalls) / float(count), recalls

def knn(dataset_distances, run_distances, count, metrics, epsilon=1e-3):
if 'knn' not in metrics:
print('Computing knn metrics')
knn_metrics = metrics.create_group('knn')
mean, std, recalls = get_recall_values(dataset_distances,
run_distances, count, knn_threshold, epsilon)
knn_metrics.attrs['mean'] = mean
knn_metrics.attrs['std'] = std
knn_metrics['recalls'] = recalls
else:
print("Found cached result")
return metrics['knn']

def epsilon(dataset_distances, run_distances, count, metrics, epsilon=0.01):
s = 'eps' + str(epsilon)
if s not in metrics:
print('Computing epsilon metrics')
epsilon_metrics = metrics.create_group(s)
mean, std, recalls = get_recall_values(dataset_distances,
run_distances, count, epsilon_threshold, epsilon)
epsilon_metrics.attrs['mean'] = mean
epsilon_metrics.attrs['std'] = std
epsilon_metrics['recalls'] = recalls
else:
print("Found cached result")
return metrics[s]

def rel(dataset_distances, run_distances):
total_closest_distance = 0.0
total_candidate_distance = 0.0
for true_distances, found_distances in zip(dataset_distances, run_distances):
for rdist, cdist in zip(true_distances, found_distances):
total_closest_distance += rdist
total_candidate_distance += cdist
if total_closest_distance < 0.01:
return float("inf")
return total_candidate_distance / total_closest_distance
def rel(dataset_distances, run_distances, metrics):
if 'rel' not in metrics.attrs:
print('Computing rel metrics')
total_closest_distance = 0.0
total_candidate_distance = 0.0
for true_distances, found_distances in zip(dataset_distances, run_distances):
for rdist, cdist in zip(true_distances, found_distances):
total_closest_distance += rdist
total_candidate_distance += cdist
if total_closest_distance < 0.01:
metrics.attrs['rel'] = float("inf")
else:
metrics.attrs['rel'] = total_candidate_distance / total_closest_distance
else:
print("Found cached result")
return metrics.attrs['rel']

def queries_per_second(queries, attrs):
return 1.0 / attrs["best_search_time"]
Expand All @@ -40,51 +76,59 @@ def build_time(queries, attrs):
def candidates(queries, attrs):
return attrs["candidates"]

def dist_computations(queries, attrs):
return attrs.get("dist_comps", 0) / (attrs['run_count'] * len(queries))

all_metrics = {
"k-nn": {
"description": "Recall",
"function": lambda true_distances, run_distances, run_attrs: knn(true_distances, run_distances, run_attrs["count"]),
"function": lambda true_distances, run_distances, metrics, run_attrs: knn(true_distances, run_distances, run_attrs["count"], metrics).attrs['mean'],
"worst": float("-inf"),
"lim": [0.0, 1.03]
},
"epsilon": {
"description": "Epsilon 0.01 Recall",
"function": lambda true_distances, run_distances, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"]),
"function": lambda true_distances, run_distances, metrics, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], metrics).attrs['mean'],
"worst": float("-inf")
},
"largeepsilon": {
"description": "Epsilon 0.1 Recall",
"function": lambda true_distances, run_distances, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], 0.1),
"function": lambda true_distances, run_distances, metrics, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], metrics, 0.1).attrs['mean'],
"worst": float("-inf")
},
"rel": {
"description": "Relative Error",
"function": lambda true_distances, run_distances, run_attrs: rel(true_distances, run_distances),
"function": lambda true_distances, run_distances, metrics, run_attrs: rel(true_distances, run_distances, metrics),
"worst": float("inf")
},
"qps": {
"description": "Queries per second (1/s)",
"function": lambda true_distances, run_distances, run_attrs: queries_per_second(true_distances, run_attrs),
"function": lambda true_distances, run_distances, metrics, run_attrs: queries_per_second(true_distances, run_attrs),
"worst": float("-inf")
},
"distcomps" : {
"description": "Distance computations",
"function": lambda true_distances, run_distances, metrics, run_attrs: dist_computations(true_distances, run_attrs),
"worst": float("inf")
},
"build": {
"description": "Build time (s)",
"function": lambda true_distances, run_distances, run_attrs: build_time(true_distances, run_attrs),
"function": lambda true_distances, run_distances, metrics, run_attrs: build_time(true_distances, run_attrs),
"worst": float("inf")
},
"candidates" : {
"description": "Candidates generated",
"function": lambda true_distances, run_distances, run_attrs: candidates(true_distances, run_attrs),
"function": lambda true_distances, run_distances, metrics, run_attrs: candidates(true_distances, run_attrs),
"worst": float("inf")
},
"indexsize" : {
"description": "Index size (kB)",
"function": lambda true_distances, run_distances, run_attrs: index_size(true_distances, run_attrs),
"function": lambda true_distances, run_distances, metrics, run_attrs: index_size(true_distances, run_attrs),
"worst": float("inf")
},
"queriessize" : {
"description": "Index size (kB)/Queries per second (s)",
"function": lambda true_distances, run_distances, run_attrs: index_size(true_distances, run_attrs) / queries_per_second(true_distances, run_attrs),
"function": lambda true_distances, run_distances, metrics, run_attrs: index_size(true_distances, run_attrs) / queries_per_second(true_distances, run_attrs),
"worst": float("inf")
}
}
1 change: 1 addition & 0 deletions ann_benchmarks/plotting/plot_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"recall/time" : ("k-nn", "qps"),
"recall/buildtime" : ("k-nn", "build"),
"recall/indexsize" : ("k-nn", "indexsize"),
"recall/distcomps" : ("k-nn", "distcomps"),
"rel/time" : ("rel", "qps"),
"recall/candidates" : ("k-nn", "candidates"),
"recall/qpssize" : ("k-nn", "queriessize"),
Expand Down
28 changes: 21 additions & 7 deletions ann_benchmarks/plotting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
from ann_benchmarks.plotting.metrics import all_metrics as metrics
import matplotlib.pyplot as plt

def get_or_create_metrics(run):
if 'metrics' not in run:
run.create_group('metrics')
return run['metrics']

def create_pointset(data, xn, yn):
xm, ym = (metrics[xn], metrics[yn])
rev = ym["worst"] < 0
Expand All @@ -28,33 +33,42 @@ def create_pointset(data, xn, yn):
ls.append(algo_name)
return xs, ys, ls, axs, ays, als

def compute_metrics(true_nn_distances, res, metric_1, metric_2):
def compute_metrics(true_nn_distances, res, metric_1, metric_2, recompute=False):
all_results = {}
for i, (properties, run) in enumerate(res):
algo = properties['algo']
algo_name = properties['name']
# cache distances to avoid access to hdf5 file
run_distances = list(run['distances'])
run_distances = numpy.array(run['distances'])
if recompute and 'metrics' in run:
del run['metrics']
metrics_cache = get_or_create_metrics(run)

metric_1_value = metrics[metric_1]['function'](true_nn_distances, run_distances, properties)
metric_2_value = metrics[metric_2]['function'](true_nn_distances, run_distances, properties)
metric_1_value = metrics[metric_1]['function'](true_nn_distances,
run_distances, metrics_cache, properties)
metric_2_value = metrics[metric_2]['function'](true_nn_distances,
run_distances, metrics_cache, properties)

print('%3d: %80s %12.3f %12.3f' % (i, algo_name, metric_1_value, metric_2_value))

all_results.setdefault(algo, []).append((algo, algo_name, metric_1_value, metric_2_value))

return all_results

def compute_all_metrics(true_nn_distances, run, properties):
def compute_all_metrics(true_nn_distances, run, properties, recompute=False):
algo = properties["algo"]
algo_name = properties["name"]
print('--')
print(algo_name)
results = {}
# cache distances to avoid access to hdf5 file
run_distances = list(run["distances"])
run_distances = numpy.array(run["distances"])
if recompute and 'metrics' in run:
del run['metrics']
metrics_cache = get_or_create_metrics(run)

for name, metric in metrics.items():
v = metric["function"](true_nn_distances, run_distances, properties)
v = metric["function"](true_nn_distances, run_distances, metrics_cache, properties)
results[name] = v
if v:
print('%s: %g' % (name, v))
Expand Down
2 changes: 1 addition & 1 deletion ann_benchmarks/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def load_all_results(dataset=None, count=None, split_batched=False, batch_mode=
try:
if split_batched and batch_mode != is_batch(root):
continue
f = h5py.File(os.path.join(root, fn))
f = h5py.File(os.path.join(root, fn), 'r+')
properties = dict(f.attrs)
# TODO Fix this properly. Sometimes the hdf5 file returns bytes
# This converts these bytes to strings before we work with them
Expand Down
3 changes: 3 additions & 0 deletions ann_benchmarks/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def batch_query(X):
"distance": distance,
"count": int(count)
}
additional = algo.get_additional()
for k in additional:
attrs[k] = additional[k]
return (attrs, results)


Expand Down
6 changes: 5 additions & 1 deletion create_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def prepare_data(data, xn, yn):
'--scatter',
help='create scatterplot for data',
action = 'store_true')
parser.add_argument(
'--recompute',
help='Clears the cache and recomputes the metrics',
action='store_true')
args = parser.parse_args()

def get_lines(all_data, xn, yn, render_all_points):
Expand Down Expand Up @@ -189,7 +193,7 @@ def load_all_results():
cached_true_dist = list(dataset["distances"])
old_sdn = sdn
algo = properties["algo"]
ms = compute_all_metrics(cached_true_dist, f, properties)
ms = compute_all_metrics(cached_true_dist, f, properties, args.recompute)
algo_ds = get_dataset_label(sdn)
idx = "non-batch"
if properties["batch_mode"]:
Expand Down
2 changes: 1 addition & 1 deletion install/Dockerfile.flann
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM ann-benchmarks

RUN apt-get update && apt-get install -y cmake
RUN apt-get update && apt-get install -y cmake pkg-config liblz4-dev
RUN git clone https://github.com/mariusmuja/flann
RUN mkdir flann/build
RUN cd flann/build && cmake ..
Expand Down
8 changes: 7 additions & 1 deletion plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import argparse

from ann_benchmarks.datasets import get_dataset
Expand Down Expand Up @@ -90,6 +91,10 @@ def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles, batch):
'--batch',
help='Plot runs in batch mode',
action='store_true')
parser.add_argument(
'--recompute',
help='Clears the cache and recomputes the metrics',
action='store_true')
args = parser.parse_args()

if not args.output:
Expand All @@ -101,7 +106,8 @@ def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles, batch):
unique_algorithms = get_unique_algorithms()
results = load_all_results(args.dataset, count, True, args.batch)
linestyles = create_linestyles(sorted(unique_algorithms))
runs = compute_metrics(list(dataset["distances"]), results, args.x_axis, args.y_axis)
runs = compute_metrics(np.array(dataset["distances"]),
results, args.x_axis, args.y_axis, args.recompute)
if not runs:
raise Exception('Nothing to plot')

Expand Down
Loading

0 comments on commit 37a70ca

Please sign in to comment.