From 1ee85b567da759a0ab09905ea5b8768a13b8ba20 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 09:16:45 +0100 Subject: [PATCH 01/28] attempt to refactor nn graph building --- pygsp/graphs/nngraphs/nngraph.py | 117 ++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 34 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 397af4e7..95c2c5cc 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -10,6 +10,21 @@ _logger = utils.build_logger(__name__) +# conversion between the FLANN conventions and the various backend functions +_dist_translation = { + 'scipy-kdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf + }, + 'scipy-pdist' : { + 'euclidean': 'euclidean', + 'manhattan': 'cityblock', + 'max_dist': 'chebyshev', + 'minkowski': 'minkowski' + }, + + } def _import_pfl(): try: @@ -20,6 +35,46 @@ def _import_pfl(): 'pip (or conda) install pyflann (or pyflann3).') return pfl + + +def _knn_sp_kdtree(_X, _num_neighbors, _dist_type, _order=0): + kdt = spatial.KDTree(_X) + D, NN = kdt.query(_X, k=(_num_neighbors + 1), + p=_dist_translation['scipy-kdtree'][_dist_type]) + return NN, D + +def _knn_flann(_X, _num_neighbors, _dist_type, _order): + pfl = _import_pfl() + pfl.set_distance_type(_dist_type, order=_order) + flann = pfl.FLANN() + + # Default FLANN parameters (I tried changing the algorithm and + # testing performance on huge matrices, but the default one + # seems to work best). + NN, D = flann.nn(_X, _X, num_neighbors=(_num_neighbors + 1), + algorithm='kdtree') + return NN, D + +def _radius_sp_kdtree(_X, _epsilon, _dist_type, order=0): + kdt = spatial.KDTree(_X) + D, NN = kdt.query(_X, k=None, distance_upper_bound=_epsilon, + p=_dist_translation['scipy-kdtree'][_dist_type]) + return NN, D + +def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order): + pd = spatial.distance.squareform( + spatial.distance.pdist(_X, + _dist_translation['scipy-pdist'][_dist_type], + p=_order)) + pds = np.sort(pd)[:, 0:_num_neighbors+1] + pdi = pd.argsort()[:, 0:_num_neighbors+1] + return pdi, pds + +def _radius_sp_pdist(): + raise NotImplementedError() + +def _radius_flann(): + raise NotImplementedError() class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. @@ -33,9 +88,11 @@ class NNGraph(Graph): Type of nearest neighbor graph to create. The options are 'knn' for k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default is 'knn'). - use_flann : bool, optional - Use Fast Library for Approximate Nearest Neighbors (FLANN) or not. - (default is False) + backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} + Type of the backend for graph construction. + - 'scipy-kdtree'(default) will use scipy.spatial.KDTree + - 'scipy-pdist' will use scipy.spatial.distance.pdist + - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) center : bool, optional Center the data so that it has zero mean (default is True) rescale : bool, optional @@ -74,20 +131,34 @@ class NNGraph(Graph): """ - def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, + def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None, plotting={}, symmetrize_type='average', dist_type='euclidean', order=0, **kwargs): self.Xin = Xin self.NNtype = NNtype - self.use_flann = use_flann + self.backend = backend self.center = center self.rescale = rescale self.k = k self.sigma = sigma self.epsilon = epsilon - + _dist_translation['scipy-kdtree']['minkowski'] = order + + self._nn_functions = { + 'knn': { + 'scipy-kdtree':_knn_sp_kdtree, + 'scipy-pdist': _knn_sp_pdist, + 'flann': _knn_flann + }, + 'radius': { + 'scipy-kdtree':_radius_sp_kdtree, + 'scipy-pdist': _radius_sp_pdist, + 'flann': _radius_flann + }, + } + if gtype is None: gtype = 'nearest neighbors' else: @@ -108,33 +179,15 @@ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, scale = np.power(N, 1. / float(min(d, 3))) / 10. Xout *= scale / bounding_radius - # Translate distance type string to corresponding Minkowski order. - dist_translation = {"euclidean": 2, - "manhattan": 1, - "max_dist": np.inf, - "minkowski": order - } + if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) spv = np.zeros((N * k)) - if self.use_flann: - pfl = _import_pfl() - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - - # Default FLANN parameters (I tried changing the algorithm and - # testing performance on huge matrices, but the default one - # seems to work best). - NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1), - algorithm='kdtree') - - else: - kdt = spatial.KDTree(Xout) - D, NN = kdt.query(Xout, k=(k + 1), - p=dist_translation[dist_type]) + NN, D = self._nn_functions[NNtype][backend](Xout, k, + dist_type, order) for i in range(N): spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) @@ -144,13 +197,9 @@ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, elif self.NNtype == 'radius': - kdt = spatial.KDTree(Xout) - D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon, - p=dist_translation[dist_type]) - count = 0 - for i in range(N): - count = count + len(NN[i]) - + NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) + count = sum(map(len, NN)) + spi = np.zeros((count)) spj = np.zeros((count)) spv = np.zeros((count)) From 4bacd5c70579c68647daf794bcb2078281a528f0 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 11:14:21 +0100 Subject: [PATCH 02/28] update tests --- pygsp/graphs/nngraphs/nngraph.py | 7 ++++--- pygsp/tests/test_graphs.py | 29 ++++++++++++++++------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 95c2c5cc..4b9f2f3d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -91,7 +91,7 @@ class NNGraph(Graph): backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} Type of the backend for graph construction. - 'scipy-kdtree'(default) will use scipy.spatial.KDTree - - 'scipy-pdist' will use scipy.spatial.distance.pdist + - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) center : bool, optional Center the data so that it has zero mean (default is True) @@ -187,7 +187,7 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, spv = np.zeros((N * k)) NN, D = self._nn_functions[NNtype][backend](Xout, k, - dist_type, order) + dist_type, order) for i in range(N): spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) @@ -197,7 +197,8 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, elif self.NNtype == 'radius': - NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) + NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, + dist_type, order) count = sum(map(len, NN)) spi = np.zeros((count)) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index dc51fec5..afde1947 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -182,19 +182,22 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - - for dist_type in dist_types: - - # Only p-norms with 1<=p<=infinity permitted. - if dist_type != 'minkowski': - graphs.NNGraph(Xin, NNtype='radius', dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', dist_type=dist_type) - - # Distance type unsupported in the C bindings, - # use the C++ bindings instead. - if dist_type != 'max_dist': - graphs.NNGraph(Xin, use_flann=True, NNtype='knn', - dist_type=dist_type) + backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + for cur_backend in backends: + for dist_type in dist_types: + + # Only p-norms with 1<=p<=infinity permitted. + if dist_type != 'minkowski': + graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, + dist_type=dist_type) + graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, + dist_type=dist_type) + + # Distance type unsupported in the C bindings, + # use the C++ bindings instead. + if dist_type != 'max_dist': + graphs.NNGraph(Xin, backend=cur_backend, NNtype='knn', + dist_type=dist_type) def test_bunny(self): graphs.Bunny() From 00bbcdd37579cae13a24e620df6e0bec303f1883 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 11:42:11 +0100 Subject: [PATCH 03/28] fix typo --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 4b9f2f3d..f3fc9908 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -197,7 +197,7 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, elif self.NNtype == 'radius': - NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, + NN, D = self._nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) count = sum(map(len, NN)) From b822333ac85adbbc69b85453732ce5073efbf1be Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 12:30:45 +0100 Subject: [PATCH 04/28] fix tests (avoiding not implemented combinations) --- pygsp/graphs/nngraphs/nngraph.py | 4 ++-- pygsp/tests/test_graphs.py | 33 ++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index f3fc9908..d7b112fd 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -70,10 +70,10 @@ def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order): pdi = pd.argsort()[:, 0:_num_neighbors+1] return pdi, pds -def _radius_sp_pdist(): +def _radius_sp_pdist(_X, _epsilon, _dist_type, order=0): raise NotImplementedError() -def _radius_flann(): +def _radius_flann(_X, _epsilon, _dist_type, order=0): raise NotImplementedError() class NNGraph(Graph): diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index afde1947..cdbad6ce 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -183,21 +183,30 @@ def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + order=3 # for minkowski + for cur_backend in backends: - for dist_type in dist_types: - - # Only p-norms with 1<=p<=infinity permitted. + for dist_type in dist_types: if dist_type != 'minkowski': - graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, - dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, - dist_type=dist_type) - - # Distance type unsupported in the C bindings, - # use the C++ bindings instead. - if dist_type != 'max_dist': - graphs.NNGraph(Xin, backend=cur_backend, NNtype='knn', + # curently radius only implemented with scipy kdtree + if cur_backend == 'scipy-kdtree': + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, dist_type=dist_type) + else: + # Only p-norms with 1<=p<=infinity permitted. + # flann only accepts integer orders + if cur_backend == 'scipy-kdtree': + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order) + def test_bunny(self): graphs.Bunny() From 38aebd0f77af6a5cf1ed733fb80e31d95b111434 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 13:55:46 +0100 Subject: [PATCH 05/28] - fix missing space after colon in dictionary - do not use underscores in functions args --- pygsp/graphs/nngraphs/nngraph.py | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d7b112fd..2d0fce2d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -37,37 +37,37 @@ def _import_pfl(): -def _knn_sp_kdtree(_X, _num_neighbors, _dist_type, _order=0): - kdt = spatial.KDTree(_X) - D, NN = kdt.query(_X, k=(_num_neighbors + 1), - p=_dist_translation['scipy-kdtree'][_dist_type]) +def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): + kdt = spatial.KDTree(X) + D, NN = kdt.query(X, k=(num_neighbors + 1), + p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D -def _knn_flann(_X, _num_neighbors, _dist_type, _order): +def _knn_flann(X, num_neighbors, dist_type, order): pfl = _import_pfl() - pfl.set_distance_type(_dist_type, order=_order) + pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = flann.nn(_X, _X, num_neighbors=(_num_neighbors + 1), + NN, D = flann.nn(X, X, num_neighbors=(num_neighbors + 1), algorithm='kdtree') return NN, D -def _radius_sp_kdtree(_X, _epsilon, _dist_type, order=0): - kdt = spatial.KDTree(_X) - D, NN = kdt.query(_X, k=None, distance_upper_bound=_epsilon, - p=_dist_translation['scipy-kdtree'][_dist_type]) +def _radius_sp_kdtree(X, epsilon, dist_type, order=0): + kdt = spatial.KDTree(X) + D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, + p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D -def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order): +def _knn_sp_pdist(X, num_neighbors, dist_type, _order): pd = spatial.distance.squareform( - spatial.distance.pdist(_X, - _dist_translation['scipy-pdist'][_dist_type], + spatial.distance.pdist(X, + _dist_translation['scipy-pdist'][dist_type], p=_order)) - pds = np.sort(pd)[:, 0:_num_neighbors+1] - pdi = pd.argsort()[:, 0:_num_neighbors+1] + pds = np.sort(pd)[:, 0:num_neighbors+1] + pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds def _radius_sp_pdist(_X, _epsilon, _dist_type, order=0): @@ -148,12 +148,12 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, self._nn_functions = { 'knn': { - 'scipy-kdtree':_knn_sp_kdtree, + 'scipy-kdtree': _knn_sp_kdtree, 'scipy-pdist': _knn_sp_pdist, 'flann': _knn_flann }, 'radius': { - 'scipy-kdtree':_radius_sp_kdtree, + 'scipy-kdtree': _radius_sp_kdtree, 'scipy-pdist': _radius_sp_pdist, 'flann': _radius_flann }, From 524c60fcde3a68b034dcdeaa85cef30256639059 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 14:09:03 +0100 Subject: [PATCH 06/28] fix (matlab) GSP url --- README.rst | 2 +- pygsp/tests/test_graphs.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2d6c59d4..99d27364 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ The documentation is available on `Read the Docs `_ and development takes place on `GitHub `_. -(A `Matlab counterpart `_ exists.) +(A `Matlab counterpart `_ exists.) The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index cdbad6ce..fd66deda 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -186,7 +186,8 @@ def test_nngraph(self): order=3 # for minkowski for cur_backend in backends: - for dist_type in dist_types: + for dist_type in dist_types: + print("backend={} dist={}".format(cur_backend, dist_type)) if dist_type != 'minkowski': # curently radius only implemented with scipy kdtree if cur_backend == 'scipy-kdtree': From ae838148d584f8ae66baa0deb8fa764230d051de Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 15:05:45 +0100 Subject: [PATCH 07/28] throw exception when using FLANN + max_dist (produces incorrect results) --- pygsp/graphs/nngraphs/nngraph.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 2d0fce2d..827b2d1d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -45,6 +45,10 @@ def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): def _knn_flann(X, num_neighbors, dist_type, order): pfl = _import_pfl() + # the combination FLANN + max_dist produces incorrect results + # do not allow it + if dist_type == 'max_dist': + raise ValueError('FLANN and max_dist is not supported') pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() From 62fc0ce26ddc46405fe229253559eb05a1838ac4 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 15:21:43 +0100 Subject: [PATCH 08/28] update test case to fit FLANN & max_dist exception --- pygsp/tests/test_graphs.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index fd66deda..484f32ae 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -182,24 +182,16 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] - order=3 # for minkowski + backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: for dist_type in dist_types: - print("backend={} dist={}".format(cur_backend, dist_type)) - if dist_type != 'minkowski': - # curently radius only implemented with scipy kdtree - if cur_backend == 'scipy-kdtree': - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type) + if cur_backend == 'flann' and dist_type == 'max_dist': + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='knn', backend=cur_backend, + dist_type=dist_type) else: - # Only p-norms with 1<=p<=infinity permitted. - # flann only accepts integer orders if cur_backend == 'scipy-kdtree': graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, @@ -208,7 +200,6 @@ def test_nngraph(self): backend=cur_backend, dist_type=dist_type, order=order) - def test_bunny(self): graphs.Bunny() From 6f473fa4175e68d5eba4d1640559ea2fb55c9b13 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 07:44:42 +0100 Subject: [PATCH 09/28] implement nn graph using pdist using radius --- pygsp/graphs/nngraphs/nngraph.py | 35 ++++++++++++++++++++++++++------ pygsp/tests/test_graphs.py | 2 +- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 827b2d1d..d32f2cc0 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -65,20 +65,43 @@ def _radius_sp_kdtree(X, epsilon, dist_type, order=0): p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D -def _knn_sp_pdist(X, num_neighbors, dist_type, _order): +def _knn_sp_pdist(X, num_neighbors, dist_type, order): pd = spatial.distance.squareform( spatial.distance.pdist(X, _dist_translation['scipy-pdist'][dist_type], - p=_order)) + p=order)) pds = np.sort(pd)[:, 0:num_neighbors+1] pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds -def _radius_sp_pdist(_X, _epsilon, _dist_type, order=0): - raise NotImplementedError() +def _radius_sp_pdist(X, epsilon, dist_type, order): + N, dim = np.shape(X) + pd = spatial.distance.squareform( + spatial.distance.pdist(X, + _dist_translation['scipy-pdist'][dist_type], + p=order)) + pdf = pd < epsilon + D = [] + NN = [] + for k in range(N): + v = pd[k, pdf[k, :]] + # use the same conventions as in scipy.distance.kdtree + NN.append(v.argsort()) + D.append(np.sort(v)) + + return NN, D -def _radius_flann(_X, _epsilon, _dist_type, order=0): - raise NotImplementedError() +def _radius_flann(X, epsilon, dist_type, order=0): + pfl = _import_pfl() + # the combination FLANN + max_dist produces incorrect results + # do not allow it + if dist_type == 'max_dist': + raise ValueError('FLANN and max_dist is not supported') + pfl.set_distance_type(dist_type, order=order) + flann = pfl.FLANN() + flann.build_index(X) + + flann.delete_index() class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 484f32ae..bdeaf9d1 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -192,7 +192,7 @@ def test_nngraph(self): NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - if cur_backend == 'scipy-kdtree': + if cur_backend != 'flann': graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, dist_type=dist_type, order=order) From 25ec6d23f20875c9f148d650a5ce4d37641d7aa8 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 09:09:02 +0100 Subject: [PATCH 10/28] implement radius nn graph with flann --- pygsp/graphs/nngraphs/nngraph.py | 36 ++++++++++++++++++++------------ pygsp/tests/test_graphs.py | 7 +++---- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d32f2cc0..d65a92b9 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -3,7 +3,8 @@ import traceback import numpy as np -from scipy import sparse, spatial +from scipy import sparse +import scipy.spatial as sps from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 @@ -38,17 +39,17 @@ def _import_pfl(): def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): - kdt = spatial.KDTree(X) + kdt = sps.KDTree(X) D, NN = kdt.query(X, k=(num_neighbors + 1), p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D def _knn_flann(X, num_neighbors, dist_type, order): - pfl = _import_pfl() # the combination FLANN + max_dist produces incorrect results # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') + pfl = _import_pfl() pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() @@ -60,26 +61,26 @@ def _knn_flann(X, num_neighbors, dist_type, order): return NN, D def _radius_sp_kdtree(X, epsilon, dist_type, order=0): - kdt = spatial.KDTree(X) + kdt = sps.KDTree(X) D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D def _knn_sp_pdist(X, num_neighbors, dist_type, order): - pd = spatial.distance.squareform( - spatial.distance.pdist(X, - _dist_translation['scipy-pdist'][dist_type], - p=order)) + pd = sps.distance.squareform( + sps.distance.pdist(X, + metric=_dist_translation['scipy-pdist'][dist_type], + p=order)) pds = np.sort(pd)[:, 0:num_neighbors+1] pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds def _radius_sp_pdist(X, epsilon, dist_type, order): N, dim = np.shape(X) - pd = spatial.distance.squareform( - spatial.distance.pdist(X, - _dist_translation['scipy-pdist'][dist_type], - p=order)) + pd = sps.distance.squareform( + sps.distance.pdist(X, + metric=_dist_translation['scipy-pdist'][dist_type], + p=order)) pdf = pd < epsilon D = [] NN = [] @@ -92,16 +93,25 @@ def _radius_sp_pdist(X, epsilon, dist_type, order): return NN, D def _radius_flann(X, epsilon, dist_type, order=0): - pfl = _import_pfl() + N, dim = np.shape(X) # the combination FLANN + max_dist produces incorrect results # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') + + pfl = _import_pfl() pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() flann.build_index(X) + D = [] + NN = [] + for k in range(N): + nn, d = flann.nn_radius(X[k, :], epsilon) + D.append(d) + NN.append(nn) flann.delete_index() + return NN, D class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index bdeaf9d1..6e70347c 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -192,10 +192,9 @@ def test_nngraph(self): NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - if cur_backend != 'flann': - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type, order=order) graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) From 96b628e2bf86ed31c2c753d73d80d3d5e10243aa Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 10:06:03 +0100 Subject: [PATCH 11/28] flann returns the squared distance when called with 'euclidean' distance -> fix fix radius nn graph for spdist --- pygsp/graphs/nngraphs/nngraph.py | 29 ++++++++++++++++++++--------- pygsp/tests/test_graphs.py | 15 +++++++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d65a92b9..175460f0 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -49,6 +49,7 @@ def _knn_flann(X, num_neighbors, dist_type, order): # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') + pfl = _import_pfl() pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() @@ -58,6 +59,8 @@ def _knn_flann(X, num_neighbors, dist_type, order): # seems to work best). NN, D = flann.nn(X, X, num_neighbors=(num_neighbors + 1), algorithm='kdtree') + if dist_type == 'euclidean': # flann returns squared distances + return NN, np.sqrt(D) return NN, D def _radius_sp_kdtree(X, epsilon, dist_type, order=0): @@ -86,8 +89,9 @@ def _radius_sp_pdist(X, epsilon, dist_type, order): NN = [] for k in range(N): v = pd[k, pdf[k, :]] + d = pd[k, :].argsort() # use the same conventions as in scipy.distance.kdtree - NN.append(v.argsort()) + NN.append(d[0:len(v)]) D.append(np.sort(v)) return NN, D @@ -98,8 +102,8 @@ def _radius_flann(X, epsilon, dist_type, order=0): # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') - pfl = _import_pfl() + pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() flann.build_index(X) @@ -107,12 +111,23 @@ def _radius_flann(X, epsilon, dist_type, order=0): D = [] NN = [] for k in range(N): - nn, d = flann.nn_radius(X[k, :], epsilon) + nn, d = flann.nn_radius(X[k, :], epsilon*epsilon) D.append(d) NN.append(nn) flann.delete_index() + if dist_type == 'euclidean': # flann returns squared distances + return NN, np.sqrt(D) return NN, D +def center_input(X, N): + return X - np.kron(np.ones((N, 1)), np.mean(X, axis=0)) + +def rescale_input(X, N, d): + bounding_radius = 0.5 * np.linalg.norm(np.amax(X, axis=0) - + np.amin(X, axis=0), 2) + scale = np.power(N, 1. / float(min(d, 3))) / 10. + return X * scale / bounding_radius + class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. @@ -207,14 +222,10 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, Xout = self.Xin if self.center: - Xout = self.Xin - np.kron(np.ones((N, 1)), - np.mean(self.Xin, axis=0)) + Xout = center_input(Xout, N) if self.rescale: - bounding_radius = 0.5 * np.linalg.norm(np.amax(Xout, axis=0) - - np.amin(Xout, axis=0), 2) - scale = np.power(N, 1. / float(min(d, 3))) / 10. - Xout *= scale / bounding_radius + Xout = rescale_input(Xout, N, d) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 6e70347c..c7853839 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -187,6 +187,7 @@ def test_nngraph(self): for cur_backend in backends: for dist_type in dist_types: + #print("backend={} dist={}".format(cur_backend, dist_type)) if cur_backend == 'flann' and dist_type == 'max_dist': self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='knn', backend=cur_backend, @@ -199,6 +200,20 @@ def test_nngraph(self): backend=cur_backend, dist_type=dist_type, order=order) + def test_nngraph_consistency(self): + #Xin = np.arange(180).reshape(60, 3) + Xin = np.random.uniform(-5, 5, (60, 3)) + dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'flann'] + num_neighbors=5 + + G = graphs.NNGraph(Xin, NNtype='knn', + backend='scipy-pdist', k=num_neighbors) + for cur_backend in backends: + for dist_type in dist_types: + Gt = graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, k=num_neighbors) + def test_bunny(self): graphs.Bunny() From 09bbff42c295829d14e05e7dce07db5df33d6d81 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:33:04 +0100 Subject: [PATCH 12/28] compute sqrt of list properly --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 175460f0..085b7efc 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -116,7 +116,7 @@ def _radius_flann(X, epsilon, dist_type, order=0): NN.append(nn) flann.delete_index() if dist_type == 'euclidean': # flann returns squared distances - return NN, np.sqrt(D) + return NN, list(map(np.sqrt, D)) return NN, D def center_input(X, N): From 27b9a038d16f129fb63a1e3344fe2fb81e8936bf Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:14:08 +0100 Subject: [PATCH 13/28] use cyflann instead of pyflann (radius search not working) --- pygsp/graphs/nngraphs/nngraph.py | 43 +++++++++++++++++--------------- setup.py | 4 +-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 085b7efc..979319e4 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -27,14 +27,14 @@ } -def _import_pfl(): +def _import_cfl(): try: - import pyflann as pfl + import cyflann as cfl except Exception: - raise ImportError('Cannot import pyflann. Choose another nearest ' + raise ImportError('Cannot import cyflann. Choose another nearest ' 'neighbors method or try to install it with ' - 'pip (or conda) install pyflann (or pyflann3).') - return pfl + 'pip (or conda) install cyflann.') + return cfl @@ -50,15 +50,15 @@ def _knn_flann(X, num_neighbors, dist_type, order): if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') - pfl = _import_pfl() - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - + cfl = _import_cfl() + cfl.set_distance_type(dist_type, order=order) + c = cfl.FLANNIndex(algorithm='kdtree') + c.build_index(X) # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = flann.nn(X, X, num_neighbors=(num_neighbors + 1), - algorithm='kdtree') + NN, D = c.nn_index(X, num_neighbors + 1) + c.free_index() if dist_type == 'euclidean': # flann returns squared distances return NN, np.sqrt(D) return NN, D @@ -102,19 +102,18 @@ def _radius_flann(X, epsilon, dist_type, order=0): # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') - pfl = _import_pfl() - - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - flann.build_index(X) + cfl = _import_cfl() + cfl.set_distance_type(dist_type, order=order) + c = cfl.FLANNIndex(algorithm='kdtree') + c.build_index(X) D = [] NN = [] for k in range(N): - nn, d = flann.nn_radius(X[k, :], epsilon*epsilon) + nn, d = c.nn_radius(X[k, :], epsilon*epsilon) D.append(d) NN.append(nn) - flann.delete_index() + c.free_index() if dist_type == 'euclidean': # flann returns squared distances return NN, list(map(np.sqrt, D)) return NN, D @@ -228,7 +227,13 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, Xout = rescale_input(Xout, N, d) + if self._nn_functions.get(NNtype) == None: + raise ValueError('Invalid NNtype {}'.format(self.NNtype)) + if self._nn_functions[NNtype].get(backend) == None: + raise ValueError('Invalid backend {} for type {}'.format(backend, + self.NNtype)) + if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) @@ -262,8 +267,6 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, float(self.sigma)) start = start + leng - else: - raise ValueError('Unknown NNtype {}'.format(self.NNtype)) W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) diff --git a/setup.py b/setup.py index bcf0ccd9..b247f3a2 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,8 @@ # Construct patch graphs from images. 'scikit-image', # Approximate nearest neighbors for kNN graphs. - 'pyflann; python_version == "2.*"', - 'pyflann3; python_version == "3.*"', + 'flann', + 'cyflann', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. From 8a1f9b907b45a4cbad2b8ba48cddc535f7f82981 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:14:39 +0100 Subject: [PATCH 14/28] check nn graphs building against pdist reference --- pygsp/tests/test_graphs.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c7853839..48277e3f 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -9,6 +9,7 @@ import numpy as np import scipy.linalg +import scipy.sparse.linalg from skimage import data, img_as_float from pygsp import graphs @@ -199,20 +200,45 @@ def test_nngraph(self): graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='badtype', backend=cur_backend, + dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='knn', backend='badtype', + dist_type=dist_type) def test_nngraph_consistency(self): - #Xin = np.arange(180).reshape(60, 3) Xin = np.random.uniform(-5, 5, (60, 3)) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'flann'] - num_neighbors=5 + num_neighbors=4 + epsilon=0.1 + # use pdist as ground truth G = graphs.NNGraph(Xin, NNtype='knn', backend='scipy-pdist', k=num_neighbors) - for cur_backend in backends: + for cur_backend in backends: for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) + d = scipy.sparse.linalg.norm(G.W - Gt.W) + self.assertTrue(d < 0.01, 'Graphs (knn) are not identical error='.format(d)) + + G = graphs.NNGraph(Xin, NNtype='radius', + backend='scipy-pdist', epsilon=epsilon) + for cur_backend in backends: + for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) + Gt = graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, epsilon=epsilon) + d = scipy.sparse.linalg.norm(G.W - Gt.W, ord=1) + self.assertTrue(d < 0.01, + 'Graphs (radius) are not identical error='.format(d)) def test_bunny(self): graphs.Bunny() From 6e9e2ac856a2fcafa36c2fc2fd12b8b6f1055fa2 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:36:49 +0100 Subject: [PATCH 15/28] cyflann needs the flann library to be installed on the system try to install via before_install --- .travis.yml | 4 ++++ setup.py | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 835c6d85..d8f45358 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,10 @@ python: - 3.5 - 3.6 +before_install: + - sudo apt-get -qq update + - sudo apt-get install -y libflann-dev + addons: apt: packages: diff --git a/setup.py b/setup.py index b247f3a2..a9f92004 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,7 @@ # Construct patch graphs from images. 'scikit-image', # Approximate nearest neighbors for kNN graphs. - 'flann', - 'cyflann', + 'cyflann', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. @@ -60,7 +59,7 @@ # Dependencies to build and upload packages. 'pkg': [ 'wheel', - 'twine', + 'twine' ], }, license="BSD", From 811de06e5adc694e8ba496bbc59e7d874b9b9695 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:14:39 +0100 Subject: [PATCH 16/28] check nn graphs building against pdist reference --- pygsp/tests/test_graphs.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c7853839..48277e3f 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -9,6 +9,7 @@ import numpy as np import scipy.linalg +import scipy.sparse.linalg from skimage import data, img_as_float from pygsp import graphs @@ -199,20 +200,45 @@ def test_nngraph(self): graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='badtype', backend=cur_backend, + dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='knn', backend='badtype', + dist_type=dist_type) def test_nngraph_consistency(self): - #Xin = np.arange(180).reshape(60, 3) Xin = np.random.uniform(-5, 5, (60, 3)) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'flann'] - num_neighbors=5 + num_neighbors=4 + epsilon=0.1 + # use pdist as ground truth G = graphs.NNGraph(Xin, NNtype='knn', backend='scipy-pdist', k=num_neighbors) - for cur_backend in backends: + for cur_backend in backends: for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) + d = scipy.sparse.linalg.norm(G.W - Gt.W) + self.assertTrue(d < 0.01, 'Graphs (knn) are not identical error='.format(d)) + + G = graphs.NNGraph(Xin, NNtype='radius', + backend='scipy-pdist', epsilon=epsilon) + for cur_backend in backends: + for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) + Gt = graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, epsilon=epsilon) + d = scipy.sparse.linalg.norm(G.W - Gt.W, ord=1) + self.assertTrue(d < 0.01, + 'Graphs (radius) are not identical error='.format(d)) def test_bunny(self): graphs.Bunny() From 813fe3988c89d693e685d61ce5d69fbd8cd6070a Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 16:18:33 +0100 Subject: [PATCH 17/28] backport stuff from cyflann branch --- pygsp/graphs/nngraphs/nngraph.py | 7 +++++-- pygsp/tests/test_graphs.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 085b7efc..747c8542 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -228,7 +228,12 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, Xout = rescale_input(Xout, N, d) + if self._nn_functions.get(NNtype) == None: + raise ValueError('Invalid NNtype {}'.format(self.NNtype)) + if self._nn_functions[NNtype].get(backend) == None: + raise ValueError('Invalid backend {} for type {}'.format(backend, + self.NNtype)) if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) @@ -262,8 +267,6 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, float(self.sigma)) start = start + leng - else: - raise ValueError('Unknown NNtype {}'.format(self.NNtype)) W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 48277e3f..b2ddf18b 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -219,26 +219,26 @@ def test_nngraph_consistency(self): backend='scipy-pdist', k=num_neighbors) for cur_backend in backends: for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + if cur_backend == 'flann': # skip flann for now continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) d = scipy.sparse.linalg.norm(G.W - Gt.W) - self.assertTrue(d < 0.01, 'Graphs (knn) are not identical error='.format(d)) + self.assertTrue(d < 0.01, "Graphs (knn) are not identical") G = graphs.NNGraph(Xin, NNtype='radius', backend='scipy-pdist', epsilon=epsilon) for cur_backend in backends: for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + if cur_backend == 'flann': # skip flann for now continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, epsilon=epsilon) d = scipy.sparse.linalg.norm(G.W - Gt.W, ord=1) self.assertTrue(d < 0.01, - 'Graphs (radius) are not identical error='.format(d)) + "Graphs (radius) are not identical") def test_bunny(self): graphs.Bunny() From 4a4d59796aa1c8c3f0a040122bf8659119690fa2 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 16:24:33 +0100 Subject: [PATCH 18/28] flann should (mostly) work for knn graphs --- pygsp/tests/test_graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index b2ddf18b..8d1a008d 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -219,7 +219,7 @@ def test_nngraph_consistency(self): backend='scipy-pdist', k=num_neighbors) for cur_backend in backends: for dist_type in dist_types: - if cur_backend == 'flann': # skip flann for now + if cur_backend == 'flann' and dist_type == 'max_dist': continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', From 53dffc12670c6664fddca317c76a8a2199e1e992 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Wed, 21 Mar 2018 08:18:49 +0100 Subject: [PATCH 19/28] fix pdist warnings --- pygsp/graphs/nngraphs/nngraph.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 979319e4..c2357468 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -5,6 +5,7 @@ import numpy as np from scipy import sparse import scipy.spatial as sps +import scipy.spatial.distance as spsd from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 @@ -70,20 +71,24 @@ def _radius_sp_kdtree(X, epsilon, dist_type, order=0): return NN, D def _knn_sp_pdist(X, num_neighbors, dist_type, order): - pd = sps.distance.squareform( - sps.distance.pdist(X, - metric=_dist_translation['scipy-pdist'][dist_type], - p=order)) + if dist_type == 'minkowski': + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], + p=order) + else: + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) + pd = spsd.squareform(p) pds = np.sort(pd)[:, 0:num_neighbors+1] pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds def _radius_sp_pdist(X, epsilon, dist_type, order): N, dim = np.shape(X) - pd = sps.distance.squareform( - sps.distance.pdist(X, - metric=_dist_translation['scipy-pdist'][dist_type], - p=order)) + if dist_type == 'minkowski': + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], + p=order) + else: + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) + pd = spsd.squareform(p) pdf = pd < epsilon D = [] NN = [] From 1309e92b963f5177822fb1e5406b2bc67f812272 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Wed, 21 Mar 2018 12:28:35 +0100 Subject: [PATCH 20/28] implement and use scipy-ckdtree as default (faster than kdtree) --- pygsp/graphs/nngraphs/nngraph.py | 40 +++++++++++++++++++++++++++++--- pygsp/tests/test_graphs.py | 13 ++++++----- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 747c8542..faa4d9f5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -18,6 +18,11 @@ 'manhattan': 1, 'max_dist': np.inf }, + 'scipy-ckdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf + }, 'scipy-pdist' : { 'euclidean': 'euclidean', 'manhattan': 'cityblock', @@ -44,6 +49,13 @@ def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D +def _knn_sp_ckdtree(X, num_neighbors, dist_type, order=0): + kdt = sps.cKDTree(X) + D, NN = kdt.query(X, k=(num_neighbors + 1), + p=_dist_translation['scipy-ckdtree'][dist_type]) + return NN, D + + def _knn_flann(X, num_neighbors, dist_type, order): # the combination FLANN + max_dist produces incorrect results # do not allow it @@ -66,9 +78,27 @@ def _knn_flann(X, num_neighbors, dist_type, order): def _radius_sp_kdtree(X, epsilon, dist_type, order=0): kdt = sps.KDTree(X) D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, - p=_dist_translation['scipy-kdtree'][dist_type]) + p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D +def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): + N, dim = np.shape(X) + kdt = sps.cKDTree(X) + nn = kdt.query_ball_point(X, r=epsilon, + p=_dist_translation['scipy-ckdtree'][dist_type]) + D = [] + NN = [] + for k in range(N): + x = np.matlib.repmat(X[k, :], len(nn[k]), 1) + d = np.linalg.norm(x - X[nn[k], :], + ord=_dist_translation['scipy-ckdtree'][dist_type], + axis=1) + nidx = d.argsort() + NN.append(np.take(nn[k], nidx)) + D.append(np.sort(d)) + return NN, D + + def _knn_sp_pdist(X, num_neighbors, dist_type, order): pd = sps.distance.squareform( sps.distance.pdist(X, @@ -142,7 +172,8 @@ class NNGraph(Graph): is 'knn'). backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} Type of the backend for graph construction. - - 'scipy-kdtree'(default) will use scipy.spatial.KDTree + - 'scipy-kdtree' will use scipy.spatial.KDTree + - 'scipy-ckdtree'(default) will use scipy.spatial.cKDTree - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) center : bool, optional @@ -183,7 +214,7 @@ class NNGraph(Graph): """ - def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, + def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None, plotting={}, symmetrize_type='average', dist_type='euclidean', order=0, **kwargs): @@ -197,15 +228,18 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, self.sigma = sigma self.epsilon = epsilon _dist_translation['scipy-kdtree']['minkowski'] = order + _dist_translation['scipy-ckdtree']['minkowski'] = order self._nn_functions = { 'knn': { 'scipy-kdtree': _knn_sp_kdtree, + 'scipy-ckdtree': _knn_sp_ckdtree, 'scipy-pdist': _knn_sp_pdist, 'flann': _knn_flann }, 'radius': { 'scipy-kdtree': _radius_sp_kdtree, + 'scipy-ckdtree': _radius_sp_ckdtree, 'scipy-pdist': _radius_sp_pdist, 'flann': _radius_flann }, diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 8d1a008d..e8ac8f99 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -183,7 +183,7 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'flann'] order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: @@ -194,9 +194,10 @@ def test_nngraph(self): NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type, order=order) + if cur_backend != 'flann': #pyflann fails on radius query + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type, order=order) graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) @@ -208,9 +209,9 @@ def test_nngraph(self): dist_type=dist_type) def test_nngraph_consistency(self): - Xin = np.random.uniform(-5, 5, (60, 3)) + Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann'] num_neighbors=4 epsilon=0.1 From 648fa9130007f8e005808be3d11a92370c0f4410 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 22 Mar 2018 11:20:40 +0100 Subject: [PATCH 21/28] backport README changes from master --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 99d27364..387daa92 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ The documentation is available on `Read the Docs `_ and development takes place on `GitHub `_. -(A `Matlab counterpart `_ exists.) +(A (mostly unmaintained) `Matlab version `_ exists.) The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, From 8e7c553249f4b762078ceb6ae593d998ccbf3b4f Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Fri, 23 Mar 2018 10:00:13 +0100 Subject: [PATCH 22/28] add nmslib --- pygsp/graphs/nngraphs/nngraph.py | 39 ++++++++++++++++++++++++++++---- pygsp/tests/test_graphs.py | 24 ++++++++++++++------ setup.py | 4 +++- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d8d85b34..b58c8fc5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -30,7 +30,12 @@ 'max_dist': 'chebyshev', 'minkowski': 'minkowski' }, - + 'nmslib' : { + 'euclidean': 'l2', + 'manhattan': 'l1', + 'max_dist': 'linf', + 'minkowski': 'lp' + } } def _import_cfl(): @@ -42,7 +47,14 @@ def _import_cfl(): 'pip (or conda) install cyflann.') return cfl - +def _import_nmslib(): + try: + import nmslib as nms + except Exception: + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors method or try to install it with ' + 'pip (or conda) install nmslib.') + return nms def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): kdt = sps.KDTree(X) @@ -111,6 +123,20 @@ def _knn_sp_pdist(X, num_neighbors, dist_type, order): pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds +def _knn_nmslib(X, num_neighbors, dist_type, order): + N, dim = np.shape(X) + if dist_type == 'minkowski': + raise ValueError('unsupported distance type (lp) for nmslib') + nms = _import_nmslib() + nmsidx = nms.init(space=_dist_translation['nmslib'][dist_type]) + nmsidx.addDataPointBatch(X) + nmsidx.createIndex() + q = nmsidx.knnQueryBatch(X, k=num_neighbors+1) + nn, d = zip(*q) + D = np.concatenate(d).reshape(N, num_neighbors+1) + NN = np.concatenate(nn).reshape(N, num_neighbors+1) + return NN, D + def _radius_sp_pdist(X, epsilon, dist_type, order): N, dim = np.shape(X) if dist_type == 'minkowski': @@ -153,6 +179,9 @@ def _radius_flann(X, epsilon, dist_type, order=0): return NN, list(map(np.sqrt, D)) return NN, D +def _radius_nmslib(X, epsilon, dist_type, order=0): + raise ValueError('nmslib does not support (yet?) range queries') + def center_input(X, N): return X - np.kron(np.ones((N, 1)), np.mean(X, axis=0)) @@ -239,13 +268,15 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, 'scipy-kdtree': _knn_sp_kdtree, 'scipy-ckdtree': _knn_sp_ckdtree, 'scipy-pdist': _knn_sp_pdist, - 'flann': _knn_flann + 'flann': _knn_flann, + 'nmslib': _knn_nmslib }, 'radius': { 'scipy-kdtree': _radius_sp_kdtree, 'scipy-ckdtree': _radius_sp_ckdtree, 'scipy-pdist': _radius_sp_pdist, - 'flann': _radius_flann + 'flann': _radius_flann, + 'nmslib': _radius_nmslib }, } diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 8613d3b1..25ae17b4 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -183,24 +183,30 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: for dist_type in dist_types: #print("backend={} dist={}".format(cur_backend, dist_type)) - if cur_backend == 'flann' and dist_type == 'max_dist': + if (cur_backend == 'flann' and + dist_type == 'max_dist') or (cur_backend == 'nmslib' and + dist_type == 'minkowski'): self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - if cur_backend != 'flann': #pyflann fails on radius query + if cur_backend == 'nmslib': + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='radius', backend=cur_backend, + dist_type=dist_type, order=order) + else: graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, dist_type=dist_type, order=order) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order) self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='badtype', backend=cur_backend, dist_type=dist_type) @@ -211,7 +217,7 @@ def test_nngraph(self): def test_nngraph_consistency(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib']#, 'flann'] num_neighbors=4 epsilon=0.1 @@ -222,6 +228,8 @@ def test_nngraph_consistency(self): for dist_type in dist_types: if cur_backend == 'flann' and dist_type == 'max_dist': continue + if cur_backend == 'nmslib' and dist_type == 'minkowski': + continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) @@ -234,6 +242,8 @@ def test_nngraph_consistency(self): for dist_type in dist_types: if cur_backend == 'flann' and dist_type == 'max_dist': continue + if cur_backend == 'nmslib': #unsupported + continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, epsilon=epsilon) diff --git a/setup.py b/setup.py index a9f92004..2979ac83 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,9 @@ # Construct patch graphs from images. 'scikit-image', # Approximate nearest neighbors for kNN graphs. - 'cyflann', + 'cyflann', + 'pybind11', + 'nmslib', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. From b83e4672e801bf88ca0de6468b4e9b33a1995e2b Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 26 Mar 2018 10:12:43 +0200 Subject: [PATCH 23/28] test flann when not on windows --- pygsp/tests/test_graphs.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 25ae17b4..14277ed4 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -6,7 +6,7 @@ """ import unittest - +import os import numpy as np import scipy.linalg import scipy.sparse.linalg @@ -183,7 +183,9 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] + if os.name != 'nt': + backends.append('flann') order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: @@ -217,7 +219,9 @@ def test_nngraph(self): def test_nngraph_consistency(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib']#, 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib'] + if os.name != 'nt': + backends.append('flann') num_neighbors=4 epsilon=0.1 From 28b78589c6dbd9d6149c2c8dfeba39a794282690 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 29 Mar 2018 10:56:41 +0200 Subject: [PATCH 24/28] use the same code to build sparse matrix for knn and radius --- pygsp/graphs/nngraphs/nngraph.py | 41 ++++++++++++-------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d8d85b34..b35b6d38 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -273,37 +273,26 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, raise ValueError('Invalid backend {} for type {}'.format(backend, self.NNtype)) if self.NNtype == 'knn': - spi = np.zeros((N * k)) - spj = np.zeros((N * k)) - spv = np.zeros((N * k)) - NN, D = self._nn_functions[NNtype][backend](Xout, k, dist_type, order) - - for i in range(N): - spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) - spj[i * k:(i + 1) * k] = NN[i, 1:] - spv[i * k:(i + 1) * k] = np.exp(-np.power(D[i, 1:], 2) / - float(self.sigma)) - + elif self.NNtype == 'radius': - NN, D = self._nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) - count = sum(map(len, NN)) - - spi = np.zeros((count)) - spj = np.zeros((count)) - spv = np.zeros((count)) - - start = 0 - for i in range(N): - leng = len(NN[i]) - 1 - spi[start:start + leng] = np.kron(np.ones((leng)), i) - spj[start:start + leng] = NN[i][1:] - spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / - float(self.sigma)) - start = start + leng + countV = list(map(len, NN)) + count = sum(countV) + spi = np.zeros((count)) + spj = np.zeros((count)) + spv = np.zeros((count)) + + start = 0 + for i in range(N): + leng = countV[i] - 1 + spi[start:start + leng] = np.kron(np.ones((leng)), i) + spj[start:start + leng] = NN[i][1:] + spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / + float(self.sigma)) + start = start + leng W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) From 188c4a61ffe93fa0647030758c31ebfe21a2f163 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 29 Mar 2018 13:44:10 +0200 Subject: [PATCH 25/28] building the graph with rescale/center=False should also work --- pygsp/tests/test_graphs.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 14277ed4..a48e6034 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -197,6 +197,9 @@ def test_nngraph(self): self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='radius', backend=cur_backend, + dist_type=dist_type) else: if cur_backend == 'nmslib': self.assertRaises(ValueError, graphs.NNGraph, Xin, @@ -209,6 +212,18 @@ def test_nngraph(self): graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order, + center=False) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order, + rescale=False) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order, + rescale=False, center=False) self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='badtype', backend=cur_backend, dist_type=dist_type) From 8e98b77b0f8d26f83ef10064e454a14811796539 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 29 Mar 2018 14:58:25 +0200 Subject: [PATCH 26/28] update doc for nmslib --- pygsp/graphs/nngraphs/nngraph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index a5f766e8..0ba86c92 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -209,6 +209,7 @@ class NNGraph(Graph): - 'scipy-ckdtree'(default) will use scipy.spatial.cKDTree - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) + - 'nmslib' use nmslib for approximate nearest neighbors (faster in high-dimensional spaces) center : bool, optional Center the data so that it has zero mean (default is True) rescale : bool, optional From 08ae29fae719af6e551b531c381d52cf747e8ea0 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 9 Apr 2018 09:46:45 +0200 Subject: [PATCH 27/28] enable multithreading with ckdtree/nmslib --- pygsp/graphs/nngraphs/nngraph.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 0ba86c92..5d564089 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -6,6 +6,7 @@ from scipy import sparse import scipy.spatial as sps import scipy.spatial.distance as spsd +import multiprocessing from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 @@ -65,7 +66,8 @@ def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): def _knn_sp_ckdtree(X, num_neighbors, dist_type, order=0): kdt = sps.cKDTree(X) D, NN = kdt.query(X, k=(num_neighbors + 1), - p=_dist_translation['scipy-ckdtree'][dist_type]) + p=_dist_translation['scipy-ckdtree'][dist_type], + n_jobs=-1) return NN, D @@ -98,7 +100,8 @@ def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): N, dim = np.shape(X) kdt = sps.cKDTree(X) nn = kdt.query_ball_point(X, r=epsilon, - p=_dist_translation['scipy-ckdtree'][dist_type]) + p=_dist_translation['scipy-ckdtree'][dist_type], + n_jobs=-1) D = [] NN = [] for k in range(N): @@ -125,13 +128,14 @@ def _knn_sp_pdist(X, num_neighbors, dist_type, order): def _knn_nmslib(X, num_neighbors, dist_type, order): N, dim = np.shape(X) + ncpu = multiprocessing.cpu_count() if dist_type == 'minkowski': raise ValueError('unsupported distance type (lp) for nmslib') nms = _import_nmslib() nmsidx = nms.init(space=_dist_translation['nmslib'][dist_type]) nmsidx.addDataPointBatch(X) nmsidx.createIndex() - q = nmsidx.knnQueryBatch(X, k=num_neighbors+1) + q = nmsidx.knnQueryBatch(X, k=num_neighbors+1, num_threads=int(ncpu/2)) nn, d = zip(*q) D = np.concatenate(d).reshape(N, num_neighbors+1) NN = np.concatenate(nn).reshape(N, num_neighbors+1) From a562896516c75514c1522337e12188c83f9dd57c Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Wed, 20 Jun 2018 11:45:33 +0200 Subject: [PATCH 28/28] fix _get_extra_repr --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 73449c6c..ec91afe5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -345,7 +345,7 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, def _get_extra_repr(self): return {'NNtype': self.NNtype, - 'use_flann': self.use_flann, + 'backend': self.backend, 'center': self.center, 'rescale': self.rescale, 'k': self.k,