Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor NN graph building (included in #43) #21

Draft
wants to merge 33 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1ee85b5
attempt to refactor nn graph building
naspert Mar 19, 2018
4bacd5c
update tests
naspert Mar 19, 2018
00bbcdd
fix typo
naspert Mar 19, 2018
b822333
fix tests (avoiding not implemented combinations)
naspert Mar 19, 2018
38aebd0
- fix missing space after colon in dictionary
naspert Mar 19, 2018
524c60f
fix (matlab) GSP url
naspert Mar 19, 2018
ae83814
throw exception when using FLANN + max_dist (produces incorrect results)
naspert Mar 19, 2018
62fc0ce
update test case to fit FLANN & max_dist exception
naspert Mar 19, 2018
6f473fa
implement nn graph using pdist using radius
naspert Mar 20, 2018
25ec6d2
implement radius nn graph with flann
naspert Mar 20, 2018
96b628e
flann returns the squared distance when called with 'euclidean' dista…
naspert Mar 20, 2018
09bbff4
compute sqrt of list properly
naspert Mar 20, 2018
27b9a03
use cyflann instead of pyflann (radius search not working)
naspert Mar 20, 2018
8a1f9b9
check nn graphs building against pdist reference
naspert Mar 20, 2018
6e9e2ac
cyflann needs the flann library to be installed on the system
naspert Mar 20, 2018
811de06
check nn graphs building against pdist reference
naspert Mar 20, 2018
813fe39
backport stuff from cyflann branch
naspert Mar 20, 2018
4a4d597
flann should (mostly) work for knn graphs
naspert Mar 20, 2018
53dffc1
fix pdist warnings
naspert Mar 21, 2018
1309e92
implement and use scipy-ckdtree as default (faster than kdtree)
naspert Mar 21, 2018
90ae9a8
Merge remote-tracking branch 'origin-nas/nn_cyflann' into nn_refactor
naspert Mar 22, 2018
648fa91
backport README changes from master
naspert Mar 22, 2018
96fa5f6
Merge branch 'master' of https://github.com/epfl-lts2/pygsp into nn_r…
naspert Mar 22, 2018
c26e449
Merge branch 'master' into nn_refactor
naspert Mar 22, 2018
8e7c553
add nmslib
naspert Mar 23, 2018
b83e467
test flann when not on windows
naspert Mar 26, 2018
28b7858
use the same code to build sparse matrix for knn and radius
naspert Mar 29, 2018
188c4a6
building the graph with rescale/center=False should also work
naspert Mar 29, 2018
59c131a
Merge pull request #1 from naspert/nmslib
naspert Mar 29, 2018
8e98b77
update doc for nmslib
naspert Mar 29, 2018
08ae29f
enable multithreading with ckdtree/nmslib
naspert Apr 9, 2018
57e9661
Merge branch 'master' into nn_refactor
naspert Jun 20, 2018
a562896
fix _get_extra_repr
naspert Jun 20, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 84 additions & 34 deletions pygsp/graphs/nngraphs/nngraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,21 @@

_logger = utils.build_logger(__name__)

# conversion between the FLANN conventions and the various backend functions
_dist_translation = {
'scipy-kdtree': {
'euclidean': 2,
'manhattan': 1,
'max_dist': np.inf
},
'scipy-pdist' : {
'euclidean': 'euclidean',
'manhattan': 'cityblock',
'max_dist': 'chebyshev',
'minkowski': 'minkowski'
},

}

def _import_pfl():
try:
Expand All @@ -20,6 +35,46 @@ def _import_pfl():
'pip (or conda) install pyflann (or pyflann3).')
return pfl



def _knn_sp_kdtree(_X, _num_neighbors, _dist_type, _order=0):
mdeff marked this conversation as resolved.
Show resolved Hide resolved
kdt = spatial.KDTree(_X)
D, NN = kdt.query(_X, k=(_num_neighbors + 1),
p=_dist_translation['scipy-kdtree'][_dist_type])
return NN, D

def _knn_flann(_X, _num_neighbors, _dist_type, _order):
pfl = _import_pfl()
pfl.set_distance_type(_dist_type, order=_order)
flann = pfl.FLANN()

# Default FLANN parameters (I tried changing the algorithm and
# testing performance on huge matrices, but the default one
# seems to work best).
NN, D = flann.nn(_X, _X, num_neighbors=(_num_neighbors + 1),
algorithm='kdtree')
return NN, D

def _radius_sp_kdtree(_X, _epsilon, _dist_type, order=0):
kdt = spatial.KDTree(_X)
D, NN = kdt.query(_X, k=None, distance_upper_bound=_epsilon,
p=_dist_translation['scipy-kdtree'][_dist_type])
return NN, D

def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order):
pd = spatial.distance.squareform(
spatial.distance.pdist(_X,
_dist_translation['scipy-pdist'][_dist_type],
p=_order))
pds = np.sort(pd)[:, 0:_num_neighbors+1]
pdi = pd.argsort()[:, 0:_num_neighbors+1]
return pdi, pds

def _radius_sp_pdist():
raise NotImplementedError()

def _radius_flann():
raise NotImplementedError()

class NNGraph(Graph):
r"""Nearest-neighbor graph from given point cloud.
Expand All @@ -33,9 +88,11 @@ class NNGraph(Graph):
Type of nearest neighbor graph to create. The options are 'knn' for
k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default
is 'knn').
use_flann : bool, optional
Use Fast Library for Approximate Nearest Neighbors (FLANN) or not.
(default is False)
backend : {'scipy-kdtree', 'scipy-pdist', 'flann'}
Type of the backend for graph construction.
- 'scipy-kdtree'(default) will use scipy.spatial.KDTree
- 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact)
- 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN)
center : bool, optional
Center the data so that it has zero mean (default is True)
rescale : bool, optional
Expand Down Expand Up @@ -74,20 +131,34 @@ class NNGraph(Graph):

"""

def __init__(self, Xin, NNtype='knn', use_flann=False, center=True,
def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True,
rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None,
plotting={}, symmetrize_type='average', dist_type='euclidean',
order=0, **kwargs):

self.Xin = Xin
self.NNtype = NNtype
self.use_flann = use_flann
self.backend = backend
self.center = center
self.rescale = rescale
self.k = k
self.sigma = sigma
self.epsilon = epsilon

_dist_translation['scipy-kdtree']['minkowski'] = order

self._nn_functions = {
'knn': {
'scipy-kdtree':_knn_sp_kdtree,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you miss a space after the colon

'scipy-pdist': _knn_sp_pdist,
'flann': _knn_flann
},
'radius': {
'scipy-kdtree':_radius_sp_kdtree,
'scipy-pdist': _radius_sp_pdist,
'flann': _radius_flann
},
}

if gtype is None:
gtype = 'nearest neighbors'
else:
Expand All @@ -108,33 +179,15 @@ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True,
scale = np.power(N, 1. / float(min(d, 3))) / 10.
Xout *= scale / bounding_radius

# Translate distance type string to corresponding Minkowski order.
dist_translation = {"euclidean": 2,
"manhattan": 1,
"max_dist": np.inf,
"minkowski": order
}


if self.NNtype == 'knn':
spi = np.zeros((N * k))
spj = np.zeros((N * k))
spv = np.zeros((N * k))

if self.use_flann:
pfl = _import_pfl()
pfl.set_distance_type(dist_type, order=order)
flann = pfl.FLANN()

# Default FLANN parameters (I tried changing the algorithm and
# testing performance on huge matrices, but the default one
# seems to work best).
NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1),
algorithm='kdtree')

else:
kdt = spatial.KDTree(Xout)
D, NN = kdt.query(Xout, k=(k + 1),
p=dist_translation[dist_type])
NN, D = self._nn_functions[NNtype][backend](Xout, k,
dist_type, order)

for i in range(N):
spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i)
Expand All @@ -144,13 +197,10 @@ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True,

elif self.NNtype == 'radius':

kdt = spatial.KDTree(Xout)
D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon,
p=dist_translation[dist_type])
count = 0
for i in range(N):
count = count + len(NN[i])

NN, D = self._nn_functions[NNtype][backend](Xout, epsilon,
dist_type, order)
count = sum(map(len, NN))

spi = np.zeros((count))
spj = np.zeros((count))
spv = np.zeros((count))
Expand Down
29 changes: 16 additions & 13 deletions pygsp/tests/test_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,19 +182,22 @@ def test_set_coordinates(self):
def test_nngraph(self):
Xin = np.arange(90).reshape(30, 3)
dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski']

for dist_type in dist_types:

# Only p-norms with 1<=p<=infinity permitted.
if dist_type != 'minkowski':
graphs.NNGraph(Xin, NNtype='radius', dist_type=dist_type)
graphs.NNGraph(Xin, NNtype='knn', dist_type=dist_type)

# Distance type unsupported in the C bindings,
# use the C++ bindings instead.
if dist_type != 'max_dist':
graphs.NNGraph(Xin, use_flann=True, NNtype='knn',
dist_type=dist_type)
backends = ['scipy-kdtree', 'scipy-pdist', 'flann']
for cur_backend in backends:
for dist_type in dist_types:

# Only p-norms with 1<=p<=infinity permitted.
if dist_type != 'minkowski':
graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend,
dist_type=dist_type)
graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend,
dist_type=dist_type)

# Distance type unsupported in the C bindings,
# use the C++ bindings instead.
if dist_type != 'max_dist':
graphs.NNGraph(Xin, backend=cur_backend, NNtype='knn',
dist_type=dist_type)

def test_bunny(self):
graphs.Bunny()
Expand Down