From 184650f2d66864e4d7110dcbfec33b301c7dbb6b Mon Sep 17 00:00:00 2001 From: sebastian Date: Wed, 8 Nov 2017 10:18:09 +0000 Subject: [PATCH] A first test to accelerate DBSCAN, issue #48 --- photon_stream/__init__.py | 1 + photon_stream/quantized_dbscan.py | 265 ++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 photon_stream/quantized_dbscan.py diff --git a/photon_stream/__init__.py b/photon_stream/__init__.py index 8afbea6..f4c9939 100644 --- a/photon_stream/__init__.py +++ b/photon_stream/__init__.py @@ -17,4 +17,5 @@ from . import finger_histogram from . import representations from . import geometry +from . import quantized_dbscan from .geometry import GEOMETRY \ No newline at end of file diff --git a/photon_stream/quantized_dbscan.py b/photon_stream/quantized_dbscan.py new file mode 100644 index 0000000..3acb23b --- /dev/null +++ b/photon_stream/quantized_dbscan.py @@ -0,0 +1,265 @@ +import numpy as np +from .io.magic_constants import NUMBER_OF_PIXELS +from .io.magic_constants import TIME_SLICE_DURATION_S +from .io.magic_constants import NUMBER_OF_TIME_SLICES_OFFSET_AFTER_BEGIN_OF_ROI +from .io.magic_constants import NUMBER_OF_TIME_SLICES +from .io.binary import LINEBREAK +from .geometry import GEOMETRY +from . import representations +from tqdm import tqdm + +MAX_NEIGHBORS = 250 + +def scaled_time_axis(deg_over_s=0.35e9): + rad_over_s = np.deg2rad(deg_over_s) + off = NUMBER_OF_TIME_SLICES_OFFSET_AFTER_BEGIN_OF_ROI*TIME_SLICE_DURATION_S + time_axis = np.linspace( + off, + off + NUMBER_OF_TIME_SLICES*TIME_SLICE_DURATION_S, + NUMBER_OF_TIME_SLICES, + endpoint=False + ) + return time_axis * rad_over_s + + +def discrete_positions_in_photon_stream(time_axis): + poss = np.zeros( + shape=(NUMBER_OF_PIXELS*NUMBER_OF_TIME_SLICES, 3), + dtype=np.float32 + ) + + poss[:,0] = np.repeat( + GEOMETRY.x_angle, + NUMBER_OF_TIME_SLICES + ) + + poss[:,1] = np.repeat( + GEOMETRY.y_angle, + NUMBER_OF_TIME_SLICES + ) + + poss[:,2] = np.tile( + time_axis, + NUMBER_OF_PIXELS + ) + + return poss + + +def time_neighbor_mask(time_axis, eps): + t_mask = np.zeros( + shape=(NUMBER_OF_TIME_SLICES, NUMBER_OF_TIME_SLICES), + dtype=np.bool + ) + for i, t_slice_i in enumerate(time_axis): + for q, t_slice_q in enumerate(time_axis): + if np.abs(t_slice_i - t_slice_q) < eps: + t_mask[i, q] = True + return t_mask + + +def x_neighbor_mask(x_axis, eps): + xmask = np.zeros( + shape=(NUMBER_OF_PIXELS, NUMBER_OF_PIXELS), + dtype=np.bool + ) + for i, x_i in enumerate(x_axis): + for q, x_q in enumerate(x_axis): + if np.abs(x_i - x_q) < eps: + xmask[i, q] = True + return xmask + + +def y_neighbor_mask(y_axis, eps): + ymask = np.zeros( + shape=(NUMBER_OF_PIXELS, NUMBER_OF_PIXELS), + dtype=np.bool + ) + for i, y_i in enumerate(y_axis): + for q, y_q in enumerate(y_axis): + if np.abs(y_i - y_q) < eps: + ymask[i, q] = True + return ymask + + +def cell_idx_2_chid_idx(cell): + return cell//NUMBER_OF_TIME_SLICES + +def cell_idx_2_time_idx(cell): + return np.mod(cell, NUMBER_OF_TIME_SLICES) + + +def chid_slice_mask_for_cell(cell, x_mask, y_mask): + chid = cell_idx_2_chid_idx(cell) + x_sl = x_mask[chid, :] + y_sl = y_mask[chid, :] + chid_mask = np.arange(NUMBER_OF_PIXELS)[x_sl&y_sl] + return chid_mask + + +def time_slice_mask_for_cell(cell, t_mask): + tslice = cell_idx_2_time_idx(cell) + time_mask = np.arange(NUMBER_OF_TIME_SLICES)[t_mask[tslice]] + return time_mask + + +def cell_mask_for_cell(cell, x_mask, y_mask, t_mask): + cm = chid_slice_mask_for_cell(cell, x_mask, y_mask) + tm = time_slice_mask_for_cell(cell, t_mask) + m = np.zeros( + shape=(cm.shape[0], tm.shape[0]), + dtype=np.int64 + ) + for t_i in range(m.shape[1]): + m[:, t_i] = cm + (tm[t_i])*1440 + return m.flatten() + + +def _create_neighborhood_index( + discrete_positions, + x_mask, + y_mask, + t_mask, + eps=0.00798080, + max_neighbors=MAX_NEIGHBORS +): + neighbors = - np.ones( + shape=(NUMBER_OF_PIXELS*NUMBER_OF_TIME_SLICES, max_neighbors), + dtype=np.int32 + ) + for cell_p in tqdm(range(NUMBER_OF_PIXELS*NUMBER_OF_TIME_SLICES)): + n = 0 + neighbor_candidates = cell_mask_for_cell( + cell=cell_p, + x_mask=x_mask, + y_mask=y_mask, + t_mask=t_mask, + ) + for cell_q in neighbor_candidates: + pos_p = discrete_positions[cell_p] + pos_q = discrete_positions[cell_q] + distance = np.linalg.norm(pos_p - pos_q) + if distance < eps: + if cell_p != cell_q: + neighbors[cell_p, n] = cell_q + n += 1 + return neighbors + + +def create_neighborhood_index(eps_deg=0.457267): + time_axis = scaled_time_axis() + poss = discrete_positions_in_photon_stream(time_axis=time_axis) + + eps = np.deg2rad(eps_deg) + + t_mask = time_neighbor_mask(time_axis=time_axis, eps=1.05*eps) + x_mask = x_neighbor_mask(x_axis=GEOMETRY.x_angle, eps=1.05*eps) + y_mask = y_neighbor_mask(y_axis=GEOMETRY.y_angle, eps=1.05*eps) + + m = _create_neighborhood_index( + eps=eps, + discrete_positions=poss, + x_mask=x_mask, + y_mask=y_mask, + t_mask=t_mask, + max_neighbors=MAX_NEIGHBORS, + ) + + return m + + +def write_index(neighborhood_index, path): + with open(path, 'wb') as fo: + fo.write(neighborhood_index.tobytes()) + + +def read_index(path): + with open(path, 'rb') as fi: + raw = np.fromstring( + fi.read(), + dtype=np.int32 + ) + max_neighbors = raw.shape[0] // (NUMBER_OF_PIXELS*NUMBER_OF_TIME_SLICES) + return raw.reshape( + ( + NUMBER_OF_PIXELS*NUMBER_OF_TIME_SLICES, + max_neighbors + ) + ) + + +def raw_phs_to_image_sequence_idx(raw_phs): + number_photons = raw_phs.shape[0] - NUMBER_OF_PIXELS + idxs = - np.ones(number_photons, np.int64) + chid = 0 + photon = 0 + for symbol in raw_phs: + if symbol == LINEBREAK: + chid += 1 + else: + time_idx = symbol - NUMBER_OF_TIME_SLICES_OFFSET_AFTER_BEGIN_OF_ROI + idxs[photon] = chid + time_idx * NUMBER_OF_PIXELS + photon += 1 + return idxs + + +def dbscan(raw_phs, neighborhood_index, min_samples=20): + number_photons = raw_phs.shape[0] - NUMBER_OF_PIXELS + clusters = - np.ones(number_photons, np.int64) + visited = np.zeros(number_photons, np.bool) + ims_idx_photons = raw_phs_to_image_sequence_idx(raw_phs) + ims = representations.raw_phs_to_image_sequence(raw_phs) + + c = 0 + + for i, ims_idx_photon in enumerate(ims_idx_photons): + if not visited[i]: + neighbors = neighborhood_index[ims_idx_photon] + neighbors = neighbors[neighbors >= 0] + + if ims[neighbors].sum() > min_samples: + # expand cluster + clusters[i] = c + + visited[i] = True + + return clusters + + +def query(ims_idx_photons, neighbors): + + isin = np.zeros(neighbors.shape[0], dtype=np.bool) + ids = np.searchsorted(ims_idx_photons, neighbors) + + for i, n in enumerate(neighbors): + + ims_idx_photons[ids[i]] + + + + + + + + +""" +# run it + +time_axis = scaled_time_axis() +poss = discrete_positions_in_photon_stream(time_axis=time_axis) + +eps = 0.1 +abs_eps = eps * (2.0 * GEOMETRY.fov_radius) + +t_mask = time_neighbor_mask(time_axis=time_axis, eps=1.1*abs_eps) +x_mask = x_neighbor_mask(x_axis=GEOMETRY.x_angle, eps=1.1*abs_eps) +y_mask = y_neighbor_mask(y_axis=GEOMETRY.y_angle, eps=1.1*abs_eps) + +m = create_neighborhood_index( + discrete_positions=poss, + x_mask=x_mask, + y_mask=y_mask, + t_mask=t_mask, + max_neighbors=MAX_NEIGHBORS, +) +"""