Skip to content

Commit

Permalink
fix: conditional import of multicore tsne
Browse files Browse the repository at this point in the history
  • Loading branch information
danellecline committed Jun 3, 2024
1 parent d87b1de commit 76ec895
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions sdcat/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@
from umap import UMAP
from hdbscan import HDBSCAN
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sdcat.logger import info, warn, debug, err
from sdcat.cluster.utils import cluster_grid, crop_square_image
from sdcat.cluster.embedding import fetch_embedding, has_cached_embedding, compute_norm_embedding

if find_spec("multicore_tsne"):
from multicore_tsne import MulticoreTSNE as TSNE
else:
from sklearn.manifold import TSNE

if find_spec("cuml"):
info('=======> USING GPU for HDBSCAN AND UMAP <=========')
from cuml.cluster import HDBSCAN as cuHDBSCAN # pylint: disable=E0611, E0401
Expand Down Expand Up @@ -74,10 +80,6 @@ def _run_hdbscan_assign(
# Get the number of samples which is the number of rows in the dataframe - this is used mostly for calculating coverage
num_samples = df.shape[0]

# from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
from MulticoreTSNE import MulticoreTSNE as TSNE

tsne = TSNE(n_components=2, perplexity=40, metric="cosine", n_jobs=8, random_state=42, verbose=True)
embedding = tsne.fit_transform(df.values)
x = MinMaxScaler().fit_transform(embedding) # scale the embedding to 0-1
Expand Down

0 comments on commit 76ec895

Please sign in to comment.