From 3ed3ea71f302c5f9f570e5bae9bdac74f29b9395 Mon Sep 17 00:00:00 2001
From: unknown <Yongkang>
Date: Thu, 28 Dec 2023 19:26:58 +0800
Subject: [PATCH] perf:Improved the common function for clustering

---
 .../data_mining/model/clustering.py           | 225 ++++++--------
 .../model/func/algo_clustering/_common.py     | 291 ++++++++++++++++--
 .../model/func/algo_clustering/_dbscan.py     |  94 ------
 .../model/func/algo_clustering/_kmeans.py     | 161 ----------
 4 files changed, 352 insertions(+), 419 deletions(-)
diff --git a/geochemistrypi/data_mining/model/clustering.py b/geochemistrypi/data_mining/model/clustering.py
index 93bdf85b..8aeb3cd0 100644
--- a/geochemistrypi/data_mining/model/clustering.py
+++ b/geochemistrypi/data_mining/model/clustering.py
@@ -12,9 +12,9 @@
 from ..constants import MLFLOW_ARTIFACT_DATA_PATH, MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH
 from ..utils.base import clear_output, save_data, save_fig, save_text
 from ._base import WorkflowBase
-from .func.algo_clustering._common import plot_results, plot_silhouette_diagram, score
-from .func.algo_clustering._dbscan import dbscan_manual_hyper_parameters, dbscan_result_plot
-from .func.algo_clustering._kmeans import kmeans_manual_hyper_parameters, plot_silhouette_diagram_kmeans, scatter2d, scatter3d
+from .func.algo_clustering._common import plot_silhouette_diagram, plot_silhouette_value_diagram, scatter2d, scatter3d, score
+from .func.algo_clustering._dbscan import dbscan_manual_hyper_parameters
+from .func.algo_clustering._kmeans import kmeans_manual_hyper_parameters
 
 
 class ClusteringWorkflowBase(WorkflowBase):
@@ -64,26 +64,44 @@ def _score(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str, store_
         mlflow.log_metrics(scores)
 
     @staticmethod
-    def _plot_results(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str, cluster_centers_: pd.DataFrame, local_path: str, mlflow_path: str) -> None:
-        """Plot the cluster_results ."""
-        print("-----* results diagram *-----")
-        plot_results(data, labels, algorithm_name, cluster_centers_)
-        save_fig(f"results - {algorithm_name}", local_path, mlflow_path)
-        data = pd.concat([data, labels], axis=1)
-        save_data(data, f"results - {algorithm_name}", local_path, mlflow_path)
+    def _scatter2d(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+        """Plot the two-dimensional diagram of the clustering result."""
+        print("-----* Cluster Two-Dimensional Diagram *-----")
+        scatter2d(data, labels, algorithm_name)
+        save_fig(f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
+        data_with_labels = pd.concat([data, labels], axis=1)
+        save_data(data_with_labels, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
+
+    @staticmethod
+    def _scatter3d(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+        """Plot the three-dimensional diagram of the clustering result."""
+        print("-----* Cluster Three-Dimensional Diagram *-----")
+        scatter3d(data, labels, algorithm_name)
+        save_fig(f"Cluster Three-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
+        data_with_labels = pd.concat([data, labels], axis=1)
+        save_data(data_with_labels, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
 
     @staticmethod
-    def _plot_silhouette_diagram(data: pd.DataFrame, labels: pd.DataFrame, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+    def _plot_silhouette_diagram(data: pd.DataFrame, labels: pd.DataFrame, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
         """Plot the silhouette diagram of the clustering result."""
         print("-----* Silhouette Diagram *-----")
-        plot_silhouette_diagram(data, labels, algorithm_name)
+        plot_silhouette_diagram(data, labels, cluster_centers_, model, algorithm_name)
         save_fig(f"Silhouette Diagram - {algorithm_name}", local_path, mlflow_path)
         data_with_labels = pd.concat([data, labels], axis=1)
         save_data(data_with_labels, "Silhouette Diagram - Data With Labels", local_path, mlflow_path)
-        if isinstance(cluster_centers_, pd.DataFrame):
+        if not isinstance(cluster_centers_, str):
             cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns)
             save_data(cluster_center_data, "Silhouette Diagram - Cluster Centers", local_path, mlflow_path)
 
+    @staticmethod
+    def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+        """Plot the silhouette value diagram of the clustering result."""
+        print("-----* Silhouette value Diagram *-----")
+        plot_silhouette_value_diagram(data, labels, algorithm_name)
+        save_fig(f"Silhouette value Diagram - {algorithm_name}", local_path, mlflow_path)
+        data_with_labels = pd.concat([data, labels], axis=1)
+        save_data(data_with_labels, "Silhouette value Diagram - Data With Labels", local_path, mlflow_path)
+
     def common_components(self) -> None:
         """Invoke all common application functions for clustering algorithms."""
         GEOPI_OUTPUT_METRICS_PATH = os.getenv("GEOPI_OUTPUT_METRICS_PATH")
@@ -94,18 +112,68 @@ def common_components(self) -> None:
             algorithm_name=self.naming,
             store_path=GEOPI_OUTPUT_METRICS_PATH,
         )
-        # self._plot_results(
-        #     data=self.X,
-        #     labels=self.clustering_result["clustering result"],
-        #     cluster_centers_=self.get_cluster_centers(),
-        #     algorithm_name=self.naming,
-        #     local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-        #     mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-        # )
+        if self.X.shape[1] >= 3:
+            # choose two of dimensions to draw
+            two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(self.X, 2)
+            self._scatter2d(
+                data=two_dimen_data,
+                labels=self.clustering_result["clustering result"],
+                algorithm_name=self.naming,
+                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
+                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            )
+
+            # choose three of dimensions to draw
+            three_dimen_axis_index, three_dimen_data = self.choose_dimension_data(self.X, 3)
+            self._scatter3d(
+                data=three_dimen_data,
+                labels=self.clustering_result["clustering result"],
+                algorithm_name=self.naming,
+                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
+                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            )
+        elif self.X.shape[1] == 3:
+            # choose two of dimensions to draw
+            two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(self.X, 2)
+            self._scatter2d(
+                data=two_dimen_data,
+                labels=self.clustering_result["clustering result"],
+                algorithm_name=self.naming,
+                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
+                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            )
+
+            # no need to choose
+            self._scatter3d(
+                data=self.X,
+                labels=self.clustering_result["clustering result"],
+                algorithm_name=self.naming,
+                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
+                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            )
+        elif self.X.shape[1] == 2:
+            self._scatter2d(
+                data=self.X,
+                labels=self.clustering_result["clustering result"],
+                algorithm_name=self.naming,
+                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
+                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            )
+        else:
+            pass
+
         self._plot_silhouette_diagram(
             data=self.X,
             labels=self.clustering_result["clustering result"],
             cluster_centers_=self.get_cluster_centers(),
+            model=self.model,
+            algorithm_name=self.naming,
+            local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
+            mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+        )
+        self._plot_silhouette_value_diagram(
+            data=self.X,
+            labels=self.clustering_result["clustering result"],
             algorithm_name=self.naming,
             local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
             mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
@@ -244,111 +312,13 @@ def manual_hyper_parameters(cls) -> Dict:
         clear_output()
         return hyper_parameters
 
-    @staticmethod
-    def _plot_silhouette_diagram_kmeans(
-        data: pd.DataFrame,
-        cluster_labels: pd.DataFrame,
-        cluster_centers_: np.ndarray,
-        n_clusters: int,
-        algorithm_name: str,
-        local_path: str,
-        mlflow_path: str,
-    ) -> None:
-        """Plot the silhouette diagram of the clustering result."""
-        print("-----* KMeans's Silhouette Diagram *-----")
-        plot_silhouette_diagram_kmeans(data, cluster_labels, cluster_centers_, n_clusters, algorithm_name)
-        save_fig(f"KMeans's Silhouette Diagram - {algorithm_name}", local_path, mlflow_path)
-        data_with_labels = pd.concat([data, cluster_labels], axis=1)
-        save_data(data_with_labels, "KMeans's Silhouette Diagram - Data With Labels", local_path, mlflow_path)
-        cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns)
-        save_data(cluster_center_data, "KMeans's Silhouette Diagram - Cluster Centers", local_path, mlflow_path)
-
-    @staticmethod
-    def _scatter2d(data: pd.DataFrame, cluster_labels: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
-        """Plot the two-dimensional diagram of the clustering result."""
-        print("-----* Cluster Two-Dimensional Diagram *-----")
-        scatter2d(data, cluster_labels, algorithm_name)
-        save_fig(f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
-        data_with_labels = pd.concat([data, cluster_labels], axis=1)
-        save_data(data_with_labels, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
-
-    @staticmethod
-    def _scatter3d(data: pd.DataFrame, cluster_labels: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
-        """Plot the three-dimensional diagram of the clustering result."""
-        print("-----* Cluster Three-Dimensional Diagram *-----")
-        scatter3d(data, cluster_labels, algorithm_name)
-        save_fig(f"Cluster Three-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
-        data_with_labels = pd.concat([data, cluster_labels], axis=1)
-        save_data(data_with_labels, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
-
     def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
         """Invoke all special application functions for this algorithms by Scikit-learn framework."""
         GEOPI_OUTPUT_METRICS_PATH = os.getenv("GEOPI_OUTPUT_METRICS_PATH")
-        GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
         self._get_inertia_scores(
             algorithm_name=self.naming,
             store_path=GEOPI_OUTPUT_METRICS_PATH,
         )
-        self._plot_silhouette_diagram_kmeans(
-            data=self.X,
-            cluster_labels=self.clustering_result["clustering result"],
-            cluster_centers_=self.get_cluster_centers(),
-            n_clusters=self.n_clusters,
-            algorithm_name=self.naming,
-            local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-            mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-        )
-
-        # Draw graphs when the number of principal components > 3
-        if self.X.shape[1] >= 3:
-            # choose two of dimensions to draw
-            two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(self.X, 2)
-            self._scatter2d(
-                data=two_dimen_data,
-                cluster_labels=self.clustering_result["clustering result"],
-                algorithm_name=self.naming,
-                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-            )
-
-            # choose three of dimensions to draw
-            three_dimen_axis_index, three_dimen_data = self.choose_dimension_data(self.X, 3)
-            self._scatter3d(
-                data=three_dimen_data,
-                cluster_labels=self.clustering_result["clustering result"],
-                algorithm_name=self.naming,
-                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-            )
-        elif self.X.shape[1] == 3:
-            # choose two of dimensions to draw
-            two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(self.X, 2)
-            self._scatter2d(
-                data=two_dimen_data,
-                cluster_labels=self.clustering_result["clustering result"],
-                algorithm_name=self.naming,
-                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-            )
-
-            # no need to choose
-            self._scatter3d(
-                data=self.X,
-                cluster_labels=self.clustering_result["clustering result"],
-                algorithm_name=self.naming,
-                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-            )
-        elif self.X.shape[1] == 2:
-            self._scatter2d(
-                data=self.X,
-                cluster_labels=self.clustering_result["clustering result"],
-                algorithm_name=self.naming,
-                local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-                mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-            )
-        else:
-            pass
 
 
 class DBSCANClustering(ClusteringWorkflowBase):
@@ -440,25 +410,8 @@ def manual_hyper_parameters(cls) -> Dict:
         clear_output()
         return hyper_parameters
 
-    @staticmethod
-    def _clustering_result_plot(X: pd.DataFrame, trained_model: any, algorithm_name: str, imag_config: dict, local_path: str, mlflow_path: str) -> None:
-        """Plot the clustering result in 2D graph."""
-        print("-------** Cluster Two-Dimensional Diagram **----------")
-        dbscan_result_plot(X, trained_model, imag_config, algorithm_name)
-        save_fig(f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
-        save_data(X, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
-
     def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
         """Invoke all special application functions for this algorithms by Scikit-learn framework."""
-        GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
-        self._clustering_result_plot(
-            X=self.X,
-            trained_model=self.model,
-            algorithm_name=self.naming,
-            imag_config=self.image_config,
-            local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
-            mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
-        )
 
 
 class AffinityPropagationClustering(ClusteringWorkflowBase):
diff --git a/geochemistrypi/data_mining/model/func/algo_clustering/_common.py b/geochemistrypi/data_mining/model/func/algo_clustering/_common.py
index 9a58d65f..855d6a3f 100644
--- a/geochemistrypi/data_mining/model/func/algo_clustering/_common.py
+++ b/geochemistrypi/data_mining/model/func/algo_clustering/_common.py
@@ -1,31 +1,34 @@
 # -*- coding: utf-8 -*-
+from itertools import cycle
 from typing import Dict
 
+import matplotlib.cm as cm
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 import seaborn as sns
 from rich import print
 from sklearn.metrics import calinski_harabasz_score, silhouette_samples, silhouette_score
 
 
-def score(X: pd.DataFrame, labels: pd.DataFrame) -> Dict:
+def score(data: pd.DataFrame, labels: pd.DataFrame) -> Dict:
     """Calculate the scores of the clustering model.
 
     Parameters
     ----------
-    X : pd.DataFrame (n_samples, n_components)
+    data : pd.DataFrame (n_samples, n_components)
         The true values.
 
-    label : pd.DataFrame (n_samples, n_components)
-        The labels values.
+    labels : pd.DataFrame (n_samples, n_components)
+        Labels of each point.
 
     Returns
     -------
     scores : dict
         The scores of the clustering model.
     """
-    silhouette = silhouette_score(X, labels)
-    calinski_harabaz = calinski_harabasz_score(X, labels)
+    silhouette = silhouette_score(data, labels)
+    calinski_harabaz = calinski_harabasz_score(data, labels)
     print("silhouette_score: ", silhouette)
     print("calinski_harabasz_score:", calinski_harabaz)
     scores = {
@@ -35,49 +38,281 @@ def score(X: pd.DataFrame, labels: pd.DataFrame) -> Dict:
     return scores
 
 
-def plot_results(X, labels, algorithm_name: str, cluster_centers_=None) -> None:
-    """Plot clustering results of the clustering model.
+def scatter2d(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str) -> None:
+    """
+    Draw the result-2D diagram for analysis.
 
     Parameters
     ----------
-    X : pd.DataFrame (n_samples, n_components)
-        The true values.
+    data : pd.DataFrame (n_samples, n_components)
+       The true values.
 
-    label : pd.DataFrame (n_samples, n_components)
-        The labels values.
+    labels : pd.DataFrame (n_samples,)
+        Labels of each point.
 
     algorithm_name : str
-        The name of the algorithm model.
+        the name of the algorithm
+    """
+    markers = ["+", "v", ".", "d", "o", "s", "1", "D", "X", "^", "p", "<", "*", "H", "3", "P"]
+    colors = [
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+        "#e377c2",
+        "#7f7f7f",
+        "#bcbd22",
+        "#17becf",
+        "#33a02c",
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+        "#e377c2",
+        "#7f7f7f",
+        "#bcbd22",
+    ]
+
+    marker_cycle = cycle(markers)
+    color_cycle = cycle(colors)
+
+    fig = plt.figure()
+    fig.set_size_inches(18, 10)
+    plt.subplot(111)
+    for i, label in enumerate(set(labels)):
+        cluster_data = data[labels == label]
+        color = next(color_cycle)
+        marker = next(marker_cycle)
+        plt.scatter(cluster_data.iloc[:, 0], cluster_data.iloc[:, 1], c=color, marker=marker)
+    plt.xlabel(f"{data.columns[0]}")
+    plt.ylabel(f"{data.columns[1]}")
+    plt.title(f"Cluster Data Bi-plot - {algorithm_name}")
+
+
+def scatter3d(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str) -> None:
+    """
+    Draw the result-3D diagram for analysis.
+
+    Parameters
+    ----------
+    data : pd.DataFrame (n_samples, n_components)
+       The true values.
+
+    labels : pd.DataFrame (n_samples,)
+        Labels of each point.
 
-    cluster_centers
-        The center of the algorithm model.
+    algorithm_name : str
+        the name of the algorithm
     """
-    sns.scatterplot(x=X.iloc[:, 0], y=X.iloc[:, 1], hue=labels, palette="viridis", s=50, alpha=0.8)
+    plt.figure()
+    namelist = data.columns.values.tolist()
+    fig = plt.figure(figsize=(12, 6), facecolor="w")
+    plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.9)
+
+    ax = fig.add_subplot(121, projection="3d")
+    ax.scatter(data.iloc[:, 0], data.iloc[:, 1], data.iloc[:, 2], alpha=0.3, c="#FF0000", s=6)
+    ax.set_xlabel(namelist[0])
+    ax.set_ylabel(namelist[1])
+    ax.set_zlabel(namelist[2])
+    plt.grid(True)
+
+    ax2 = fig.add_subplot(122, projection="3d")
+    markers = ["+", "v", ".", "d", "o", "s", "1", "D", "X", "^", "p", "<", "*", "H", "3", "P"]
+    colors = [
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+        "#e377c2",
+        "#7f7f7f",
+        "#bcbd22",
+        "#17becf",
+        "#33a02c",
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+        "#e377c2",
+        "#7f7f7f",
+        "#bcbd22",
+    ]
+    marker_cycle = cycle(markers)
+    color_cycle = cycle(colors)
+
+    for i, label in enumerate(set(labels)):
+        cluster_data = data[labels == label]
+        color = next(color_cycle)
+        marker = next(marker_cycle)
+        ax2.scatter(cluster_data.iloc[:, 0], cluster_data.iloc[:, 1], cluster_data.iloc[:, 2], c=color, marker=marker, s=6, cmap=plt.cm.Paired, edgecolors="none")
+
+    ax2.set_xlabel(namelist[0])
+    ax2.set_ylabel(namelist[1])
+    ax2.set_zlabel(namelist[2])
+    plt.grid(True)
+    ax.set_title(f"Base Data Tri-plot - {algorithm_name}")
+    ax2.set_title(f"Cluster Data Tri-plot - {algorithm_name}")
+
+
+def plot_silhouette_diagram(data: pd.DataFrame, labels: pd.DataFrame, cluster_centers_: np.ndarray, model: object, algorithm_name: str) -> None:
+    """
+    Draw the silhouette diagram for analysis.
+
+    Parameters
+    ----------
+    data : pd.DataFrame (n_samples, n_components)
+       The true values.
+
+    labels : pd.DataFrame (n_samples,)
+        Labels of each point.
+
+    cluster_centers_: np.ndarray (n_samples,)
+        Coordinates of cluster centers. If the algorithm stops before fully converging (see tol and max_iter), these will not be consistent with labels_.
+
+    model : sklearn algorithm model
+        The sklearn algorithm model trained with X.
+
+    algorithm_name : str
+        the name of the algorithm
+
+    References
+    ----------
+    Silhouette analysis can be used to study the separation distance between the resulting clusters.
+    The silhouette plot displays a measure of how close each point in one cluster is to other points in the
+    neighboring clusters and thus provides a way to assess parameters like number of clusters visually.
+    This measure has a range of [-1, 1].
+
+    https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html
+    """
+    if hasattr(model, "n_clusters"):
+        n_clusters = model.n_clusters
+    else:
+        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
+
+    # Create a subplot with 1 row and 2 columns
+    fig, (ax1, ax2) = plt.subplots(1, 2)
+    fig.set_size_inches(18, 10)
+
+    # The 1st subplot is the silhouette plot
+    # The silhouette coefficient can range from -1, 1 but in this example all
+    # lie within [-0.1, 1]
+    ax1.set_xlim([-0.1, 1])
+    # The (n_clusters+1)*10 is for inserting blank space between silhouette
+    # plots of individual clusters, to demarcate them clearly.
+    ax1.set_ylim([0, len(data) + (n_clusters + 1) * 10])
+
+    # The silhouette_score gives the average value for all the samples.
+    # This gives a perspective into the density and separation of the formed
+    # clusters
+    silhouette_avg = silhouette_score(data, labels)
+    print(
+        "For n_clusters =",
+        n_clusters,
+        "The average silhouette_score is :",
+        silhouette_avg,
+    )
+
+    # Compute the silhouette scores for each sample
+    sample_silhouette_values = silhouette_samples(data, labels)
+
+    if n_clusters >= 20:
+        Fontsize = 5
+        y_long = 7
+    else:
+        Fontsize = None
+        y_long = 10
+
+    y_lower = 10
+    for i in range(n_clusters):
+        # Aggregate the silhouette scores for samples belonging to
+        # cluster i, and sort them
+        ith_cluster_silhouette_values = sample_silhouette_values[labels == i]
+
+        ith_cluster_silhouette_values.sort()
+
+        size_cluster_i = ith_cluster_silhouette_values.shape[0]
+        y_upper = y_lower + size_cluster_i
+
+        color = cm.nipy_spectral(float(i) / n_clusters)
+        ax1.fill_betweenx(
+            np.arange(y_lower, y_upper),
+            0,
+            ith_cluster_silhouette_values,
+            facecolor=color,
+            edgecolor=color,
+            alpha=0.7,
+        )
+
+        # Label the silhouette plots with their cluster numbers at the middle
+        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i), fontsize=Fontsize)
+
+        # Compute the new y_lower for next plot
+        y_lower = y_upper + y_long  # 10 for the 0 samples
+
+    ax1.set_title("The silhouette plot for the various clusters.")
+    ax1.set_xlabel("The silhouette coefficient values")
+    ax1.set_ylabel("Cluster label")
+
+    # The vertical line for average silhouette score of all the values
+    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
+
+    ax1.set_yticks([])  # Clear the yaxis labels / ticks
+    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
+
+    # 2nd Plot showing the actual clusters formed
+    colors = cm.nipy_spectral(labels.astype(float) / n_clusters)
+    ax2.scatter(data.iloc[:, 0], data.iloc[:, 1], marker=".", s=30, lw=0, alpha=0.7, c=colors, edgecolor="k")
+
     if not isinstance(cluster_centers_, str):
-        plt.scatter(cluster_centers_[:, 0], cluster_centers_[:, 1], c="red", marker="X", s=200, label="Cluster Centers")
-    plt.title(f"results - {algorithm_name}")
-    plt.xlabel("Feature 1")
-    plt.ylabel("Feature 2")
-    plt.legend()
+        # Labeling the clusters
+        centers = cluster_centers_
+        # Draw white circles at cluster centers
+        ax2.scatter(
+            centers[:, 0],
+            centers[:, 1],
+            marker="o",
+            c="white",
+            alpha=1,
+            s=200,
+            edgecolor="k",
+        )
+
+        for i, c in enumerate(centers):
+            ax2.scatter(c[0], c[1], marker="$%d$" % i, alpha=1, s=50, edgecolor="k")
+
+    ax2.set_title("The visualization of the clustered data.")
+    ax2.set_xlabel("Feature space for the 1st feature")
+    ax2.set_ylabel("Feature space for the 2nd feature")
+    plt.suptitle(
+        f"Silhouette analysis for clustering on sample data with n_clusters = %d - {algorithm_name}" % n_clusters,
+        fontsize=14,
+        fontweight="bold",
+    )
 
 
-def plot_silhouette_diagram(X, labels, algorithm_name: str):
+def plot_silhouette_value_diagram(data, labels, algorithm_name: str):
     """Calculate the scores of the clustering model.
 
     Parameters
     ----------
-    X : pd.DataFrame (n_samples, n_components)
+    data : pd.DataFrame (n_samples, n_components)
         The true values.
 
-    label : pd.DataFrame (n_samples, n_components)
-        The labels values.
+    labels : pd.DataFrame (n_samples, n_components)
+        Labels of each point.
 
     algorithm_name : str
         The name of the algorithm model.
     """
-    silhouette_values = silhouette_samples(X, labels)
+    silhouette_values = silhouette_samples(data, labels)
     sns.histplot(silhouette_values, bins=30, kde=True)
-    plt.title(f"Silhouette Diagram - {algorithm_name}")
+    plt.title(f"Silhouette value Diagram - {algorithm_name}")
     plt.xlabel("Silhouette Coefficient")
     plt.ylabel("Frequency")
-    plt.legend()
diff --git a/geochemistrypi/data_mining/model/func/algo_clustering/_dbscan.py b/geochemistrypi/data_mining/model/func/algo_clustering/_dbscan.py
index 951dca13..003334bf 100644
--- a/geochemistrypi/data_mining/model/func/algo_clustering/_dbscan.py
+++ b/geochemistrypi/data_mining/model/func/algo_clustering/_dbscan.py
@@ -1,8 +1,5 @@
 from typing import Dict
 
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
 from rich import print
 
 from ....constants import SECTION
@@ -47,94 +44,3 @@ def dbscan_manual_hyper_parameters() -> Dict:
         "p": p,
     }
     return hyper_parameters
-
-
-def dbscan_result_plot(data: pd.DataFrame, trained_model: any, image_config: dict, algorithm_name: str) -> None:
-    """
-    Draw the clustering result diagram for analysis.
-
-    Parameters
-    ----------
-    data: pd.DataFrame (n_samples, n_components)
-        Data for silhouette.
-
-    trained_model: any
-        The algorithm which to be used
-
-    algorithm_name : str
-        the name of the algorithm
-
-    References
-    ----------
-    The DBSCAN algorithm is deterministic, always generating the same clusters when given the same data in the same order.
-
-    https://scikit-learn.org/stable/modules/clustering.html/dbscan
-
-    """
-    db = trained_model.fit(data)
-    labels = trained_model.labels_
-    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
-    core_samples_mask[db.core_sample_indices_] = True
-    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
-    print("Estimated number of clusters: %d" % n_clusters_)
-    unique_labels = set(labels)
-
-    # create drawing canvas
-    fig, ax = plt.subplots(figsize=(image_config["width"], image_config["height"]), dpi=image_config["dpi"])
-
-    # draw the main content
-    colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
-    for k, col in zip(unique_labels, colors):
-        if k == -1:
-            col = [0, 0, 0, 1]
-        class_member_mask = labels == k
-        xy = data[class_member_mask & core_samples_mask]
-        ax.plot(
-            xy.iloc[:, 0],
-            xy.iloc[:, 1],
-            image_config["marker_angle"],
-            markerfacecolor=tuple(col),
-            markeredgecolor=image_config["edgecolor"],
-            markersize=image_config["markersize1"],
-            alpha=image_config["alpha1"],
-        )
-        xy = data[class_member_mask & ~core_samples_mask]
-        ax.plot(
-            xy.iloc[:, 0],
-            xy.iloc[:, 1],
-            image_config["marker_circle"],
-            markerfacecolor=tuple(col),
-            markeredgecolor=image_config["edgecolor"],
-            markersize=image_config["markersize2"],
-            alpha=image_config["alpha2"],
-        )
-
-    # automatically optimize picture layout structure
-    fig.tight_layout()
-    xmin, xmax = ax.get_xlim()
-    ymin, ymax = ax.get_ylim()
-    x_adjustment = (xmax - xmin) * 0.1
-    y_adjustment = (ymax - ymin) * 0.1
-    ax.axis([xmin - x_adjustment, xmax + x_adjustment, ymin - y_adjustment, ymax + y_adjustment])
-
-    # convert the font of the axes
-    plt.tick_params(labelsize=image_config["labelsize"])  # adjust the font size of the axis label
-    # plt.setp(ax.get_xticklabels(), rotation=image_config['xrotation'], ha=image_config['xha'],
-    #          rotation_mode="anchor")  # axis label rotation Angle
-    # plt.setp(ax.get_yticklabels(), rotation=image_config['rot'], ha=image_config['yha'],
-    #          rotation_mode="anchor")  # axis label rotation Angle
-    x1_label = ax.get_xticklabels()  # adjust the axis label font
-    [x1_label_temp.set_fontname(image_config["axislabelfont"]) for x1_label_temp in x1_label]
-    y1_label = ax.get_yticklabels()
-    [y1_label_temp.set_fontname(image_config["axislabelfont"]) for y1_label_temp in y1_label]
-
-    ax.set_title(
-        label=algorithm_name,
-        fontdict={
-            "size": image_config["title_size"],
-            "color": image_config["title_color"],
-            "family": image_config["title_font"],
-        },
-        loc=image_config["title_location"],
-        pad=image_config["title_pad"],
-    )
diff --git a/geochemistrypi/data_mining/model/func/algo_clustering/_kmeans.py b/geochemistrypi/data_mining/model/func/algo_clustering/_kmeans.py
index 5e3b2654..82668a55 100644
--- a/geochemistrypi/data_mining/model/func/algo_clustering/_kmeans.py
+++ b/geochemistrypi/data_mining/model/func/algo_clustering/_kmeans.py
@@ -1,11 +1,6 @@
 from typing import Dict
 
-import matplotlib.cm as cm
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
 from rich import print
-from sklearn.metrics import silhouette_samples, silhouette_score
 
 from ....constants import SECTION
 from ....data.data_readiness import float_input, num_input, str_input
@@ -38,159 +33,3 @@ def kmeans_manual_hyper_parameters() -> Dict:
     algorithm = str_input(algorithms, SECTION[2])
     hyper_parameters = {"n_clusters": n_clusters, "init": init, "max_iter": max_iters, "tol": tol, "algorithm": algorithm}
     return hyper_parameters
-
-
-def plot_silhouette_diagram_kmeans(data: pd.DataFrame, cluster_labels: pd.DataFrame, cluster_centers_: np.ndarray, n_clusters: int, algorithm_name: str) -> None:
-    """
-    Draw the silhouette diagram for analysis.
-
-    Parameters
-    ----------
-    data: pd.DataFrame (n_samples, n_components)
-        Data for silhouette.
-
-    cluster_labels: pd.DataFrame (n_samples,)
-        Labels of each point.
-
-    cluster_centers_: np.ndarray (n_samples,)
-        Coordinates of cluster centers. If the algorithm stops before fully converging (see tol and max_iter), these will not be consistent with labels_.
-
-    n_clusters: int
-        Number of features seen during fit.
-
-    algorithm_name : str
-        the name of the algorithm
-
-    References
-    ----------
-    Silhouette analysis can be used to study the separation distance between the resulting clusters.
-    The silhouette plot displays a measure of how close each point in one cluster is to other points in the
-    neighboring clusters and thus provides a way to assess parameters like number of clusters visually.
-    This measure has a range of [-1, 1].
-
-    https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html
-    """
-    # Create a subplot with 1 row and 2 columns
-    fig, (ax1, ax2) = plt.subplots(1, 2)
-    fig.set_size_inches(18, 7)
-
-    # The 1st subplot is the silhouette plot
-    # The silhouette coefficient can range from -1, 1 but in this example all
-    # lie within [-0.1, 1]
-    ax1.set_xlim([-0.1, 1])
-    # The (n_clusters+1)*10 is for inserting blank space between silhouette
-    # plots of individual clusters, to demarcate them clearly.
-    ax1.set_ylim([0, len(data) + (n_clusters + 1) * 10])
-
-    # The silhouette_score gives the average value for all the samples.
-    # This gives a perspective into the density and separation of the formed
-    # clusters
-    silhouette_avg = silhouette_score(data, cluster_labels)
-    print(
-        "For n_clusters =",
-        n_clusters,
-        "The average silhouette_score is :",
-        silhouette_avg,
-    )
-
-    # Compute the silhouette scores for each sample
-    sample_silhouette_values = silhouette_samples(data, cluster_labels)
-
-    y_lower = 10
-    for i in range(n_clusters):
-        # Aggregate the silhouette scores for samples belonging to
-        # cluster i, and sort them
-        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
-
-        ith_cluster_silhouette_values.sort()
-
-        size_cluster_i = ith_cluster_silhouette_values.shape[0]
-        y_upper = y_lower + size_cluster_i
-
-        color = cm.nipy_spectral(float(i) / n_clusters)
-        ax1.fill_betweenx(
-            np.arange(y_lower, y_upper),
-            0,
-            ith_cluster_silhouette_values,
-            facecolor=color,
-            edgecolor=color,
-            alpha=0.7,
-        )
-
-        # Label the silhouette plots with their cluster numbers at the middle
-        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
-
-        # Compute the new y_lower for next plot
-        y_lower = y_upper + 10  # 10 for the 0 samples
-
-    ax1.set_title("The silhouette plot for the various clusters.")
-    ax1.set_xlabel("The silhouette coefficient values")
-    ax1.set_ylabel("Cluster label")
-
-    # The vertical line for average silhouette score of all the values
-    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
-
-    ax1.set_yticks([])  # Clear the yaxis labels / ticks
-    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
-
-    # 2nd Plot showing the actual clusters formed
-    colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)
-    ax2.scatter(data.iloc[:, 0], data.iloc[:, 1], marker=".", s=30, lw=0, alpha=0.7, c=colors, edgecolor="k")
-
-    # Labeling the clusters
-    centers = cluster_centers_
-    # Draw white circles at cluster centers
-    ax2.scatter(
-        centers[:, 0],
-        centers[:, 1],
-        marker="o",
-        c="white",
-        alpha=1,
-        s=200,
-        edgecolor="k",
-    )
-
-    for i, c in enumerate(centers):
-        ax2.scatter(c[0], c[1], marker="$%d$" % i, alpha=1, s=50, edgecolor="k")
-
-    ax2.set_title("The visualization of the clustered data.")
-    ax2.set_xlabel("Feature space for the 1st feature")
-    ax2.set_ylabel("Feature space for the 2nd feature")
-    plt.suptitle(
-        f"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d - {algorithm_name}" % n_clusters,
-        fontsize=14,
-        fontweight="bold",
-    )
-
-
-def scatter2d(data: pd.DataFrame, cluster_labels: pd.DataFrame, algorithm_name: str) -> None:
-    plt.figure()
-    plt.subplot(111)
-    plt.scatter(data.iloc[:, 0], data.iloc[:, 1], c=cluster_labels)
-
-    plt.xlabel(f"{data.columns[0]}")
-    plt.ylabel(f"{data.columns[1]}")
-    plt.title(f"Cluster Data Bi-plot - {algorithm_name}")
-
-
-def scatter3d(data: pd.DataFrame, cluster_labels: pd.DataFrame, algorithm_name: str) -> None:
-    plt.figure()
-    namelist = data.columns.values.tolist()
-    fig = plt.figure(figsize=(12, 6), facecolor="w")
-    plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.9)
-
-    ax = fig.add_subplot(121, projection="3d")
-    ax.scatter(data.iloc[:, 0], data.iloc[:, 1], data.iloc[:, 2], alpha=0.3, c="#FF0000", s=6)
-    ax.set_xlabel(namelist[0])
-    ax.set_ylabel(namelist[1])
-    ax.set_zlabel(namelist[2])
-    plt.grid(True)
-
-    ax2 = fig.add_subplot(122, projection="3d")
-    ax2.scatter(data.iloc[:, 0], data.iloc[:, 1], data.iloc[:, 2], c=cluster_labels, s=6, cmap=plt.cm.Paired, edgecolors="none")
-    ax2.set_xlabel(namelist[0])
-    ax2.set_ylabel(namelist[1])
-    ax2.set_zlabel(namelist[2])
-    plt.grid(True)
-    ax.set_title(f"Base Data Tri-plot - {algorithm_name}")
-    ax2.set_title(f"Cluster Data Tri-plot - {algorithm_name}")