scikit-learn-contrib · iwan-tee · Apr 14, 2024 · Apr 14, 2024 · Apr 14, 2024 · Apr 14, 2024
diff --git a/Pipfile b/Pipfile
@@ -7,6 +7,7 @@ name = "pypi"
 networkx = "*"
 numpy = "*"
 scikit-learn = "*"
+matplotlib = "*"
 
 [dev-packages]
 pytest = "*"
@@ -20,4 +21,4 @@ sphinx-rtd-theme = "0.5.2"
 [extras]
 ray = "*"
 shap = "0.44.1"
-xarray = "*"
+xarray = "*"
diff --git a/docs/examples/plot_lcpl_explainer.py b/docs/examples/plot_lcpl_explainer.py
@@ -25,35 +25,14 @@
 
 # Define Explainer
 explainer = Explainer(classifier, data=X_train, mode="tree")
-explanations = explainer.explain(X_test.values)
-print(explanations)
 
-# Let's filter the Shapley values corresponding to the Covid (level 1)
-# and 'Respiratory' (level 0)
+# Now, our task is to see how feature importance may vary from level to level
+# We are going to calculate shap_values for 'Respiratory', 'Covid' and plot what we calculated
+# This can be done with a single method .shap_multi_plot, which additionally returns calculated explanations
 
-covid_idx = classifier.predict(X_test)[:, 1] == "Covid"
-
-shap_filter_covid = {"level": 1, "class": "Covid", "sample": covid_idx}
-shap_filter_resp = {"level": 0, "class": "Respiratory", "sample": covid_idx}
-shap_val_covid = explanations.sel(**shap_filter_covid)
-shap_val_resp = explanations.sel(**shap_filter_resp)
-
-
-# This code snippet demonstrates how to visually compare the mean absolute SHAP values for 'Covid' vs. 'Respiratory' diseases.
-
-# Feature names for the X-axis
-feature_names = X_train.columns.values
-
-# SHAP values for 'Covid'
-shap_values_covid = shap_val_covid.shap_values.values
-
-# SHAP values for 'Respiratory'
-shap_values_resp = shap_val_resp.shap_values.values
-
-shap.summary_plot(
-    [shap_values_covid, shap_values_resp],
-    features=X_test.iloc[covid_idx],
-    feature_names=X_train.columns.values,
-    plot_type="bar",
+explanations = explainer.shap_multi_plot(
     class_names=["Covid", "Respiratory"],
+    features=X_test.values,
+    pred_class="Respiratory",
+    features_names=X_train.columns.values,
 )
diff --git a/docs/examples/plot_lcppn_explainer.py b/docs/examples/plot_lcppn_explainer.py
@@ -25,23 +25,25 @@
 # Train local classifier per parent node
 classifier.fit(X_train, Y_train)
 
+# Get predictions
+predictions = classifier.predict(X_test)
+
 # Define Explainer
 explainer = Explainer(classifier, data=X_train.values, mode="tree")
 explanations = explainer.explain(X_test.values)
 print(explanations)
 
-# Filter samples which only predicted "Respiratory" at first level
-respiratory_idx = classifier.predict(X_test)[:, 0] == "Respiratory"
-
-# Specify additional filters to obtain only level 0
-shap_filter = {"level": 0, "class": "Respiratory", "sample": respiratory_idx}
-
 # Use .sel() method to apply the filter and obtain filtered results
-shap_val_respiratory = explanations.sel(shap_filter)
+shap_val_respiratory = explainer.filter_by_class(
+    explanations,
+    class_name="Respiratory",
+    sample_indices=explainer.get_sample_indices(predictions, "Respiratory"),
+)
+
 
 # Plot feature importance on test set
 shap.plots.violin(
-    shap_val_respiratory.shap_values,
+    shap_val_respiratory,
     feature_names=X_train.columns.values,
     plot_size=(13, 8),
 )
diff --git a/docs/source/algorithms/explainer.rst b/docs/source/algorithms/explainer.rst
@@ -111,21 +111,42 @@ Code sample
 
     lcppn.fit(x_train, y_train)
     explainer = Explainer(lcppn, data=x_train, mode="tree")
+
+    # One of the possible ways to get explanations
     explanations = explainer.explain(x_test)
 
 
 ++++++++++++++++++++++++++
 Filtering and Manipulation
 ++++++++++++++++++++++++++
 
-The Explanation object returned by the Explainer is built using the :literal:`xarray.Dataset` data structure, that enables the application of any xarray dataset operation. For example, filtering specific values can be quickly done. To illustrate the filtering operation, suppose we have SHAP values stored in the Explanation object named :literal:`explanation`.
+When you work with the `Explanation` object generated by the `Explainer`, you're leveraging the power of the `xarray.Dataset`. This structure is not just robust but also flexible, allowing for comprehensive dataset operations—especially filtering.
+
+**Practical Example: Filtering SHAP Values**
 
-A common use case is to extract SHAP values for only the predicted nodes. In Local classifier per parent node approach, each node except the leaf nodes represents a classifier. Hence, to find the SHAP values, we can pass the prediction until the penultimate element to obtain the SHAP values.
-To achieve this, we can use xarray's :literal:`.sel()` method:
+Consider a scenario where you need to focus only on SHAP values corresponding to predicted nodes. In the context of our `LocalClassifierPerParentNode` model, each node—except for the leaf nodes—acts as a classifier. This setup is particularly useful when you're looking to isolate SHAP values up to the penultimate node in your predictions. Here’s how you can do this efficiently using the `sel()` method from xarray:
 
 .. code-block:: python
 
+    # Creating a mask for selecting SHAP values for predicted classes
     mask = {'class': lcppn.predict(x_test).flatten()[:-1]}
-    x = explanations.sel(mask).shap_values
+    selected_shap_values = explanations.sel(mask).shap_values
+
+**Advanced Visualization: Multi-Plot SHAP Values**
+
+For an even deeper analysis, you might want to visualize the SHAP values. The `shap_multi_plot()` method not only filters the data but also provides a visual representation of the SHAP values for specified classes. Below is an example that illustrates how to plot SHAP values for the classes "Covid" and "Respiratory":
+
+.. code-block:: python
+
+    # Generating and plotting explanations for specific classes
+    explanations = explainer.shap_multi_plot(
+        class_names=["Covid", "Respiratory"],
+        features=x_test,
+        pred_class="Covid",
+        # Feature names specifiaction possible if x_train is a dataframe with specified columns_names
+        feature_names=x_train.columns.values
+    )
+
+
 
 More advanced usage and capabilities can be found at the `Xarray.Dataset <https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html>`_ documentation.