-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshap_explain.py
56 lines (51 loc) · 2.38 KB
/
shap_explain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import shap
import numpy as np
import time
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from externals.LORE.datamanager import prepare_dataset
from xailib.explainers.shap_explainer_tab import ShapXAITabularExplainer
from xailib.models.sklearn_classifier_wrapper import sklearn_classifier_wrapper
from xailib.models.keras_classifier_wrapper import keras_classifier_wrapper
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
if __name__ == '__main__':
source_file = 'datasets/german_credit.csv'
class_field = 'default'
# Load and transform dataset and select one row to classify and explain
df = pd.read_csv(source_file, skipinitialspace=True, na_values='?', keep_default_na=True)
df, feature_names, class_values, numeric_columns, \
rdf, real_feature_names, features_map = prepare_dataset(df, class_field)
print(df.head())
print(class_values)
# Learn a model from the data
test_size = 0.3
random_state = 0
X_train, X_test, Y_train, Y_test = train_test_split(df[feature_names], df[class_field],
test_size=test_size,
random_state=random_state,
stratify=df[class_field])
#bb = LogisticRegression(C=1, penalty='l2')
#bb = CatBoostClassifier(custom_loss=['Accuracy'],random_seed=42,logging_level='Silent')
bb = RandomForestClassifier(n_estimators=20, random_state=random_state)
bb.fit(X_train.values, Y_train.values)
bbox = sklearn_classifier_wrapper(bb)
inst = X_train.iloc[18].values
print(inst, bb.predict(inst.reshape(1, -1)))
start = time.time()
print("hello")
explainer = ShapXAITabularExplainer(bbox)
print(X_train.shape)
#background = X_train[np.random.choice(X_train.shape[0], 100, replace=False)]
config = {'explainer' : 'tree', 'X_train' : X_train.iloc[0:100].values, 'feature_pert' : 'interventional'}
explainer.fit(config)
print('building an explanation')
exp = explainer.explain(inst)
print(exp)
end = time.time()
print('time ', end - start)
#explainer.force_plot(inst, 18)