1. Jernej Kos
  2. python-ml-explain

Commits

Jernej Kos  committed 9e545c8

Implemented discrete model explanation.

  • Participants
  • Parent commits 0f034c8
  • Branches default

Comments (0)

Files changed (1)

File explainer.py

View file
  • Ignore whitespace
 
   return dict(explanation = explanation, prediction = prediction)
 
+def explain_value(model, data, feature, value, iterations = 200):
+  """
+  Explains a single value of a single feature. The returned explanation
+  is a dictionary containing the contribution mean and standard deviation.
+  
+  NOTE: It currently only works for discrete features!
+  
+  :param model: Scikit-learn model instance with enabled probability
+    estimation
+  :param data: Dataset used for estimating feature range
+  :param feature: Feature to explain
+  :param value: Feature value to explain
+  :param iterations: Number of iterations in Monte Carlo simulation
+  """
+  cls_typ = data.dtypes[-1].type
+  orig_data = data
+  data = data[data.columns[:-1]]
+  
+  contribs = []
+  for j in xrange(iterations):
+    # Use first instance to get attribute format
+    instance1 = data[0:1]
+    # Replace all attributes with random values
+    for ifeature in data.columns[:-1]:
+      instance1[ifeature] = select_random_value(data[ifeature])
+    # Make another instance and replace the chosen feature with a
+    # pre-selected value
+    instance2 = instance1.copy()
+    instance2[feature] = value
+    # Compute the predicted class
+    prediction = cls_typ(model.predict(np.asarray(instance2))[0])
+    p_index = sorted(orig_data[orig_data.columns[-1]].unique()).index(prediction)
+    # Append contribution
+    contribs.append(
+      model.predict_proba(instance2)[0][p_index] - \
+      model.predict_proba(instance1)[0][p_index]
+    )
+  
+  return dict(mean = np.mean(contribs), std = np.std(contribs))
+
+def explain_discrete_model(model, data, iterations = 200):
+  """
+  Explains the complete model (all values of all features).
+  
+  :param model: Scikit-learn model instance with enabled probability
+    estimation
+  :param data: Dataset used for estimating feature range
+  :param iterations: Number of iterations in Monte Carlo simulation
+  """
+  explanation = []
+  for feature in data.columns[:-1]:
+    values, means, stds = [], [], []
+    for value in sorted(data[feature].unique()):
+      e = explain_value(model, data, feature, value)
+      values.append(value)
+      means.append(e['mean'])
+      stds.append(e['std'])
+    
+    explanation.append((feature, dict(values = values, means = means, stds = stds)))
+  
+  return explanation
+
 def select_random_value(feature):
   """
   Selects a random value from a feature's range.
 
   plt.savefig(filename)
 
+def plot_model_explanation(result, filename = "output.png"):
+  # TODO
+  pass
+