Source

python-ml-explain / explainer.py

import numpy as np
import matplotlib.pyplot as plt

def explain_instance(model, data, instance, iterations = 200):
  """
  Explains a dataset instance using the specified model. The returned
  explanation is a dictionary containing contribution of each individual
  feature and the model's prediction.

  NOTE: It currently only works for discrete features!

  :param model: Scikit-learn model instance with enabled probability
    estimation
  :param data: Dataset used for estimating feature range
  :param instance: Instance to explain
  :param iterations: Number of iterations in Monte Carlo simulation
  """
  cls_typ = instance.dtypes[-1].type
  features = list(data.columns[:-1])
  instance = instance[features]
  prediction = cls_typ(model.predict(np.asarray(instance))[0])
  p_index = sorted(data[data.columns[-1]].unique()).index(prediction)
  data = data[features]

  nFeatures = len(data.columns)
  explanation = []
  for feature in data.columns:
    contribution = 0.0
    for j in xrange(iterations):
      perm = np.random.choice(data.columns, nFeatures, replace = False)
      tmp = instance.copy()
      idx = 0
      while perm[idx] != feature:
        tmp[perm[idx]] = select_random_value(data[perm[idx]])
        idx += 1

      tmp2 = tmp.copy()
      tmp2[perm[idx]] = select_random_value(data[perm[idx]])
      contribution += model.predict_proba(tmp)[0][p_index] - model.predict_proba(tmp2)[0][p_index]

    contribution /= iterations
    explanation.append((feature, contribution))

  return dict(explanation = explanation, prediction = prediction)

def explain_value(model, data, feature, value, iterations = 200):
  """
  Explains a single value of a single feature. The returned explanation
  is a dictionary containing the contribution mean and standard deviation.
  
  NOTE: It currently only works for discrete features!
  
  :param model: Scikit-learn model instance with enabled probability
    estimation
  :param data: Dataset used for estimating feature range
  :param feature: Feature to explain
  :param value: Feature value to explain
  :param iterations: Number of iterations in Monte Carlo simulation
  """
  cls_typ = data.dtypes[-1].type
  orig_data = data
  data = data[data.columns[:-1]]
  
  contribs = []
  for j in xrange(iterations):
    # Use first instance to get attribute format
    instance1 = data[0:1]
    # Replace all attributes with random values
    for ifeature in data.columns[:-1]:
      instance1[ifeature] = select_random_value(data[ifeature])
    # Make another instance and replace the chosen feature with a
    # pre-selected value
    instance2 = instance1.copy()
    instance2[feature] = value
    # Compute the predicted class
    prediction = cls_typ(model.predict(np.asarray(instance2))[0])
    p_index = sorted(orig_data[orig_data.columns[-1]].unique()).index(prediction)
    # Append contribution
    contribs.append(
      model.predict_proba(instance2)[0][p_index] - \
      model.predict_proba(instance1)[0][p_index]
    )
  
  return dict(mean = np.mean(contribs), std = np.std(contribs))

def explain_discrete_model(model, data, iterations = 200):
  """
  Explains the complete model (all values of all features).
  
  :param model: Scikit-learn model instance with enabled probability
    estimation
  :param data: Dataset used for estimating feature range
  :param iterations: Number of iterations in Monte Carlo simulation
  """
  explanation = []
  for feature in data.columns[:-1]:
    values, means, stds = [], [], []
    for value in sorted(data[feature].unique()):
      e = explain_value(model, data, feature, value)
      values.append(value)
      means.append(e['mean'])
      stds.append(e['std'])
    
    explanation.append((feature, dict(values = values, means = means, stds = stds)))
  
  return explanation

def select_random_value(feature):
  """
  Selects a random value from a feature's range.

  :param feature: DataFrame describing feature's values
  """
  if feature.dtype.kind == 'i':
    return np.random.choice(feature.unique(), 1)
 
  raise TypeError, "Unsupported feature type!"

def plot_instance_explanation(result, filename = "output.png"):
  """
  Plots an instance explanation generated by `explain_instance`.

  :param result: Explanation generated by `explain_instance`
  :param filename: File where the output visualization should be
    saved to
  """
  plt.clf()
  plt.figure(1, figsize = (6, 4))
  left_bar = plt.axes((0.1, 0.1, 0.85, 0.7))
  result['explanation'] = result['explanation'][::-1]
  N = len(result['explanation'])
  contributions = [contribution for _, contribution in result['explanation']]
  maxc = max(contributions) + 0.1

  left_bar.barh(np.arange(N) + 0.3, contributions, height = 0.55)
  
  left_bar.hlines(np.arange(N) + 0.05, -maxc, maxc, linestyles = 'dashed')
  left_bar.axvline(x = 0.0, color = 'black', linewidth = 2)

  left_bar.set_yticks(np.arange(len(result['explanation'])) + 0.5)
  left_bar.set_yticklabels([feature for feature, _ in result['explanation']])
  left_bar.set_xlim(left = -maxc, right = maxc)
  for t in left_bar.get_xticklines(): t.set_marker(None)
  for t in left_bar.get_yticklines(): t.set_marker(None)
  left_bar.set_frame_on(False)

  plt.savefig(filename)

def plot_model_explanation(result, filename = "output.png"):
  # TODO
  pass
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.