Source

ml4hackers / ch02.py

Full commit
#!/usr/bin/env python2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('/home/miki/src/ML_for_Hackers/02-Exploration/data/01_heights_weights_genders.csv')

male = df[df['Gender'] == 'Male']
female = df[df['Gender'] == 'Female']

le = LabelEncoder()


X = df[['Height', 'Weight']].values
y = le.fit_transform(df['Gender'])

clf = LinearRegression()
clf.fit(X, y)



fig = plt.figure()
ax = fig.add_subplot(111)
alpha=0.3
ax.scatter(male['Height'], male['Weight'], color='blue',
           alpha=alpha)
ax.scatter(female['Height'], female['Weight'], color='red',
           alpha=alpha)

# xs = np.linspace(df['Height'].min(), df['Height'].max(), 100)
# ys = np.linspace(df['Weight'].min(), df['Weight'].max(), 100)
# X1, X2 = np.meshgrid(xs, ys)
# Z = np.empty(X1.shape)
# for (i, j), val in np.ndenumerate(X1):
#     x1 = val
#     x2 = X2[i, j]
#     Z[i, j] = clf.predict([x1, x2])
# 
# ax.contour(X1, X2, Z, levels=[0.5])
f = lambda p: clf.intercept_ + (clf.coef_ * p).sum()
ll = df['Height'].min(), df['Weight'].min()
ur = df['Height'].max(), df['Weight'].max()
fig.show()
raw_input()