Source

ml-class / ex1.py

Full commit
import matplotlib.pylab as plt
import numpy as np


def parse_line(line):
    return map(float, line.split(','))


def load(filename):
    with open(filename) as fo:
        return np.matrix(map(parse_line, fo))


def load_features(filename):
    m = load(filename)
    xs = [row.item(0) for row in m]
    ys = np.matrix([[row.item(1)] for row in m])

    f0 = np.ones(len(xs))

    features = np.matrix(zip(f0, xs))

    return features, ys


def J(features, theta, ys):
    pred = features * theta.T
    diff2 = np.power(pred - ys, 2)
    return diff2.sum() / (2 * len(ys))


def diff(i, j, theta, features, ys):
    return ((features[i] * theta.T)[0, 0] - ys[i, 0]) * features[i, j]


def step(j, theta, alpha, features, ys):
    m = len(features)
    mul = alpha/m

    sumdiffs = sum(diff(i, j, theta, features, ys) for i in range(m))
    return theta[0, j] - (mul * sumdiffs)


def linreg(features, ys):
    alpha = 0.01
    niter = 1500

    theta = np.matrix([0, 0])
    for round in xrange(niter):
        t0 = step(0, theta, alpha, features, ys)
        t1 = step(1, theta, alpha, features, ys)
        theta = np.matrix([t0, t1])

    return theta


def plot(theta):
    data = list(load('ex1/ex1data1.txt'))
    xs = [p.item(0) for p in data]
    ys = [p.item(1) for p in data]

    t0 = theta[0,0]
    t1 = theta[0,1]

    tys = [(t0 + t1*x) for x in xs]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(xs, ys, 'x', color='red')
    ax.plot(xs, tys, color='blue')
    ax.set_xlabel('Population of City in 10,000s')
    ax.set_ylabel('Profit in $10,000s')
    plt.show()

if __name__ == '__main__':
    features, labels = load_features('ex1/ex1data1.txt')
    theta = np.matrix([0., 0.])

    print(J(features, theta, labels))