Simple Linear Regression

from numpy import *

def computer_error_for_line_given_points(b, m, points):
    # Initialize error at 0
    totalError = 0
    # Loop through all points
    for i in range(0, len(points)):
        # Get x value
        x = points[i, 0]
        # Get y value
        y = points[i, 0]
        # Get squared difference and add to total error
        totalError += (y - (m * x + b)) ** 2

    # Return the average
    return totalError / float(len(points))

def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    # Starting b and m
    b = starting_b
    m = starting_m

    # Gradient descent
    for i in range(num_iterations):
        # Update b and m
        b, m = step_gradient(b, m, array(points), learning_rate)

    return [b, m]

# Define step gradient function
def step_gradient(b_current, m_current, points, learning_rate):
    # Initialize gradient values
    b_gradient = 0
    m_gradient = 0

    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]

        N = float(len(points))

        # Direction w.r.t. b and m by computing partial derivatives of error function
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))

    # Update b amd m values using partial derivatives
    new_b = b_current - (learning_rate * b_gradient)
    new_m = m_current - (learning_rate * m_gradient)
    return [new_b, new_m]



def run():

    # Step 1: collect data
    points = genfromtxt('data.csv', delimiter = ',')

    # Step 2: define hyperparameters

    # Learning rate: rate of convergence of model
    learning_rate = 0.0001

    # Initialize y = mx + b values
    initial_b = 0
    initial_m = 0

    # Number of iterations
    num_iterations = 1000

    # Step 3: train model
    print ('Starting gradient descent at b = {0}, m = {1}, error = {2}'.format(initial_b, initial_m, computer_error_for_line_given_points(initial_b, initial_m, points)))
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    print ('Ending point at b = {1}, m = {2}, error = {3}'.format(num_iterations, b, m, computer_error_for_line_given_points(b, m, points)))


if __name__ == '__main__':
    run()
Starting gradient descent at b = 0, m = 0, error = 2490.961189080058
Ending point at b = 0.08893651993741346, m = 1.4777440851894448, error = 572.7037959508043

social