本篇是吴恩达机器学习课程单变量线性回归作业ex1 Python 版本参考程序
程序是从jupyter notebook 转换过来的
1%matplotlib notebook # jupyter notebook 的魔法方法
2import numpy as np
3import matplotlib.pyplot as plt
4import pandas as pd
1# 加载数据
2data = pd.read_csv('ex1data1.txt', header=None, names=['Population', 'Profit'])
1# 数据查看
2data.describe() # jupyter notebook 直接用来显示数据的,以下非Python 语法是 jupyter notebook 用来显示数据的
数据基本信息
1# 数据可视化,绘制散点图
2data.plot(kind='scatter', x='Population', y='Profit', figsize=(12, 8))
1def costFunction(X, y, theta):
2 inner = np.power((X*theta.T)-y, 2)
3 return np.sum(inner) / (2*X.shape[0])
1# 在第 0 列插入一列数据 1 标签是 'Ones'
2data.insert(0, 'Ones', 1)
3data.head() # 查看前5 行数据
1# 设置训练集数据
2cols = data.shape[1] # 获取列数下标为 1 ;行数下标为 0
3X = data.iloc[:,0:-1] # 取所有行,去掉所有一列
4y = data.iloc[:,cols-1:cols] # 取所有行,最后一列
5# 数据集转化为矩阵
6X = np.matrix(X.values)
7y = np.matrix(y.values)
8theta = np.matrix(np.array([0, 0]))
1#shape 属性是查看维数的
2X.shape, y.shape, theta.shape
((97, 2), (97, 1), (1, 2))
1#测试
2theta_1 = np.matrix(np.array([-1, 2]))
3J = costFunction(X, y, theta_1)
4print('\nWith theta = [-1 ; 2]\nCost computed =', J)
5print('Expected cost value (approx) 54.24\n');
With theta = [-1 ; 2] Cost computed = 54.24245508201238 Expected cost value (approx) 54.24
1def gradientDescent(X, y, theta, alpha, iteration):
2 J_history = np.zeros(iteration)
3 for iter in range(iteration):
4 h = X*theta.T
5 theta = theta - alpha * (h - y).T*X / y.shape[0]
6 J_history[iter] = costFunction(X, y, theta)#记录
7 return theta, J_history
1alpha = 0.01
2iteration = 1000
3trained_theta, J = gradientDescent(X, y, theta, alpha, iteration)
1trained_theta[0, 1],J.shape
(1.127294202428184, (1000,))
1x_axis = np.linspace(data.Population.min(), data.Population.max(), 100)#设置x轴
2y_axis = trained_theta[0, 0] + (trained_theta[0, 1] * x_axis)#设置Y轴
3fig, ax = plt.subplots(figsize=(12,8))
4ax.plot(x_axis, y_axis, 'r', label='Prediction') # 绘制拟合曲线
5ax.scatter(data.Population, data.Profit, label='Traning Data')#绘制散点图
6ax.legend(loc=2)
7ax.set_xlabel('Population')
8ax.set_ylabel('Profit')
9ax.set_title('Predicted Profit vs. Population Size')
10plt.show()
1fig1, ax1 = plt.subplots()
2x=np.linspace(1, iteration, iteration)
3ax1.plot(x, J)
代价函数随着迭代次数增加,不断下降。 说明,梯度下降算法适合用来优化代价函数
您的关注和转发是对我最大的鼓励!