文章目录
- 逻辑回归和线性回归的区别?
- 正则化逻辑回归
- 逻辑回归中的梯度下降:
- 模型预测案例
- 解决二分类问题:
- 不同的 λ \lambda λ会产生不同的分类结果:
逻辑回归和线性回归的区别?
逻辑回归可以理解为线性回归的一个plus版,主要的区别在于逻辑回归使用的是sigmoid函数,而线性回归使用的是线性函数。逻辑回归中通过引入sigmoid函数将线性函数的输出映射到0到1之间的概率值,再加入阈值处理后,使其可以用于二分类问题的预测,如判断一粒肿瘤病理是良性还是恶性。
正则化逻辑回归
对于一些复杂的数据,用线性函数进行拟合容易造成欠拟合(underfit),而用过多的高次多项式拟合也可能会造成过拟合(overfit),因此除了学习率 α \alpha α外,通过引入另一个变量 λ \lambda λ,用于将与高次项式点乘的向量 w w w中引起过拟合的系数元素置零,可实现正则化的逻辑回归。
逻辑回归中的梯度下降:
模型预测案例
import numpy as np
import matplotlib.pylab as plt
import scipy.optimize as op# Load Data
data = np.loadtxt('ex2data1.txt', delimiter=',')
X = data[:, 0:2]
Y = data[:, 2]# ==================== Part 1: Plotting ====================
print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.')# 绘制散点图像
def plotData(x, y):pos = np.where(y == 1)neg = np.where(y == 0)p1 = plt.scatter(x[pos, 0], x[pos, 1], marker='+', s=30, color='b')p2 = plt.scatter(x[neg, 0], x[neg, 1], marker='o', s=30, color='y')plt.legend((p1, p2), ('Admitted', 'Not admitted'), loc='upper right', fontsize=8)plt.xlabel('Exam 1 score')plt.ylabel('Exam 2 score')plt.show()plotData(X, Y)
# _ = input('Press [Enter] to continue.')# ============ Part 2: Compute Cost and Gradient ============
m, n = np.shape(X)
X = np.concatenate((np.ones((m, 1)), X), axis=1)
init_theta = np.zeros((n+1,))# sigmoid函数
def sigmoid(z):g = 1/(1+np.exp(-1*z))return g# 计算损失函数和梯度函数
def costFunction(theta, x, y):m = np.size(y, 0)h = sigmoid(x.dot(theta))if np.sum(1-h < 1e-10) != 0:return np.infj = -1/m*(y.dot(np.log(h))+(1-y).dot(np.log(1-h)))return jdef gradFunction(theta, x, y):m = np.size(y, 0)grad = 1 / m * (x.T.dot(sigmoid(x.dot(theta)) - y))return gradcost = costFunction(init_theta, X, Y)
grad = gradFunction(init_theta, X, Y)
print('Cost at initial theta (zeros): ', cost)
print('Gradient at initial theta (zeros): ', grad)
# _ = input('Press [Enter] to continue.')# ============= Part 3: Optimizing using fmin_bfgs =============
# 注:此处与原始的情况有些出入
result = op.minimize(costFunction, x0=init_theta, method='BFGS', jac=gradFunction, args=(X, Y))
theta = result.x
print('Cost at theta found by fmin_bfgs: ', result.fun)
print('theta: ', theta)# 绘制图像
def plotDecisionBoundary(theta, x, y):pos = np.where(y == 1)neg = np.where(y == 0)p1 = plt.scatter(x[pos, 1], x[pos, 2], marker='+', s=60, color='r')p2 = plt.scatter(x[neg, 1], x[neg, 2], marker='o', s=60, color='y')plot_x = np.array([np.min(x[:, 1])-2, np.max(x[:, 1]+2)])plot_y = -1/theta[2]*(theta[1]*plot_x+theta[0])plt.plot(plot_x, plot_y)plt.legend((p1, p2), ('Admitted', 'Not admitted'), loc='upper right', fontsize=8)plt.xlabel('Exam 1 score')plt.ylabel('Exam 2 score')plt.show()plotDecisionBoundary(theta, X, Y)
# _ = input('Press [Enter] to continue.')# ============== Part 4: Predict and Accuracies ==============
prob = sigmoid(np.array([1, 45, 85]).dot(theta))
print('For a student with scores 45 and 85, we predict an admission probability of: ', prob)# 预测给定值
def predict(theta, x):m = np.size(X, 0)p = np.zeros((m,))pos = np.where(x.dot(theta) >= 0)neg = np.where(x.dot(theta) < 0)p[pos] = 1p[neg] = 0return p
p = predict(theta, X)
print('Train Accuracy: ', np.sum(p == Y)/np.size(Y, 0)) #计算训练准确率(p==Y的个数/Y的总个数)
解决二分类问题:
不同的 λ \lambda λ会产生不同的分类结果:
import numpy as np
import matplotlib.pylab as plt
import scipy.optimize as op# 加载数据
data = np.loadtxt('ex2data2.txt', delimiter=',')
X = data[:, 0:2]
Y = data[:, 2]def plotData(x, y):pos = np.where(y == 1)neg = np.where(y == 0)p1 = plt.scatter(x[pos, 0], x[pos, 1], marker='+', s=50, color='b')p2 = plt.scatter(x[neg, 0], x[neg, 1], marker='o', s=50, color='y')plt.legend((p1, p2), ('Admitted', 'Not admitted'), loc='upper right', fontsize=8)plt.xlabel('Exam 1 score')plt.ylabel('Exam 2 score')plt.show()plotData(X, Y)# =========== Part 1: Regularized Logistic Regression ============
# 向高维扩展
def mapFeature(x1, x2):degree = 6col = int(degree * (degree + 1) / 2 + degree + 1)out = np.ones((np.size(x1, 0), col))count = 1for i in range(1, degree + 1):for j in range(i + 1):out[:, count] = np.power(x1, i - j) * np.power(x2, j)count += 1return outX = mapFeature(X[:, 0], X[:, 1])
init_theta = np.zeros((np.size(X, 1),))
lamd = 10# sigmoid函数
def sigmoid(z):g = 1 / (1 + np.exp(-1 * z))return g# 损失函数
def costFuncReg(theta, x, y, lam):m = np.size(y, 0)h = sigmoid(x.dot(theta))j = -1 / m * (y.dot(np.log(h)) + (1 - y).dot(np.log(1 - h))) + lam / (2 * m) * theta[1:].dot(theta[1:])return j# 梯度函数
def gradFuncReg(theta, x, y, lam):m = np.size(y, 0)h = sigmoid(x.dot(theta))grad = np.zeros(np.size(theta, 0))grad[0] = 1 / m * (x[:, 0].dot(h - y))grad[1:] = 1 / m * (x[:, 1:].T.dot(h - y)) + lam * theta[1:] / mreturn gradcost = costFuncReg(init_theta, X, Y, lamd)
print('Cost at initial theta (zeros): ', cost)
# _ = input('Press [Enter] to continue.')# ============= Part 2: Regularization and Accuracies =============
init_theta = np.zeros((np.size(X, 1),))
lamd = 10
result = op.minimize(costFuncReg, x0=init_theta, method='BFGS', jac=gradFuncReg, args=(X, Y, lamd))
theta = result.xdef plotDecisionBoundary(theta, x, y):pos = np.where(y == 1)neg = np.where(y == 0)p1 = plt.scatter(x[pos, 1], x[pos, 2], marker='+', s=60, color='r')p2 = plt.scatter(x[neg, 1], x[neg, 2], marker='o', s=60, color='y')u = np.linspace(-1, 1.5, 50)v = np.linspace(-1, 1.5, 50)z = np.zeros((np.size(u, 0), np.size(v, 0)))for i in range(np.size(u, 0)):for j in range(np.size(v, 0)):z[i, j] = mapFeature(np.array([u[i]]), np.array([v[j]])).dot(theta)z = z.T[um, vm] = np.meshgrid(u, v)plt.contour(um, vm, z, levels=[0], lw=2)plt.legend((p1, p2), ('Admitted', 'Not admitted'), loc='upper right', fontsize=8)plt.xlabel('Microchip Test 1')plt.ylabel('Microchip Test 2')plt.title('lambda = 10')plt.show()plotDecisionBoundary(theta, X, Y)# 预测给定值
def predict(theta, x):m = np.size(X, 0)p = np.zeros((m,))pos = np.where(x.dot(theta) >= 0)neg = np.where(x.dot(theta) < 0)p[pos] = 1p[neg] = 0return pp = predict(theta, X)
print('Train Accuracy: ', np.sum(p == Y) / np.size(Y, 0))
λ = 2 : \lambda=2: λ=2:
λ = 10 : \lambda=10: λ=10: