import numpy as np
def RBF(x, center_i, beta_i):dist = np.sum(pow((x - center_i),2))rho = np.exp(-beta_i*dist)return rho# 输出为1
class RBF_network:def __init__(self):self.hidden_num = 0self.y = 0def createNN(self, input_num, hidden_num, learning_rate, center):self.input_num = input_numself.hidden_num = hidden_numself.center = centerself.w = np.random.random(self.hidden_num)self.rho = np.zeros(self.hidden_num)self.beta = np.random.random(self.hidden_num)self.lr = learning_ratedef Predict(self, x):self.y = 0for i in range(self.hidden_num):self.rho[i] = RBF(x, self.center[i], self.beta[i])self.y += self.w[i] * self.rho[i]return self.ydef BackPropagate(self, x, y):self.Predict(x)grad = np.zeros(self.hidden_num)for i in range(self.hidden_num):# dE_k/dy_cap = (y_cap-y) # dE_k/dw = (y_cap-y)*rho[i] # dE_k/d_beta = -(y_cap-y)*rho[i]w_i*||x-c_i||grad[i] = (self.y - y) * self.rho[i]self.w[i] -= self.lr * grad[i]self.beta[i] += self.lr * grad[i] * self.w[i] * np.sum(pow((x - center[i]),2))def trainNN(self, x, y):error_list = []for i in range(len(x)):self.BackPropagate(x[i], y[i])error = (self.y - y[i])**2error_list.append(error/2)print(error_list)
train_x = np.random.randint(0,2,(100,2))
train_y = np.logical_xor(train_x[:,0],train_x[:,1])
test_x = np.random.randint(0,2,(100,2))
test_y = np.logical_xor(test_x[:,0],test_x[:,1])
center = np.array([[0,0],[0,1],[1,0],[1,1]]) rbf = RBF_network() rbf.createNN(input_num = 2, hidden_num=4 , learning_rate=0.1, center=center)
rbf.trainNN(train_x, train_y)
最右侧是该边权连接的下层;最左侧是学习率;中间部分是传输过程,经过不同层的神经元;
最右侧是最后一次传输,传输到边权连接到的那个神经元的输出
这个就是只传了不到一层,即只经过一个最上层的神经元
这个先是说传到bh神经元上,再由那个神经元到α上
注意这里是最上层的求和,就是说把上面层的所有神经元都重合了,叠在一起
要求vih,vih会链接xi与bh,就是说只会让xi影响到bh,但是bh会进而影响到后续的所有y,通过w,所以在向下传播的时候不能只用一个y,而是所有y的累和,只有到紧邻的那层时才可以指定方向
这里的话就是EK到BH有多种方向,xi在通过vih影响到bh后,对这些方向都会有影响,所以就是累和;求和的数量就是紧邻神经元与下一层神经元所连接的数量,也就是下一层的数量
输出减去真值,再乘上偏导
最里面的循环是为了建立中间层神经元与与之相连的每一个输出层神经元之间的关系,就是求和,把所有与之相连的神经元全部求和;最后再成一个偏导就是梯度
先是得到梯度,然后是更新参数
#!/usr/bin/env python
# -*- coding: utf-8 -*-# STANDARD BP-NN & ACCUMULATED BP-NN
import numpy as npclass Data(object):def __init__(self, data):self.data = np.array(data)self.rows = len(self.data[:, 0])self.cols = len(self.data[0, :]) # it include the column of labelsself.__eta = 0.1 # initial eta=0.1self.__in = self.cols - 1 # number of input neuronsself.__out = len(np.unique(self.data[:, -1])) # number of output neuronsdef set_eta(self, n):self.__eta = ndef get_eta(self):return self.__etadef get_in(self):return self.__indef get_out(self):return self.__out# 标准BP算法def BP_NN(self, q=10, err=0.1):X = self.data[:, :-1]# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-thetaX = np.insert(X, [0], -1, axis=1)Y = np.array([self.data[:, -1], 1 - self.data[:, -1]]).transpose()d, l = self.__in, self.__outv = np.mat(np.random.random((d + 1, q))) # v_0 = gamaw = np.mat(np.random.random((q + 1, l))) # w_0 = thetadef f(x): # sigmoid functions = 1 / (1 + np.exp(-x))return sn = self.__etagap = 1counter = 0while gap > err: # set E_k<=0.01 to quit the loopcounter += 1for i in range(self.rows):alpha = np.mat(X[i, :]) * v # 1*q matrixb_init = f(alpha) # 1*q matrix# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为bb = np.insert(b_init.T, [0], -1, axis=0) # (q+1)*1 matrixbeta = b.T * w # 1*l matrixy_cal = np.array(f(beta)) # 1*l arrayg = y_cal * (1 - y_cal) * (Y[i, :] - y_cal) # 1*l arrayw_g = w[1:, :] * np.mat(g).T # q*1 matrixe = np.array(b_init) * (1 - np.array(b_init)) * np.array(w_g.T) # 1*q arrayd_w = n * b * np.mat(g)d_v = n * np.mat(X[i, :]).T * np.mat(e)w += d_wv += d_vgap = 0.5 * np.sum((Y[i, :] - y_cal) ** 2)print('BP_round:', counter)return v, w# l累积BP算法def ABP_NN(self, q=10, err=0.1):X = self.data[:, :-1]# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-thetaX = np.insert(X, [0], -1, axis=1)Y = np.array([self.data[:, -1], 1 - self.data[:, -1]]).transpose()d, l = self.__in, self.__outv = np.mat(np.random.random((d + 1, q))) # v_0 = gamaw = np.mat(np.random.random((q + 1, l))) # w_0 = thetadef f(x): # sigmoid functions = 1 / (1 + np.exp(-x))return sn = self.__etagap = 1counter = 0while gap > err: # set E_k<=1 to quit the loopd_v, d_w, gap = 0, 0, 0counter += 1for i in range(self.rows):alpha = np.mat(X[i, :]) * v # 1*q matrixb_init = f(alpha) # 1*q matrix# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为bb = np.insert(b_init.T, [0], -1, axis=0) # (q+1)*1 matrixbeta = b.T * w # 1*l matrixy_cal = np.array(f(beta)) # 1*l arrayg = y_cal * (1 - y_cal) * (Y[i, :] - y_cal) # 1*l arrayw_g = w[1:, :] * np.mat(g).T # q*1 matrixe = np.array(b_init) * (1 - np.array(b_init)) * np.array(w_g.T) # 1*q arrayd_w += n * b * np.mat(g)d_v += n * np.mat(X[i, :]).T * np.mat(e)gap += 0.5 * np.sum((Y[i, :] - y_cal) ** 2)w += d_w / self.rowsv += d_v / self.rowsgap = gap / self.rowsprint('ABP_round:', counter)return v, wdef test_NN(a, v, w):X = a.data[:, :-1]X = np.insert(X, [0], -1, axis=1)Y = np.array([a.data[:, -1], 1 - a.data[:, -1]]).transpose()y_cal = np.zeros((a.rows, 2))def f(x): # sigmoid functions = 1 / (1 + np.exp(-x))return sfor i in range(a.rows):alpha = np.mat(X[i, :]) * v # 1*q matrixb_init = f(alpha) # 1*q matrixb = np.insert(b_init.T, [0], -1, axis=0) # (q+1)*1 matrixbeta = b.T * w # 1*l matrixy_cal[i, :] = np.array(f(beta)) # 1*l arrayprint(y_cal)# 对原始数据进行一位有效编码
D = np.array([[1, 1, 1, 1, 1, 1, 0.697, 0.460, 1],[2, 1, 2, 1, 1, 1, 0.774, 0.376, 1],[2, 1, 1, 1, 1, 1, 0.634, 0.264, 1],[1, 1, 2, 1, 1, 1, 0.608, 0.318, 1],[3, 1, 1, 1, 1, 1, 0.556, 0.215, 1],[1, 2, 1, 1, 2, 2, 0.403, 0.237, 1],[2, 2, 1, 2, 2, 2, 0.481, 0.149, 1],[2, 2, 1, 1, 2, 1, 0.437, 0.211, 1],[2, 2, 2, 2, 2, 1, 0.666, 0.091, 0],[1, 3, 3, 1, 3, 2, 0.243, 0.267, 0],[3, 3, 3, 3, 3, 1, 0.245, 0.057, 0],[3, 1, 1, 3, 3, 2, 0.343, 0.099, 0],[1, 2, 1, 2, 1, 1, 0.639, 0.161, 0],[3, 2, 2, 2, 1, 1, 0.657, 0.198, 0],[2, 2, 1, 1, 2, 2, 0.360, 0.370, 0],[3, 1, 1, 3, 3, 1, 0.593, 0.042, 0],[1, 1, 2, 2, 2, 1, 0.719, 0.103, 0]])
a = Data(D) # 加载数据
v, w = a.ABP_NN(err=0.01) # 累积BP
v1, w1 = a.BP_NN(err=0.2) # 标准BP
# v, w = a.ABP_NN(err=0.2)#累积BP
# v1, w1 = a.BP_NN(err=0.01)#标准BP
print("开始计算累计BP")
test_NN(a, v, w)
print("开始计算标准BP")
test_NN(a, v1, w1)
#!/usr/bin/env python
# -*- coding: utf-8 -*-# STANDARD BP-NN & ACCUMULATED BP-NN
import numpy as npclass Data(object):def __init__(self, data):self.data = np.array(data)self.rows = len(self.data[:, 0])self.cols = len(self.data[0, :]) # it include the column of labelsself.__eta = 0.1 # initial eta=0.1self.__in = self.cols - 1 # number of input neuronsself.__out = len(np.unique(self.data[:, -1])) # number of output neuronsdef set_eta(self, n):self.__eta = ndef get_eta(self):return self.__etadef get_in(self):return self.__indef get_out(self):return self.__out# 标准BP算法def BP_NN(self, q=10, err=0.1):X = self.data[:, :-1]# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-thetaX = np.insert(X, [0], -1, axis=1)Y = np.array([self.data[:, -1], 1 - self.data[:, -1]]).transpose()d, l = self.__in, self.__outv = np.mat(np.random.random((d + 1, q))) # v_0 = gamaw = np.mat(np.random.random((q + 1, l))) # w_0 = thetadef f(x): # sigmoid functions = 1 / (1 + np.exp(-x))return sn = self.__etagap = 1counter = 0while gap > err: # set E_k<=0.01 to quit the loopcounter += 1for i in range(self.rows):alpha = np.mat(X[i, :]) * v # 1*q matrixb_init = f(alpha) # 1*q matrix# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为bb = np.insert(b_init.T, [0], -1, axis=0) # (q+1)*1 matrixbeta = b.T * w # 1*l matrixy_cal = np.array(f(beta)) # 1*l arrayg = y_cal * (1 - y_cal) * (Y[i, :] - y_cal) # 1*l arrayw_g = w[1:, :] * np.mat(g).T # q*1 matrixe = np.array(b_init) * (1 - np.array(b_init)) * np.array(w_g.T) # 1*q arrayd_w = n * b * np.mat(g)d_v = n * np.mat(X[i, :]).T * np.mat(e)w += d_wv += d_vgap = 0.5 * np.sum((Y[i, :] - y_cal) ** 2)print('BP_round:', counter)return v, w# l累积BP算法def ABP_NN(self, q=10, err=0.1):X = self.data[:, :-1]# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-thetaX = np.insert(X, [0], -1, axis=1)Y = np.array([self.data[:, -1], 1 - self.data[:, -1]]).transpose()d, l = self.__in, self.__outv = np.mat(np.random.random((d + 1, q))) # v_0 = gamaw = np.mat(np.random.random((q + 1, l))) # w_0 = thetadef f(x): # sigmoid functions = 1 / (1 + np.exp(-x))return sn = self.__etagap = 1counter = 0while gap > err: # set E_k<=1 to quit the loopd_v, d_w, gap = 0, 0, 0counter += 1for i in range(self.rows):alpha = np.mat(X[i, :]) * v # 1*q matrixb_init = f(alpha) # 1*q matrix# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为bb = np.insert(b_init.T, [0], -1, axis=0) # (q+1)*1 matrixbeta = b.T * w # 1*l matrixy_cal = np.array(f(beta)) # 1*l arrayg = y_cal * (1 - y_cal) * (Y[i, :] - y_cal) # 1*l arrayw_g = w[1:, :] * np.mat(g).T # q*1 matrixe = np.array(b_init) * (1 - np.array(b_init)) * np.array(w_g.T) # 1*q arrayd_w += n * b * np.mat(g)d_v += n * np.mat(X[i, :]).T * np.mat(e)gap += 0.5 * np.sum((Y[i, :] - y_cal) ** 2)w += d_w / self.rowsv += d_v / self.rowsgap = gap / self.rowsprint('ABP_round:', counter)return v, wdef test_NN(a, v, w):X = a.data[:, :-1]X = np.insert(X, [0], -1, axis=1)Y = np.array([a.data[:, -1], 1 - a.data[:, -1]]).transpose()y_cal = np.zeros((a.rows, 2))def f(x): # sigmoid functions = 1 / (1 + np.exp(-x))return sfor i in range(a.rows):alpha = np.mat(X[i, :]) * v # 1*q matrixb_init = f(alpha) # 1*q matrixb = np.insert(b_init.T, [0], -1, axis=0) # (q+1)*1 matrixbeta = b.T * w # 1*l matrixy_cal[i, :] = np.array(f(beta)) # 1*l arrayprint(y_cal)# 对原始数据进行一位有效编码
D = np.array([[1, 1, 1, 1, 1, 1, 0.697, 0.460, 1],[2, 1, 2, 1, 1, 1, 0.774, 0.376, 1],[2, 1, 1, 1, 1, 1, 0.634, 0.264, 1],[1, 1, 2, 1, 1, 1, 0.608, 0.318, 1],[3, 1, 1, 1, 1, 1, 0.556, 0.215, 1],[1, 2, 1, 1, 2, 2, 0.403, 0.237, 1],[2, 2, 1, 2, 2, 2, 0.481, 0.149, 1],[2, 2, 1, 1, 2, 1, 0.437, 0.211, 1],[2, 2, 2, 2, 2, 1, 0.666, 0.091, 0],[1, 3, 3, 1, 3, 2, 0.243, 0.267, 0],[3, 3, 3, 3, 3, 1, 0.245, 0.057, 0],[3, 1, 1, 3, 3, 2, 0.343, 0.099, 0],[1, 2, 1, 2, 1, 1, 0.639, 0.161, 0],[3, 2, 2, 2, 1, 1, 0.657, 0.198, 0],[2, 2, 1, 1, 2, 2, 0.360, 0.370, 0],[3, 1, 1, 3, 3, 1, 0.593, 0.042, 0],[1, 1, 2, 2, 2, 1, 0.719, 0.103, 0]])
a = Data(D) # 加载数据
v, w = a.ABP_NN(err=0.01) # 累积BP
v1, w1 = a.BP_NN(err=0.2) # 标准BP
# v, w = a.ABP_NN(err=0.2)#累积BP
# v1, w1 = a.BP_NN(err=0.01)#标准BP
print("开始计算累计BP")
test_NN(a, v, w)
print("开始计算标准BP")
test_NN(a, v1, w1)
输入矩阵的第i行,即第i个数据的所有信息,乘上权重矩阵v,得到输入层到隐藏层的值阿尔法,是隐藏层得到的值,然后隐藏层在把输入值通过函数就得到其输出值,即隐藏层的输出值,
隐藏层的输出值影响到输出层,输出层
两个部分,一个是传输阶段,一个是经过神经元激活函数的部分,经过神经元激活函数时需要有一个阈值处理
传输阶段的偏导是线性的;激活函数部分的是固定的
#-*- coding:utf-8 -*-
import re
import xlrd
import xdrlib,sys
import xlwt
import datetime
import timeif __name__=="__main__":data = xlrd.open_workbook("watermelon3.0.xlsx")table = data.sheets()[0]nrows = table.nrows # 行数ncols = table.ncols # 列数type1 = table.row_values(0)number=len(type1)data={}yR={}for i in range(number):yR[i]=[]yR[i].append(table.row_values(nrows-1)[i]-1)if i not in data.keys():data[i]=[]for k in range(nrows-1):data[i].append(table.row_values(k)[i])print(data) # 获取到的数据print(yR)import matplotlib.pyplot as pltX=[]y=[]for i in range(12):X.append(data[i])y.append(yR[i])print(X)print(y)'''BP implementation'''from BP_network import *import matplotlib.pyplot as pltnn = BP_network() # build a BP network classnn.CreateNN(8, 8, 1, 'Sigmoid') # build the networke = []for i in range(2000):err, err_k = nn.TrainStandard(X, y, lr=0.5)e.append(err)f2 = plt.figure(2)plt.xlabel("epochs")plt.ylabel("accumulated error")plt.title("circles convergence curve")plt.plot(e)plt.show()'''draw decision boundary'''import numpy as npimport matplotlib.pyplot as pltXP=[]for i in range(0,17):XP.append(data[i])z = nn.PredLabel(XP)print(z)
#-*- coding:utf-8 -*-def Sigmoid(x):from math import expreturn 1.0 / (1.0 + exp(-x))
def SigmoidDerivate(y): #导函数return y * (1 - y)
def Tanh(x):from math import tanhreturn tanh(x)
def TanhDerivate(y):return 1 - y * ydef rand(a, b):'''random value generation for parameter initialization@param a,b: the upper and lower limitation of the random value'''from random import randomreturn (b - a) * random() + a
class BP_network:def __init__(self):#每一层节点的数目self.i_n=0self.h_n=0self.o_n=0#每一层的输出值向量self.i_v=[]self.h_v=[]self.o_v=[]#权重和阈值self.ih_w=[] #权重self.ho_w=[]self.h_t=[] #阈值self.o_t=[]#定义选择函数和误差函数self.fun={'Sigmoid':Sigmoid,'SigmoidDerivate':SigmoidDerivate,'Tanh':Tanh,'TanhDerivate':TanhDerivate}def CreateNN(self,ni,nh,no,actfun): #分别代表输入数目、隐藏数目、输出数目、激活函数import numpy as npself.i_n=niself.h_n=nhself.o_n=no#初始化每一层的输出self.i_v=np.zeros(self.i_n)self.h_v=np.zeros(self.h_n)self.o_v=np.zeros(self.o_n)#初始化连接的权重(随机初始化)self.ih_w=np.zeros([self.i_n,self.h_n])self.ho_w=np.zeros([self.h_n,self.o_n])for i in range(self.i_n):for h in range(self.h_n):self.ih_w[i][h]=rand(0,1)for h in range(self.h_n):for o in range(self.o_n):self.ho_w[h][o]=rand(0,1)#随机初始化每一层的阈值self.h_t=np.zeros(self.h_n)self.o_t=np.zeros(self.o_n)for h in range(self.h_n):self.h_t[h]=rand(0,1)for o in range(self.o_n):self.o_t[o]=rand(0,1)#初始化激活函数self.af=self.fun[actfun]self.afd=self.fun[actfun+'Derivate']def Pred(self,x):#x 输入的向量for i in range(self.i_n):self.i_v[i]=x[i]#激活隐藏层for h in range(self.h_n):total=0.0for i in range(self.i_n):total+=self.i_v[i]*self.ih_w[i][h]self.h_v[h]=self.af(total-self.h_t[h])#激活输出层for j in range(self.o_n):total=0.0for h in range(self.h_n):total+=self.h_v[h]*self.ho_w[h][j]self.o_v[j]=self.af(total-self.o_t[j])def BackPropagate(self,x,y,lr):#计算BP算法#x,y 输入、输出、lr 学习率import numpy as np#获取当前网络的输出self.Pred(x)#计算 gradient# calculate the gradient based on outputo_grid = np.zeros(self.o_n)for j in range(self.o_n):o_grid[j] = (y[j] - self.o_v[j]) * self.afd(self.o_v[j])h_grid = np.zeros(self.h_n)for h in range(self.h_n):for j in range(self.o_n):h_grid[h] += self.ho_w[h][j] * o_grid[j]h_grid[h] = h_grid[h] * self.afd(self.h_v[h])# updating the parameterfor h in range(self.h_n):for j in range(self.o_n):self.ho_w[h][j] += lr * o_grid[j] * self.h_v[h]for i in range(self.i_n):for h in range(self.h_n):self.ih_w[i][h] += lr * h_grid[h] * self.i_v[i]for j in range(self.o_n):self.o_t[j] -= lr * o_grid[j]for h in range(self.h_n):self.h_t[h] -= lr * h_grid[h]def TrainStandard(self, data_in, data_out, lr=0.05):'''standard BP training@param lr, learning rate, default 0.05@return: e, accumulated error@return: e_k, error array of each step'''e_k = []for k in range(len(data_in)):x = data_in[k]y = data_out[k]self.BackPropagate(x, y, lr)# error in train set for each stepy_delta2 = 0.0for j in range(self.o_n):y_delta2 += (self.o_v[j] - y[j]) * (self.o_v[j] - y[j])e_k.append(y_delta2 / 2)# total error of traininge = sum(e_k) / len(e_k)return e, e_kdef PredLabel(self, X):'''predict process through the network@param X: the input sample set for input layer@return: y, array, output set (0,1 - class) based on [winner-takes-all]'''import numpy as npy = []min=2for m in range(len(X)):self.Pred(X[m])print(self.o_v)if self.o_v[0]<min:min=self.o_v[0]if self.o_v[0] > 0.5:y.append(2)else:y.append(1)# max_y = self.o_v[0]# label = 0# for j in range(1,self.o_n):# if max_y < self.o_v[j]: label = j# y.append(label)return np.array(y)