2016-04-23 109 views
0

我想了解反向传播,为此我使用了一些python代码,但它注意工作正常。当我用异或输入输出训练时,错误不会收敛。但是如果我改变xor的最后一个输出的值,它会收敛。简单的反向传播神经网络算法(Python)

如果我把一些目标输出值大于1,那么目标-1的误差会收敛,这种情况看起来并不正确。

import numpy as np 
import random 

class neural_network(): 

    activation = [] #List of values with the values of activation of each layers 
    weightsIn = [] 
    weightsOut = [] 

    def __init__(self, sizeOfLayers): 
     ''' 
      sizeOfLayers: Tuple with numbers of neurons of each layer 
      (in, hidden, out) 
     ''' 
     if len(sizeOfLayers) > 3: 
      raise ValueError('Wrong number of layers') 

     self.sizeOfLayers = sizeOfLayers 
     for i in range(len(sizeOfLayers)): 
      if i == 0: 
       #input layer + bias 
       self.activation.append(sizeOfLayers[i]*[0.0] + [0.0]) 
      else: 
       self.activation.append(sizeOfLayers[i]*[0.0]) 
     # Wi = len(Hid) x len(IN)+1(bias) 
     self.weightsIn = np.random.random((sizeOfLayers[1], sizeOfLayers[0] + 1)) 
     # Wo = len(OUT) x len(Hid) 
     self.weightsOut = np.random.random((sizeOfLayers[2], sizeOfLayers[1])) 

    def forward(self, X): 
     ''' 
      X: Vetor de entradas 
     ''' 
     #In+bias add ativation vector 
     self.activation[0] = np.vstack((np.array([X]).T, np.array([1]))) 
     #sum of (weights x in) 
     self.sumHidden = self.weightsIn.dot(self.activation[0]) 
     #Ativation of hidden layer 
     self.activation[1] = (self.sigmoid(self.sumHidden)) 
     #sum of(out weights x activation of last layer) 
     self.sumOut = self.weightsOut.dot(self.activation[1]) 
     #activation of output 
     self.activation[2] = (self.sigmoid(self.sumOut)) 
     return self.activation[2].T 

    def backPropagate(self, Y, trainRate = 0.1): 
     ''' 
      Y: output target 
      trainRate: 
     ''' 
     if len(Y) != self.sizeOfLayers[2]: 
      raise ValueError('Wrong number of inputs') 

     #Calc of output delta 
     error_o = Y.T - self.activation[2].T 
     out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T 
     #Calc of hidden delta 
     error_h = out_delta.T.dot(self.weightsOut) 
     hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T 

     # update output weights output 
     change_o = self.activation[1] * out_delta.T 
     for i in range(self.sizeOfLayers[2]): 
      for j in range(self.sizeOfLayers[1]): 
       self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i] 
     # update Input weights 
     change_h = self.activation[0] * hiden_delta.T 
     for i in range(self.sizeOfLayers[1]): 
      for j in range(self.sizeOfLayers[0]): 
       self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i] 

     #Error 
     return np.sum((Y.T - self.activation[2].T)**2)/0.5 

    def sigmoid(self, z, derv = False): 
     if derv == False: 
      return 1/(1+np.exp(-z)) 

    def sigmoidPrime(self, z): 
     return self.sigmoid(z)*(1-self.sigmoid(z)) 

    def train(self, target, trainRate = 0.001, it = 50000): 
     for i in range(it): 
      error = 0.0 
      for t in target: 
       inputs = np.array(t[0]) 
       targets = np.array([t[1]]) 
       self.forward(inputs) 
       error = error + self.backPropagate(targets, trainRate) 

nn = neural_network((2,6,1)) 
xor = [ 
    [[0,0], [0]], 
    [[0,1], [1]], 
    [[1,0], [1]], 
    [[1,1], [0]] #If I change her to 1 it converges 
    ] 

nn.train(xor) 

编辑: 修改是根据什么迭戈斯蒂法诺告诉制作(谢谢你,迭戈),但错误之处不收敛呢。

import numpy as np 
import math 
import random 
from scipy.special import expit 
from sklearn.preprocessing import normalize 


class neural_network(object): 
    activation = [] 
    weightsIn = [] 
    weightsOut = [] 

    def __init__(self, sizeOfLayers): 
     ''' 
      sizeOfLayers: Tuple with numbers of neurons of each layer 
      (in, hidden, out) 
     ''' 
     self.sizeOfLayers = sizeOfLayers 
     for i in range(len(sizeOfLayers)): 
      self.activation.append(sizeOfLayers[i]*[0.0] + [0.0]) 

     self.weightsIn = np.random.normal(scale=0.1, size = (sizeOfLayers[1], sizeOfLayers[0] + 1)) 
     self.weightsOut = np.random.normal(scale=0.1, size = (sizeOfLayers[2], sizeOfLayers[1] + 1)) 


    def forward(self, X): 
     ''' 
      X: Vetor de entradas 
     ''' 
     #In+bias add ativation vector 
     self.activation[0] = np.vstack((np.array([X]).T, np.array([1]))) 
     #sum of (weights x in) 
     self.sumHidden = self.weightsIn.dot(self.activation[0]) 
     #+bias add ativation vector 
     self.activation[1] = np.vstack((expit(self.sumHidden), np.array([1]))) 
     #sum of(out weights x activation of last layer) 
     self.sumOut = self.weightsOut.dot(self.activation[1]) 
     #activation of output 
     self.activation[2] = (expit(self.sumOut)) 
     return self.activation[2].T 

    def backPropagate(self, X, Y, trainRate = 0.1): 
     self.forward(X) 
     #Calc of output delta 
     error_o = Y - self.activation[2].T 
     out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T 
     #Calc of hidden delta 
     error_h = out_delta.T.dot(self.weightsOut) 
     hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T 

     # update output weights output 
     change_o = self.activation[1] * np.transpose(out_delta) 

     self.weightsOut = self.weightsOut + trainRate*change_o.T 
     # update hidden weights output 
     change_h = self.activation[0].dot(hiden_delta[:-1].T) 
     self.weightsIn = self.weightsIn + trainRate*change_h.T 
     #error 
     return np.sum((Y - self.activation[2].T)**2)*0.5 


    def train(self, input_list, epochs): 
     for epoch in range(epochs): 
      ErrAcc = 0.0 
      for inputs, targets in input_list: 
       Err = self.backPropagate(np.array(inputs), np.array(targets), 0.2) 
       ErrAcc = ErrAcc + Err 
      if epoch % 1000 == 0: 
       print 'Epoch =', epoch, 'ErrAcc =', ErrAcc 

    def sigmoidPrime(self,x): 
     return expit(x)*(1-expit(x)) 


nn = neural_network((2,10,1)) 
xor = [ 
    [[0,0], [0]], 
    [[0,1], [1]], 
    [[1,0], [1]], 
    [[1,1], [0]] #If I change her to 1 it converges 
    ] 
nn.train(xor, 300000) 

回答

0

下面是我所做的修改您的代码,使工作:

  1. 添加偏见到输出神经元太。网络中的所有神经元都应该拥有它,因为它将激活场从起点分开,因此shifts your activation function left or right, greatly improving the chances of successful learning

  2. 为了使用np.random.random,它生成区间[0.0,1.0)中的数字以初始化权重,使用np.random.uniform在[-1.0,1.0)中生成均匀的随机浮点数。

  3. 围绕原点居中输入空间(即删除平均值)并对其进行归一化。

这是你的初始化应该怎么做:

for i in range(len(sizeOfLayers)): 
     self.activation.append(sizeOfLayers[i]*[0.0] + [0.0]) 

    self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1)) 
    self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1)) 

然后你也会有在功能forward追加1至activation

self.activation[1] = np.vstack((self.sigmoid(self.sumHidden), np.array([1]))) 

您可能要更改使其工作的学习速度(约0.5为我工作)。此外,你的均方误差计算是错误的:你应该乘以0.5,而不是除法。

这是您的修改后的代码:

import numpy as np 
import random 

class neural_network(): 

activation = [] #List of values with the values of activation of each layers 
weightsIn = [] 
weightsOut = [] 

def __init__(self, sizeOfLayers): 
    ''' 
     sizeOfLayers: Tuple with numbers of neurons of each layer 
     (in, hidden, out) 
    ''' 
    if len(sizeOfLayers) > 3: 
     raise ValueError('Wrong number of layers') 

    self.sizeOfLayers = sizeOfLayers 
    for i in range(len(sizeOfLayers)): 
     #input layer + bias 
     self.activation.append(sizeOfLayers[i]*[0.0] + [0.0]) 

    # Wi = len(Hid) x len(IN)+1(bias) 
    self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1)) 

    # Wo = len(OUT) x len(Hid) 
    self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1)) 

def forward(self, X): 
    ''' 
     X: Vetor de entradas 
    ''' 
    #In+bias add ativation vector 
    self.activation[0] = np.vstack((np.array([X]).T, np.array([1]))) 
    #sum of (weights x in) 
    self.sumHidden = self.weightsIn.dot(self.activation[0]) 
    #Ativation of hidden layer 
    self.activation[1] = np.vstack((self.sigmoid(self.sumHidden), np.array([1]))) 
    #sum of(out weights x activation of last layer) 
    self.sumOut = self.weightsOut.dot(self.activation[1]) 
    #activation of output 
    self.activation[2] = (self.sigmoid(self.sumOut)) 
    return self.activation[2].T 

def backPropagate(self, Y, trainRate = 0.1): 
    ''' 
     Y: output target 
     trainRate: 
    ''' 
    if len(Y) != self.sizeOfLayers[2]: 
     raise ValueError('Wrong number of inputs') 

    #Calc of output delta 
    error_o = Y.T - self.activation[2].T 
    out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T 
    #Calc of hidden delta 
    error_h = out_delta.T.dot(self.weightsOut) 
    hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T 

    # update output weights output 
    change_o = self.activation[1] * out_delta.T 
    for i in range(self.sizeOfLayers[2]): 
     for j in range(self.sizeOfLayers[1]): 
      self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i] 
    # update Input weights 
    change_h = self.activation[0] * hiden_delta.T 
    for i in range(self.sizeOfLayers[1]): 
     for j in range(self.sizeOfLayers[0]): 
      self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i] 

    #Error 
    return np.sum((Y.T - self.activation[2].T)**2)*0.5 

def sigmoid(self, z, derv = False): 
    if derv == False: 
     return 1/(1+np.exp(-z)) 

def sigmoidPrime(self, z): 
    return self.sigmoid(z)*(1-self.sigmoid(z)) 

def train(self, target, trainRate = 0.5, it = 50000): 
    for i in range(it): 
     error = 0.0 
     for t in target: 
      inputs = np.array(t[0]) 
      targets = np.array([t[1]]) 
      self.forward(inputs) 
      error = error + self.backPropagate(targets, trainRate) 

nn = neural_network((2,5,1)) 
xor = [ 
    [[-1.0, -1.0], [0]], 
    [[-1.0, 1.0], [1]], 
    [[ 1.0, -1.0], [1]], 
    [[ 1.0, 1.0], [0]] #If I change her to 1 it converges 
] 

nn.train(xor) 

for e in xor: 
    nn.forward(e[0]) 
    print nn.activation[2] 

祝你好运!

+0

谢谢diego,我做了你的修改,但是异或逻辑dos的错误还没有收敛! – bottega

+0

@bottega您是否尝试增加新纪元的数量?另外,我上面提到的第三点很重要,您可以通过将“训练集”的输入样本中的所有零替换为“-1.0”(但仅输入样本,因为您在输出图层中使用S形函数)来实现它。 –