2012-03-30 94 views
4

我试图在Java中实现前馈神经网络。 我创建了三个类NNeuron,NLayer和NNetwork。 “简单”计算似乎很好(我得到正确的总和/激活/输出),但是当涉及到训练过程时,我似乎没有得到正确的结果。任何人都可以,请告诉我做错了什么? 为n网络类的整个代码是很长,所以我张贴这是造成问题的一部分: [编辑]:这其实是相当多的n网络类的所有在Java中实现神经网络:训练和反向传播问题

import java.util.ArrayList; 
import java.util.Arrays; 
import java.util.List; 

public class NNetwork 
{ 
    public static final double defaultLearningRate = 0.4; 
    public static final double defaultMomentum = 0.8; 

    private NLayer inputLayer; 
    private ArrayList<NLayer> hiddenLayers; 
    private NLayer outputLayer; 

    private ArrayList<NLayer> layers; 

    private double momentum = NNetwork1.defaultMomentum; // alpha: momentum, default! 0.3 

    private ArrayList<Double> learningRates; 

    public NNetwork (int nInputs, int nOutputs, Integer... neuronsPerHiddenLayer) 
    { 
     this(nInputs, nOutputs, Arrays.asList(neuronsPerHiddenLayer)); 
    } 

    public NNetwork (int nInputs, int nOutputs, List<Integer> neuronsPerHiddenLayer) 
    { 
     // the number of neurons on the last layer build so far (i.e. the number of inputs for each neuron of the next layer) 
     int prvOuts = 1; 

     this.layers = new ArrayList<>(); 

     // input layer 
     this.inputLayer = new NLayer(nInputs, prvOuts, this); 
     this.inputLayer.setAllWeightsTo(1.0); 
     this.inputLayer.setAllBiasesTo(0.0); 
     this.inputLayer.useSigmaForOutput(false); 
     prvOuts = nInputs; 
     this.layers.add(this.inputLayer); 

     // hidden layers 
     this.hiddenLayers = new ArrayList<>(); 
     for (int i=0 ; i<neuronsPerHiddenLayer.size() ; i++) 
     { 
      this.hiddenLayers.add(new NLayer(neuronsPerHiddenLayer.get(i), prvOuts, this)); 
      prvOuts = neuronsPerHiddenLayer.get(i); 
     } 
     this.layers.addAll(this.hiddenLayers); 

     // output layer 
     this.outputLayer = new NLayer(nOutputs, prvOuts, this); 
     this.layers.add(this.outputLayer); 

     this.initCoeffs(); 
    } 

    private void initCoeffs() 
    { 
     this.learningRates = new ArrayList<>(); 
     // learning rates of the hidden layers 
     for (int i=0 ; i<this.hiddenLayers.size(); i++) 
      this.learningRates.add(NNetwork1.defaultLearningRate); 

     // learning rate of the output layer 
     this.learningRates.add(NNetwork1.defaultLearningRate); 
    } 

    public double getLearningRate (int layerIndex) 
    { 
     if (layerIndex > 0 && layerIndex <= this.hiddenLayers.size()+1) 
     { 
      return this.learningRates.get(layerIndex-1); 
     } 
     else 
     { 
      return 0; 
     } 
    } 

    public ArrayList<Double> getLearningRates() 
    { 
     return this.learningRates; 
    } 

    public void setLearningRate (int layerIndex, double newLearningRate) 
    { 
     if (layerIndex > 0 && layerIndex <= this.hiddenLayers.size()+1) 
     { 
      this.learningRates.set(
        layerIndex-1, 
        newLearningRate); 
     } 
    } 

    public void setLearningRates (Double... newLearningRates) 
    { 
     this.setLearningRates(Arrays.asList(newLearningRates)); 
    } 

    public void setLearningRates (List<Double> newLearningRates) 
    { 
     int len = (this.learningRates.size() <= newLearningRates.size()) 
       ? this.learningRates.size() 
       : newLearningRates.size(); 

     for (int i=0; i<len; i++) 
      this.learningRates 
        .set(i, 
        newLearningRates.get(i)); 
    } 

    public double getMomentum() 
    { 
     return this.momentum; 
    } 

    public void setMomentum (double momentum) 
    { 
     this.momentum = momentum; 
    } 

    public NNeuron getNeuron (int layerIndex, int neuronIndex) 
    { 
     if (layerIndex == 0) 
      return this.inputLayer.getNeurons().get(neuronIndex); 
     else if (layerIndex == this.hiddenLayers.size()+1) 
      return this.outputLayer.getNeurons().get(neuronIndex); 
     else 
      return this.hiddenLayers.get(layerIndex-1).getNeurons().get(neuronIndex); 
    } 

    public ArrayList<Double> getOutput (ArrayList<Double> inputs) 
    { 
     ArrayList<Double> lastOuts = inputs; // the last computed outputs of the last 'called' layer so far 

     // input layer 
     //lastOuts = this.inputLayer.getOutput(lastOuts); 
     lastOuts = this.getInputLayerOutputs(lastOuts); 

     // hidden layers 
     for (NLayer layer : this.hiddenLayers) 
      lastOuts = layer.getOutput(lastOuts); 

     // output layer 
     lastOuts = this.outputLayer.getOutput(lastOuts); 

     return lastOuts; 
    } 

    public ArrayList<ArrayList<Double>> getAllOutputs (ArrayList<Double> inputs) 
    { 
     ArrayList<ArrayList<Double>> outs = new ArrayList<>(); 

     // input layer 
     outs.add(this.getInputLayerOutputs(inputs)); 

     // hidden layers 
     for (NLayer layer : this.hiddenLayers) 
      outs.add(layer.getOutput(outs.get(outs.size()-1))); 

     // output layer 
     outs.add(this.outputLayer.getOutput(outs.get(outs.size()-1))); 

     return outs; 
    } 

    public ArrayList<ArrayList<Double>> getAllSums (ArrayList<Double> inputs) 
    { 
     //* 
     ArrayList<ArrayList<Double>> sums = new ArrayList<>(); 
     ArrayList<Double> lastOut; 

     // input layer 
     sums.add(inputs); 
     lastOut = this.getInputLayerOutputs(inputs); 

     // hidden nodes 
     for (NLayer layer : this.hiddenLayers) 
     { 
      sums.add(layer.getSums(lastOut)); 

      lastOut = layer.getOutput(lastOut); 
     } 

     // output layer 
     sums.add(this.outputLayer.getSums(lastOut)); 

     return sums; 
    } 

    public ArrayList<Double> getInputLayerOutputs (ArrayList<Double> inputs) 
    { 
     ArrayList<Double> outs = new ArrayList<>(); 
     for (int i=0 ; i<this.inputLayer.getNeurons().size() ; i++) 
      outs.add(this 
        .inputLayer 
        .getNeuron(i) 
        .getOutput(inputs.get(i))); 
     return outs; 
    } 

    public void changeWeights (
      ArrayList<ArrayList<Double>> deltaW, 
      ArrayList<ArrayList<Double>> inputSet, 
      ArrayList<ArrayList<Double>> targetSet, 
      boolean checkError) 
    { 
     for (int i=0 ; i<deltaW.size()-1 ; i++) 
      this.hiddenLayers.get(i).changeWeights(deltaW.get(i), inputSet, targetSet, checkError); 

     this.outputLayer.changeWeights(deltaW.get(deltaW.size()-1), inputSet, targetSet, checkError); 

    } 

    public int train2 (
      ArrayList<ArrayList<Double>> inputSet, 
      ArrayList<ArrayList<Double>> targetSet, 
      double maxError, 
      int maxIterations) 
    { 
     ArrayList<Double> 
       input, 
       target; 

     ArrayList<ArrayList<ArrayList<Double>>> prvNetworkDeltaW = null; 

     double error; 

     int i = 0, j = 0, traininSetLength = inputSet.size(); 
     do // during each itreration... 
     { 
      error = 0.0; 
      for (j = 0; j < traininSetLength; j++) // ... for each training element... 
      { 
       input = inputSet.get(j); 
       target = targetSet.get(j); 
       prvNetworkDeltaW = this.train2_bp(input, target, prvNetworkDeltaW); // ... do backpropagation, and return the new weight deltas 

       error += this.getInputMeanSquareError(input, target); 
      } 

      i++; 
     } while (error > maxError && i < maxIterations); // iterate as much as necessary/possible 

     return i; 
    } 

    public ArrayList<ArrayList<ArrayList<Double>>> train2_bp (
      ArrayList<Double> input, 
      ArrayList<Double> target, 
      ArrayList<ArrayList<ArrayList<Double>>> prvNetworkDeltaW) 
    { 
     ArrayList<ArrayList<Double>> layerSums = this.getAllSums(input);  // the sums for each layer 
     ArrayList<ArrayList<Double>> layerOutputs = this.getAllOutputs(input); // the outputs of each layer 

     // get the layer deltas (inc the input layer that is null) 
     ArrayList<ArrayList<Double>> layerDeltas = this.train2_getLayerDeltas(layerSums, layerOutputs, target); 

     // get the weight deltas 
     ArrayList<ArrayList<ArrayList<Double>>> networkDeltaW = this.train2_getWeightDeltas(layerOutputs, layerDeltas, prvNetworkDeltaW); 

     // change the weights 
     this.train2_updateWeights(networkDeltaW); 

     return networkDeltaW; 
    } 

    public void train2_updateWeights (ArrayList<ArrayList<ArrayList<Double>>> networkDeltaW) 
    { 
     for (int i=1; i<this.layers.size(); i++) 
      this.layers.get(i).train2_updateWeights(networkDeltaW.get(i)); 
    } 

    public ArrayList<ArrayList<ArrayList<Double>>> train2_getWeightDeltas (
      ArrayList<ArrayList<Double>>   layerOutputs, 
      ArrayList<ArrayList<Double>>   layerDeltas, 
      ArrayList<ArrayList<ArrayList<Double>>> prvNetworkDeltaW) 
    { 
     ArrayList<ArrayList<ArrayList<Double>>> networkDeltaW = new ArrayList<>(this.layers.size()); 
       ArrayList<ArrayList<Double>> layerDeltaW; 
          ArrayList<Double> neuronDeltaW; 

     for (int i=0; i<this.layers.size(); i++) 
      networkDeltaW.add(new ArrayList<ArrayList<Double>>()); 

     double 
       deltaW, x, learningRate, prvDeltaW, d; 

     int i, j, k; 
     for (i=this.layers.size()-1; i>0; i--) // for each layer 
     { 
      learningRate = this.getLearningRate(i); 

      layerDeltaW = new ArrayList<>(); 
      networkDeltaW.set(i, layerDeltaW); 

      for (j=0; j<this.layers.get(i).getNeurons().size(); j++) // for each neuron of this layer 
      { 
       neuronDeltaW = new ArrayList<>(); 
       layerDeltaW.add(neuronDeltaW); 

       for (k=0; k<this.layers.get(i-1).getNeurons().size(); k++) // for each weight (i.e. each neuron of the previous layer) 
       { 
        d = layerDeltas.get(i).get(j); 
        x = layerOutputs.get(i-1).get(k); 
        prvDeltaW = (prvNetworkDeltaW != null) 
          ? prvNetworkDeltaW.get(i).get(j).get(k) 
          : 0.0; 

        deltaW = -learningRate * d * x + this.momentum * prvDeltaW; 

        neuronDeltaW.add(deltaW); 
       } 

       // the bias !! 
       d = layerDeltas.get(i).get(j); 
       x = 1; 
       prvDeltaW = (prvNetworkDeltaW != null) 
         ? prvNetworkDeltaW.get(i).get(j).get(prvNetworkDeltaW.get(i).get(j).size()-1) 
         : 0.0; 

       deltaW = -learningRate * d * x + this.momentum * prvDeltaW; 

       neuronDeltaW.add(deltaW); 
      } 
     } 

     return networkDeltaW; 
    } 

    ArrayList<ArrayList<Double>> train2_getLayerDeltas (
      ArrayList<ArrayList<Double>> layerSums, 
      ArrayList<ArrayList<Double>> layerOutputs, 
      ArrayList<Double>    target) 
    { 
     // get ouput deltas 
     ArrayList<Double> outputDeltas = new ArrayList<>(); // the output layer deltas 
     double 
       oErr, // output error given a target 
       s, // sum 
       o, // output 
       d; // delta 
     int 
       nOutputs = target.size(), // @TODO ?== this.outputLayer.size() 
       nLayers = this.hiddenLayers.size()+2; // @TODO ?== layerOutputs.size() 

     for (int i=0; i<nOutputs; i++) // for each neuron... 
     { 
      s = layerSums.get(nLayers-1).get(i); 
      o = layerOutputs.get(nLayers-1).get(i); 
      oErr = (target.get(i) - o); 
      d = -oErr * this.getNeuron(nLayers-1, i).sigmaPrime(s); // @TODO "s" or "o" ?? 

      outputDeltas.add(d); 
     } 

     // get hidden deltas 
     ArrayList<ArrayList<Double>> hiddenDeltas = new ArrayList<>(); 
     for (int i=0; i<this.hiddenLayers.size(); i++) 
      hiddenDeltas.add(new ArrayList<Double>()); 

     NLayer nextLayer = this.outputLayer; 
     ArrayList<Double> nextDeltas = outputDeltas; 

     int 
       h, k, 
       nHidden = this.hiddenLayers.size(), 
       nNeurons = this.hiddenLayers.get(nHidden-1).getNeurons().size(); 
     double 
       wdSum = 0.0; 
     for (int i=nHidden-1; i>=0; i--) // for each hidden layer 
     { 
      hiddenDeltas.set(i, new ArrayList<Double>()); 
      for (h=0; h<nNeurons; h++) 
      { 
       wdSum = 0.0; 
       for (k=0; k<nextLayer.getNeurons().size(); k++) 
       { 
        wdSum += nextLayer.getNeuron(k).getWeight(h) * nextDeltas.get(k); 
       } 

       s = layerSums.get(i+1).get(h); 
       d = this.getNeuron(i+1, h).sigmaPrime(s) * wdSum; 

       hiddenDeltas.get(i).add(d); 
      } 

      nextLayer = this.hiddenLayers.get(i); 
      nextDeltas = hiddenDeltas.get(i); 
     } 

     ArrayList<ArrayList<Double>> deltas = new ArrayList<>(); 

     // input layer deltas: void 
     deltas.add(null); 

     // hidden layers deltas 
     deltas.addAll(hiddenDeltas); 

     // output layer deltas 
     deltas.add(outputDeltas); 

     return deltas; 
    } 

    public double getInputMeanSquareError (ArrayList<Double> input, ArrayList<Double> target) 
    { 
     double diff, mse=0.0; 
     ArrayList<Double> output = this.getOutput(input); 
     for (int i=0; i<target.size(); i++) 
     { 
      diff = target.get(i) - output.get(i); 
      mse += (diff * diff); 
     } 

     mse /= 2.0; 

     return mse; 
    } 

} 

一些方法名称(带有它们的返回值/类型)是非常明显的,就像返回每个层的总和(sum(x_i * w_i))的每个层的“this.getAllSums”,返回输出的“this.getAllOutputs”每个神经元的sigmoid(sum))和返回第i层的第j个神经元的“this.getNeuron(i,j)”。

预先感谢您的帮助:)

+2

发布更多的代码! – Bohemian 2012-03-30 23:18:16

+0

[编辑]:我把整个NNetwork类现在:) – 865719 2012-03-30 23:32:04

+0

我不明白你的代码,它为时已晚。 :D但是您应该检查具有有限差异的渐变:http://en.wikipedia.org/wiki/Finite_difference,它应该简化调试。 – alfa 2012-03-30 23:53:27

回答

5

我试着去在你的代码,但正如你所说,这是相当长的。

这里是我的建议:

  • 要确认您的网络是否学习,尽力培养一个简单的网络,像识别XOR运算的网络。这不应该持续很长时间。
  • 使用最简单的反向传播算法。随机反向传播(在每次训练输入呈现后更新权重)是最简单的。最初实施没有动量项的算法,并且具有恒定的学习速率(即,不要以自适应学习速率开始)。一旦你对算法的工作感到满意,你可以引入动量项。在同一时间做太多事情会增加不止一件事情可能出错的机会。这使你很难看到你出错的地方。
  • 如果你想查看一些代码,你可以查看一下code that I wrote;你想看看Backpropagator.java。我基本上用动量项实现了随机反向传播算法。我也有一个video,我提供了一个快速解释我的反向传播算法的实现。

希望这有些帮助!

+0

谢谢你的帮助:) 其实,我从头开始写了一个全新的代码(一个更可读的代码,使用矩阵操作等):我试过一些例子(函数近似/预测),它似乎给出了一些好的结果。 我现在的问题是:我已经实现了反向传播的简单版本(其中权重在呈现每个模式后更新)。现在我想实施所谓的“批量”反向传播,但我不明白如何这样做。其实,我没有找到任何教程/课程处理批量训练:( – 865719 2012-04-19 14:54:27

+0

有一些批次训练的伪代码[here](http://www.byclb.com/TR/Tutorials/neural_networks/ch10_1.htm)。有点数学,但要警告!:) – 2012-04-19 16:13:49

+0

这是非常有趣的(即使我没有读过它)昨晚,我认为我已经成功实施了批量训练:D现在它只是激活功能选择/输入缩放的问题。 – 865719 2012-04-20 11:18:47

4

这是一个非常简单的Java实现与主方法测试:

import java.util.Arrays; 
import java.util.Random; 

public class MLP { 

public static class MLPLayer { 

    float[] output; 
    float[] input; 
    float[] weights; 
    float[] dweights; 
    boolean isSigmoid = true; 

    public MLPLayer(int inputSize, int outputSize, Random r) { 
    output = new float[outputSize]; 
    input = new float[inputSize + 1]; 
    weights = new float[(1 + inputSize) * outputSize]; 
    dweights = new float[weights.length]; 
    initWeights(r); 
    } 

    public void setIsSigmoid(boolean isSigmoid) { 
    this.isSigmoid = isSigmoid; 
    } 

    public void initWeights(Random r) { 
    for (int i = 0; i < weights.length; i++) { 
    weights[i] = (r.nextFloat() - 0.5f) * 4f; 
    } 
    } 

    public float[] run(float[] in) { 
    System.arraycopy(in, 0, input, 0, in.length); 
    input[input.length - 1] = 1; 
    int offs = 0; 
    Arrays.fill(output, 0); 
    for (int i = 0; i < output.length; i++) { 
    for (int j = 0; j < input.length; j++) { 
    output[i] += weights[offs + j] * input[j]; 
    } 
    if (isSigmoid) { 
    output[i] = (float) (1/(1 + Math.exp(-output[i]))); 
    } 
    offs += input.length; 
    } 
    return Arrays.copyOf(output, output.length); 
    } 

    public float[] train(float[] error, float learningRate, float momentum) { 
    int offs = 0; 
    float[] nextError = new float[input.length]; 
    for (int i = 0; i < output.length; i++) { 
    float d = error[i]; 
    if (isSigmoid) { 
    d *= output[i] * (1 - output[i]); 
    } 
    for (int j = 0; j < input.length; j++) { 
    int idx = offs + j; 
    nextError[j] += weights[idx] * d; 
    float dw = input[j] * d * learningRate; 
    weights[idx] += dweights[idx] * momentum + dw; 
    dweights[idx] = dw; 
    } 
    offs += input.length; 
    } 
    return nextError; 
    } 
} 
MLPLayer[] layers; 

public MLP(int inputSize, int[] layersSize) { 
    layers = new MLPLayer[layersSize.length]; 
    Random r = new Random(1234); 
    for (int i = 0; i < layersSize.length; i++) { 
    int inSize = i == 0 ? inputSize : layersSize[i - 1]; 
    layers[i] = new MLPLayer(inSize, layersSize[i], r); 
    } 
} 

public MLPLayer getLayer(int idx) { 
    return layers[idx]; 
} 

public float[] run(float[] input) { 
    float[] actIn = input; 
    for (int i = 0; i < layers.length; i++) { 
    actIn = layers[i].run(actIn); 
    } 
    return actIn; 
} 

public void train(float[] input, float[] targetOutput, float learningRate, float momentum) { 
    float[] calcOut = run(input); 
    float[] error = new float[calcOut.length]; 
    for (int i = 0; i < error.length; i++) { 
    error[i] = targetOutput[i] - calcOut[i]; // negative error 
    } 
    for (int i = layers.length - 1; i >= 0; i--) { 
    error = layers[i].train(error, learningRate, momentum); 
    } 
} 

public static void main(String[] args) throws Exception { 
    float[][] train = new float[][]{new float[]{0, 0}, new float[]{0, 1}, new float[]{1, 0}, new float[]{1, 1}}; 
    float[][] res = new float[][]{new float[]{0}, new float[]{1}, new float[]{1}, new float[]{0}}; 
    MLP mlp = new MLP(2, new int[]{2, 1}); 
    mlp.getLayer(1).setIsSigmoid(false); 
    Random r = new Random(); 
    int en = 500; 
    for (int e = 0; e < en; e++) { 

    for (int i = 0; i < res.length; i++) { 
    int idx = r.nextInt(res.length); 
    mlp.train(train[idx], res[idx], 0.3f, 0.6f); 
    } 

    if ((e + 1) % 100 == 0) { 
    System.out.println(); 
    for (int i = 0; i < res.length; i++) { 
    float[] t = train[i]; 
    System.out.printf("%d epoch\n", e + 1); 
    System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], mlp.run(t)[0]); 
    } 
    } 
    } 
} 
} 
+0

如果我将sigmoid设置为false;重量击中NaN。任何想法为什么? – 2017-12-07 14:23:57