2016-11-16 45 views
0

我正在写一个基于图的反向传播神经网络,作为一个个人项目。仍然在前进的道路上。它编译。一半时间成功运行,在一半时间的最后一步崩溃。它似乎在一些垃圾收集步骤中死去。我是虚拟函数和static_cast的新手,所以我想知道这些部分是否应该受到指责。 GDB说:“编程接收到的信号SIGABRT,中止0x00000000100404740 in __gnu_cxx :: new_allocator :: deallocate(double *,unsigned long)()”间歇性“排出中止的核心”。也许static_cast的错?

构成代码前半部分的函数可能不是责备,因为他们在我的神经网络(没有图表)的更简单的旧版本中工作。我敢打赌它是在某个地方的结构中。

更新:如果我使用123而不是基于时间的随机种子生成随机数生成,它随时都会运行。种子= 124每次都失败。为了保持恒定的权重,去除随机性也允许它每次都运行。我很困惑!

#include <bits/stdc++.h> 
using namespace std; 

#define p(x) cout << #x << " = "<< x<< endl 
#define min(a,b) a<b ? a : b 
typedef vector<double> d1; 
typedef vector<d1> d2; 
typedef vector<d2> d3; 
typedef vector<int> i1; 

int argmax(d1 x){ 
    p(x.size()); 
    int maxIndex=0; 
    double maxValue=x[0]; 
    for (int i=1; i<x.size(); i++){ 
    if (x[i] > maxValue){ 
     maxValue = x[i]; 
     maxIndex = i; 
    } 
    } 
    return maxIndex; 
} 

d1 zeros(int n){ 
    return d1(n); 
} 

d2 zeros(int rows, int cols){ 
    return d2(rows, d1(cols, 0)); 
} 

d3 zeros(int x, int rows, int cols){ 
    return d3(x, d2(rows, d1(cols, 0))); 
} 

void print(d1 x){ 
    for (double d: x) 
    cout << d << endl; 
    cout << endl; 
} 

void print(d2 x){ 
    for (auto row: x){ 
    for (double d: row){ 
     cout << d << " "; 
    } 
    cout << endl; 
    } 
    cout << endl; 
} 

void print(d3 x){ 
    for (d2 X: x) 
    print(X); 
} 



void toRank2(d1&x, int rows, d2& y){ 
    for (int i=0; i<x.size()/rows; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].push_back(x[i*rows+row]); 
    } 
    } 
} 

void toRank3(d1& x, int rows, int cols, d3& y){ 
    for (int i=0; i<x.size()/rows/cols; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].emplace_back(); 
     for (int col=0; col<cols; col++){ 
     y[i][row].push_back(x[i*rows*cols+row*cols+col]); 
     } 
    } 
    } 
} 

d1 getRandomDoubles(int size, double mean=0, double standard_deviation=1){ 
    static normal_distribution<double> distribution(mean, standard_deviation); 
    int seed=time(NULL); 
    static default_random_engine generator(seed); 
    d1 data(size); 
    generate(data.begin(), data.end(), []() { return distribution(generator); }); 
    return data; 
} 

d2 getRandomDoubles(int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(rows*cols, mean, standard_deviation); 
    d2 e; 
    toRank2(d, cols, e); 
    return e; 
} 

d3 getRandomDoubles(int depth, int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(depth*rows*cols, mean, standard_deviation);; 
    d3 e; 
    toRank3(d, rows, cols, e); 
    return e; 
} 

struct Node{ 
    vector<Node*> parents, children; 
    bool ready=false; 
    // 
    // bool check_ready(){ 
    // for (Node* n: parents) 
    // if (!n->check_ready()) 
    // return false; 
    // return true; 
    // } 
    // 
    void add_child(Node& n){ 
    children.push_back(&n); 
    n.parents.push_back(this); 
    } 

    void forward_propagate(){ 
    cout << "starting r2 forward" <<endl; 
    // if (parents.size()==0 || updated_parents == parents.size()-1) 
    for (Node* n: children){ 
     cout << "loop" << endl; 
     n->update_state(); 
     // cout << "root child forward" << endl; 
    } 
    cout << "exiting r2 forward" << endl; 
    //updated_parents++; 
    } 

    virtual void update_state(){ 
    //if (parents.size()==0 || updated_parents == parents.size() - 1) 
    forward_propagate(); 
    } 
}; 

struct r1:Node{ 
    vector<double> state; 
    int r; 

    r1(){} 

    r1(int R){ 
    r=R; 
    state = vector<double>(r); 
    } 
}; 

struct r2:Node{ 
    vector<vector<double>> state; 
    int r,c; 

    r2(){} 
    r2(int R, int C){ 
    r=R; 
    c=C; 
    state = zeros(r, c); 
    } 
}; 

struct r3:Node{ 
    d3 state; 
    int r, c, d; 
    r3(){} 
    r3(int R, int C, int D){ 
    r=R; 
    c=C; 
    d=D; 
    state = zeros(R,C,D); 
    } 
}; 

struct MatrixProduct1_1: r1{ 
    MatrixProduct1_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "mat11" << endl; 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d1& x = static_cast<r1*>(parents[1])->state; 
    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    state[i] += W[i][j]*x[j]; 
    forward_propagate(); 
    } 
}; 

struct MatrixProduct2_1: r1{ 
    MatrixProduct2_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "matt21" << endl; 
    d3& W = static_cast<r3*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 
    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    for (int k=0; k<W[0][0].size(); k++) 
    state[k] += W[i][j][k]*x[i][j]; 
    forward_propagate(); 
    } 
}; 

struct Convolution: r2{ 
    Convolution(int r, int c): r2(r, c){} 
    void update_state() override{ 
    cout << "convolving" << endl; 
    state = zeros(r, c); 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 

    int wCenterX = W[0].size()/2; 
    int wCenterY = W.size()/2; 
    int rows = x.size(), cols = x[0].size(); 
    int wRows = W.size(), wCols = W[0].size(); 

    //#pragma omp parallel for 
    for(int i=0; i < rows; i++) 
    for(int j=0; j < cols; j++) 
    for(int m=0; m < W.size(); m++){ 
     int mm = W.size() - 1 - m; 
     for(int n=0; n < wCols; n++){ 
     int nn = wCols - 1 - n; 
     int ii = i + (m - wCenterY); 
     int jj = j + (n - wCenterX); 
     if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) 
     state[i][j] += x[ii][jj] * W[mm][nn]; 
     } 
    } 
    forward_propagate(); 
    } 
}; 


struct RELU: r2{ 
    RELU(int r, int c):r2(r, c){} 
    void update_state() override{ 
    cout << "relu2" << endl; 
    state = zeros(r,c); 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<state.size(); i++) 
    for (int j=0; j<state[0].size(); j++) 
    if (x[i][j] > 0) 
    state[i][j] = x[i][j]; 
    forward_propagate(); 
    } 
}; 

struct Softmax: r1{ 
    Softmax(int r):r1(r){} 
    void update_state() override{ 
    cout << "softmax" << endl; 
    state = zeros(r); 
    p(parents.size()); 
    d1& x = static_cast<r1*>(parents[0])->state; 
    cout << "got state" << endl; 
    //p(x.size()); 
    //print(x); 

    p(x.size()); 
    cout << "argmax " << argmax(x) << endl; 
    double largest = x[argmax(x)]; 
    double lndenom = largest; 
    double expsum = 0; 
    cout << "starting expsum" << endl; 
    for (int i=0; i<x.size(); i++) 
    //expsum += exp(x[i]-largest); 
    expsum += x[i] - largest; 
    cout << "next loop " << endl; 
    for (int i=0; i<x.size(); i++) 
    // state[i] = exp(x[i]-largest)/expsum; 
    state[i] = x[i]-largest; 
    cout << "forward proping" << endl; 
    cout << "weird" << endl; 
    // forward_propagate(); 
    cout << "done with softmax" <<endl; 
    } 
}; 

struct Add1: r1{ 
    Add1(int r):r1(r){} 
    void update_state() override{ 
    cout << "add1ing" << endl; 
    d1& x = static_cast<r1*>(parents[0])->state; 
    d1& y = static_cast<r1*>(parents[1])->state; 
    for (int i=0; i<r; i++) 
    state[i] = x[i]+y[i]; 
    forward_propagate(); 
    } 
}; 

struct Add2: r2{ 
    Add2(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    d2& y = static_cast<r2*>(parents[1])->state; 
    for (int i=0; i<x.size(); i++) 
    for (int j=0; j<x[0].size(); j++) 
    state[i][j] = x[i][j] + y[i][j]; 
    forward_propagate(); 
    } 
}; 

struct MaxPool: r2{ 
    MaxPool(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<x.size(); i+=2) 
    for (int j=0; j<x[0].size(); j+=2) 
    state[i/2][j/2] = max(max(x[i][j], x[i+1][j]), max(x[i+1][j], x[i+1][j+1])); 
    forward_propagate(); 
    } 
}; 

int main(){ 
    Node root; 
    r2 x; 
    x.state = getRandomDoubles(28,28); 
    r2 wConv; 
    wConv.state = getRandomDoubles(10, 10); 
    root.add_child(x); 
    root.add_child(wConv); 
    Convolution c(28,28); 
    wConv.add_child(c); 
    x.add_child(c); 
    Add2 a(28,28); 
    r2 bConv(28,28); 
    bConv.state = getRandomDoubles(28,28); 
    c.add_child(a); 
    bConv.add_child(a); 
    RELU r(28,28); 
    a.add_child(r); 
    MaxPool max(14, 14); 
    r.add_child(max); 
    r3 wFull(10,28,28); 
    wFull.state = getRandomDoubles(10,28,28); 
    // print(wFull.state); 
    // return 0; 
    MatrixProduct2_1 full(10); 
    wFull.add_child(full); 
    max.add_child(full); 
    r1 bFull(10); 
    bFull.state = getRandomDoubles(10); 
    Add1 aFull(10); 
    aFull.state[0] = 123; 
    full.add_child(aFull); 
    bFull.add_child(aFull); 
    Softmax s(10); 
    aFull.add_child(s); 
    // d1& x =   static_cast<r1*>(parents[0])->state; 
    // d1& asdf = static_cast<r1*>(s.parents[0])->state; 
    // print(asdf); 
    //root.forward_propagate(); 
    x.forward_propagate(); 
    //print(s.state); 
    cout << "returning main"; 
} 
+1

'min'宏是因为失败而臭名昭着的,这是一个糟糕的版本。这也没有意义,因为我们有一个完美的'std :: min'。不是你正在重塑的唯一轮子 - 'argmax'只是'std :: max_element'。 – MSalters

回答

1

static_cast应该是很少需要的。这也不例外。你的节点真的应该知道他们的邻居有什么类型。

我不能马上发现一个具体的问题,但我对神经网络很熟悉。而像struct MatrixProduct1_1: r1这样的代码几乎是一个红色警报。为什么它是一个结构,为什么它继承自r1?在神经网络理论中,矩阵产品就是如何表达两层节点之间的完整连接。再次,节点通常具有标量激活。

激活函数可以使用继承来实现,但是您可以从Node继承。这意味着你不能也有那些r1 .. r3类型,但我不明白这些。

TLDR:这些类型都搞砸了,你用static_cast来隐藏它,但这只会使它编译,它不会使它正确。

-1

解决!该错误是由MatrixProd2_1中使用错误的索引引起的。我通过从神经网络结束开始删除节点,确定其中一个是bug的来源,并且对vector :: operator []参数进行了断言。我正在访问越界,导致未定义的行为。根本不清楚为什么某些种子会跑(大概有些不正确的结果)。

我还对主要创建wFull时的形状以及MatrixProd2_1中使用的getRandomDoubles的参数进行了更改。全新版本:

#include <bits/stdc++.h> 
using namespace std; 

#define p(x) cout << #x << " = "<< x<< endl 
//#define min(a,b) a<b ? a : b 
typedef vector<double> d1; 
typedef vector<d1> d2; 
typedef vector<d2> d3; 
typedef vector<int> i1; 
int seed; 
bool time_seed = true; 

int argmax(d1 x){ 
    p(x.size()); 
    int maxIndex=0; 
    double maxValue=x[0]; 
    for (int i=1; i<x.size(); i++){ 
    if (x[i] > maxValue){ 
     maxValue = x[i]; 
     maxIndex = i; 
    } 
    } 
    return maxIndex; 
} 

d1 zeros(int n){ 
    return d1(n); 
} 

d2 zeros(int rows, int cols){ 
    return d2(rows, d1(cols, 0)); 
} 

d3 zeros(int x, int rows, int cols){ 
    return d3(x, d2(rows, d1(cols, 0))); 
} 

void print(d1 x){ 
    for (double d: x) 
    cout << d << endl; 
    cout << endl; 
} 

void print(d2 x){ 
    for (auto row: x){ 
    for (double d: row){ 
     cout << d << " "; 
    } 
    cout << endl; 
    } 
    cout << endl; 
} 

void print(d3 x){ 
    for (d2 X: x) 
    print(X); 
} 



void toRank2(d1&x, int rows, d2& y){ 
    for (int i=0; i<x.size()/rows; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].push_back(x[i*rows+row]); 
    } 
    } 
} 

void toRank3(d1& x, int rows, int cols, d3& y){ 
    for (int i=0; i<x.size()/rows/cols; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].emplace_back(); 
     for (int col=0; col<cols; col++){ 
     y[i][row].push_back(x[i*rows*cols+row*cols+col]); 
     } 
    } 
    } 
} 

d1 getRandomDoubles(int size, double mean=1, double standard_deviation=1){ 
    static normal_distribution<double> distribution(mean, standard_deviation); 
    if (time_seed) 
    seed=time(NULL); 
    //int seed=123; //123 works, 124 fails 
    static default_random_engine generator(seed); 
    d1 data(size); 
    generate(data.begin(), data.end(), []() { return distribution(generator); }); 
    // generate(data.begin(), data.end(), [](){return -.1;}); 
    return data; 
} 

d2 getRandomDoubles(int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(rows*cols, mean, standard_deviation); 
    d2 e; 
    toRank2(d, cols, e); 
    return e; 
} 

d3 getRandomDoubles(int depth, int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(depth*rows*cols, mean, standard_deviation);; 
    d3 e; 
    toRank3(d, rows, cols, e); 
    return e; 
} 

struct Node{ 
    vector<Node*> parents, children; 
    bool ready=false; 
    // 
    // bool check_ready(){ 
    // for (Node* n: parents) 
    // if (!n->check_ready()) 
    // return false; 
    // return true; 
    // } 
    // 
    void add_child(Node& n){ 
    children.push_back(&n); 
    n.parents.push_back(this); 
    } 

    void forward_propagate(){ 
    cout << "starting r2 forward" <<endl; 
    // if (parents.size()==0 || updated_parents == parents.size()-1) 
    for (Node* n: children){ 
     cout << "loop" << endl; 
     n->update_state(); 
     // cout << "root child forward" << endl; 
    } 
    cout << "exiting r2 forward" << endl; 
    //updated_parents++; 
    } 

    virtual void update_state(){ 
    //if (parents.size()==0 || updated_parents == parents.size() - 1) 
    forward_propagate(); 
    } 
}; 

struct r1:Node{ 
    vector<double> state; 
    int r; 

    r1(){} 

    r1(int R){ 
    r=R; 
    state = vector<double>(r); 
    } 
}; 

struct r2:Node{ 
    vector<vector<double>> state; 
    int r,c; 

    r2(){} 
    r2(int R, int C){ 
    r=R; 
    c=C; 
    state = zeros(r, c); 
    } 
}; 

struct r3:Node{ 
    d3 state; 
    int r, c, d; 
    r3(){} 
    r3(int R, int C, int D){ 
    r=R; 
    c=C; 
    d=D; 
    state = zeros(R,C,D); 
    } 
}; 

struct MatrixProduct1_1: r1{ 
    MatrixProduct1_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "mat11" << endl; 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d1& x = static_cast<r1*>(parents[1])->state; 
    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    state[i] += W[i][j]*x[j]; 
    forward_propagate(); 
    } 
}; 

struct MatrixProduct2_1: r1{ 
    MatrixProduct2_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "matt21" << endl; 
    d3& W = static_cast<r3*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 
    p(x.size()); 
    p(W.size()); 
    p(x[0].size()); 
    p(W[0].size()); 
    p(W[0][0].size()); 
    p(state.size()); 
    // assert (x.size()==W.size()); 
    // assert (x[0].size()==W[0].size()); 
    // assert (state.size()==W[0][0].size()); 
    assert (state.size() == W.size()); 

    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    for (int k=0; k<W[0][0].size(); k++) 
    state[i] += W[i][j][k]*x[j][k]; 
    forward_propagate(); 
    } 
}; 

struct Convolution: r2{ 
    Convolution(int r, int c): r2(r, c){} 
    void update_state() override{ 
    cout << "convolving" << endl; 
    state = zeros(r, c); 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 

    int wCenterX = W[0].size()/2; 
    int wCenterY = W.size()/2; 
    int rows = x.size(), cols = x[0].size(); 
    int wRows = W.size(), wCols = W[0].size(); 

    //#pragma omp parallel for 
    for(int i=0; i < rows; i++) 
    for(int j=0; j < cols; j++) 
    for(int m=0; m < W.size(); m++){ 
     int mm = W.size() - 1 - m; 
     for(int n=0; n < wCols; n++){ 
     int nn = wCols - 1 - n; 
     int ii = i + (m - wCenterY); 
     int jj = j + (n - wCenterX); 
     if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) 
     state[i][j] += x[ii][jj] * W[mm][nn]; 
     } 
    } 
    forward_propagate(); 
    } 
}; 


struct RELU: r2{ 
    RELU(int r, int c):r2(r, c){} 
    void update_state() override{ 
    cout << "relu2" << endl; 
    state = zeros(r,c); 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<state.size(); i++) 
    for (int j=0; j<state[0].size(); j++) 
    if (x[i][j] > 0) 
    state[i][j] = x[i][j]; 
    forward_propagate(); 
    } 
}; 

struct Softmax: r1{ 
    Softmax(int r):r1(r){} 
    void update_state() override{ 
    cout << "softmax" << endl; 
    state = zeros(r); 
    p(parents.size()); 
    d1& x = static_cast<r1*>(parents[0])->state; 
    cout << "got state" << endl; 
    //p(x.size()); 
    //print(x); 

    p(x.size()); 
    cout << "argmax " << argmax(x) << endl; 
    double largest = x[argmax(x)]; 
    double lndenom = largest; 
    double expsum = 0; 
    cout << "starting expsum" << endl; 
    for (int i=0; i<x.size(); i++) 
    expsum += exp(x[i]-largest); 
    //expsum += x[i] - largest; 
    cout << "next loop " << endl; 
    for (int i=0; i<x.size(); i++) 
    state[i] = exp(x[i]-largest)/expsum; 
    //state[i] = x[i]-largest; 
    // state[i] = 3; 
    cout << "forward proping" << endl; 
    cout << "weird" << endl; 
    forward_propagate(); 
    cout << "done with softmax" <<endl; 
    } 
}; 

struct Add1: r1{ 
    Add1(int r):r1(r){} 
    void update_state() override{ 
    cout << "add1ing" << endl; 
    d1& x = static_cast<r1*>(parents[0])->state; 
    d1& y = static_cast<r1*>(parents[1])->state; 
    for (int i=0; i<r; i++) 
    state[i] = x[i]+y[i]; 
    forward_propagate(); 
    } 
}; 

struct Add2: r2{ 
    Add2(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    d2& y = static_cast<r2*>(parents[1])->state; 
    for (int i=0; i<x.size(); i++) 
    for (int j=0; j<x[0].size(); j++) 
    state[i][j] = x[i][j] + y[i][j]; 
    forward_propagate(); 
    } 
}; 

struct MaxPool: r2{ 
    MaxPool(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<x.size(); i+=2) 
    for (int j=0; j<x[0].size(); j+=2) 
    state[i/2][j/2] = max(max(x[i][j], x[i+1][j]), max(x[i+1][j], x[i+1][j+1])); 
    forward_propagate(); 
    } 
}; 

int main(int argc, char *argv[]){ 
    if (argc>1){ 
    seed = atoi(argv[1]); 
    time_seed = false; 
    } 
    Node root; 
    r2 x; 
    x.state = getRandomDoubles(28,28); 
    //x.state[0][0]-=1000; 
    r2 wConv; 
    wConv.state = getRandomDoubles(10, 10); 
    root.add_child(x); 
    root.add_child(wConv); 
    Convolution c(28,28); 
    wConv.add_child(c); 
    x.add_child(c); 
    Add2 a(28,28); 
    r2 bConv(28,28); 
    bConv.state = getRandomDoubles(28,28); 
    c.add_child(a); 
    bConv.add_child(a); 
    RELU r(28,28); 
    a.add_child(r); 
    MaxPool max(14, 14); 
    r.add_child(max); 
// print(max.state); 
    r3 wFull(10,14,14); 
    wFull.state = getRandomDoubles(10,14,14); 
    //print(wFull.state); 
    // return 0; 
    MatrixProduct2_1 full(10); 
    wFull.add_child(full); 
    max.add_child(full); 
    //print(full.state); //suspiciously zero 
    r1 bFull(10); 
    bFull.state = getRandomDoubles(10); 
    Add1 aFull(10); 
    aFull.state[0] = 123; 
    full.add_child(aFull); 
    bFull.add_child(aFull); 
    Softmax s(10); 
    aFull.add_child(s); 
    // d1& x =   static_cast<r1*>(parents[0])->state; 
    // d1& asdf = static_cast<r1*>(s.parents[0])->state; 
    // print(asdf); 
    //root.forward_propagate(); 
    x.forward_propagate(); 
    //print(aFull.state); 
    print(s.state); 
    cout << "returning main"; 
}