2017-09-15 110 views
0

我有一个Matlab MEX功能,这使得重复调用名为calculate(). C函数我做的函数的两个版本:大的差异取决于地方的内存分配

版本A:每次mex()调用calculate(),它只传递输入参数,并且calculate()所需的所有内存在calculate()内分配并释放 - 每次!

版本B:calculate()所需的内存分配在mex()的开头,并且指针传递给calculate()。内存仅在mex()结束时释放。换句话说,分配/释放只对整个业务进行一次。

根据内存分配需要时间的观念,我根据天真的假设创建了B版,这应该会提高速度。但它实际上增加了约5倍的执行时间!那里发生了什么?

下面是版本B--实际的代码,你可以从那里我本来的版本A.

/* MEX business*/ 
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ 
    /* mexPrintf("Inside the mex function of CalcMz2.\n"); 
mexEvalString("drawnow;");*/ 

/*declare local variables*/ 
struct InputParameters voxelPars; 
InputParameters *point2pars; 
struct Zcontrast contrast; 
struct Zcontrast* ptr2contrast; 
ptr2contrast = &contrast; 
mxArray *in, *out; 
mxArray *temp; 
double *output; 
int nFields; 
int i,j,k; 

in = mxDuplicateArray(prhs[0]); /*'in' now is a copy of the input struct*/ 
nFields = mxGetNumberOfFields(prhs[0]); 
if (nFields != 39) mexPrintf("Error: the number of fields in the input struct is incorrect.\n"); 

//associate outputs 
    out = plhs[0] = mxCreateDoubleScalar(0.0); 
    output = mxGetPr(out); 

    /*Passed variables memory allocation*/ 
double **pntrA; 
pntrA = (double **)mxCalloc(18, sizeof(double*)); 
for (i=0; i <18; i++){ 
    pntrA[i] = (double *) mxCalloc(18, sizeof(double)); 
} 
double *pntrAinvB; 
pntrAinvB = (double *) mxCalloc(18, sizeof(double)); 


/*used by MatrInv()*/ 
double **CopyOfMatrix; 
CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*)); 
for (i=0; i<18; i++){ 
    CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double)); 
    } 

double *vector; 
double *col; 
int *indx; 
vector = (double *)mxCalloc(18, sizeof(double)); 
indx = (int *)mxCalloc(18, sizeof(int)); 
col = (double *)mxCalloc(18, sizeof(double)); 

/*used by CalculateY()*/ 
double *Aat; 
Aat = (double*)mxCalloc(18*18, sizeof(double)); 

double *Aate; 
Aate = (double*)mxCalloc(18*18, sizeof(double)); 

double *sum; 
sum = (double*)mxCalloc(18, sizeof(double)); 
double *product; 
product = (double*)mxCalloc(18, sizeof(double)); 

/*Reassign values passed from input struct*/ 
temp = mxGetFieldByNumber(prhs[0],0,0); 
voxelPars.ampl = mxGetPr(temp); 
temp = mxGetFieldByNumber(prhs[0],0,1); 
voxelPars.phi = mxGetPr(temp); 
temp = mxGetFieldByNumber(prhs[0],0,2); 
voxelPars.count1 = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,3); 
voxelPars.pw1 = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,4); 
voxelPars.b1 = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,5); 
voxelPars.puloffsetppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,6); 
voxelPars.pw1dc = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,7); 
voxelPars.cf = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,8); 
voxelPars.M0w = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,9); 
voxelPars.T1a = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,10); 
voxelPars.T2a = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,11); 
voxelPars.offsetappm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,12); 
voxelPars.bwfraction = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,13); 
voxelPars.M0a = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,14); 
voxelPars.M0bw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,15); 
voxelPars.T1bw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,16); 
voxelPars.T2bw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,17); 
voxelPars.offsetbwppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,18); 
voxelPars.exratebw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,19); 
voxelPars.M0b = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,20); 
voxelPars.T1b = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,21); 
voxelPars.T2b = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,22); 
voxelPars.offsetbppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,23); 
voxelPars.exrateb = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,24); 
voxelPars.M0c = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,25); 
voxelPars.T1c = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,26); 
voxelPars.T2c = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,27); 
voxelPars.offsetcppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,28); 
voxelPars.exratec = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,29); 
voxelPars.M0d = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,30); 
voxelPars.T1d = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,31); 
voxelPars.T2d = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,32); 
voxelPars.offsetdppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,33); 
voxelPars.exrated = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,34); 
voxelPars.M0e = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,35); 
voxelPars.T1e = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,36); 
voxelPars.T2e = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,37); 
voxelPars.offseteppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,38); 
voxelPars.exratee = mxGetScalar(temp); 

point2pars = &voxelPars; 


/*Initialize contrast's values to zero*/ 
ptr2contrast->SimMza = ptr2contrast->SimMzb = ptr2contrast->contrastz = 1.0; 

/* call calculate()*/ 
calculate(point2pars, ptr2contrast, pntrA, pntrAinvB, CopyOfMatrix,vector, col, indx, Aat, Aate, sum, product); 
*output = ptr2contrast->contrastz; 
//mexPrintf("The contrast has been calculated as :%f.\n",  ptr2contrast->contrastz); 


/*Free memory 
mxFree(pntrA); mxFree(pntrAinvB);*/ 
mxFree(CopyOfMatrix); mxFree(Aat); mxFree(Aate); 
mxFree(vector);mxFree(indx);mxFree(col); 
mxFree(sum); mxFree(product); 
}//end mex function 



void calculate (struct InputParameters *voxelPars1, struct Zcontrast  *contrast, double **pntrA,double *pntrAinvB, double **CopyOfMatrix, double *vector, double *col,int *indx, double *Aat, double *Aate, double *sum, double *product){ 

/*Local variables*/ 
int i, j, k, n1, m; 
double pwms, pw1delay; 
int npul; 
double timeStepSize; 
double W; 
double Wa, Wbw, Wb, Wc, Wd, We; 
double Cbw, Cb, Cc, Cd, Ce; 
double M0a, M0bw, M0b, M0c, M0d, M0e; //declaring these as locals just to make life easier 
double pa, pbw, pb, pc, pd, pe; 
double Cabw, Cab, Cac, Cad, Cae; 
double k1a, k1bw, k1b, k1c, k1d, k1e; 
double k2a, k2bw, k2b, k2c, k2d, k2e; 

//  double **pntrA; 
//  pntrA = (double **)mxCalloc(18, sizeof(double*)); 
//  for (i=0; i <18; i++){ 
//   pntrA[i] = (double *) mxCalloc(18, sizeof(double)); 


    //  } 
    //   
    double AdinvB[18]; 
    double AinvB[18]; 
    for(i=0; i<18; i++){ 
      AdinvB[i] = 0.0; 
     AinvB[i] = 0.0; 
    } 
//  
// /*Passed variables memory allocation*/ 
//  double **CopyOfMatrix; /*used by MatriInv()*/ 
//  CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*)); 
//  for (i=0; i<18; i++){ 
//   CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double)); 
//  } 
//  
//  double *vector; 
//  double *col; 
//  int *indx; 
//  vector = (double *)mxCalloc(18, sizeof(double)); 
//  indx = (int *)mxCalloc(18, sizeof(int)); 
//  col = (double *)mxCalloc(18, sizeof(double)); 
//  
//  /*used by CalculateY()*/ 
//  double *Aat; 
//  Aat = (double*)mxCalloc(18*18, sizeof(double)); 
// /* for (i=0; i<18; i++){ 
//   Aat[i] = (double*)mxCalloc(18, sizeof(double)); 
//  }*/ 
//  
//  double *Aate; 
//  Aate = (double*)mxCalloc(18*18, sizeof(double)); 
// /* for (i=0; i<18; i++){ 
//   Aate[i] = (double*)mxCalloc(18, sizeof(double)); 
//  } */ 
//  
//  double *sum; 
//  sum = (double*)mxCalloc(18, sizeof(double)); 
//  double *product; 
//  product = (double*)mxCalloc(18, sizeof(double)); 

    double W1[voxelPars1->count1]; //Now THIS might require a pointer, since count1 is only passed in the argument struct. We'll see. 
    double W1x[voxelPars1->count1]; 
    double W1y[voxelPars1->count1]; 
    /* mexPrintf("Declared all local variables in calculate().\n"); 
    mexEvalString("drawnow;");*/ 

    /*Fill local variables with values from input struct voxelPars*/ 

内存分配....计算保留后

+1

你认为这里的代码不相关吗? –

+0

在这里显示你的工作,你的代码可能有一些问题。 – krpra

回答

0

去评论见我最近遇到类似的问题。虽然我对matlab内存管理设施没有任何洞察力,但我可以说,对于我来说,切换到C本地malloc/free给了x5的大概加速。

看来,在你的情况下,也不需要管理Matlab堆上的内存(使用mxCalloc和类似的) - 因为内存分配仅用于mex函数内部使用。建议您尝试一下:只需用malloc,mxFree替换mxCalloc即可。