我想计算安德森达令测试发现here。我遵循维基百科上的步骤,并确保在计算我正在测试的数据的平均值和标准偏差时,使用MATLAB表示为X
。另外,我使用了一个叫phi
的函数来计算标准正常CDF,我也测试了这个函数以确保它是正确的。现在,当我实际计算A平方(在维基百科中表示,我在C++中将其表示为A)时,似乎出现了问题。安德森达林测试C++
这里是我的功能,我对安德森 - 达林检验进行:
void Anderson_Darling(int n, double X[]){
sort(X,X + n);
// Find the mean of X
double X_avg = 0.0;
double sum = 0.0;
for(int i = 0; i < n; i++){
sum += X[i];
}
X_avg = ((double)sum)/n;
// Find the variance of X
double X_sig = 0.0;
for(int i = 0; i < n; i++){
X_sig += (X[i] - X_avg)*(X[i] - X_avg);
}
X_sig /= n;
// The values X_i are standardized to create new values Y_i
double Y[n];
for(int i = 0; i < n; i++){
Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
//cout << Y[i] << endl;
}
// With a standard normal CDF, we calculate the Anderson_Darling Statistic
double A = 0.0;
for(int i = 0; i < n; i++){
A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
}
cout << A << endl;
}
注意,我知道,安德森 - 达林式(A方)与i = 1
开始i = n
,虽然当我改变了索引使它在C++中工作,我仍然得到相同的结果而不改变索引。
我在C++中获得的价值是:
-4e+006
我应该得到的值,在MATLAB收到的是:
0.2330
任何建议都非常赞赏。
这里是我的全部代码:
#include <iostream>
#include <math.h>
#include <cmath>
#include <random>
#include <algorithm>
#include <chrono>
using namespace std;
double *Box_Muller(int n, double u[]);
double *Beasley_Springer_Moro(int n, double u[]);
void Anderson_Darling(int n, double X[]);
double phi(double x);
int main(){
int n = 2000;
double Mersenne[n];
random_device rd;
mt19937 e2(1);
uniform_real_distribution<double> dist(0, 1);
for(int i = 0; i < n; i++){
Mersenne[i] = dist(e2);
}
// Print Anderson Statistic for Mersenne 6a
double *result = new double[n];
result = Box_Muller(n,Mersenne);
Anderson_Darling(n,result);
return 0;
}
double *Box_Muller(int n, double u[]){
double *X = new double[n];
double Y[n];
double R_2[n];
double theta[n];
for(int i = 0; i < n; i++){
R_2[i] = -2.0*log(u[i]);
theta[i] = 2.0*M_PI*u[i+1];
}
for(int i = 0; i < n; i++){
X[i] = sqrt(-2.0*log(u[i]))*cos(2.0*M_PI*u[i+1]);
Y[i] = sqrt(-2.0*log(u[i]))*sin(2.0*M_PI*u[i+1]);
}
return X;
}
double *Beasley_Springer_Moro(int n, double u[]){
double y[n];
double r[n+1];
double *x = new double(n);
// Constants needed for algo
double a_0 = 2.50662823884; double b_0 = -8.47351093090;
double a_1 = -18.61500062529; double b_1 = 23.08336743743;
double a_2 = 41.39119773534; double b_2 = -21.06224101826;
double a_3 = -25.44106049637; double b_3 = 3.13082909833;
double c_0 = 0.3374754822726147; double c_5 = 0.0003951896511919;
double c_1 = 0.9761690190917186; double c_6 = 0.0000321767881768;
double c_2 = 0.1607979714918209; double c_7 = 0.0000002888167364;
double c_3 = 0.0276438810333863; double c_8 = 0.0000003960315187;
double c_4 = 0.0038405729373609;
// Set r and x to empty for now
for(int i = 0; i <= n; i++){
r[i] = 0.0;
x[i] = 0.0;
}
for(int i = 1; i <= n; i++){
y[i] = u[i] - 0.5;
if(fabs(y[i]) < 0.42){
r[i] = pow(y[i],2.0);
x[i] = y[i]*(((a_3*r[i] + a_2)*r[i] + a_1)*r[i] + a_0)/((((b_3*r[i] + b_2)*r[i] + b_1)*r[i] + b_0)*r[i] + 1);
}else{
r[i] = u[i];
if(y[i] > 0.0){
r[i] = 1.0 - u[i];
r[i] = log(-log(r[i]));
x[i] = c_0 + r[i]*(c_1 + r[i]*(c_2 + r[i]*(c_3 + r[i]*(c_4 + r[i]*(c_5 + r[i]*(c_6 + r[i]*(c_7 + r[i]*c_8)))))));
}
if(y[i] < 0){
x[i] = -x[i];
}
}
}
return x;
}
double phi(double x){
return 0.5 * erfc(-x * M_SQRT1_2);
}
void Anderson_Darling(int n, double X[]){
sort(X,X + n);
// Find the mean of X
double X_avg = 0.0;
double sum = 0.0;
for(int i = 0; i < n; i++){
sum += X[i];
}
X_avg = ((double)sum)/n;
// Find the variance of X
double X_sig = 0.0;
for(int i = 0; i < n; i++){
X_sig += (X[i] - X_avg)*(X[i] - X_avg);
}
X_sig /= (n-1);
// The values X_i are standardized to create new values Y_i
double Y[n];
for(int i = 0; i < n; i++){
Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
//cout << Y[i] << endl;
}
// With a standard normal CDF, we calculate the Anderson_Darling Statistic
double A = -n;
for(int i = 0; i < n; i++){
A += -1.0/(double)n *(2*(i+1) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n - i])));
}
cout << A << endl;
}
在调试器中浏览代码并找出发生了什么。 –
@AnonMail好的我正在使用Eclipse,通过确保我具有用于计算X的平均值和标准偏差的相同值进行调试。您是否建议使用Eclipse调试器? – Scooby
在附注中,你为什么要重写自己的'Phi'?我在过去做过,但后来转而使用'std :: erfc'和'std :: erf',这非常准确和快速。 –