0
成功找到质量和可靠性

我手动试图计算回归系数,而不是使用数据的任何默认http://people.sc.fsu.edu/~jburkardt/datasets/regression/x31.txt无法获取线性回归系数R中后,通过户主

这里是我的代码,它正确地产生Q & R满足A = QR。但是我无法找到系数作为创造问题的尺寸。任何专家都可以帮助我吗?当我有适当的Q & R如何找到系数可能会出错?

library(xlsx) 
data.df<-read.xlsx("regression.xlsx",2,header = F) 
#Remove unneccesary Index position 
data.df<-data.df[2:5] 

#Decomposing 
#coefficients [b]=inv(t(X)(Matrix Multiplication)(X))(Matrix Multiplication)t(X)(Matrix Multiplication)y 
Y<-as.matrix(data.df[4]) 
#But note that if you need to express Y=Xb, the coefficient of b_0 must be X_0 
#which is 1 
X_0<-as.data.frame(c(1,1,1,1,1,1,1,1,1,1)) 
X<-(cbind(X_0,data.df[1:3])) 
names(X)<-c("X1","X2","X3","X4") 
X<-as.matrix(X) 
#Create copy for final evaluvations 
A<-X 


x1<-as.matrix(X[,1]) 
deter_x<-sqrt(sum(x1^2)) 
n=dim(x1)[1] 
deter_e1<-as.matrix(c(deter_x,rep(0,n-1))) 
v1=x1+deter_e1 
#c_i=2/(transpose(v_i)%*%(v_i)) 
c1<-as.numeric(2/(t(v1)%*%v1)) 
#H_i = I - c_i*v_i%*%transpose(v_i) 
I<-diag(n) 
H1<-I-c1*(v1%*%t(v1)) 
R1<-H1%*%X 
#Check R1 and see if it is Upper Triangle Matrix 
R1 
#We will take rest of the interesting portion of matrix R1. 
n=dim(R1)[1] 
X<-as.matrix(as.data.frame(cbind(R1[2:n,2],R1[2:n,3],R1[2:n,4]))) 
x1<-as.matrix(X[,1]) 
deter_x<-sqrt(sum(x1^2)) 
n=dim(x1)[1] 
deter_e1<-as.matrix(c(deter_x,rep(0,n-1))) 
v1=x1-deter_e1 
#c_i=2/(transpose(v_i)%*%(v_i)) 
c1<-as.numeric(2/(t(v1)%*%v1)) 
#H_i = I - c_i*v_i%*%transpose(v_i) 
I<-diag(n) 
H2<-I-c1*(v1%*%t(v1)) 
R2<-H2%*%X 

#Check R2 and see if it is Upper Triangle Matrix, if no go for R3 
n=dim(R2)[1] 
X<-as.matrix(as.data.frame(cbind(R2[2:n,2],R2[2:n,3]))) 
x1<-as.matrix(X[,1]) 
deter_x<-sqrt(sum(x1^2)) 
n=dim(x1)[1] 
deter_e1<-as.matrix(c(deter_x,rep(0,n-1))) 
v1=x1+deter_e1 
#c_i=2/(transpose(v_i)%*%(v_i)) 
c1<-as.numeric(2/(t(v1)%*%v1)) 
#H_i = I - c_i*v_i%*%transpose(v_i) 
I<-diag(n) 
H3<-I-c1*(v1%*%t(v1)) 
R3<-H3%*%X 
R3 

#Check R3 and see if it is Upper Triangle Matrix, if no go for R4 
n=dim(R3)[1] 
X<-as.matrix(as.data.frame(cbind(R3[2:n,2]))) 
x1<-as.matrix(X[,1]) 
deter_x<-sqrt(sum(x1^2)) 
n=dim(x1)[1] 
deter_e1<-as.matrix(c(deter_x,rep(0,n-1))) 
v1=x1-deter_e1 
#c_i=2/(transpose(v_i)%*%(v_i)) 
c1<-as.numeric(2/(t(v1)%*%v1)) 
#H_i = I - c_i*v_i%*%transpose(v_i) 
I<-diag(n) 
H4<-I-c1*(v1%*%t(v1)) 
R4<-H4%*%X 
R4 
#As we can see R4 has all values except first element as zero 
#Let us replace Matrices iteratively in R1 from R2 to R4 and round it of 
R1[2:10,2:4]<-R2 
R1[3:10,3:4]<-R3 
R1[4:10,4]<-R4 
R<-round(R1,5) 
R 
#Find Complete H1 
#Q=H1%*%H2%*%H3%*%H4 
H1_COM<-H1 
# 
H_temp<-diag(10) 
n=dim(H_temp)[1] 
dim(H_temp[2:n,2:n]) 
dim(H2) 
H_temp[2:n,2:n]<-H2 
H2_COM<-H_temp 
H2_COM 
H_temp<-diag(10) 
n=dim(H_temp)[1] 
dim(H_temp[3:n,3:n]) 
dim(H3) 
H_temp[3:n,3:n]<-H3 
H3_COM<-H_temp 
H3_COM 

H_temp<-diag(10) 
n=dim(H_temp)[1] 
dim(H_temp[4:n,4:n]) 
dim(H4) 
H_temp[4:n,4:n]<-H4 
H4_COM<-H_temp 
Q=H1_COM%*%H2_COM%*%H3_COM%*%H4_COM 
# The following code properly reconstructs A Matrix proving proper Q & R 
A=round(Q%*%R) 
# When you try to find coefficients using Q&R you will end up in error. 
solve(R)%*%t(Q)%*%Y 
#Error in solve.default(R) : 'a' (10 x 4) must be square 
#So trying to get matrix R without all 0 rows R[1:4,1:4] 
solve(R[1:4,1:4])%*%t(Q)%*%Y 
#Error in solve(R[1:4, 1:4]) %*% t(Q) : non-conformable arguments 
dim(solve(R[1:4,1:4])) 
dim(solve(R[1:4,1:4])) 
#4 4 
dim(t(Q)) 
#[1] 10 10 
dim(Y) 
#10 1 

回答

2

我想指出您对此线程的回答(相当全面):Multiple regression analysis in R using QR decomposition。社区将判断你的问题是否会被视为重复。请注意,其中的选项中,最后一个使用的是使用普通R代码编写的QR分解。

鉴于QR分解的玩具代码是正确的(正如您在代码中评论的那样),主要问题在于最后几行。

的解决方案是简单的:

solve(R) %*% (t(Q) %*% Y)[1:4,] 

1:4选择在单柱矩阵t(Q) %*% Y的前4个元素。

如果你检查我的链接答案,你会看到,而不是solve,我使用backsolve,因为这是一个三角形的方程组。我可以用crossprod代替t%*%。我最近的回答When is 'crossprod' preferred to '%*%', and when isn't?对这两种方式进行了全面的讨论。

+1

哇!那很快。谢谢。只要修改,你的答案是正确的,如果我改变我的R为4X4的矩形矩阵给出的例子:solve(R [1:4,1:4])%*%(t(Q)%*%Y)[1 :4,]。只有一件事我想明白的是,从t(Q)%*%Y中选择单列矩阵中的前4个元素是正确的,给出正确的结果是多少?我肯定会阅读你的其他答案,但如果你可以帮助我在任何时候给予答案,那么这是一个很好的帮助!再次感谢!干杯! –

+1

你知道吗!你真了不起!感谢所有的帮助。我已经将它标记为答案并将其提升。我认为你的答案的关键是“它展示了如何使用Y的正交变换”。再次感谢。 :-) –

+0

嗨Zheyuan Li,我对我缺乏知识表示歉意。尽管如此,我仍然无法理解如何仅仅列出单数列矩阵的前四位是正确的?我读过你提到的所有文献。但如何留下其余的价值会给我正确的结果,我无法理解:-( –