所以我非常喜欢显示统计回归模型的stargazer包。我一直在使用R和Stata来完成教科书中的一些问题。我发现的一个问题是,由stargazer包打印的置信区间与stata的置信区间并不相符。我确定手动完成后,stata中的CI是正确的。Stargazer置信区间错误?
因为问题可能可能在于我如何处理数据,所以我在此将其作为可选选项提供。我主要关心的是确定CI为什么不回应。从以前的文章中,这里是查找我正在使用的数据的一种可能方式;
install.packages("devtools") # if not already installed
library(devtools)
install_git("https://github.com/ccolonescu/PoEdata")
library(PoEdata) # loads the package in memory
library(multcomp) # for hypo testing
data(fair4) # loads the data set of interest
在Stata,我使用的数据集的名字叫做fair4.dta
。对于数据本身,你可以手动使用它,
structure(list(year = structure(c(1880, 1884, 1888, 1892, 1896,
1900, 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940,
1944, 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984,
1988, 1992, 1996, 2000, 2004, 2008), label = "year", format.stata = "%9.0g"),
vote = structure(c(50.2200012207031, 49.8460006713867, 50.4140014648438,
48.2680015563965, 47.7599983215332, 53.1710014343262, 60.0060005187988,
54.4830017089844, 54.7080001831055, 51.681999206543, 36.1189994812012,
58.2439994812012, 58.8199996948242, 40.8409996032715, 62.4580001831055,
54.9990005493164, 53.773998260498, 52.3699989318848, 44.5950012207031,
57.7639999389648, 49.9129981994629, 61.3440017700195, 49.5960006713867,
61.7890014648438, 48.9480018615723, 44.6969985961914, 59.1699981689453,
53.9020004272461, 46.5449981689453, 54.7360000610352, 50.2649993896484,
51.2330017089844, 46.5999984741211), label = "Incumbent share of the two-party presidential vote", format.stata = "%9.0g"),
party = structure(c(-1, -1, 1, -1, 1, -1, -1, -1, -1, 1,
1, -1, -1, -1, 1, 1, 1, 1, 1, -1, -1, 1, 1, -1, -1, 1, -1,
-1, -1, 1, 1, -1, -1), label = "= 1 if Democratic incumbent at election time; -1 if a Republican incumbent", format.stata = "%9.0g"),
person = structure(c(0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0,
1, 0), label = "= 1 if incumbent is running for election and 0 otherwise", format.stata = "%9.0g"),
duration = structure(c(1.75, 2, 0, 0, 0, 0, 1, 1.25, 1.5,
0, 1, 0, 1, 1.25, 0, 1, 1.25, 1.5, 1.75, 0, 1, 0, 1, 0, 1,
0, 0, 1, 1.25, 0, 1, 0, 1), label = "number of terms incumbent administration in power", format.stata = "%9.0g"),
war = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), label = "= 1 for elections of 1920, 1944, and 1948 and 0 otherwise.", format.stata = "%9.0g"),
growth = structure(c(3.87899994850159, 1.58899998664856,
-5.55299997329712, 2.76300001144409, -10.0240001678467, -1.42499995231628,
-2.4210000038147, -6.2810001373291, 4.16400003433228, 2.22900009155273,
-11.4630002975464, -3.87199997901917, 4.6230001449585, -14.4989995956421,
11.7650003433228, 3.90199995040894, 4.27899980545044, 3.5789999961853,
0.690999984741211, -1.45099997520447, 0.377000004053116,
5.10900020599365, 5.04300022125244, 5.91400003433228, 3.75099992752075,
-3.59699988365173, 5.44000005722046, 2.17799997329712, 2.66199994087219,
3.12100005149841, 1.21899998188019, 2.69000005722046, 0.219999998807907
), label = "growth rate GDP in first three quarters of the election year", format.stata = "%9.0g"),
inflation = structure(c(1.97399997711182, 1.05499994754791,
0.603999972343445, 2.2739999294281, 3.41000008583069, 2.54800009727478,
1.44200003147125, 1.87899994850159, 2.17199993133545, 4.2519998550415,
0, 5.16099977493286, 0.18299999833107, 7.19999980926514,
2.49699997901917, 0.0810000002384186, 0, 0, 2.36199998855591,
1.93499994277954, 1.96700000762939, 1.25999999046326, 3.13899993896484,
4.81500005722046, 7.63000011444092, 7.83099985122681, 5.25899982452393,
2.90599989891052, 3.27999997138977, 2.06200003623962, 1.60500001907349,
2.32500004768372, 2.88000011444092), label = "growth rate of GDP deflator during first 15 quarters of admin", format.stata = "%9.0g"),
goodnews = structure(c(9, 2, 3, 7, 6, 7, 5, 8, 8, 3, 0, 10,
7, 4, 9, 8, 0, 0, 7, 5, 5, 10, 7, 4, 5, 5, 8, 4, 2, 4, 8,
1, 3), label = "number of quarters in first 15 with real GDP per capita growth > 3.2", format.stata = "%9.0g")), notes = c("more complete variable definitions in fair.def",
"1"), .Names = c("year", "vote", "party", "person", "duration",
"war", "growth", "inflation", "goodnews"), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -33L))
所以这里是给我找麻烦了夜光云代码:
presidential <- read_dta("~/Directory/fair4.dta")
pres.lm = lm(vote ~ growth, data = subset(presidential,
presidential$year >= 1916)
stargazer(pres.lm,
type = "text",
intercept.bottom = T,
digits = 5,
report = "vc*stp",
ci = T
)
confint(pres.lm, level = 0.95)
考虑在置信区间的差异。
(0.52948, 1.24241) # in R, Stargazer
0.5087671 1.263126 # in R, confint(pres.lm)
.5087671 1.263126 # in Stata
我还用手工计算的置信区间和confit()
和Stata的数字退房。此数据集的临界值应为t_(N-2 , prob) = t(22,.0025) = -2.073873
。
此外,我确保创建一个全新的数据框架。也就是说,我并不是在lm()
的论点中进行子集化,而是首先对它进行子集化。当将这种方法与前一种方法进行比较时,我仍然得到相同的(不正确的)置信区间。
# subset into a new dataframe
presidential.1 = subset(presidential, presidential$year >= 1916)
# create the model
pres.lm.2 = lm(vote ~ growth, data = presidential.1)
# compare the two
stargazer(pres.lm,pres.lm.2,
type = "text",
intercept.bottom = F,
digits = 5,
report = "vc*stp",
ci = T,
t.auto = T)
(1) (2)
-----------------------------------------------------------------------
Constant 50.84840*** 50.84840***
(48.86384, 52.83295) (48.86384, 52.83295)
t = 50.21835 t = 50.21835
p = 0.00000 p = 0.00000
growth 0.88595*** 0.88595***
(0.52948, 1.24241) (0.52948, 1.24241)
t = 4.87126 t = 4.87126
p = 0.00008 p = 0.00008
# correct intervals from Stata and R's confint()
growth 0.5087671 1.263126
我是不是正确运行代码?对我来说,运行stargazer命令并仅打印系数和t-stats对我来说并不是什么大事,但是令人失望的是,由于Stargazer的输出是因为我不得不运行confint()
作为单独的命令华丽。这很奇怪,因为系数估计和t统计是完美的。置信区间有不同程度的降低,我想知道这可能是什么原因。任何建议将不胜感激。
占星师自身内部并不那么华丽https://github.com/cran/stargazer/blob/master/R/stargazer-internal.R – Hao
@豪点了。相对于使用“摘要(模型)”,观星看起来相当不错。我只是发现摘要的默认输出非常难看,很难看。如果有什么比观星更好地整理一个回归模式的统计数字的话,我全都是耳朵! – im2wddrf