2016-03-02 69 views
2

我正在重新访问一些现在不起作用的代码,我无法弄清楚为什么当我使用spreaddcast时,变量与日期不一致。所有软件包都是最新的。传播和dcast未正确对齐

请注意,当使用spread时,tmean在其自己的行中,而不是与1996年4月对齐。 dcast做同样的事情。

下面是一个例子:

dput:

library(reshape2) 
library(tidyr) 

dat <- structure(list(gridNumber = c(266783L, 266783L, 266783L, 266783L, 
266783L, 266783L, 266783L, 266783L, 266783L), fips = c(9005L, 
9005L, 9005L, 9005L, 9005L, 9005L, 9005L, 9005L, 9005L), cropArea = c(0, 
0, 0, 0, 0, 0, 0, 0, 0), state = structure(c(8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L), .Label = c("AK", "AL", "AR", "AS", "AZ", 
"CA", "CO", "CT", "DC", "DE", "FL", "GA", "GU", "HI", "IA", "ID", 
"IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN", "MO", 
"MP", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY", 
"OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UM", 
"UT", "VA", "VI", "VT", "WA", "WI", "WV", "WY"), class = "factor"), 
    county_name = c("Litchfield County", "Litchfield County", 
    "Litchfield County", "Litchfield County", "Litchfield County", 
    "Litchfield County", "Litchfield County", "Litchfield County", 
    "Litchfield County"), long = c(-73.4583333333292, -73.4583333333292, 
    -73.4583333333292, -73.458333332921, -73.4583333333292, -73.4583333333292, 
    -73.4583333333292, -73.458333332921, -73.4583333333292), 
    lat = c(42.0416666666681, 42.0416666666681, 42.0416666666681, 
    42.041666666606, 42.0416666666681, 42.0416666666681, 42.0416666666681, 
    42.041666666606, 42.0416666666681), year = c(1996L, 1996L, 
    1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1996L), element = c("tmin", 
    "tmax", "ppt", "tmean", "tmin", "tmax", "ppt", "tmean", "tmin" 
    ), month = c("apr", "apr", "apr", "apr", "aug", "aug", "aug", 
    "aug", "dec"), value = c(32.8099999785423, 53.2939998626709, 
    6.64250644805908, 43.0519997596741, 56.552000617981, 76.55, 
    2.51968, 66.5509994506836, 26.1320000171661)), .Names = c("gridNumber", 
"fips", "cropArea", "state", "county_name", "long", "lat", "year", 
"element", "month", "value"), class = c("tbl_df", "data.frame" 
), row.names = c(NA, -9L)) 

spreaddcast

> spread(dat, element, value) 

Source: local data frame [5 x 13] 

    gridNumber fips cropArea state  county_name  long  lat year month  ppt tmax tmean tmin 
     (int) (int) (dbl) (fctr)    (chr)  (dbl) (dbl) (int) (chr) (dbl) (dbl) (dbl) (dbl) 
1  266783 9005  0  CT Litchfield County -73.45833 42.04167 1996 apr 6.642506 53.294  NA 32.810 
2  266783 9005  0  CT Litchfield County -73.45833 42.04167 1996 aug 2.519680 76.550  NA 56.552 
3  266783 9005  0  CT Litchfield County -73.45833 42.04167 1996 dec  NA  NA  NA 26.132 
4  266783 9005  0  CT Litchfield County -73.45833 42.04167 1996 apr  NA  NA 43.052  NA 
5  266783 9005  0  CT Litchfield County -73.45833 42.04167 1996 aug  NA  NA 66.551  NA 

> dcast(dat, gridNumber + fips + cropArea + state + county_name + long + lat + year + month ~ element, value.var = "value") 
    gridNumber fips cropArea state  county_name  long  lat year month  ppt tmax tmean tmin 
1  266783 9005  0 CT Litchfield County -73.45833 42.04167 1996 apr 6.642506 53.294  NA 32.810 
2  266783 9005  0 CT Litchfield County -73.45833 42.04167 1996 aug 2.519680 76.550  NA 56.552 
3  266783 9005  0 CT Litchfield County -73.45833 42.04167 1996 dec  NA  NA  NA 26.132 
4  266783 9005  0 CT Litchfield County -73.45833 42.04167 1996 apr  NA  NA 43.052  NA 
5  266783 9005  0 CT Litchfield County -73.45833 42.04167 1996 aug  NA  NA 66.551  NA 
+0

我认为问题在于您使用数字列作为键。试试'sprintf(“%。40f”,dat $ long)'来看看我的意思。 Yup; – bdemarest

回答

1

你的问题是精度与lat & long个值

> unique(dat$long) 
# [1] -73.45833 -73.45833 
> unique(dat$lat) 
# [1] 42.04167 42.04167 

## Notice two 'unique' values that are 'printed' the same 

如果你看看你structure数据观察,你会看到你有稍微不同的值,你latlong

例如,圆整经/纬度值,从而消除精度“错误”

dat$lat <- round(dat$lat, 4) 
dat$long <- round(dat$long, 4) 

spread(dat, element, value) 

## now tmin is aligned 

    #gridNumber fips cropArea state  county_name  long  lat year month  ppt tmax tmean tmin 
#(int) (int) (dbl) (fctr)    (chr) (dbl) (dbl) (int) (chr) (dbl) (dbl) (dbl) (dbl) 
#1  266783 9005  0  CT Litchfield County -73.4583 42.0417 1996 apr 6.642506 53.294 43.052 32.810 
#2  266783 9005  0  CT Litchfield County -73.4583 42.0417 1996 aug 2.519680 76.550 66.551 56.552 
#3  266783 9005  0  CT Litchfield County -73.4583 42.0417 1996 dec  NA  NA  NA 26.132 
+0

它做到了。我想也许会是这样,但值看起来很好。谢谢!〜 – Vedda

+0

是的,当打印到控制台时,它们看起来很好,但是这也让我感到了几次。 – SymbolixAU