2016-09-23 50 views
1

以下是整个数据的一小部分,我有很多年的数千个符号。 。 。两个符号和日期范围从运行到运行变化乘以R中的2个动物园系列

我有2个动物园系列“返回”和“decFac”。

> tail(returns) 
        AAPL   DISCA   IBM   JNJ   KO 
2014-12-23 -0.0035479832 0.0137774854 0.004943048 -0.0233164191 0.0145336114 
2014-12-24 -0.0047206092 -0.0054309123 -0.002592361 0.0029684238 -0.0006984054 
2014-12-26 0.0175226064 -0.0005733945 0.003208447 0.0044836732 0.0004657399 
2014-12-29 -0.0007020609   NA   NA 0.0025666222 -0.0023303779 
2014-12-30 -0.0122776892   NA   NA 0.0002847851 -0.0023360686 
2014-12-31 -0.0192020576 -0.0219631307 0.002433726 -0.0075263261 -0.0127090448 
        NKE   TXN 
2014-12-23 0.0004169359 -0.0007298205 
2014-12-24 0.0033288228 0.0014592993 
2014-12-26 0.0055922518 -0.0020985205 
2014-12-29   NA   NA 
2014-12-30   NA   NA 
2014-12-31 -0.0075636285 -0.0086595788 

> tail(decFac) 
2014-12-23 2014-12-24 2014-12-26 2014-12-29 2014-12-30 2014-12-31 
0.02576202 0.02655878 0.02738019 0.02822700 0.02910000 0.03000000 

这些都具有值(根据R-工作室)的 “从2012-01-04 TP动物园系列2014-12-31”

数据类型的每一个是以下:

> sapply(returns, typeof) 
    AAPL DISCA  IBM  JNJ  KO  NKE  TXN 
"double" "double" "double" "double" "double" "double" "double" 
> sapply(decFac, typeof) 
[1] "double" 

我objextive是让每个股票的回报,每一天,BU的decFac为AAPL的前5天乘以当天

期望的结果如下:

    AAPL 
12/23/2014 -0.000091403 
12/24/2014 -0.000125374 
12/26/2014 0.000479772 
12/29/2014 -0.000019817 
12/30/2014 -0.000357281 
12/31/2014 -0.000576062 

回答

2

动物园和XTS对象将通过索引操作之前保持一致:

library(xts) 

time = seq.Date(as.Date('2014-12-23'), as.Date('2014-12-31'), by = 'day') 
time = time[c(1,2,4,7:9)] 

AAPL = c(-0.0035479832, -0.0047206092, 0.0175226064, 
      -0.0007020609, -0.0122776892, -0.0192020576) 
DISCA = c(0.0137774854, -0.0054309123 , -0.0005733945 , 
      NA, NA, -0.0219631307) 
IBM = c(0.004943048, -0.002592361, 0.003208447, 
     NA, NA, 0.002433726 ) 
JNJ = c(-0.0233164191, 0.0029684238, 0.0044836732, 
      0.0025666222, 0.0002847851, -0.0075263261 ) 
KO = c(0.0145336114, -0.0006984054, 0.0004657399, 
     -0.0023303779, -0.0023360686, -0.0127090448) 
NKE = c(0.0004169359, 0.0033288228, 0.0055922518, 
     NA, NA, -0.0075636285) 
TXN = c(-0.0007298205, 0.0014592993, -0.0020985205, 
     NA, NA, -0.0086595788) 
decFac_v = c(0.02576202, 0.02655878, 0.02738019, 
       0.02822700, 0.02910000, 0.03000000 ) 

returns_zoo = zoo(cbind(AAPL, DISCA, IBM, JNJ, KO, NKE, TXN), time) 
returns  = xts(cbind(AAPL, DISCA, IBM, JNJ, KO, NKE, TXN), time) 
decFac_zoo = drop(zoo(decFac_v, time)) 
decFac  = drop(xts(decFac_v, time)) 

乘以动物园或XTS物体放在一起应该工作:

returns * decFac 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 3.549359e-04 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-26 4.797723e-04 -1.569965e-05 8.784789e-05 1.227638e-04 1.275205e-05 1.531169e-04 -5.745789e-05 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -6.588939e-04 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 
returns_zoo * decFac_zoo 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 3.549359e-04 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-26 4.797723e-04 -1.569965e-05 8.784789e-05 1.227638e-04 1.275205e-05 1.531169e-04 -5.745789e-05 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -6.588939e-04 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 

考虑一下,如果你是会发生什么子集returnsdecFac对象:

# subsetting 
x = zoo(cbind(AAPL, DISCA, IBM, JNJ, KO, NKE, TXN), time) 
y = drop(zoo(decFac_v, time)) 

x * y 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 3.549359e-04 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-26 4.797723e-04 -1.569965e-05 8.784789e-05 1.227638e-04 1.275205e-05 1.531169e-04 -5.745789e-05 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -6.588939e-04 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 

x * y[-3] # does not return values corresponding to the third date index 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 0.0003549359 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -0.0001442384 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -0.0006588939 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 

x[-3] * y # does not return values corresponding to the third date index 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 0.0003549359 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -0.0001442384 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -0.0006588939 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 

x[,-3] * y # does not return values corresponding to the 3rd symbol column 
#     AAPL   DISCA   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 3.549359e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -1.442384e-04 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-26 4.797723e-04 -1.569965e-05 1.227638e-04 1.275205e-05 1.531169e-04 -5.745789e-05 
# 2014-12-29 -1.981707e-05   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -6.588939e-04 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 

考虑一下,如果你要扩大日期范围,会发生什么:

# expanding time dimension 
expanded_time = seq.Date(as.Date('2012-01-04'), 
         as.Date('2014-12-22'), 
         by = 'day') 

value = rep_len(1, length(expanded_time)) 
old_returns = xts(cbind(AAPL = value, 
         DISCA = value, 
         IBM = value, 
         JNJ = value, 
         KO = value, 
         NKE = value, 
         TXN = value), 
        expanded_time) 

returns_expanded_time = xts(rbind(old_returns, returns), c(expanded_time, time)) 
returns_expanded_time * decFac 
# returns only values where the date index of each object matches: 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN 
# 2014-12-23 -9.140321e-05 3.549359e-04 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 
# 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 
# 2014-12-26 4.797723e-04 -1.569965e-05 8.784789e-05 1.227638e-04 1.275205e-05 1.531169e-04 -5.745789e-05 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 
# 2014-12-31 -5.760617e-04 -6.588939e-04 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 

考虑会发生什么,如果你要追加附加列:

new_column1 = rep_len(1, length(c(expanded_time, time))) 
new_column2 = new_column1 

returns_expanded_cols = xts( 
    cbind(rbind(old_returns, returns), 
     nc1 = new_column1, 
     nc2 =new_column2), 
    c(expanded_time, time)) 

returns_expanded_cols * decFac 
# returns only values where the date index of each object matches, 
# including the two new columns, `nc1` and `nc2` 
#     AAPL   DISCA   IBM   JNJ   KO   NKE   TXN  nc1  nc2 
# 2014-12-23 -9.140321e-05 3.549359e-04 1.273429e-04 -6.006781e-04 3.744152e-04 1.074111e-05 -1.880165e-05 0.02576202 0.02576202 
# 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05 7.883771e-05 -1.854880e-05 8.840947e-05 3.875721e-05 0.02655878 0.02655878 
# 2014-12-26 4.797723e-04 -1.569965e-05 8.784789e-05 1.227638e-04 1.275205e-05 1.531169e-04 -5.745789e-05 0.02738019 0.02738019 
# 2014-12-29 -1.981707e-05   NA   NA 7.244804e-05 -6.577958e-05   NA   NA 0.02822700 0.02822700 
# 2014-12-30 -3.572808e-04   NA   NA 8.287246e-06 -6.797960e-05   NA   NA 0.02910000 0.02910000 
# 2014-12-31 -5.760617e-04 -6.588939e-04 7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 0.03000000 0.03000000 
+0

我不知道如何实现你的建议,我有多年的数据和数千个符号(我正在更新我的问题以指出这一点。 。 。我确实指出时间是2年,“动物园系列从2012-01-04到2014-12-31” – John

+0

我的建议是简单地将'returns'乘以'decFrac'。结果将是两个对象的乘积,其中'returns'和'decFrac'中的值具有匹配的日期索引。这个概念仍然适用于不同于数据集尾部的日期和列范围。我会在我的答案中加入以说明产品在子集或扩展任一对象的范围时的作用。 –