2016-09-06 70 views
0

使用SAS:如果日期实际上不匹配,如何连接两个表,按日期如何连接? 例如,我想在full_table中添加一个包含来自changepoints表的'type'的列,智能地按日期合并匹配。按不匹配的日期连接表

ods listing; 
/********************************************************** 
main table 
***********************************************************/ 
DATA full_table; 
input id $ date date9.; 
FORMAT date date9.; 
DATALINES; 
a 01APR2015 
b 02APR2015 
c 03APR2015 
d 01JUN2015 
e 24JUN2015 
f 01DEC2015 
; 
RUN; 

PROC PRINT; 
run; 

/********************************************************** 
additional information 
***********************************************************/ 
DATA changepoints; 
input date date9. type $; 
FORMAT date date9.; 
DATALINES; 
15MAR2014 spiral 
05JUN2015 circle 
29NOV2015 square 
; 
RUN; 

PROC PRINT; 
run; 
/********************************************************** 
Desired result 
***********************************************************/ 
DATA new_table; 
input id $ date date9. type $; 
FORMAT date date9.; 
DATALINES; 
a 01APR2015 spiral 
b 02APR2015 spiral 
c 03APR2015 spiral 
d 01JUN2015 spiral 
e 24JUN2015 circle 
f 01DEC2015 square 
; 
RUN; 

PROC PRINT; 
run; 

/********************************************************** 
join not working this way 
***********************************************************/ 
PROC SQL; 
    create table new_table2 as 
    select full_table.*, changepoints.type 
    from full_table left join changepoints 
    on full_table.date = changepoints.date; 
QUIT; 

所需的输出将是:

       Obs id   date  type 
           1  a  01APR2015 spiral 
           2  b  02APR2015 spiral 
           3  c  03APR2015 spiral 
           4  d  01JUN2015 spiral 
           5  e  24JUN2015 circle 
           6  f  01DEC2015 square 

ANSWER基于下面的正确答案:

ods listing; 
/********************************************************** 
main table 
***********************************************************/ 
DATA full_table; 
input id $ date date9.; 
FORMAT date date9.; 
DATALINES; 
a 01APR2015 
b 02APR2015 
c 03APR2015 
d 01JUN2015 
e 24JUN2015 
f 01DEC2015 
; 
RUN; 

PROC PRINT; 
RUN; 

/********************************************************** 
additional information 
***********************************************************/ 
DATA changepoints; 
input date date9. type $; 
FORMAT date date9.; 
DATALINES; 
15MAR2014 spiral 
05JUN2015 circle 
29NOV2015 square 
; 
RUN; 

PROC PRINT; 
RUN; 

/********************************************************** 
Update changepoints to have start/end dates so the sql join 
works 
***********************************************************/ 
PROC SORT data=changepoints; 
    by descending date; 
RUN; 

DATA changepoints; 
    set changepoints; 
    end = lag(date); 
    start = date; 
    format start end date9.; 
RUN; 

PROC SORT data=changepoints; 
    by date; 
RUN; 

DATA changepoints; 
    set changepoints end=eof; 
    by start; 
    IF eof and missing(end) THEN end = today(); 
RUN; 

PROC PRINT; 
RUN; 

/********************************************************** 
Join 
***********************************************************/ 
proc sql noprint; 
create table test as 
select a.id,a.date,b.type 
from full_table as a 
left join 
changepoints as b 
on a.date >= b.start 
and a.date < b.end; 
quit; 

PROC PRINT; 
RUN; 

回答

0

试着改变你的changepoints到日期范围是这样的:

DATA changepoints; 
input Start date9. End date9. type $; 
FORMAT Start End date9.; 
DATALINES; 
15MAR201405JUN2015 spiral 
05JUN201529NOV2015 circle 
29NOV201501JAN2016 square 
; 
RUN; 

那么你可以使用一个简单的SQL连接如下:

proc sql noprint; 
create table test as 
select a.id,a.date,b.type 
from full_table as a 
left join 
changepoints as b 
on a.date >= b.start 
and a.date < b.end; 
quit; 
+0

尽管这不是一个完整的答案(因为这需要编写代码将更多日期添加到变更点数据集),但我决定采用它,因为它更清晰,更清晰,更易于理解。谢谢!! – variable

0

对我来说最好的方法是使用PROC FORMAT它用很少的很好处理范围工作:

DATA full_table; 
input id $ date date9.; 
FORMAT date date9.; 
DATALINES; 
a 01APR2015 
b 02APR2015 
c 03APR2015 
d 01JUN2015 
e 24JUN2015 
f 01DEC2015 
; 
RUN; 


DATA changepoints; 
input date date9. type $; 
FORMAT date date9.; 
DATALINES; 
15MAR2014 spiral 
05JUN2015 circle 
29NOV2015 square 
; 
RUN; 


data for_fmt; 
    set changepoints end=eof; 
    length prev_type $6; 
    retain prev_date '01JAN2014'd 
     prev_type ' '; *saving prev. record values; 

    label = prev_type; 
    start = prev_date; 
    end = date; 
    eexcl='Y'; *exclude from "end" - so on 05JUN2015 exactly it will be circle; 
    fmtname = 'CHANGEF'; 
    output; 
    if eof then do; 
    label = type; 
    start = date; 
    end = .; 
    hlo='h'; 
    output; 
    end; 
    prev_date=date; 
    prev_type=type; 
    drop type; *type is a reserved word in this context; 
run; 


proc format cntlin=for_fmt; 
quit; 

data want; 
    set full_table; 
    type = put(date,CHANGEF6.); 
run; 
+0

我跑了这一点,它没有为我工作。 – variable