2016-07-27 190 views
2

我想第一次使用SAS解析Json文件。 我在互联网上发现了一些脚本,并试图在SAS上使用此代码,但无法使其在我的JSON数据文件上工作。解析与SAS的Json文件

data parsed; 
infile data lrecl=32000 truncover scanover dlm=","; 
input @'"driver_id":{' driver_id $255. 
    @'"publication_site_id":{' publication_site_id $255. 
    @'"confirmed_seat_count":{' confirmed_seat_count $255. 
    @'"published_date":{' published_date $255. 
    @'"trip_date":{' trip_date $255. 
    @'"first_publication_date":{' first_publication_date $255. 
    @'"profile_picture_added_date":{' profile_picture_added_date $255.; 

driver_id=scan(driver_id, 1, '",'); 
published_date=scan(published_date, 1, '",'); 
trip_date =scan(trip_date, 1, '",'); 
publication_site_id=scan(publication_site_id, 1, '",'); 
confirmed_seat_count =scan(confirmed_seat_count, 1, '",'); 
first_publication_date =scan(first_publication_date, 1, '",'); 
profile_picture_added_date =scan(profile_picture_added_date, 1, '",'); 
run; 

这里是我的JSON文件

{ 
"driver_id": {"0":11450650,"1":4705453,"2":,"3":4892472,"4":47572650,"5":5242675}, 
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056}, 
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0}, 
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"}, 
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"}, 
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"}, 
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null} 
} 

的概述我的帮助非常感谢。

问候 SM

+0

您正在使用哪个版本的SAS? – Altons

回答

1

这会帮助你对你的方式:

options noquotelenmax; 
filename data temp lrecl=32000; 

data _null_; 
file data; 
put '{ 
"driver_id": {"0":11450650,"1":4705453,"2":,"3":4892472,"4":47572650,"5":5242675}, 
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056}, 
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0}, 
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"}, 
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"}, 
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"}, 
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null} 
}'; 
run; 

data parsed; 
infile data lrecl=32000 truncover scanover dlm='}' ; 
input @'"driver_id": {' driver_id: $255. 
    @'"publication_site_id":{' publication_site_id: $255. 
    @'"confirmed_seat_count":{' confirmed_seat_count: $255. 
    @'"published_date":{' published_date: $255. 
    @'"trip_date":{' trip_date: $255. 
    @'"first_publication_date":{' first_publication_date: $255. 
    @'"profile_picture_added_date":{' profile_picture_added_date: [email protected]@; 
run; 

我更新了分隔符(让您保持内部的对象,它们本身就是分隔),增加结肠输入修改(允许您通过说“开始读取下一个非空白列”来读取非标准值)并用@@尾部列指针结束以保存输出后续观察的记录(机会是您的json将作为一个长流到达) 。哦,我还为@'"driver_id": {'增加了空间 - 这是一个错字吗?

我省略了扫描部分,认为你有这个位控制!

1

不是我最好的工作,但它会给你以后的东西!需要重新组织列和一些dq,但是你的JSON将看起来像一个SAS数据集。我会建议你看看python pandas包。用pandas你可以用一行代码对它进行排序。

data parsed; 
infile "/your_path/test.json" lrecl=32500 truncover scanover dlm="," firstobs=2; 
input @'"driver_id"' _driver_id $255. 
     @'"publication_site_id"' _publication_site_id $255. 
     @'"confirmed_seat_count"' _confirmed_seat_count $255. 
     @'"published_date"' _published_date $255. 
     @'"trip_date"' _trip_date $255. 
     @'"first_publication_date"' _first_publication_date $255. 
     @'"profile_picture_added_date"' _profile_picture_added_date $255.; 
array v{*} _:; 
do i=1 to dim(v); 
    v[i]=prxchange('s/"\d+"//',-1,v[i]); 
    v[i]=compress(v[i],'{}"'); 
    v[i]=substr(v[i],3); 
end; 
run; 

proc transpose data=parsed out=t_parsed ; 
var _:; 
run; 

data want(drop=i); 
set t_parsed; 
array split(6) $50.; 
do i=1 to dim(split); 
split[i] = scan(col1,i,','); 
end; 
run; 

proc sort data=want; by _name_;run; 

proc transpose data=want out=want_t(drop=_name_); 
var split:; 
id _name_; 
run;