2012-08-12 85 views
0

我有一个JSON格式的文本文件,我使用此代码从link下载。它看起来像这样:将Twitter流JSON转换为R数据帧

"{\"contributors\":null,\"coordinates\":null,\"in_reply_to_user_id\":null,\"truncated\":false,\"text\":\"'ello miss...?\",\"entities\":{\"hashtags\":[],\"urls\":[],\"user_mentions\":[]},\"place\":null,\"id_str\":\"234702954853199872\",\"favorited\":false,\"geo\":null,\"source\":\"\u003Ca href=\\"http:\/\/www.tweetdeck.com\\" rel=\\"nofollow\\"\u003ETweetDeck\u003C\/a\u003E\",\"retweet_count\":0,\"in_reply_to_status_id_str\":null,\"in_reply_to_screen_name\":null,\"created_at\":\"Sun Aug 12 17:28:39 +0000 2012\",\"in_reply_to_user_id_str\":null,\"user\":{\"show_all_inline_media\":false,\"lang\":\"en\",\"friends_count\":145,\"profile_sidebar_border_color\":\"181A1E\",\"location\":\"Chile\",\"profile_background_image_url_https\":\"https:\/\/si0.twimg.com\/images\/themes\/theme9\/bg.gif\",\"id_str\":\"76862348\",\"listed_count\":5,\"profile_use_background_image\":true,\"profile_image_url_https\":\"https:\/\/si0.twimg.com\/profile_images\/1547896482\/egotuiter_normal.png\",\"description\":\"Geek. Amante de la m\u00fasica. Ateo. Nortino. Estudio Derecho en la U. de Chile. Nadie lee mi blog. Cobreloino. Afortunadamente ya no uso lentes :D\",\"follow_request_sent\":null,\"following\":null,\"profile_text_color\":\"666666\",\"default_profile\":false,\"profile_background_image_url\":\"http:\/\/a0.twimg.com\/images\/themes\/theme9\/bg.gif\",\"followers_count\":181,\"is_translator\":false,\"time_zone\":\"Buenos Aires\",\"profile_link_color\":\"2FC2EF\",\"protected\":false,\"created_at\":\"Thu Sep 24 05:11:20 +0000 2009\",\"profile_background_color\":\"1A1B1F\",\"name\":\"Camilo Rojas\",\"default_profile_image\":false,\"contributors_enabled\":false,\"statuses_count\":36450,\"geo_enabled\":false,\"notifications\":null,\"profile_background_tile\":false,\"url\":\"http:\/\/unnombreingenioso.blogspot.com\",\"profile_image_url\":\"http:\/\/a0.twimg.com\/profile_images\/1547896482\/egotuiter_normal.png\",\"screen_name\":\"rojas_altazor\",\"id\":76862348,\"verified\":false,\"utc_offset\":-10800,\"favourites_count\":42,\"profile_sidebar_fill_color\":\"252429\"},\"retweeted\":false,\"in_reply_to_status_id\":null,\"id\":234702954853199872} 
" 
"{\"contributors\":null,\"coordinates\":null,\"in_reply_to_user_id\":null,\"truncated\":false,\"text\":\"VALENTINA CORTANTE!\",\"entities\":{\"hashtags\":[],\"urls\":[],\"user_mentions\":[]},\"place\":null,\"id_str\":\"234702954861580288\",\"favorited\":false,\"geo\":null,\"source\":\"web\",\"retweet_count\":0,\"in_reply_to_status_id_str\":null,\"in_reply_to_screen_name\":null,\"created_at\":\"Sun Aug 12 17:28:39 +0000 2012\",\"in_reply_to_user_id_str\":null,\"user\":{\"show_all_inline_media\":false,\"lang\":\"es\",\"friends_count\":251,\"profile_sidebar_border_color\":\"CC3366\",\"location\":\"\",\"profile_background_image_url_https\":\"https:\/\/si0.twimg.com\/images\/themes\/theme11\/bg.gif\",\"id_str\":\"292675295\",\"listed_count\":0,\"profile_use_background_image\":true,\"profile_image_url_https\":\"https:\/\/si0.twimg.com\/profile_images\/2494400748\/7zd2z67iclw88rc0q8sr_normal.jpeg\",\"description\":\"Soy hermosaaaa y valen me ama\u2665\",\"follow_request_sent\":null,\"following\":null,\"profile_text_color\":\"362720\",\"default_profile\":false,\"profile_background_image_url\":\"http:\/\/a0.twimg.com\/images\/themes\/theme11\/bg.gif\",\"followers_count\":399,\"is_translator\":false,\"time_zone\":\"Santiago\",\"profile_link_color\":\"B40B43\",\"protected\":false,\"created_at\":\"Wed May 04 01:35:06 +0000 2011\",\"profile_background_color\":\"FF6699\",\"name\":\"Valen me amodora\u2020\",\"default_profile_image\":false,\"contributors_enabled\":false,\"statuses_count\":2643,\"geo_enabled\":false,\"notifications\":null,\"profile_background_tile\":true,\"url\":null,\"profile_image_url\":\"http:\/\/a0.twimg.com\/profile_images\/2494400748\/7zd2z67iclw88rc0q8sr_normal.jpeg\",\"screen_name\":\"CamiCelie\",\"id\":292675295,\"verified\":false,\"utc_offset\":-14400,\"favourites_count\":2,\"profile_sidebar_fill_color\":\"E5507E\"},\"retweeted\":false,\"in_reply_to_status_id\":null,\"id\":234702954861580288} 
" 

我很好奇如何提取单个部分到R数据框。举例来说,这将最好去像这样的数据帧:

screen_name  text 
rojas_altazor  'ello miss...? 
CamiCelie   VALENTINA CORTANTE! 
...     ... 

这是我首次涉足使用JSON文件,因此任何建议,将不胜感激。

回答

1
twit<-c("{\"contributors\":null,\"coordinates\":null,\"in_reply_to_user_id\":null,\"truncated\":false,\"text\":\"'ello miss...?\",\"entities\":{\"hashtags\":[],\"urls\":[],\"user_mentions\":[]},\"place\":null,\"id_str\":\"234702954853199872\",\"favorited\":false,\"geo\":null,\"source\":\"\\u003Ca href=\\\"http:\\/\\/www.tweetdeck.com\\\" rel=\\\"nofollow\\\"\\u003ETweetDeck\\u003C\\/a\\u003E\",\"retweet_count\":0,\"in_reply_to_status_id_str\":null,\"in_reply_to_screen_name\":null,\"created_at\":\"Sun Aug 12 17:28:39 +0000 2012\",\"in_reply_to_user_id_str\":null,\"user\":{\"show_all_inline_media\":false,\"lang\":\"en\",\"friends_count\":145,\"profile_sidebar_border_color\":\"181A1E\",\"location\":\"Chile\",\"profile_background_image_url_https\":\"https:\\/\\/si0.twimg.com\\/images\\/themes\\/theme9\\/bg.gif\",\"id_str\":\"76862348\",\"listed_count\":5,\"profile_use_background_image\":true,\"profile_image_url_https\":\"https:\\/\\/si0.twimg.com\\/profile_images\\/1547896482\\/egotuiter_normal.png\",\"description\":\"Geek. Amante de la m\\u00fasica. Ateo. Nortino. Estudio Derecho en la U. de Chile. Nadie lee mi blog. Cobreloino. Afortunadamente ya no uso lentes :D\",\"follow_request_sent\":null,\"following\":null,\"profile_text_color\":\"666666\",\"default_profile\":false,\"profile_background_image_url\":\"http:\\/\\/a0.twimg.com\\/images\\/themes\\/theme9\\/bg.gif\",\"followers_count\":181,\"is_translator\":false,\"time_zone\":\"Buenos Aires\",\"profile_link_color\":\"2FC2EF\",\"protected\":false,\"created_at\":\"Thu Sep 24 05:11:20 +0000 2009\",\"profile_background_color\":\"1A1B1F\",\"name\":\"Camilo Rojas\",\"default_profile_image\":false,\"contributors_enabled\":false,\"statuses_count\":36450,\"geo_enabled\":false,\"notifications\":null,\"profile_background_tile\":false,\"url\":\"http:\\/\\/unnombreingenioso.blogspot.com\",\"profile_image_url\":\"http:\\/\\/a0.twimg.com\\/profile_images\\/1547896482\\/egotuiter_normal.png\",\"screen_name\":\"rojas_altazor\",\"id\":76862348,\"verified\":false,\"utc_offset\":-10800,\"favourites_count\":42,\"profile_sidebar_fill_color\":\"252429\"},\"retweeted\":false,\"in_reply_to_status_id\":null,\"id\":234702954853199872}", 
"", "{\"contributors\":null,\"coordinates\":null,\"in_reply_to_user_id\":null,\"truncated\":false,\"text\":\"VALENTINA CORTANTE!\",\"entities\":{\"hashtags\":[],\"urls\":[],\"user_mentions\":[]},\"place\":null,\"id_str\":\"234702954861580288\",\"favorited\":false,\"geo\":null,\"source\":\"web\",\"retweet_count\":0,\"in_reply_to_status_id_str\":null,\"in_reply_to_screen_name\":null,\"created_at\":\"Sun Aug 12 17:28:39 +0000 2012\",\"in_reply_to_user_id_str\":null,\"user\":{\"show_all_inline_media\":false,\"lang\":\"es\",\"friends_count\":251,\"profile_sidebar_border_color\":\"CC3366\",\"location\":\"\",\"profile_background_image_url_https\":\"https:\\/\\/si0.twimg.com\\/images\\/themes\\/theme11\\/bg.gif\",\"id_str\":\"292675295\",\"listed_count\":0,\"profile_use_background_image\":true,\"profile_image_url_https\":\"https:\\/\\/si0.twimg.com\\/profile_images\\/2494400748\\/7zd2z67iclw88rc0q8sr_normal.jpeg\",\"description\":\"Soy hermosaaaa y valen me ama\\u2665\",\"follow_request_sent\":null,\"following\":null,\"profile_text_color\":\"362720\",\"default_profile\":false,\"profile_background_image_url\":\"http:\\/\\/a0.twimg.com\\/images\\/themes\\/theme11\\/bg.gif\",\"followers_count\":399,\"is_translator\":false,\"time_zone\":\"Santiago\",\"profile_link_color\":\"B40B43\",\"protected\":false,\"created_at\":\"Wed May 04 01:35:06 +0000 2011\",\"profile_background_color\":\"FF6699\",\"name\":\"Valen me amodora\\u2020\",\"default_profile_image\":false,\"contributors_enabled\":false,\"statuses_count\":2643,\"geo_enabled\":false,\"notifications\":null,\"profile_background_tile\":true,\"url\":null,\"profile_image_url\":\"http:\\/\\/a0.twimg.com\\/profile_images\\/2494400748\\/7zd2z67iclw88rc0q8sr_normal.jpeg\",\"screen_name\":\"CamiCelie\",\"id\":292675295,\"verified\":false,\"utc_offset\":-14400,\"favourites_count\":2,\"profile_sidebar_fill_color\":\"E5507E\"},\"retweeted\":false,\"in_reply_to_status_id\":null,\"id\":234702954861580288}", 
"") 


library(RJSONIO) 

twit1<-fromJSON(twit[1]) 

> twit1$user$screen_name 
[1] "rojas_altazor" 

> twit1$text 
[1] "'ello miss...?" 

> twit2<-fromJSON(twit[3]) 
> twit2$user$screen_name 
[1] "CamiCelie" 
> twit2$text 
[1] "VALENTINA CORTANTE!" 
2

首先,使用rjson库,我们拉入流 - 在这种情况下,我使用一个用户的时间轴。

library(rjson) 
timeline=fromJSON(file="http://api.twitter.com/1/statuses/user_timeline.json?screen_name=douglbutt&count=100") 

这给了我们一个名单 - 我总是有麻烦打破层级列表分解成整齐的桌子,所以我要去外地做现场。

timelist=unlist(timeline) 

tweets=timelist[names(timelist)=="text"] 
names=timelist[names(timelist)=="user.screen_name"] 
times=timelist[names(timelist)=="created_at"] 

tweet.frame=data.frame(tweets,names,times) 

因此,我们应该让他们都在tweet.frame。如果你想看看在未上市的数据可用字段,请尝试:

unique(names(timelist)) 
+0

给我'警告消息: 在readlines方法(文件):在“http://api.twitter.com发现 不完整的最后一行/1/statuses/user_timeline.json?screen_name=douglbutt&count=100''。任何想法为什么? – Chernoff 2013-03-16 22:11:45