2015-10-06 145 views
0

我对R非常陌生我试图让我的函数将输出的表格与我在函数中指定的列进行比较,当用户输入日期(mm/dd/yy)和季节(yyyy/yy)。我很困惑如何设置我的初始数据框。函数if else语句从不同的数据集中读取

所以有3个季节我从(3个网站)拉。我的IF语句应该读入一个df(称为dfmess,因为它是一团糟)。我该怎么做,希望这是有道理的,有人可以帮助我。我会很感激,如果有人能理清这个烂摊子给我...

Season_2015_16 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) 
    Season_2014_15 <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) 
    Season_2013_14 <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE) 

    **dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) 
    dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) 
    dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)** 

    EPL_Standings <- function(Standingdate, season){ 
     Standingdate <- as.Date(Standingdate, format = "%m/%d/%Y") 
     Standingdate <- as.Date("08/30/15", format = "%m/%d/%y") 
     if (season == "2015/16"){ 

     read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) -> dfmess 

     } else if (season == "2014/15") { 
     read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) -> dfmess 

     } else if (season == "2013/14"){ 
     read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE) -> dfmess 

     } 

    dfmess[dfmess$Date <= Standingdate, 2:6] 
    #Team Record 


    ddply(dfmess, 
     .(HomeTeam) 
     , summarize 
     , win = sum(FTR == "H") 
     , Loss = sum(FTR == "A") 
     , Draws = sum(FTR == "D") 
) -> homewins1 #Homerecord 
    ddply(dfmess, 
     .(AwayTeam) 
     , summarize 
     , win = sum(FTR == "A") 
     , Loss = sum(FTR == "H") 
     , Draws = sum(FTR == "D") 
) -> awaywins1 #Awayrecord 

    merge(homewins1, awaywins1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> homeawayrec 

    #homeawayrec 
    (homeawayrec$win.x + homeawayrec$win.y) -> homeawayrec$totalwins #adds away and home wins together, then puts on table. renamed column later 
    (homeawayrec$Loss.x + homeawayrec$Loss.y) -> homeawayrec$Total.Losses #same as above, but Losses. Named consistently with what I changed too after I changed above column. 
    (homeawayrec$Draws.x + homeawayrec$Draws.y) -> homeawayrec$Total.Draws #same as above, but with Draws. 
    #names(homeawayrec) <- c("Teams","Home.Wins", "Home.Losses", "Home.Draws", "Away.Wins", "Away.Losses", "Away.Draws", "Total.Wins") #Name all columns. make sure name right thing. 

    #homeawayrec[, c("Teams", "Total.Wins", "Total.Draws", "Total.Losses")] -> WDL 

    #Need to make home and away record, should be easy. 

    #homewins1 #homerecord 
    #awaywins1 #awayrecord 

    #Matches Played 

    (homeawayrec$totalwins + homeawayrec$Total.Draws + homeawayrec$Total.Losses) -> homeawayrec$MatchesPlayed 
    #homeawayrec 

    #homeawayrec[, c("Teams", "MatchesPlayed")] -> Matches.Played 
    #Matches.Played #number of matches played 
    #POINTS (need to make it per match) 

    #ddply(dfmess, 
    # .(HomeTeam)     #DDPLY Points was eliminated because multiplying the wins by 3 and draws by 1 was easier. 
    # , summarize 
    # , win = 3*(sum(FTR == "H")) 
    # , Loss = 0*(sum(FTR == "A")) 
    #  , Draws = 1*(sum(FTR == "D")) 
    #) -> HomePoints #team points at home 

    #ddply(dfmess, 
    #  .(AwayTeam) 
    # , summarize 
    # , win = 3*(sum(FTR == "A")) 
    # , Loss = 0*(sum(FTR == "H")) 
    #  , Draws = 1*(sum(FTR == "D")) 
    #) -> AwayPoints #team points on road 

    #merge(HomePoints, AwayPoints, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> Pointshomeaway 

    #(HomePoints$win +HomePoints$Draws + AwayPoints$win + AwayPoints$Draws) -> Pointshomeaway$TotalPoints 

    #names(Pointshomeaway) <- c("Teams", "HomeWinPoints", "HomeLossPoints", "HomeDrawPoints", "AwayWinPoints", "AwayLossPoints", "AwayDrawPoints", "TotalPoints") 

    #Pointshomeaway[, c("Teams", "TotalPoints")] -> Points.standings #table with just total points, total points standings. 
    #homeawayrec 
    (3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws))) -> homeawayrec$Points 

    #points per match and point percentage % 
    #homeawayrec 
    (3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws)))/(homeawayrec$MatchesPlayed) -> homeawayrec$PPM 

    #homeawayrec[, c("Teams", "PointsPerMatch")] -> PointsPer.Standings #standings with teams and points per match. 

    #Point Percentage 
    (homeawayrec$Points)/(3*(homeawayrec$MatchesPlayed)) -> homeawayrec$PtPct 
    #homeawayrec 
    #Goals Scored 

    ddply(dfmess,       #THIS WORKED, make sure HomeTeam and FTHG/AwayTeam and FTAG match up. Goals allowed will be the inverse for each. 
     .(HomeTeam) 
     , summarize 
     , goalsscored1 = sum(FTHG) 
) -> Homegoalmade1 
    ddply(dfmess, 
     .(AwayTeam) 
     , summarize 
     , goalsscored1 = sum(FTAG) 
) -> Awaygoalsmade1 

    merge(Homegoalmade1, Awaygoalsmade1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsmade1 #merged home/away goalsmmade1 THIS WORKED 
    (goalsmade1$goalsscored1.x + goalsmade1$goalsscored1.y) -> goalsmade1$allgoalsmade1 #total goalsmade 

    #goalsmade1 

    goalsmade1[, c("HomeTeam", "allgoalsmade1")] -> GS 

    merge(homeawayrec, GS, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec1 
    #homeawayrec 
    #homeawayrec1 

    #Goals Scored Per Match 
    ((homeawayrec1$allgoalsmade1)/(homeawayrec1$MatchesPlayed)) -> homeawayrec1$GSM 
    #homeawayrec1 

    #Goals Allowed 
    ddply(dfmess,       #THIS WORKED, make sure HomeTeam and FTAG/AwayTeam and FTHG match up. Goals scored will be the inverse for each. 
     .(HomeTeam) 
     , summarize 
     , goalsscored1 = sum(FTAG) 
) -> Homegoalallowed1 
    ddply(dfmess, 
     .(AwayTeam) 
     , summarize 
     , goalsscored1 = sum(FTHG) 
) -> Awaygoalsallowed1 

    merge(Homegoalallowed1, Awaygoalsallowed1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsallowed1 

    (goalsallowed1$goalsscored1.x + goalsallowed1$goalsscored1.y) -> goalsallowed1$GA 

    #goalsallowed1 

    goalsallowed1[, c("HomeTeam", "allgoalsallowed1")] -> GoalsAllowedall 

    merge(homeawayrec1, goalsallowed1, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec2 

    #Goals Allowed Per Match 

    ((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM 
    names(homeawayrec2) 
    #putting columns in correct order 
    #if I rename columns the function gets confused and won't work? 
    homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)] 
    #Last 10 matches played 
    ((tail(homeawayrec2$totalwins, n=10)) (tail(homeawayrec2$Total.Draws, n=10)) (tail(homeawayrec2$Total.Losses, n=10))) 
    tail(homeawayrec2[, c("totalwins", "Total.Draws", "Total.Losses")], n=10) 
    homeawayrec2 
    print(homeawayrec2) 
    return(dfmess) 
} 
     ((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM 
     names(homeawayrec2) 
     homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)] 
     print(homeawayrec2) 
     return(dfmess) 
    } 
    A <- EPL_Standings("09/26/14", "2013/14") #example numbers, placeholders. 
    head(A) 
+0

这是代码... – SuperCereal

+1

你'if'报表看起来很好,但你可能要分配的'了'if's内read.csv'一个对象,'dfmess < - 阅读。 CSV(...)'。 – Gregor

+0

谢谢你的回应。我做了这个改变,但是我仍然在'[.data.frame'(goalsallowed1,,c(“HomeTeam”,“allgoalsallowed1”))中出现这个错误:错误: 未定义的列被选中 我使用了它并没有得到任何解释。我也尝试过对R的帮助,但不明白。你知道这个问题吗?我怎样才能定义列? – SuperCereal

回答

0

这里是一个开始。一般来说,最好将相似的数据保存在同一张表中。

library(dplyr) 
library(lubridate) 
library(magrittr) 

data = 
    data_frame(start_season = c(2013, 2014, 2015)) %>% 
    mutate(url = 
      (start_season - 2000) %>% 
      paste0(., . + 1) %>% 
      sprintf("http://www.football-data.co.uk/mmz4281/%s/E0.csv", .)) %>% 
    group_by(start_season) %>% 
    do(read.csv(.$url)) %>% 
    mutate(Date = dmy(Date) %>% as.Date) %>% 
    arrange(Date) 

cutoffs = 
    data %>% 
    group_by(start_season) %>% 
    summarize(start_date = first(Date)) 

EPL_Standings = function(specific_date) { 

    specific_date = mdy(specific_date) %>% as.Date 

    this_season_so_far = 
    cutoffs %>% 
    filter(specific_date > start_date) %>% 
    last %>% 
    left_join(data) %>% 
    filter(Date <= specific_date) 

    # some sort of processing here 
} 
+0

我试过,但由于某种原因,它只是给了我一个+ ...所以也许我的处理只是垃圾。它是我的第一个功能(分配给班级)。从来没有任何类型的编程经验。 – SuperCereal

+0

如果你能告诉我我的处理是否好/好,我将不胜感激,你不必告诉我什么是错误的,因为我知道这是很多问题(除非你想)。我只想知道我在做什么似乎是正确的。 – SuperCereal

+0

另外,这似乎并没有工作。也许我在这方面很糟糕,但即使在应该工作的处理中也会引发很多错误。 – SuperCereal