我对R非常陌生我试图让我的函数将输出的表格与我在函数中指定的列进行比较,当用户输入日期(mm/dd/yy)和季节(yyyy/yy)。我很困惑如何设置我的初始数据框。函数if else语句从不同的数据集中读取
所以有3个季节我从(3个网站)拉。我的IF语句应该读入一个df(称为dfmess,因为它是一团糟)。我该怎么做,希望这是有道理的,有人可以帮助我。我会很感激,如果有人能理清这个烂摊子给我...
Season_2015_16 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
Season_2014_15 <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
Season_2013_14 <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)
**dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)**
EPL_Standings <- function(Standingdate, season){
Standingdate <- as.Date(Standingdate, format = "%m/%d/%Y")
Standingdate <- as.Date("08/30/15", format = "%m/%d/%y")
if (season == "2015/16"){
read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) -> dfmess
} else if (season == "2014/15") {
read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) -> dfmess
} else if (season == "2013/14"){
read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE) -> dfmess
}
dfmess[dfmess$Date <= Standingdate, 2:6]
#Team Record
ddply(dfmess,
.(HomeTeam)
, summarize
, win = sum(FTR == "H")
, Loss = sum(FTR == "A")
, Draws = sum(FTR == "D")
) -> homewins1 #Homerecord
ddply(dfmess,
.(AwayTeam)
, summarize
, win = sum(FTR == "A")
, Loss = sum(FTR == "H")
, Draws = sum(FTR == "D")
) -> awaywins1 #Awayrecord
merge(homewins1, awaywins1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> homeawayrec
#homeawayrec
(homeawayrec$win.x + homeawayrec$win.y) -> homeawayrec$totalwins #adds away and home wins together, then puts on table. renamed column later
(homeawayrec$Loss.x + homeawayrec$Loss.y) -> homeawayrec$Total.Losses #same as above, but Losses. Named consistently with what I changed too after I changed above column.
(homeawayrec$Draws.x + homeawayrec$Draws.y) -> homeawayrec$Total.Draws #same as above, but with Draws.
#names(homeawayrec) <- c("Teams","Home.Wins", "Home.Losses", "Home.Draws", "Away.Wins", "Away.Losses", "Away.Draws", "Total.Wins") #Name all columns. make sure name right thing.
#homeawayrec[, c("Teams", "Total.Wins", "Total.Draws", "Total.Losses")] -> WDL
#Need to make home and away record, should be easy.
#homewins1 #homerecord
#awaywins1 #awayrecord
#Matches Played
(homeawayrec$totalwins + homeawayrec$Total.Draws + homeawayrec$Total.Losses) -> homeawayrec$MatchesPlayed
#homeawayrec
#homeawayrec[, c("Teams", "MatchesPlayed")] -> Matches.Played
#Matches.Played #number of matches played
#POINTS (need to make it per match)
#ddply(dfmess,
# .(HomeTeam) #DDPLY Points was eliminated because multiplying the wins by 3 and draws by 1 was easier.
# , summarize
# , win = 3*(sum(FTR == "H"))
# , Loss = 0*(sum(FTR == "A"))
# , Draws = 1*(sum(FTR == "D"))
#) -> HomePoints #team points at home
#ddply(dfmess,
# .(AwayTeam)
# , summarize
# , win = 3*(sum(FTR == "A"))
# , Loss = 0*(sum(FTR == "H"))
# , Draws = 1*(sum(FTR == "D"))
#) -> AwayPoints #team points on road
#merge(HomePoints, AwayPoints, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> Pointshomeaway
#(HomePoints$win +HomePoints$Draws + AwayPoints$win + AwayPoints$Draws) -> Pointshomeaway$TotalPoints
#names(Pointshomeaway) <- c("Teams", "HomeWinPoints", "HomeLossPoints", "HomeDrawPoints", "AwayWinPoints", "AwayLossPoints", "AwayDrawPoints", "TotalPoints")
#Pointshomeaway[, c("Teams", "TotalPoints")] -> Points.standings #table with just total points, total points standings.
#homeawayrec
(3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws))) -> homeawayrec$Points
#points per match and point percentage %
#homeawayrec
(3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws)))/(homeawayrec$MatchesPlayed) -> homeawayrec$PPM
#homeawayrec[, c("Teams", "PointsPerMatch")] -> PointsPer.Standings #standings with teams and points per match.
#Point Percentage
(homeawayrec$Points)/(3*(homeawayrec$MatchesPlayed)) -> homeawayrec$PtPct
#homeawayrec
#Goals Scored
ddply(dfmess, #THIS WORKED, make sure HomeTeam and FTHG/AwayTeam and FTAG match up. Goals allowed will be the inverse for each.
.(HomeTeam)
, summarize
, goalsscored1 = sum(FTHG)
) -> Homegoalmade1
ddply(dfmess,
.(AwayTeam)
, summarize
, goalsscored1 = sum(FTAG)
) -> Awaygoalsmade1
merge(Homegoalmade1, Awaygoalsmade1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsmade1 #merged home/away goalsmmade1 THIS WORKED
(goalsmade1$goalsscored1.x + goalsmade1$goalsscored1.y) -> goalsmade1$allgoalsmade1 #total goalsmade
#goalsmade1
goalsmade1[, c("HomeTeam", "allgoalsmade1")] -> GS
merge(homeawayrec, GS, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec1
#homeawayrec
#homeawayrec1
#Goals Scored Per Match
((homeawayrec1$allgoalsmade1)/(homeawayrec1$MatchesPlayed)) -> homeawayrec1$GSM
#homeawayrec1
#Goals Allowed
ddply(dfmess, #THIS WORKED, make sure HomeTeam and FTAG/AwayTeam and FTHG match up. Goals scored will be the inverse for each.
.(HomeTeam)
, summarize
, goalsscored1 = sum(FTAG)
) -> Homegoalallowed1
ddply(dfmess,
.(AwayTeam)
, summarize
, goalsscored1 = sum(FTHG)
) -> Awaygoalsallowed1
merge(Homegoalallowed1, Awaygoalsallowed1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsallowed1
(goalsallowed1$goalsscored1.x + goalsallowed1$goalsscored1.y) -> goalsallowed1$GA
#goalsallowed1
goalsallowed1[, c("HomeTeam", "allgoalsallowed1")] -> GoalsAllowedall
merge(homeawayrec1, goalsallowed1, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec2
#Goals Allowed Per Match
((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
names(homeawayrec2)
#putting columns in correct order
#if I rename columns the function gets confused and won't work?
homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
#Last 10 matches played
((tail(homeawayrec2$totalwins, n=10)) (tail(homeawayrec2$Total.Draws, n=10)) (tail(homeawayrec2$Total.Losses, n=10)))
tail(homeawayrec2[, c("totalwins", "Total.Draws", "Total.Losses")], n=10)
homeawayrec2
print(homeawayrec2)
return(dfmess)
}
((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
names(homeawayrec2)
homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
print(homeawayrec2)
return(dfmess)
}
A <- EPL_Standings("09/26/14", "2013/14") #example numbers, placeholders.
head(A)
这是代码... – SuperCereal
你'if'报表看起来很好,但你可能要分配的'了'if's内read.csv'一个对象,'dfmess < - 阅读。 CSV(...)'。 – Gregor
谢谢你的回应。我做了这个改变,但是我仍然在'[.data.frame'(goalsallowed1,,c(“HomeTeam”,“allgoalsallowed1”))中出现这个错误:错误: 未定义的列被选中 我使用了它并没有得到任何解释。我也尝试过对R的帮助,但不明白。你知道这个问题吗?我怎样才能定义列? – SuperCereal