Andrew Clark

Well the sun is finally shining in Vancouver, at least for one day, and the MLB season is in full swing other than for the Blue Jays (3-12) batting lineup so time for a dip into the wealth of data that is provided. I have previously produced a shiny app which provides interactivity for

for

  • Division Races
  • Pitcher Game Scores
  • Pitcher & Batter WAR comparisons
  • Batter game logs by Year

There are several useful R packages for baseball and I plan to utilize them in the future but for now will just take the opportunity to highlight the Baseball With R blog which has half a dozen of the top R-baseball gurus regularly churning out analyses of the game

The post I am going to is by Jim Albert joint author, with Max Marchi, of the Analyzing Baseball Data with R book and relates to the construction of a Win Probablility graph using plotly for the 2016 World Series 7 game in which the Cubs finally ended their 100 year quest for the championship

This chart itself is based on Fangraphs data a site which does an outstanding job of game analysis

What now looks like a temporary blip in the Blue Jays ended a couple of weeks earlier at the hand of the AL Champion Indians. Let’s look at that sorry tale where the Jays were favourites for all of the first 3 at bats

library(plotly)
library(XML)
library(purrr)
library(crosstalk)
library(stringr)
library(tidyverse)

Once we have located the fangraphs URL with the gamelog of interest we can use the XML package, amongst alternatives, to create a data.frame

url <- "http://www.fangraphs.com/plays.aspx?date=2016-10-19&team=Blue%20Jays&dh=0&season=2016"

tables<- readHTMLTable(url, stringsAsFactors=FALSE)

# See what tables are available
names(tables)
## [1] "NULL"                          "NULL"                         
## [3] "NULL"                          "NULL"                         
## [5] "PlayGame1_rdDate_calendar"     "NULL"                         
## [7] "PlayGame1_rdDate_calendar_Top" "NULL"                         
## [9] "PlayGame1_dgPlay_ctl00"
# Select correct one
df <- tables[["PlayGame1_dgPlay_ctl00"]]


## As per Jim's code add a play number and amend the WE field to a numeric one just showing the win probablilty

df$Play_Number <- 1:dim(df)[1]
df$WE  <- as.numeric(str_replace(df$WE, "%", ""))

## There is also a repeated column name which needs attention
names(df)[13] <- "ScoreChange"

# check that everything looks hunky-dory
glimpse(df)
## Observations: 64
## Variables: 15
## $ Pitcher     <chr> "M Estrada", "M Estrada", "M Estrada", "M Estrada"...
## $ Player      <chr> "C Santana", "J Kipnis", "F Lindor", "M Napoli", "...
## $ Inn.        <chr> "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", ...
## $ Outs        <chr> "0", "1", "2", "2", "2", "0", "1", "2", "0", "1", ...
## $ Base        <chr> "___", "___", "___", "1__", "_2_", "___", "___", "...
## $ Score       <chr> "0-0", "0-0", "0-0", "0-1", "0-1", "0-1", "0-1", "...
## $ Play        <chr> "Carlos Santana fouled out to catcher (Fly).", "Ja...
## $ LI          <chr> "0.87", "0.62", "0.40", "0.79", "1.03", "0.92", "0...
## $ RE          <chr> "0.50", "0.26", "0.10", "0.23", "0.32", "0.50", "0...
## $ WE          <dbl> 52.2, 53.8, 52.5, 41.5, 44.5, 42.1, 40.5, 39.4, 41...
## $ WPA         <chr> "-.022", "-.015", ".012", ".110", "-.029", "-.023"...
## $ RE24        <chr> "-0.24", "-0.16", "0.13", "1.09", "-0.32", "-0.24"...
## $ ScoreChange <chr> "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", ...
## $ Half        <chr> "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", ...
## $ Play_Number <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...

Jim created his interactive chart by creating a ggplot object and wrapping it by the ggplotly function.

By way of a change I will produce a comparable output using native plotly, adding a bit more to the tooltip and identifying the team at bat

# add a team field
df <- df %>% 
  mutate(team=ifelse(Half=="0","Indians","Jays"))
# and provide appropriate colors for them
col<- c("red","blue")


df %>% 
  plot_ly(x=~Play_Number,y=~WE,
          hoverinfo="text",
          text=~paste0("Inn:",Inn.," ",Outs," out","<br>Score: ",Score,"<br>  Win Probability: ",WE,"%<br>",Play)) %>% 
  add_markers(color=~team, colors=col, opacity=0.5) %>% 
  add_lines(color=I("lightgrey"), showlegend=FALSE) %>% 
  layout(title="2016 AL Championship Game 5",
         xaxis=list(title="Play"),
         yaxis=list(title="Probability of Blue Jays win")) %>% 
   config(displayModeBar = F,showLink = F)

Hover points for details. The result was a 3-0 victory for the Indians and it is pretty self-evident from the chart where the runs occurred and that they each improved the Indians chances of winning by around ten percentage points

There is also a notebook version here, which you can download and NB


lets now look at extending it with sharedData

So that’s fine if there is a specific game we know about but lets delve deeper by obtaining all the Blue Jay games for 2016. Fangraphs also have a schedule page. The code below results in a data.frame which can be perused for selecting another game and is saved for future use

url <- "http://www.fangraphs.com/teams/bluejays/schedule?season=2016"

tables<- readHTMLTable(url, stringsAsFactors=FALSE)

# See what tables are available
names(tables)
## [1] "NULL" "NULL"
# Select correct one
sched <- tables[[2]]

# Do some tidying up and save file

#columns require a name
colnames(sched)[2] <- "venue"

# change from character to integer 
sched$TORRuns <- as.integer(sched$TORRuns)
sched$OppRuns <- as.integer(sched$OppRuns)



#save for future use
write_csv(sched,"schedule_TOR_2016.csv")

head(sched)
##          Date venue Opp TORWin Prob W/L TORRuns OppRuns    TOR Starter
## 1 Apr 3, 2016    at TBR       47.5%   W       5       3 Marcus Stroman
## 2 Apr 4, 2016    at TBR       44.6%   W       5       3    R.A. Dickey
## 3 Apr 5, 2016    at TBR       47.8%   L       2       3  Aaron Sanchez
## 4 Apr 6, 2016    at TBR       51.3%   L       3       5      J.A. Happ
## 5 Apr 8, 2016    vs BOS       56.5%   L       7       8 Marcus Stroman
## 6 Apr 9, 2016    vs BOS       48.2%   L       4       8    R.A. Dickey
##     Opp Starter
## 1  Chris Archer
## 2    Drew Smyly
## 3 Jake Odorizzi
## 4    Matt Moore
## 5     Joe Kelly
## 6 Rick Porcello

We can also as a one off download all the Blue jays 2016 gamelogs for further analysis. I have used the map_df() function from the purrr package to create a data.frame for saving. One issue accounted for is that the raw data has two columns with the same name so that needs to be rectifies before the date can be added to the data.frame. There is also a bit of munging to do for future plotting

The process takes around 5 minutes if you want to uncomment the final few lines or wish to amend to other teams/years for your own analysis

sched <- read_csv("schedule_TOR_2016.csv")

#need to create a new column which has date format that can be used in URL
sched <- sched %>%
  mutate(gameDate = as.Date(Date, format = '%b %d, %Y'))

# create function to download all apprpriate urls and combine into a data.frame
  get_plays <- function(x) {
  url <- paste0("http://www.fangraphs.com/plays.aspx?date=",x,"&team=Blue%20Jays&season=2016")
  tables<- readHTMLTable(url, stringsAsFactors=FALSE)
  playLog <- tables[["PlayGame1_dgPlay_ctl00"]]
  names(playLog)[13] <- "ScoreChange"
  playLog <- cbind(playLog,date=x)


}

## This takes around 5 minutes to run 
# playLogs <-map_df(sched$gameDate, get_plays)
  
  ## make win expectancy a numeric field
#  playLogs$WE  <- as.numeric(str_replace(playLogs$WE, "%", ""))
  
  ## add play order for each game
  # playLogs <-playLogs %>% 
  # group_by(date) %>% 
  # mutate(Play_Number=row_number())
  
# write_csv(playLogs,"playLogs_jays_2016.csv")

There are many ways to play around with the data that is now available but I’d like to delve further into the crosstalk package which I have touched on before?? to

playLogs <- read_csv("playLogs_jays_2016.csv")
## Parsed with column specification:
## cols(
##   Pitcher = col_character(),
##   Player = col_character(),
##   Inn. = col_integer(),
##   Outs = col_integer(),
##   Base = col_character(),
##   Score = col_character(),
##   Play = col_character(),
##   LI = col_double(),
##   RE = col_double(),
##   WE = col_double(),
##   WPA = col_double(),
##   RE24 = col_double(),
##   ScoreChange = col_integer(),
##   Half = col_integer(),
##   date = col_date(format = ""),
##   Play_Number = col_integer()
## )
glimpse(playLogs)
## Observations: 13,381
## Variables: 16
## $ Pitcher     <chr> "C Archer", "C Archer", "C Archer", "C Archer", "C...
## $ Player      <chr> "K Pillar", "J Donaldson", "J Bautista", "J Donald...
## $ Inn.        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,...
## $ Outs        <int> 0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0,...
## $ Base        <chr> "___", "___", "1__", "12_", "_23", "1__", "12_", "...
## $ Score       <chr> "0-0", "0-0", "0-0", "0-0", "0-2", "0-2", "0-2", "...
## $ Play        <chr> "Kevin Pillar grounded out to shortstop (Grounder)...
## $ LI          <dbl> 0.87, 0.62, 1.15, 1.96, 1.56, 0.88, 1.44, 1.25, 0....
## $ RE          <dbl> 0.47, 0.25, 0.50, 0.88, 1.36, 0.50, 0.88, 0.42, 0....
## $ WE          <dbl> 52.2, 49.7, 46.1, 41.2, 30.3, 27.6, 30.9, 34.0, 31...
## $ WPA         <dbl> -0.022, 0.024, 0.036, 0.049, 0.110, 0.026, -0.032,...
## $ RE24        <dbl> -0.22, 0.25, 0.38, 0.49, 1.14, 0.38, -0.46, -0.42,...
## $ ScoreChange <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Half        <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,...
## $ date        <date> 2016-04-03, 2016-04-03, 2016-04-03, 2016-04-03, 2...
## $ Play_Number <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
glimpse(sched)
## Observations: 171
## Variables: 10
## $ Date        <chr> "Apr 3, 2016", "Apr 4, 2016", "Apr 5, 2016", "Apr ...
## $ venue       <chr> "at", "at", "at", "at", "vs", "vs", "vs", "vs", "v...
## $ Opp         <chr> "TBR", "TBR", "TBR", "TBR", "BOS", "BOS", "BOS", "...
## $ TORWin Prob <chr> "47.5%", "44.6%", "47.8%", "51.3%", "56.5%", "48.2...
## $ W/L         <chr> "W", "W", "L", "L", "L", "L", "W", "L", "W", "W", ...
## $ TORRuns     <int> 5, 5, 2, 3, 7, 4, 3, 2, 7, 4, 3, 2, 5, 4, 4, 3, 2,...
## $ OppRuns     <int> 3, 3, 3, 5, 8, 8, 0, 3, 2, 2, 5, 4, 3, 3, 3, 4, 3,...
## $ TOR Starter <chr> "Marcus Stroman", "R.A. Dickey", "Aaron Sanchez", ...
## $ Opp Starter <chr> "Chris Archer", "Drew Smyly", "Jake Odorizzi", "Ma...
## $ gameDate    <date> 2016-04-03, 2016-04-04, 2016-04-05, 2016-04-06, 2...
sched %>% 
  plot_ly(x=~TORRuns,y= ~OppRuns,color= ~`TOR Starter`) %>% 
  add_markers()
#library(rvest)

#div class="team-schedule-table" # confirmed in selectorgadget


# sched <-read_html("http://www.fangraphs.com/teams/bluejays/schedule?season=2016")  %>% # list of 2
#        html_node(".team-schedule-table") %>% 
#        html_table()
# 
# tdist <- read_html("http://en.wikipedia.org/wiki/Student%27s_t-distribution")
# tdist %>%
#   html_node("table.infobox") %>%  # sim to above but only has node not xml_document
#   html_table(header = FALSE)
# 
# 
# ## maybe come back use xml2r?
# library(xml2)
# url_parse("http://www.fangraphs.com/teams/bluejays/schedule?season=2016")

## back to XML

# url <- "http://www.fangraphs.com/teams/bluejays/schedule?season=2016"
# 
# tables<- readHTMLTable(url, stringsAsFactors=FALSE)
# 
# # See what tables are available
# names(tables)
# # Select 
# sched <- tables[[2]]
# 
# head(sched)
# 
# names(sched)
# library(crosstalk)
# library(htmltools)
# 
# 
# names(sched)
# glimpse(sched)
# 
# colnames(sched)[2] <- "venue"
# 
# sched$TORRuns <- as.integer(sched$TORRuns)
# 
# sched$OppRuns <- as.integer(sched$OppRuns)
# 
# write_csv(sched,"schedule_TOR_2016.csv")
# 
# byStarter <- sched %>% 
#   rename(starter=`TOR Starter`) %>% 
#   mutate(runs=TORRuns+OppRuns) %>% 
#   group_by(starter) %>% 
#   arrange(starter,desc(runs),desc(TORRuns))
# 
# 
# 
# sd_1 <- SharedData$new(byStarter, key= ~Date)
# 
# 
# fs <- filter_select(
# id = "starter",
# label = "Select Starter",
# sharedData = sd_1,
# group =  ~ starter,
# allLevels = FALSE,
# multiple = FALSE
# )
# 
# 
# fs_nobootstrap <- fs
# 
# attr(fs_nobootstrap, "html_dependencies") <- Filter(
#   function(dep) {dep$name != "bootstrap"},
#   attr(fs_nobootstrap, "html_dependencies")
# )
# 

# select game with most runs in for a particular starter


# game <-  sd_1 %>%
#                          DT::datatable(class='compact stripe hover row-border order-column',rownames=FALSE,options= list(paging = FALSE, searching = FALSE,info=FALSE)) 
       
# Error in DT::datatable(., class = "compact stripe hover row-border order-column",  : 
#  'data' must be 2-dimensional (e.g. data frame or matrix) sd is an Environment
# subbing in bySy=tarter
  


# p <-  sd_1 %>% 
#   plot_ly(x=~TORRuns,y=~OppRuns) ## this works so issue is not 
 

## this does show up but do not think it can be used to get unless all downloaded beforehand




  
#   sd_1$key 
#   
#   # both these retrun data set �.� not found
#   data(sd_1)
#   
#   sd_1 %>% 
#     data()
#   
#   origData(sd_1)
# 
#   state_info <- data.frame(stringsAsFactors = FALSE,
#   state.name,
#   state.region,
#   state.area
# )
# sd1 <- SharedData$new(state_info, ~state.name)
# sd2 <- SharedData$new(state_info, state_info$state.name)
# sd3 <- SharedData$new(state_info, function(data) data$state.name)
# 
# # Do all three forms give the same results?
# all(sd1$key() == sd2$key() & sd2$key() == sd3$key())
  
  
#sd_1$key()[1]  #sd_1$key()[1] "Jun 12, 2016" no filtering at this point



# <SharedData>
#   Public:
#     .updateSelection: function (value) 
#     clearSelection: function (ownerId = "") 
#     clone: function (deep = FALSE) 
#     data: function (withSelection = FALSE, withFilter = TRUE, withKey = FALSE) 
#     groupName: function () 
#     initialize: function (data, key = NULL, group = createUniqueId(4, prefix = "SharedData")) 
#     key: function () 
#     origData: function () 
#     selection: function (value, ownerId = "") 
#   Private:
#     .data: grouped_df, tbl_df, tbl, data.frame
#     .filterCV: ClientValue, R6
#     .group: SharedDataeda3f27e
#     .key: NULL
#     .rv: reactivevalues
#     .selectionCV: ClientValue, R6


#  tagList(
#   fs_nobootstrap,
#   p
# )

 
 
#  tagList(
#   fs_nobootstrap,
#   br(),
#    myChart
# )

How long does it actually take to get data

# x <- "Apr 13, 2016"
# as.Date(x, format = '%b %d, %Y') #[1] "2016-04-13"
# 
# x <- "Apr 3, 2016"
# as.Date(x, format = '%b %d, %Y') #[1] "2016-04-03"
# 
# sched <- sched %>% 
#   mutate(gameDate = as.Date(Date, format = '%b %d, %Y'))
# 
# 
# 
# 
# # 
# url <- "http://www.fangraphs.com/plays.aspx?date=2016-10-19&team=Blue%20Jays&dh=0&season=2016"
# 
# url <- paste0("http://www.fangraphs.com/plays.aspx?date=",sched$gameDate[1],"&team=Blue%20Jays&dh=0&season=2016")
# 
# tables<- readHTMLTable(url, stringsAsFactors=FALSE)
# 
# # See what tables are available
# names(tables)
# # Select
# df <- tables[["PlayGame1_dgPlay_ctl00"]]
# 
# 
# ## As per Jim's code add a play number and amend the WE field to a numeric one just showing the win probablilty
# 
# df$Play_Number <- 1:dim(df)[1]
# df$WE  <- as.numeric(str_replace(df$WE, "%", ""))
# 
# ## There is also a repeated column name which needs attention
# names(df)[13] <- "ScoreChange"
# 
# # check that everything looks hunky-dory
# glimpse(df)
# 
# ## try an get as function an purrr it?? only used in twitter stuff to date with v simple function
# 
# library(purrr)
# library(XML)
# 
# get_plays <- function(date) {
#   
#   url <- paste0("http://www.fangraphs.com/plays.aspx?date=",sched$gameDate[1],"&team=Blue%20Jays&dh=0&season=2016")
#   print(url)
#   tables<- readHTMLTable(url, stringsAsFactors=FALSE)
# }
# 
# ##  try without dh=0
# get_plays <- function(date) {
#   
#  # url <- paste0("http://www.fangraphs.com/plays.aspx?date=",date,"&team=Blue%20Jays&season=2016")
#   url <- paste0("http://www.fangraphs.com/plays.aspx?date=",games[1],"&team=Blue%20Jays&season=2016")
#   print(url)
#   tables<- readHTMLTable(url, stringsAsFactors=FALSE)
#   playLog <- tables[["PlayGame1_dgPlay_ctl00"]]
#   playLog$gameDate <- games[1]
# }
# 
# games <- sched$gameDate[1:3]
# 
# get_plays(games)
# 
# get_plays <- function(x) {
#   
#    url <- paste0("http://www.fangraphs.com/plays.aspx?date=",x,"&team=Blue%20Jays&season=2016")
#  # print(url)
#   tables<- readHTMLTable(url, stringsAsFactors=FALSE)
#   playLog <- tables[["PlayGame1_dgPlay_ctl00"]]
#   playLog$gameDate <- x
# }
# 
# 
# map_df(games, get_plays) #Error in bind_rows_(x, .id) : cannot convert object to a data frame
# map(games, get_plays)
# [[1]]
# [1] "2016-04-03"
# 
# [[2]]
# [1] "2016-04-04"
# 
# [[3]]
# [1] "2016-04-05"
# 
# # prob as last result is the vector
# 
# 
# get_plays <- function(x) {
#   
#    url <- paste0("http://www.fangraphs.com/plays.aspx?date=",x,"&team=Blue%20Jays&season=2016")
#  # print(url)
#   tables<- readHTMLTable(url, stringsAsFactors=FALSE)
#   #playLog <- tables[["PlayGame1_dgPlay_ctl00"]] # this works
#   playLog <- cbind(tables[["PlayGame1_dgPlay_ctl00"]],x)
#   
# }
# 
# 
# data <-map_df(games, get_plays) 
# 
# data <-map_df(sched$gameDate, get_plays) ## around 7 minutes without setting size of data.frame
# 
# write_csv(data,"playLogs_jays_2016.csv")
# 
# unique(data$x) #171 looks right
# 
# glimpse(data)
# data$WE  <- as.numeric(str_replace(data$WE, "%", ""))
# 
# ## There is also a repeated column name which needs attention (? not seeing this now even in orig)
# names(data)[13] <- "ScoreChange" #[1] "Half" data$Score  just has one value needs to be checked out on individual game looks like there
# # are more see df - def want to have this
# 
# 
# ## see if just a few games gets asme size df
# test <-map_df(games, get_plays) #no still 14
# 
# url <- "http://www.fangraphs.com/plays.aspx?date=2016-10-19&team=Blue%20Jays&dh=0&season=2016"
# 
# tables<- readHTMLTable(url, stringsAsFactors=FALSE)
#   #playLog <- tables[["PlayGame1_dgPlay_ctl00"]] # this works
#   playLog <- cbind(tables[["PlayGame1_dgPlay_ctl00"]],date="2016-04-03")
#   # looks like because 2 columns have same name score is overwriting 
#   
#   playLog <- tables[["PlayGame1_dgPlay_ctl00"]]
#   names(playLog)[13] <- "ScoreChange"
#   playLog <- cbind(playLog,date="2016-04-03")
#   
#   
#   
#   get_plays <- function(x) {
#   
#    url <- paste0("http://www.fangraphs.com/plays.aspx?date=",x,"&team=Blue%20Jays&season=2016")
#  # print(url)
#   tables<- readHTMLTable(url, stringsAsFactors=FALSE)
#   playLog <- tables[["PlayGame1_dgPlay_ctl00"]]
#   names(playLog)[13] <- "ScoreChange"
#   playLog <- cbind(playLog,date=x)
#   
#   
# }
# 
#   Sys.time()
# data <-map_df(sched$gameDate, get_plays) 
# Sys.time() # around 5 mins
# 
# ## now try and link charts - will need a number to id play as well
# 
# 
# df$Play_Number <- 1:dim(df)[1]
# data$WE  <- as.numeric(str_replace(data$WE, "%", ""))
# 
# data <-data %>% 
#   group_by(date) %>% 
#   mutate(Play_Number=row_number())
# 
# write_csv(data,"playLogs_jays_2016.csv")
# 
# 
# data <- read_csv("playLogs_jays_2016.csv")
# sched <- read_csv("schedule_TOR_2016.csv")
# ## replicate  the one above
# 
# theDate <- as.Date("2016-04-03") #class(theDate)
# 
# 
# df <- data %>% 
#   filter(date==theDate) 
# glimpse(df)
# 
# opponents <- sched[sched$date==theDate,]$Opp
# venue <- sched[sched$date==theDate,]$venue
# 
# 
# # add a team field
# df <- df %>% 
#   mutate(team=ifelse(Half=="0"&venue=="at","TOR",opponents)) %>% 
#   mutate(team=ifelse(Half=="1"&venue=="at","TOR",opponents)) 
# 
# df <- df %>%
# mutate(team = case_when(
#   Half=="0"&venue=="at","TOR",
#    Half=="0"&venue!="at",opponents,
#    Half=="1"&venue=="at",opponents,
#    Half=="1"&venue!="at","TOR"
# )
# )
# 
# df$team <- "TOR"
# df <- df %>%
# mutate(team = case_when(
#   Half=="0"&venue=="at"~"TOR",
#    Half=="0"&venue!="at"~opponents,
#    Half=="1"&venue=="at"~opponents,
#    Half=="1"&venue!="at"~"TOR"
# )
# )
# 
# 
# df <- df %>%
# mutate(team = case_when(
#   .$Half=="0" & .$venue=="at"~"TOR",
#    .$Half=="0" & .$venue!="at"~opponents,
#    .$Half=="1" & .$venue=="at"~opponents,
#    .$Half=="1" & .$venue!="at"~"TOR"
# )
# )
# 
# # works with dplyr_0.5.0.9004  not 0.5
# temp <- iris %>% 
# as_tibble() %>% 
#   mutate(versicolor_or_virginica = case_when(Species == "versicolor" ~ TRUE, 
#                                              Species == "virginica" ~ TRUE, 
#                                              TRUE ~ FALSE))
# 
# temp <- iris %>% 
# as_tibble() %>% 
#   mutate(versicolor_or_virginica="?") %>% 
#   mutate(versicolor_or_virginica = case_when(Species == "versicolor" ~ "ve", 
#                                              Species == "virginica" ~ "vi", 
#                                              TRUE ~ FALSE))
# 
# df <- df %>%
# mutate(team = case_when(
#   (Half=="0"&venue=="at")~"TOR",
#    (Half=="0"&venue!="at")~opponents,
#    (Half=="1"&venue=="at")~opponents,
#    (Half=="1"&venue!="at")~"TOR"
# )
# )
# 
# df$team
# df <- df %>%
# mutate(team = case_when(
#   
#    (Half=="0"&venue!="at") ~ opponents,
#    (Half=="1"&venue=="at") ~ opponents
#   
# ))
# 
# 
# df$team
# df <- df %>%
# mutate(team = case_when(
#   
#    (Half=="0"&venue!="at") ~ opponents,
#    (Half=="1"&venue=="at") ~ opponents
#   
# ))
# 
# 
# df %>% 
#   mutate(team=ifelse((Half=="0"&venue!="at"),"TOR",opponents))
# 
# 
# ## starts again
# 
# data <- read_csv("playLogs_jays_2016.csv")
# sched <- read_csv("schedule_TOR_2016.csv")
# 
# sched <- sched %>% 
#   mutate(gameDate = as.Date(Date, format = '%b %d, %Y'))
# 
# 
# ## replicate  the one above
# 
# theDate <- as.Date("2016-04-03") #class(theDate)
# 
# 
# df <- data %>% 
#   filter(date==theDate) 
# glimpse(df)
# 
# opponents <- sched[sched$gameDate==theDate,]$Opp
# venue <- sched[sched$gameDate==theDate,]$venue
# result <- sched[sched$gameDate==theDate,]$`W/L`
# 
# df <-df %>% 
#   mutate(team=ifelse((Half=="0"&venue!="at"),"TOR",opponents)) %>% 
#   mutate(team=ifelse((Half=="1"&venue!="at"),opponents,"TOR"))
# 
# ## this correct
# df <-df %>% 
#   mutate(team=ifelse((Half=="0"&venue=="at"),"TOR",opponents)) %>% 
#   mutate(team=ifelse((Half=="1"&venue=="at"),opponents,"TOR")) 
# 
# 
# 
# 
# # and provide appropriate colors for them
# col<- c("red","blue")
# 
# #title
# if (venue=="at") {
# theTitle <- paste0("TOR @ ",opponents," - ",df$date[1])
# } else {
# theTitle <- paste0("TOR vs ",opponents," - ",df$date[1])  
# }
# 
# if (venue=="at"& result=="W")  df$WE <- 100-df$WE
# 
# 
# df %>% 
#   plot_ly(x=~Play_Number,y=~WE,
#           hoverinfo="text",
#           text=~paste0("Inn:",Inn.," ",Outs," out","<br>Score: ",Score,"<br>  Win Probability: ",WE,"%<br>",Play)) %>% 
#   add_markers(color=~team, colors=col, opacity=0.5) %>% 
#   add_lines(color=I("lightgrey"), showlegend=FALSE) %>% 
#   layout(title=theTitle,
#          xaxis=list(title="Play"),
#          yaxis=list(title="Probability of Blue Jays win")) %>% 
#    config(displayModeBar = F,showLink = F)
# 
comments powered by Disqus