##########

# the following section of code is from chancesis

# http://www.chancesis.com/2011/08/14/run-expectancy-and-markov-chains/

##########

# change directory path to wherever you have bo_transitions.csv, etc saved

setwd("/Users/Seshoumaru/Desktop/untitled folder/baseball/run-win expectancy")

bo.trans = read.csv("bo_transitions.csv", header = TRUE)

bo.trans.home = read.csv("bo_transitions_home.csv", header = TRUE)

bo.trans.away = read.csv("bo_transitions_away.csv", header = TRUE)

# create header names

start.bo = NULL

end.bo = NULL

bo = NULL

bases = c('___','1__','_2_','12_','__3','1_3','_23','123')

for (b in 0:7) {

  for (o in 0:3) {

    start.bo[b + 8*o + 1] = paste('s.b', bases[b+1], '.o', o, sep = '')

    end.bo[b + 8*o + 1] = paste('e.b', bases[b+1], '.o', o, sep = '')

    bo[b + 8*o + 1] = paste('b', bases[b+1], '.o', o, sep = '')

  }

}

###############

# overall rp9 #

###############

# trans.freq - the frequency of each state transition

# trans.runs - the runs scored on each state transition

trans.freq = array(0, c(32,32), dimnames = list(start.bo, end.bo))

trans.runs = array(0, c(32,32), dimnames = list(start.bo, end.bo))

for (r in 1:nrow(bo.trans)) {

  row.num = bo.trans$START_BASES[r] + 8*bo.trans$START_OUTS[r] + 1

  col.num = bo.trans$END_BASES[r] + 8*bo.trans$END_OUTS[r] + 1

  trans.freq[row.num,col.num] = trans.freq[row.num,col.num] + bo.trans$PA[r]

  trans.runs[row.num,col.num] = trans.runs[row.num,col.num] + bo.trans$EVENT_RUNS[r]

}

# trans.prob - the probability of each state transition

trans.prob = trans.freq

for (r in 1:24) {

  trans.prob[r,] = trans.prob[r,]/sum(trans.prob[r,])

}

for (r in 25:32) {

  trans.prob[r,25] = 1

}

# bo.event.re - the expected runs scored from each state on the next event only

bo.event.re = array(0, c(32,1), dimnames = list(bo))

for (r in 1:nrow(trans.prob)) {

  bo.event.re[r,] = trans.prob[r,] %*% trans.runs[r,]

}

# run expectancy, formula method

re.formula = solve(diag(24) - trans.prob[1:24,1:24]) %*% bo.event.re[1:24]

rownames(re.formula) = bo[1:24]

colnames(re.formula)[1] = 're'

#############

# home team #

#############

# trans.freq - the frequency of each state transition

# trans.runs - the runs scored on each state transition

trans.freq.home = array(0, c(32,32), dimnames = list(start.bo, end.bo))

trans.runs.home = array(0, c(32,32), dimnames = list(start.bo, end.bo))

for (r in 1:nrow(bo.trans.home)) {

  row.num = bo.trans.home$START_BASES[r] + 8*bo.trans.home$START_OUTS[r] + 1

  col.num = bo.trans.home$END_BASES[r] + 8*bo.trans.home$END_OUTS[r] + 1

  trans.freq.home[row.num,col.num] = trans.freq.home[row.num,col.num] + bo.trans.home$PA[r]

  trans.runs.home[row.num,col.num] = trans.runs.home[row.num,col.num] + bo.trans.home$EVENT_RUNS[r]

}

# trans.prob - the probability of each state transition

trans.prob.home = trans.freq.home

for (r in 1:24) {

  trans.prob.home[r,] = trans.prob.home[r,]/sum(trans.prob.home[r,])

}

for (r in 25:32) {

  trans.prob.home[r,25] = 1

}

# re & run distribution, simulation method

innings = 100000  # ~4 mins for 100,000, ~1 hour for 1,000,000 on my computer

run.dist.simulation.home = array(0,c(24,1))

run.exp.simulation.home = array(0,c(24,1))

for (s in 1:24) {

  for (i in 1:innings) {

    s.state = s

    runs.inning = 0

    while (s.state != 25) {

      e.state = sample(1:ncol(trans.prob.home), size = 1, prob = trans.prob.home[s.state,])

      runs.inning = runs.inning + as.numeric(trans.runs.home[s.state,e.state])

      s.state = e.state

    }

    if (runs.inning+1 > ncol(run.dist.simulation.home)) {

      for (l in ncol(run.dist.simulation.home):runs.inning) {

        run.dist.simulation.home = cbind(run.dist.simulation.home,array(0,c(24,1)))

      }

    }

    run.dist.simulation.home[s,runs.inning+1] = run.dist.simulation.home[s,runs.inning+1] + 1

  }

  run.exp.simulation.home[s] = weighted.mean(0:(ncol(run.dist.simulation.home)-1),run.dist.simulation.home[s,])

}

for (r in 1:24) {

  run.dist.simulation.home[r,] = run.dist.simulation.home[r,]/sum(run.dist.simulation.home[r,])

}

runs = NULL

for (r in 1:ncol(run.dist.simulation.home)) {

  runs[r] = paste(r-1, 'runs')

}

run.dist.simulation.home = cbind(run.exp.simulation.home,run.dist.simulation.home)

rownames(run.dist.simulation.home) = bo[1:24]

colnames(run.dist.simulation.home) = c('re',runs)

#############

# away team #

#############

# trans.freq - the frequency of each state transition

# trans.runs - the runs scored on each state transition

trans.freq.away = array(0, c(32,32), dimnames = list(start.bo, end.bo))

trans.runs.away = array(0, c(32,32), dimnames = list(start.bo, end.bo))

for (r in 1:nrow(bo.trans.away)) {

  row.num = bo.trans.away$START_BASES[r] + 8*bo.trans.away$START_OUTS[r] + 1

  col.num = bo.trans.away$END_BASES[r] + 8*bo.trans.away$END_OUTS[r] + 1

  trans.freq.away[row.num,col.num] = trans.freq.away[row.num,col.num] + bo.trans.away$PA[r]

  trans.runs.away[row.num,col.num] = trans.runs.away[row.num,col.num] + bo.trans.away$EVENT_RUNS[r]

}

# trans.prob - the probability of each state transition

trans.prob.away = trans.freq.away

for (r in 1:24) {

  trans.prob.away[r,] = trans.prob.away[r,]/sum(trans.prob.away[r,])

}

for (r in 25:32) {

  trans.prob.away[r,25] = 1

}

# re & run distribution, simulation method

# innings = 100000  # ~4 mins for 100,000 # innings already set from home team

run.dist.simulation.away = array(0,c(24,1))

run.exp.simulation.away = array(0,c(24,1))

for (s in 1:24) {

  for (i in 1:innings) {

    s.state = s

    runs.inning = 0

    while (s.state != 25) {

      e.state = sample(1:ncol(trans.prob.away), size = 1, prob = trans.prob.away[s.state,])

      runs.inning = runs.inning + as.numeric(trans.runs.away[s.state,e.state])

      s.state = e.state

    }

    if (runs.inning+1 > ncol(run.dist.simulation.away)) {

      for (l in ncol(run.dist.simulation.away):runs.inning) {

        run.dist.simulation.away = cbind(run.dist.simulation.away,array(0,c(24,1)))

      }

    }

    run.dist.simulation.away[s,runs.inning+1] = run.dist.simulation.away[s,runs.inning+1] + 1

  }

  run.exp.simulation.away[s] = weighted.mean(0:(ncol(run.dist.simulation.away)-1),run.dist.simulation.away[s,])

}

for (r in 1:24) {

  run.dist.simulation.away[r,] = run.dist.simulation.away[r,]/sum(run.dist.simulation.away[r,])

}

runs = NULL

for (r in 1:ncol(run.dist.simulation.away)) {

  runs[r] = paste(r-1, 'runs')

}

run.dist.simulation.away = cbind(run.exp.simulation.away,run.dist.simulation.away)

rownames(run.dist.simulation.away) = bo[1:24]

colnames(run.dist.simulation.away) = c('re',runs)

##########

# code above is adapted from chancesis

# http://www.chancesis.com/2011/08/14/run-expectancy-and-markov-chains/

# altered to create separate run distributions for home/away

##########

#############

# home team #

#############

# force run distribution chart to go 0-16 runs

 while(ncol(run.dist.simulation.home)<18) {test.run.dist.home <- data.frame(cbind(run.dist.simulation.home,0))}

 

 for(i in 1:nrow(run.dist.simulation.home)) {run.dist.simulation.home[i,18] <- sum(run.dist.simulation.home[i,18:ncol(run.dist.simulation.home)])}

run.dist.simulation.home <- run.dist.simulation.home[,1:18]

 

#############

# away team #

#############

# force run distribution chart to go 0-16 runs

 while(ncol(run.dist.simulation.away)<18) {test.run.dist.away <- data.frame(cbind(run.dist.simulation.away,0))}

 

 for(i in 1:nrow(run.dist.simulation.away)) {run.dist.simulation.away[i,18] <- sum(run.dist.simulation.away[i,18:ncol(run.dist.simulation.away)])}

run.dist.simulation.away <- run.dist.simulation.away[,1:18]

# calculate odds of home team winning in extras

calc.extra.inning.we <- function(h,a) {

  h.win <- 0

  draw <- 0

  for (i in 1:17) {

    if(i>1) {

      h.win <- h.win + h[i]*sum(a[1:i-1])

    }

    draw <- draw + h[i]*a[i]

  }

  extra.inning.we <- h.win/(1-draw)

  return(extra.inning.we)

}

extra.inning.we <- calc.extra.inning.we(run.dist.simulation.home[1,2:18],run.dist.simulation.away[1,2:18])

# user-defined functions

create.run.dist <- function(df1,df2) {

  run.distr <- data.frame()

  for (i in 1:ncol(df1)) {

    run.distr <- rbind(run.distr,df1[1,i]*df2[1,2:18])

  }

  return(run.distr)

}

create.run.dist.2 <- function(df1,df2,h) {

  run.distr <- data.frame()

  for (i in 1:ncol(df1)) {

    run.distr <- rbind(run.distr,df1[1,i]*df2[h,2:18])

  }

  return(run.distr)

}

diag.sum <- function(df) {

        

  runs <- data.frame()

  for (i in 1:ncol(df)) {

    n <- 0

    for (j in 1:i) {

      n <- n+df[j,i+1-j]

    }

    runs[1,i] <- n

  }

  for (i in 1:nrow(df)-1) {

    n <- 0

    for (j in (i+1):nrow(df)) {

      if(ncol(df)+i+1-j>0) {

        n <- n+df[j,ncol(df)+i+1-j]

      }

    }

    runs[1,ncol(df)+i] <- n

  }

  return(runs)

}  

 

#############

# home team #

#############

  run.dist.2a.home <- create.run.dist(run.dist.simulation.home[,2:18],run.dist.simulation.home)

  run.dist.2.home <- diag.sum(run.dist.2a.home)

   

  run.dist.3a.home <- create.run.dist(run.dist.2.home,run.dist.simulation.home)

  run.dist.3.home <- diag.sum(run.dist.3a.home)

 

  run.dist.4a.home <- create.run.dist(run.dist.3.home,run.dist.simulation.home)

  run.dist.4.home <- diag.sum(run.dist.4a.home)

 

  run.dist.5a.home <- create.run.dist(run.dist.4.home,run.dist.simulation.home)

  run.dist.5.home <- diag.sum(run.dist.5a.home)

  run.dist.6a.home <- create.run.dist(run.dist.5.home,run.dist.simulation.home)

  run.dist.6.home <- diag.sum(run.dist.6a.home)

 

  run.dist.7a.home <- create.run.dist(run.dist.6.home,run.dist.simulation.home)

  run.dist.7.home <- diag.sum(run.dist.7a.home)

 

  run.dist.8a.home <- create.run.dist(run.dist.7.home,run.dist.simulation.home)

  run.dist.8.home <- diag.sum(run.dist.8a.home)

  run.dist.9a.home <- create.run.dist(run.dist.8.home,run.dist.simulation.home)

  run.dist.9.home <- diag.sum(run.dist.9a.home)

 

#############

# away team #

#############

 

  run.dist.2a.away <- create.run.dist(run.dist.simulation.away[,2:18],run.dist.simulation.away)

  run.dist.2.away <- diag.sum(run.dist.2a.away)

   

  run.dist.3a.away <- create.run.dist(run.dist.2.away,run.dist.simulation.away)

  run.dist.3.away <- diag.sum(run.dist.3a.away)

 

  run.dist.4a.away <- create.run.dist(run.dist.3.away,run.dist.simulation.away)

  run.dist.4.away <- diag.sum(run.dist.4a.away)

 

  run.dist.5a.away <- create.run.dist(run.dist.4.away,run.dist.simulation.away)

  run.dist.5.away <- diag.sum(run.dist.5a.away)

  run.dist.6a.away <- create.run.dist(run.dist.5.away,run.dist.simulation.away)

  run.dist.6.away <- diag.sum(run.dist.6a.away)

 

  run.dist.7a.away <- create.run.dist(run.dist.6.away,run.dist.simulation.away)

  run.dist.7.away <- diag.sum(run.dist.7a.away)

 

  run.dist.8a.away <- create.run.dist(run.dist.7.away,run.dist.simulation.away)

  run.dist.8.away <- diag.sum(run.dist.8a.away)

  run.dist.9a.away <- create.run.dist(run.dist.8.away,run.dist.simulation.away)

  run.dist.9.away <- diag.sum(run.dist.9a.away)

 

#bottom 9

calc.wp.b9 <- function(df) {

  wp.b9 <- data.frame()

  for(h in 1:24) {

    for(i in 1:33) {

      wp <- 0

      diff <- i-17

      for(j in 1:ncol(df)) {

        if(j==18-i) {

          wp <- wp + extra.inning.we*df[h,j]

        }

        if(j>18-i) {

          wp <- wp + df[h,j]

        }

      }

      wp.b9[h,i] <- wp

    }

  }

  colnames(wp.b9) <- -16:16

  return(wp.b9)

}

#top 9

calc.wp.t9 <- function(df.a, df.h) {

  wp.t9 <- data.frame()

  for(h in 1:24) {

    for(i in 1:33) {

      wp <- 0

      diff <- i-17

      for(j in 1:ncol(df.a)) {

        ra <- j-1

        for(k in 1:ncol(df.h)) {

          rs <- k-1

          if((rs+diff)==ra) {

            wp <- wp + extra.inning.we*df.a[h,j]*df.h[1,k]

          }

          if((rs+diff)>ra) {

            wp <- wp + df.a[h,j]*df.h[1,k]

          }

        }

      }

      wp.t9[h,i] <- wp

    }

  }

  colnames(wp.t9) <- -16:16

  return(wp.t9)

}

# bottom innings 1-8

calc.wp.bottom <- function(df.h.1,df.h.2,df.a) {

  wp.bottom <- data.frame()

  for(h in 1:24) {

    run.dist.a <- create.run.dist.2(df.h.1,df.h.2,h)

    run.dist <- diag.sum(run.dist.a)

    for(i in 1:33) {

      wp <- 0

      diff <- i-17

      for(j in 1:ncol(run.dist)) {

        rs <- j-1

        for(k in 1:ncol(df.a)) {

          ra <- k-1

          if((rs+diff)==ra) {

            wp <- wp + extra.inning.we*run.dist[1,j]*df.a[1,k]

          }

          if((rs+diff)>ra) {

            wp <- wp + run.dist[1,j]*df.a[1,k]

          }

        }

      }

      wp.bottom[h,i] <- wp

    }

  }

  colnames(wp.bottom) <- -16:16

  return(wp.bottom)

}

#top innings 1-8

calc.wp.top <- function(df.a.1,df.a.2,df.h) {

  wp.top <- data.frame()

  for(h in 1:24) {

    run.dist.a <- create.run.dist.2(df.a.1,df.a.2,h)

    run.dist <- diag.sum(run.dist.a)

    for(i in 1:33) {

      wp <- 0

      diff <- i-17

      for(j in 1:ncol(run.dist)) {

        ra <- j-1

        for(k in 1:ncol(df.h)) {

          rs <- k-1

          if((rs+diff)==ra) {

            wp <- wp + extra.inning.we*run.dist[1,j]*df.h[1,k]

          }

          if((rs+diff)>ra) {

            wp <- wp + run.dist[1,j]*df.h[1,k]

          }

        }

      }

      wp.top[h,i] <- wp

    }

  }

  colnames(wp.top) <- -16:16

  return(wp.top)

}

# calculate win expectency tables by inning

wp.b9 <- calc.wp.b9(run.dist.simulation.home[,2:18])

wp.b9 <- cbind(inn=9,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b9)

wp.t9 <- calc.wp.t9(run.dist.simulation.away[,2:18],run.dist.simulation.home[,2:18])

wp.t9 <- cbind(inn=9,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t9)

wp.b8 <- calc.wp.bottom(run.dist.simulation.home[,2:18],run.dist.simulation.home,run.dist.simulation.away[,2:18])

wp.b8 <- cbind(inn=8,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b8)

wp.t8 <- calc.wp.top(run.dist.simulation.away[,2:18],run.dist.simulation.away,run.dist.2.home[1:31])

wp.t8 <- cbind(inn=8,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t8)

wp.b7 <- calc.wp.bottom(run.dist.2.home[1:31],run.dist.simulation.home,run.dist.2.away[1:31])

wp.b7 <- cbind(inn=7,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b7)

wp.t7 <- calc.wp.top(run.dist.2.away[1:31],run.dist.simulation.away,run.dist.3.home[1:31])

wp.t7 <- cbind(inn=7,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t7)

wp.b6 <- calc.wp.bottom(run.dist.3.home[1:31],run.dist.simulation.home,run.dist.3.away[1:31])

wp.b6 <- cbind(inn=6,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b6)

wp.t6 <- calc.wp.top(run.dist.3.away[1:31],run.dist.simulation.away,run.dist.4.home[1:31])

wp.t6 <- cbind(inn=6,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t6)

wp.b5 <- calc.wp.bottom(run.dist.4.home[1:31],run.dist.simulation.home,run.dist.4.away[1:31])

wp.b5 <- cbind(inn=5,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b5)

wp.t5 <- calc.wp.top(run.dist.4.away[1:31],run.dist.simulation.away,run.dist.5.home[1:31])

wp.t5 <- cbind(inn=5,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t5)

wp.b4 <- calc.wp.bottom(run.dist.5.home[1:31],run.dist.simulation.home,run.dist.5.away[1:31])

wp.b4 <- cbind(inn=4,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b4)

wp.t4 <- calc.wp.top(run.dist.5.away[1:31],run.dist.simulation.away,run.dist.6.home[1:31])

wp.t4 <- cbind(inn=4,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t4)

wp.b3 <- calc.wp.bottom(run.dist.6.home[1:31],run.dist.simulation.home,run.dist.6.away[1:31])

wp.b3 <- cbind(inn=3,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b3)

wp.t3 <- calc.wp.top(run.dist.6.away[1:31],run.dist.simulation.away,run.dist.7.home[1:31])

wp.t3 <- cbind(inn=3,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t3)

wp.b2 <- calc.wp.bottom(run.dist.7.home[1:31],run.dist.simulation.home,run.dist.7.away[1:31])

wp.b2 <- cbind(inn=2,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b2)

wp.t2 <- calc.wp.top(run.dist.7.away[1:31],run.dist.simulation.away,run.dist.8.home[1:31])

wp.t2 <- cbind(inn=2,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t2)

wp.b1 <- calc.wp.bottom(run.dist.8.home[1:31],run.dist.simulation.home,run.dist.8.away[1:31])

wp.b1 <- cbind(inn=1,half="b",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.b1)

wp.t1 <- calc.wp.top(run.dist.8.away[1:31],run.dist.simulation.away,run.dist.9.home[1:31])

wp.t1[,18:33] <- -1

wp.t1 <- cbind(inn=1,half="t",rp9=9*re.formula[1],bo=rownames(run.dist.simulation.home),wp.t1)

win.expectancy <- data.frame()

win.expectancy <- rbind(wp.t1,wp.b1,wp.t2,wp.b2,wp.t3,wp.b3,wp.t4,wp.b4,wp.t5,wp.b5,wp.t6,wp.b6,wp.t7,wp.b7,wp.t8,wp.b8,wp.t9,wp.b9)

rownames(win.expectancy) <- paste(win.expectancy[,2],win.expectancy[,1],win.expectancy[,4], sep='')

we.rounded <- data.frame(lapply(win.expectancy, function(y) if(is.numeric(y)) round(y, 4) else y))

colnames(we.rounded) <- colnames(win.expectancy)

rownames(we.rounded) <- rownames(win.expectancy)

##################

# Leverage Index #

##################

gs.freq = read.csv("game_state_frequency.csv", header = TRUE)

return.greater.value <- function(a,b) {

  c <- 0

  for(i in 1:length(a)) {

    if(a[i]>b) { c[i] <- a[i] } else { c[i] <- b }

  }

  return(c)

}

return.lesser.value <- function(a,b) {

  c <- 0

  for(i in 1:length(a)) {

    if(a[i] < b) { c[i] <- a[i] } else { c[i] <- b }

  }

  return(c)

}

sum.cols <- function(df) {

  PA <- df$PA

  diff <- abs(as.numeric(df$start.val)-as.numeric(df$end.val))

  weight <- PA*diff

  LI <- data.frame(PA=sum(PA),weight=sum(weight),LI=sum(weight)/sum(PA))

  return(LI)

}

sum.cols.2 <- function(df) {

  PA <- df$PA.y

  weight <- PA*df$LI

  scale <- sum(weight)/sum(PA)

  return(scale)

}

li.top.inning <- function(df,df2,inn) {

  LI <- data.frame()

  end.inn.val <- df2[df2$bo=="b___.o0",]

  wp <- data.frame(cbind(df,b=0:7,o=floor(0:23/8)))

  for(i in 5:37) {

    trans <- merge(bo.trans,data.frame(cbind(start.val=df[,i],b=0:7,o=floor(0:23/8))),by.x=c("START_BASES","START_OUTS"),by.y=c("b","o"))

    event.r <- trans$EVENT_RUNS

    end.b <- trans$END_BASES

    end.o <- trans$END_OUTS

    end.diff <- return.greater.value(i-event.r,5)

    trans$diff<-end.diff

    end.v <- 0

    for(j in 1:nrow(trans)) {

      if(end.o[j]==3) {

        end.value <- end.inn.val[end.diff[j]]

      } else {

        end.value <- wp[wp$b==end.b[j] & wp$o==end.o[j],end.diff[j]]

      }

      end.v[j] <- end.value

    }

  trans$end.val <- end.v

  library(plyr)

  LI.a <- ddply(trans,c("START_BASES","START_OUTS"),sum.cols)

  LI.a$inn <- inn

  LI.a$half <- "t"

  LI.a$diff <- i-21

  LI <- rbind(LI,LI.a)

  }

  return(LI)

}

li.bottom.inning <- function(df,df2,inn) {

  LI <- data.frame()

  end.inn.val <- df2[df2$bo=="b___.o0",]

  wp <- data.frame(cbind(df,b=0:7,o=floor(0:23/8)))

  for(i in 5:37) {

    trans <- merge(bo.trans,data.frame(cbind(start.val=df[,i],b=0:7,o=floor(0:23/8))),by.x=c("START_BASES","START_OUTS"),by.y=c("b","o"))

    event.r <- trans$EVENT_RUNS

    end.b <- trans$END_BASES

    end.o <- trans$END_OUTS

    end.diff <- return.lesser.value(i+event.r,37)

    trans$diff<-end.diff

    end.v <- 0

    for(j in 1:nrow(trans)) {

      if(end.o[j]==3) {

        end.value <- end.inn.val[end.diff[j]]

      } else {

        end.value <- wp[wp$b==end.b[j] & wp$o==end.o[j],end.diff[j]]

      }

      end.v[j] <- end.value

    }

  trans$end.val <- end.v

  library(plyr)

  LI.a <- ddply(trans,c("START_BASES","START_OUTS"),sum.cols)

  LI.a$inn <- inn

  LI.a$half <- "b"

  LI.a$diff <- i-21

  LI <- rbind(LI,LI.a)

  }

  return(LI)

}

li.bottom.9 <- function(df) {

  LI <- data.frame()

#  end.inn.val <- df2[df2$bo=="b___.o0",]

  wp <- data.frame(cbind(df,b=0:7,o=floor(0:23/8)))

  for(i in 5:37) {

    trans <- merge(bo.trans,data.frame(cbind(start.val=df[,i],b=0:7,o=floor(0:23/8))),by.x=c("START_BASES","START_OUTS"),by.y=c("b","o"))

    event.r <- trans$EVENT_RUNS

    end.b <- trans$END_BASES

    end.o <- trans$END_OUTS

    end.diff <- return.lesser.value(i+event.r,37)

    trans$diff<-end.diff

    end.v <- 0

    for(j in 1:nrow(trans)) {

      if(end.o[j]==3) {

        if(end.diff[j]<21) {

          end.value=0

        } else if(end.diff[j]>21) {

          end.value=1

        } else if(end.diff[j]==21) {

          end.value=.5

        }

      } else {

        end.value <- wp[wp$b==end.b[j] & wp$o==end.o[j],end.diff[j]]

      }

      end.v[j] <- end.value

    }

  trans$end.val <- end.v

  library(plyr)

  LI.a <- ddply(trans,c("START_BASES","START_OUTS"),sum.cols)

  LI.a$inn <- 9

  LI.a$half <- "b"

  LI.a$diff <- i-21

  LI <- rbind(LI,LI.a)

  }

  return(LI)

}

li.t1 <- li.top.inning(wp.t1,wp.b1,1)

li.t2 <- li.top.inning(wp.t2,wp.b2,2)

li.t3 <- li.top.inning(wp.t3,wp.b3,3)

li.t4 <- li.top.inning(wp.t4,wp.b4,4)

li.t5 <- li.top.inning(wp.t5,wp.b5,5)

li.t6 <- li.top.inning(wp.t6,wp.b6,6)

li.t7 <- li.top.inning(wp.t7,wp.b7,7)

li.t8 <- li.top.inning(wp.t8,wp.b8,8)

li.t9 <- li.top.inning(wp.t9,wp.b9,9)

li.b1 <- li.bottom.inning(wp.b1,wp.t2,1)

li.b2 <- li.bottom.inning(wp.b2,wp.t3,2)

li.b3 <- li.bottom.inning(wp.b3,wp.t4,3)

li.b4 <- li.bottom.inning(wp.b4,wp.t5,4)

li.b5 <- li.bottom.inning(wp.b5,wp.t6,5)

li.b6 <- li.bottom.inning(wp.b6,wp.t7,6)

li.b7 <- li.bottom.inning(wp.b7,wp.t8,7)

li.b8 <- li.bottom.inning(wp.b8,wp.t9,8)

li.b9 <- li.bottom.9(wp.b9)

leverage.index.a <- rbind(li.t1,li.b1,li.t2,li.b2,li.t3,li.b3,li.t4,li.b4,li.t5,li.b5,li.t6,li.b6,li.t7,li.b7,li.t8,li.b8,li.t9,li.b9)

LI.scale <- merge(leverage.index.a,gs.freq,by=c("START_BASES","START_OUTS","inn","half","diff"))

calc.LI <- function(df,LI.scale) {

  LI <- data.frame()

  scale <- sum.cols.2(LI.scale)

  for (inn in 1:9) {

    for (b in 0:7) {

      for (o in 0:2) {

        LI.a <- c(inn,0,b,o,as.numeric(df[df$START_BASES==b & df$START_OUTS==o & df$inn==inn & df$half=='t',5]/scale))

        LI <- rbind(LI,LI.a)

        LI.b <-c(inn,1,b,o,as.numeric(df[df$START_BASES==b & df$START_OUTS==o & df$inn==inn & df$half=='b',5]/scale))

        LI <- rbind(LI,LI.b)

      }

    }

  }

  colnames(LI) <- c("inn","half","b","o",-16:16)

  return(LI)

}

LI <- calc.LI(leverage.index.a,LI.scale)

write.csv(win.expectancy, file="win expectancy HFA.csv")

write.csv(we.rounded, file="win expectancy rounded HFA.csv")

write.csv(LI, file="leverage index HFA.csv")

#############

# RE tables #

#############

bo.event.re.home = array(0, c(32,1), dimnames = list(bo))

for (r in 1:nrow(trans.prob.home)) {

  bo.event.re.home[r,] = trans.prob.home[r,] %*% trans.runs.home[r,]

}

bo.event.re.away = array(0, c(32,1), dimnames = list(bo))

for (r in 1:nrow(trans.prob.away)) {

  bo.event.re.away[r,] = trans.prob.away[r,] %*% trans.runs.away[r,]

}

re.formula.home = solve(diag(24) - trans.prob.home[1:24,1:24]) %*% bo.event.re.home[1:24]

rownames(re.formula.home) = bo[1:24]

colnames(re.formula.home)[1] = 're'

re.formula.away = solve(diag(24) - trans.prob.away[1:24,1:24]) %*% bo.event.re.away[1:24]

rownames(re.formula.away) = bo[1:24]

colnames(re.formula.away)[1] = 're'

re.table.home <- data.frame(cbind(re.formula.home,b=0:7,o=floor(0:23/8)))

trans.val.home <- merge(merge(bo.trans.home,re.table.home,by.x=c("START_BASES","START_OUTS"),by.y=c("b","o")),re.table.home,by.x=c("END_BASES","END_OUTS"),by.y=c("b","o"),all.x=TRUE)

trans.val.home[is.na(trans.val.home)] <- 0

run.change.home <- function(df) df$EVENT_RUNS+df$re.y-df$re.x

trans.val.home$change <- run.change.home(trans.val.home)

trans.val.home$weight <- abs(trans.val.home$change*trans.val.home$PA)

sum.cols <- function(df) data.frame(sum(df$PA),sum(df$weight),sum(df$weight)/sum(df$PA))

library(plyr)

boLI.a.home <- ddply(trans.val.home,c("START_BASES","START_OUTS"),sum.cols)

colnames(boLI.a.home) <- c("START_BASES","START_OUTS","PA","weights","LI")

boLI.home <- data.frame(START_BASES=boLI.a.home$START_BASES, START_OUTS=boLI.a.home$START_OUTS, LI=boLI.a.home$LI/(sum(boLI.a.home$weights)/sum(boLI.a.home$PA)))

boLI.home <- merge(boLI.home,re.table.home,by.x=c("START_BASES","START_OUTS"),by.y=c("b","o"))

boLI.home <- boLI.home[order(boLI.home$START_OUTS,boLI.home$START_BASES),]

rownames(boLI.home) <- rownames(re.table.home)

re.table.away <- data.frame(cbind(re.formula.away,b=0:7,o=floor(0:23/8)))

trans.val.away <- merge(merge(bo.trans.away,re.table.away,by.x=c("START_BASES","START_OUTS"),by.y=c("b","o")),re.table.away,by.x=c("END_BASES","END_OUTS"),by.y=c("b","o"),all.x=TRUE)

trans.val.away[is.na(trans.val.away)] <- 0

run.change.away <- function(df) df$EVENT_RUNS+df$re.y-df$re.x

trans.val.away$change <- run.change.away(trans.val.away)

trans.val.away$weight <- abs(trans.val.away$change*trans.val.away$PA)

sum.cols <- function(df) data.frame(sum(df$PA),sum(df$weight),sum(df$weight)/sum(df$PA))

library(plyr)

boLI.a.away <- ddply(trans.val.away,c("START_BASES","START_OUTS"),sum.cols)

colnames(boLI.a.away) <- c("START_BASES","START_OUTS","PA","weights","LI")

boLI.away <- data.frame(START_BASES=boLI.a.away$START_BASES, START_OUTS=boLI.a.away$START_OUTS, LI=boLI.a.away$LI/(sum(boLI.a.away$weights)/sum(boLI.a.away$PA)))

boLI.away <- merge(boLI.away,re.table.away,by.x=c("START_BASES","START_OUTS"),by.y=c("b","o"))

boLI.away <- boLI.away[order(boLI.away$START_OUTS,boLI.away$START_BASES),]

rownames(boLI.away) <- rownames(re.table.away)

boLI.away$half <- 0

boLI.home$half <- 1

boLI <- rbind(boLI.away,boLI.home)

write.csv(boLI, file="boLI HFA.csv")