########################################################
##### ResEcon 703: Topics in Advanced Econometrics #####
#####         Problem set 2 solution code          #####
#####         Matt Woerman, UMass Amherst          #####
########################################################

### Load packages for problem set
library(tidyverse)
library(mlogit)


### Problem 1 solutions --------------------------------------------------------

### Part a
## Load dataset
data_binary <- read_csv('commute_binary.csv')
## Clean choice variable
data_binary <- data_binary %>% 
  mutate(car = (mode == 'car'))
## Model choice as binary logit
model_1a <- glm(formula = car ~ cost.car + time.car + time.bus, 
                family = 'binomial',
                data = data_binary)
## Summarize model results
summary(model_1a)
## Calculate estimated utility and probability of car
data_binary <- data_binary %>% 
  mutate(utility_1a = predict(model_1a),
         prob_car_1a = 1 / (1 + exp(-utility_1a)))
## Calculate marginal effects
data_binary <- data_binary %>% 
  mutate(prob_prod_1a = prob_car_1a * (1 - prob_car_1a),
         mfx_cost_car = coef(model_1a)[2] * prob_prod_1a,
         mfx_time_car = coef(model_1a)[3] * prob_prod_1a,
         mfx_time_bus = coef(model_1a)[4] * prob_prod_1a)
## Summarize marginal effects
data_binary %>% 
  select(starts_with('mfx')) %>% 
  summary()
## Calculate hourly time-value for each commute mode
abs(coef(model_1a)[3:4] / coef(model_1a)[2]) * 60

### Part b
## Model choice as binary logit with cost divided by income
model_1b <- glm(formula = car ~ I(cost.car / income) + time.car + time.bus, 
                family = 'binomial',
                data = data_binary)
## Summarize model results
summary(model_1b)
## Calculate marginal utility of car cost at different incomes
-coef(model_1b)[2] / c(15, 25, 35)
## Calculate hourly time-value for each commute mode at different incomes
rep(abs(coef(model_1b)[3:4] / coef(model_1b)[2]), 3) * 
  c(rep(15, 2), rep(25, 2), rep(35, 2)) * 60


### Problem 2 solutions --------------------------------------------------------

### Part a
## Load dataset
data_multi <- read_csv('commute_multinomial.csv')
## Convert dataset to data frame format
data_df <- as.data.frame(data_multi)
## Convert dataset to mlogit format
data_dfidx <- dfidx(data_df, shape = 'wide', choice = 'mode', varying = 3:10)
## Model choice as multinomial logit with common cost coefficient, 
## alternative intercepts, and alternative-specific time coefficients
model_2a <- mlogit(formula = mode ~ cost | 1 | time, 
                   data = data_dfidx)
## Summarize model results
summary(model_2a)
## Calculate the choice probabilities for car
data_multi <- data_multi %>% 
  mutate(prob_car_2a = fitted(model_2a, type = 'probabilities')[, 3])
## Calculate the own elasticity of car cost
data_multi <- data_multi %>% 
  mutate(elas_own_car_cost_2a = 
           coef(model_2a)[4] * cost.car * (1 - prob_car_2a))
## Calculate the cross-elasticity of car cost
data_multi <- data_multi %>% 
  mutate(elas_cross_car_cost_2a = 
           -coef(model_2a)[4] * cost.car * prob_car_2a)
## Summarize elasticities
data_multi %>% 
  select(starts_with('elas')) %>% 
  summary()

### Part b
## Create a separate dataset of single students
data_dfidx_single <- data_dfidx %>% 
  filter(marital_status == 'single')
## Create a separate datasets of married students
data_dfidx_married <- data_dfidx %>% 
  filter(marital_status == 'married')
## Model choice for single students
model_2b_single <- mlogit(formula = mode ~ cost | 1 | time, 
                          data = data_dfidx_single)
## Model choice for single students
model_2b_married <- mlogit(formula = mode ~ cost | 1 | time, 
                           data = data_dfidx_married)
## Summarize model results for single students
summary(model_2b_single)
## Summarize model results for married students
summary(model_2b_married)
## Calculate hourly time-value for each commute mode for single students
abs(coef(model_2b_single)[5:8] / coef(model_2b_single)[4]) * 60
## Calculate hourly time-value for each commute mode for married students
abs(coef(model_2b_married)[5:8] / coef(model_2b_married)[4]) * 60

### Part c
## Create counterfactual data with more frequent buses
data_df_counter <- data_df %>% 
  mutate(time.bus = 0.8 * time.bus)
## Convert counterfactual data to dfidx format
data_counter_dfidx <- dfidx(data_df_counter, shape = 'wide', 
                            choice = 'mode', varying = 3:10)
## Calculate aggregate choices using observed data
agg_choices_obs <- predict(model_2a, newdata = data_dfidx)
## Calculate aggregate choices using counterfactual data
agg_choices_counter <- predict(model_2a, newdata = data_counter_dfidx)
## Calculate difference between aggregate choices
colSums(agg_choices_counter - agg_choices_obs)
## Calculate log-sum values using observed data
logsum_obs <- logsum(model_2a, data = data_dfidx)
## Calculate log-sum values using counterfactual data
logsum_counter <- logsum(model_2a, data = data_counter_dfidx)
## Calculate change in consumer surplus from subsidy
sum((logsum_counter - logsum_obs) / -coef(model_2a)[4])