######################################################## ##### ResEcon 703: Topics in Advanced Econometrics ##### ##### Problem set 5 solution code ##### ##### Matt Woerman, UMass Amherst ##### ######################################################## ### Load packages for problem set library(tidyverse) ### Problem 1 solutions -------------------------------------------------------- ### Part a ## Load dataset data_camping <- read_csv('camping.csv') ## Set seed for replication set.seed(703) ## Draw standard normal random variables and split into list draws_camper <- map(1:1000, ~ tibble(time_draw = rnorm(100), mountain_draw = rnorm(100))) ## Split data into list by camper data_camper <- data_camping %>% group_by(camper_id) %>% group_split() ## Function to simulate choice probabilities for one individual sim_probs_ind <- function(params, draws_ind, data_ind){ ## Select relevant variables and convert into a matrix data_matrix <- data_ind %>% select(cost, time, mountain) %>% as.matrix() ## Transform random draws into coefficients based on parameters coef_matrix <- draws_ind %>% mutate(cost_coef = params[1], time_coef = params[2] + params[4] * time_draw, mountain_coef = params[3] + params[5] * mountain_draw) %>% select(cost_coef, time_coef, mountain_coef) %>% as.matrix() ## Calculate representative utility for each alternative in each draw utility <- (coef_matrix %*% t(data_matrix)) %>% pmin(700) %>% pmax(-700) ## Sum the exponential of utility over alternatives prob_denom <- utility %>% exp() %>% rowSums() ## Calculate the conditional probability for each alternative in each draw cond_prob <- exp(utility) / prob_denom ## Calculate simulated choice probabilities as the means over all draws sim_prob <- colMeans(cond_prob) ## Add simulated choice probability to initial dataset data_ind_out <- data_ind %>% mutate(prob = sim_prob) ## Return initial dataset with simulated probability variable return(data_ind_out) } ## Function to calculate simulated log-likelihood sim_ll_fn <- function(params, draws_list, data_list){ ## Simulate probabilities for each individual data_sim_ind <- map2(.x = draws_list, .y = data_list, .f = ~ sim_probs_ind(params = params, draws_ind = .x, data_ind = .y)) ## Combine individual datasets into one data_sim <- data_sim_ind %>% bind_rows() ## Calculate the log of simulated probability for the chosen alternative data_sim <- data_sim %>% filter(visit == 1) %>% mutate(log_prob = log(prob)) ## Calculate the simulated log-likelihood sim_ll <- sum(data_sim$log_prob) ## Return the negative of simulated log-likelihood return(-sim_ll) } ## Maximize the log-likelihood function model_1a <- optim(par = c(-0.02, -0.005, -0.8, 0.002, 5), fn = sim_ll_fn, draws_list = draws_camper, data_list = data_camper, method = 'BFGS', hessian = TRUE) ## Function to summarize MLE model results summarize_mle <- function(model, names){ ## Extract model parameter estimates parameters <- model$par ## Calculate parameters standard errors std_errors <- model$hessian %>% solve() %>% diag() %>% sqrt() ## Calculate parameter z-stats z_stats <- parameters / std_errors ## Calculate parameter p-values p_values <- 2 * pnorm(-abs(z_stats)) ## Summarize results in a list model_summary <- tibble(names = names, parameters = parameters, std_errors = std_errors, z_stats = z_stats, p_values = p_values) ## Return model_summary object return(model_summary) } ## Summarize model results summarize_mle(model_1a, c('cost', 'time', 'mountain', 'sd.time', 'sd.mountain')) ### Part b ## Function to simulate elasticities for one individual sim_elas_ind <- function(params, draws_ind, data_ind){ ## Select relevant variables and convert into a matrix data_matrix <- data_ind %>% select(cost, time, mountain) %>% as.matrix() ## Transform random draws into coefficients based on parameters coef_matrix <- draws_ind %>% mutate(cost_coef = params[1], time_coef = params[2] + params[4] * time_draw, mountain_coef = params[3] + params[5] * mountain_draw) %>% select(cost_coef, time_coef, mountain_coef) %>% as.matrix() ## Calculate representative utility for each alternative in each draw utility <- (coef_matrix %*% t(data_matrix)) %>% pmin(700) %>% pmax(-700) ## Sum the exponential of utility over alternatives prob_denom <- utility %>% exp() %>% rowSums() ## Calculate the conditional probability for each alternative in each draw cond_prob <- exp(utility) / prob_denom ## Calculate simulated choice probabilities as the means over all draws sim_prob <- colMeans(cond_prob) ## Calculate simulated integral for own elasticity sim_int_own_elas <- params[1] * mean(cond_prob[, 1] * (1 - cond_prob[, 1])) ## Calculate simulated integral for cross elasticities sim_int_cross_elas <- params[1] * colMeans(cond_prob[, 1] * cond_prob[, -1]) ## Combine elasticity simulated integrals into one vector sim_int_elas <- c(sim_int_own_elas, sim_int_cross_elas) ## Calculate own-price and cross-price simulated elasticities sim_elas <- c(1, rep(-1, 4)) * data_ind$cost[1] / sim_prob * sim_int_elas ## Add simulated elasticities to initial dataset data_ind_out <- data_ind %>% mutate(elasticity = sim_elas) ## Return initial dataset with simulated elasticity variable return(data_ind_out) } ## Simulate elasticities for each individual data_1b_ind <- map2(.x = draws_camper, .y = data_camper, .f = ~ sim_elas_ind(params = model_1a$par, draws_ind = .x, data_ind = .y)) ## Combine list of data into one tibble data_1b <- data_1b_ind %>% bind_rows() ## Calculate average elasticity with respect to cost of alternative 1 data_1b %>% group_by(park_id, park) %>% summarize(elasticity = mean(elasticity), .groups = 'drop') %>% arrange(park_id) ### Problem 2 solutions -------------------------------------------------------- ### Part a ## Function to simulate individual coefficients for one individual calc_mean_coefs <- function(params, draws_ind, data_ind){ ## Select relevant variables and convert into a matrix data_matrix <- data_ind %>% select(cost, time, mountain) %>% as.matrix() ## Transform random draws into coefficients based on parameters coef <- draws_ind %>% mutate(cost_coef = params[1], time_coef = params[2] + params[4] * time_draw, mountain_coef = params[3] + params[5] * mountain_draw) %>% select(cost_coef, time_coef, mountain_coef) ## Convert coefficients tibble to a matrix coef_matrix <- as.matrix(coef) ## Calculate representative utility for each alternative in each draw utility <- (coef_matrix %*% t(data_matrix)) %>% pmin(700) %>% pmax(-700) ## Sum the exponential of utility over alternatives prob_denom <- utility %>% exp() %>% rowSums() ## Calculate the conditional probability for each alternative in each draw cond_prob <- exp(utility) / prob_denom ## Calculate the numerator of the draw weights as probability of chosen alt weights_num <- c(cond_prob %*% data_ind$visit) ## Calculate the draw weights weights <- weights_num / sum(weights_num) ## Add draw weights to dataset of coefficients coef <- coef %>% mutate(weight = weights) ## Calculate weighted mean for each coefficient coef_means <- coef %>% summarize(cost_coef = sum(cost_coef * weight), time_coef_mean = sum(time_coef * weight), mountain_coef_mean = sum(mountain_coef * weight)) ## Add individual coefficient means to initial dataset data_ind_out <- data_ind %>% bind_cols(coef_means) ## Return initial dataset with simulated probability variable return(data_ind_out) } ## Calculate mean coefficients for each individual data_2a_ind <- map2(.x = draws_camper, .y = data_camper, .f = ~ calc_mean_coefs(params = model_1a$par, draws_ind = .x, data_ind = .y)) ## Combine list of data into one tibble data_2a <- data_2a_ind %>% bind_rows() ## Calculate average coefficients for each camping park coef_park_2a <- data_2a %>% filter(visit == 1) %>% group_by(park_id, park) %>% summarize(cost_coef = mean(cost_coef), time_coef_mean = mean(time_coef_mean), mountain_coef_mean = mean(mountain_coef_mean), .groups = 'drop') coef_park_2a ## Calculate mean values for each camping park coef_park_2a %>% mutate(time_value = time_coef_mean / cost_coef * 60, mountain_value = mountain_coef_mean / -cost_coef) %>% select(park_id, park, time_value, mountain_value) ### Part a canned solutions ---------------------------------------------------- ## Load mlogit package library(mlogit) ## Convert dataset to dfidx format data_dfidx <- dfidx(data = data_camping, shape = 'long', choice = 'visit', idx = c('camper_id', 'park_id')) ## Model camping park visit as a mixed logit with fixed cost coefficient model_2a_alt <- mlogit(formula = visit ~ cost + time + mountain | 0 | 0, data = data_dfidx, rpar = c(time = 'n', mountain = 'n'), R = 100, seed = 703) ## Calculate mean coefficients for each individual model_2a_alt_params_ind <- fitted(model_2a_alt, type = 'parameters') ## Add mean coefficients to dataset data_2a_alt <- data_camping %>% filter(visit == 1) %>% mutate(cost_coef = coef(model_2a_alt)[1], time_coef_mean = model_2a_alt_params_ind[, 1], mountain_coef_mean = model_2a_alt_params_ind[, 2]) ## Calculate average coefficients for each camping park coef_park_2a_alt <- data_2a_alt %>% group_by(park_id, park) %>% summarize(cost_coef = mean(cost_coef), time_coef_mean = mean(time_coef_mean), mountain_coef_mean = mean(mountain_coef_mean), .groups = 'drop') coef_park_2a_alt ## Calculate mean values for each camping park coef_park_2a_alt %>% mutate(time_value = time_coef_mean / cost_coef * 60, mountain_value = mountain_coef_mean / -cost_coef) %>% select(park_id, park, time_value, mountain_value)