######################################################## ##### ResEcon 703: Topics in Advanced Econometrics ##### ##### Week 4: Logit Model ##### ##### Matt Woerman, UMass Amherst ##### ######################################################## ### Binary Logit Model R Example ----------------------------------------------- ### Load and look at dataset ## Load tidyverse library(tidyverse) ## Load dataset ac_data <- read_csv('ac_renters.csv') ## Look at dataset ac_data ### Model air conditioning as a binary logit ## Model air conditioning as a function of cost variables binary_logit <- glm(formula = air_conditioning ~ cost_system + cost_operating, family = 'binomial', data = ac_data) ## Summarize model results summary(binary_logit) ## Display model coefficients coef(binary_logit) ### Calculate the fitted values of the model ## Calculate utility of air conditioning ac_data <- ac_data %>% mutate(utility_ac_logit = predict(binary_logit)) ## Look at utilities and other data ac_data %>% select(air_conditioning, starts_with('cost'), utility_ac_logit) ### Visualize utility of air conditioning adoption ## Plot density of utilities ac_data %>% ggplot(aes(x = utility_ac_logit)) + geom_density() + xlab('Utility of air conditioning') + ylab('Kernel Density') ## Plot fraction vs. utility of air conditioning using bins ac_data %>% mutate(bin = cut(utility_ac_logit, breaks = seq(-3, 2, 0.25), labels = 1:20)) %>% group_by(bin) %>% summarize(fraction_ac = mean(air_conditioning), .groups = 'drop') %>% mutate(bin = as.numeric(bin), bin_mid = 0.25 * (bin - 1) - 2.875) %>% ggplot(aes(x = bin_mid, y = fraction_ac)) + geom_point() + xlab('Utility of air conditioning') + ylab('Fraction with air conditioining') ### Calculate the choice probabilities implied by the model ## Calculate choice probability of air conditioning ac_data <- ac_data %>% mutate(probability_ac_logit = 1 / (1 + exp(-utility_ac_logit))) ## Look at utilities and probabilities ac_data %>% select(air_conditioning, utility_ac_logit, probability_ac_logit) ### Visualize probability of air conditioning adoption ## Plot density of probabilities ac_data %>% ggplot(aes(x = probability_ac_logit)) + geom_density() + xlab('Probability of air conditioning') + ylab('Kernel Density') ## Plot fraction vs. probability of air conditioning using bins ac_data %>% mutate(bin = cut(probability_ac_logit, breaks = seq(0, 1, 0.05), labels = 1:20)) %>% group_by(bin) %>% summarize(fraction_ac = mean(air_conditioning), .groups = 'drop') %>% mutate(bin = as.numeric(bin), bin_mid = 0.05 * (bin - 1) + 0.025) %>% ggplot(aes(x = bin_mid, y = fraction_ac)) + geom_point() + xlab('Probability of air conditioning') + ylab('Fraction with air conditioning') ### Calculate marginal effects and elasticities ## Calculate the marginal effect of each cost variable ac_data <- ac_data %>% mutate(marg_eff_system = coef(binary_logit)[2] * probability_ac_logit * (1 - probability_ac_logit), marg_eff_operating = coef(binary_logit)[3] * probability_ac_logit * (1 - probability_ac_logit)) ## Calculate the elasticity of each cost variable ac_data <- ac_data %>% mutate(elasticity_system = coef(binary_logit)[2] * cost_system * (1 - probability_ac_logit), elasticity_operating = coef(binary_logit)[3] * cost_operating * (1 - probability_ac_logit)) ## Look at marginal effects and elasticities ac_data %>% select(starts_with('marg_eff'), starts_with('elasticity')) ## Summarize marginal effects and elasticities ac_data %>% select(starts_with('marg_eff'), starts_with('elasticity')) %>% summary() ### Model air conditioning with heterogeneous cost coefficients ## Model air conditioning as a function of costs divided by income binary_logit_inc <- glm(formula = air_conditioning ~ I(cost_system / income) + I(cost_operating / income), family = 'binomial', data = ac_data) ## Summarize model results summary(binary_logit_inc) ## Display model coefficients coef(binary_logit_inc) ### Visualize income variable ## Plot kernel density of income ac_data %>% ggplot(aes(x = income)) + geom_density() + xlab('Income') + ylab('Kernel density') ### Calculate marginal utility of cost variables ## Calculate marginal utility of costs when income == 30 coef(binary_logit_inc)[2:3] / 30 ## Calculate marginal utility of costs when income == 60 coef(binary_logit_inc)[2:3] / 60 ## Calculate marginal utility of costs when income == 90 coef(binary_logit_inc)[2:3] / 90 ### Calculate the trade-off between system cost and operating cost ## Calculate system cost equivalence of an increase in operating cost -coef(binary_logit)[3] / coef(binary_logit)[2] ### Calculate the implied discount factor of consumers ## Calculate the implied discount rate coef(binary_logit)[2] / coef(binary_logit)[3] ### Multinomial Logit Model R Example ------------------------------------------ ### Load and look at dataset ## Load tidyverse and mlogit library(tidyverse) library(mlogit) ## Load dataset from mlogit package data('Heating', package = 'mlogit') ## Rename dataset to lowercase heating <- Heating ## Look at dataset tibble(heating) ## Pivot into a long dataset heating_long <- heating %>% pivot_longer(contains('.')) %>% separate(name, c('name', 'alt')) %>% pivot_wider() %>% mutate(choice = (depvar == alt)) %>% select(-depvar) ## Look at long dataset tibble(heating_long) ### Convert data to dfidx formats ## Convert wide data to dfidx format heating_dfidx <- dfidx(heating, shape = 'wide', choice = 'depvar', varying = 3:12) ## Convert long data to dfidx format heating_long_dfidx <- dfidx(heating_long, shape = 'long', choice = 'choice', idx = c('idcase', 'alt')) ## Look at wide data in dfidx format tibble(heating_dfidx) ## Look at long data in dfidx format tibble(heating_long_dfidx) ### Model heating choice as a multinomial logit ## Model choice using cost data and alternative effects model_mlogit <- mlogit(formula = depvar ~ ic + oc | 1 | 0, data = heating_dfidx, reflevel = 'hp') ## Summarize model results summary(model_mlogit) ## Display model coefficients coef(model_mlogit) ### Model heating choice with alternative-specific demographic parameters ## Model heating choice with alternative-specific rooms coefficient model_mlogit_rooms <- mlogit(formula = depvar ~ ic + oc | rooms | 0, data = heating_dfidx, reflevel = 'hp') ## Summarize model results summary(model_mlogit_rooms) ### Model heating choice with alternative-specific cost parameters ## Model heating choice with alternative-specific ic coefficient model_mlogit_costs <- mlogit(formula = depvar ~ oc | 1 | ic, data = heating_dfidx, reflevel = 'hp') ## Summarize model results summary(model_mlogit_costs) ### Calculate the fitted values of the model ## Look at fitted utilities fitted(model_mlogit, type = 'linpred') %>% head() ## Look at fitted choice probabilities fitted(model_mlogit, type = 'probabilities') %>% head() ### Calculate and visualize own-price marginal effects for central gas ## Calculate probability of central gas heating <- heating %>% mutate(prob_gc_mlogit = fitted(model_mlogit, type = 'probabilities')[, 4]) ## Calculate own-price marginal effects for central gas heating <- heating %>% mutate(mfx_gc_ic_mlogit = coef(model_mlogit)[5] * prob_gc_mlogit * (1 - prob_gc_mlogit), mfx_gc_oc_mlogit = coef(model_mlogit)[6] * prob_gc_mlogit * (1 - prob_gc_mlogit)) ## Plot kernel density of own-price elasticity of ic for gc heating %>% ggplot(aes(x = mfx_gc_ic_mlogit)) + geom_density() + xlab('Own-price marginal effect of central gas installation cost') + ylab('Kernel density') ## Plot kernel density of own-price elasticity of oc for gc heating %>% ggplot(aes(x = mfx_gc_oc_mlogit)) + geom_density() + xlab('Own-price marginal effect of central gas operating cost') + ylab('Kernel density') ### Calculate and visualize own-price elasticities for central gas ## Calculate own-price elasticities for central gas heating <- heating %>% mutate(elas_gc_ic_mlogit = coef(model_mlogit)[5] * ic.gc * (1 - prob_gc_mlogit), elas_gc_oc_mlogit = coef(model_mlogit)[6] * oc.gc * (1 - prob_gc_mlogit)) ## Plot kernel density of own-price elasticity of ic for gc heating %>% ggplot(aes(x = elas_gc_ic_mlogit)) + geom_density() + xlab('Own-price elasticity of central gas installation cost') + ylab('Kernel density') ## Plot kernel density of own-price elasticity of oc for gc heating %>% ggplot(aes(x = elas_gc_oc_mlogit)) + geom_density() + xlab('Own-price elasticity of central gas operating cost') + ylab('Kernel density') ### Calculate and visualize cross-price elasticities for central gas costs ## Calculate own-price elasticities for central gas heating <- heating %>% mutate(crosselas_gc_ic_mlogit = -coef(model_mlogit)[5] * ic.gc * prob_gc_mlogit, crosselas_gc_oc_mlogit = -coef(model_mlogit)[6] * oc.gc * prob_gc_mlogit) ## Plot kernel density of cross-price elasticity of ic for gc heating %>% ggplot(aes(x = crosselas_gc_ic_mlogit)) + geom_density() + xlab('Cross-price elasticity of central gas installation cost') + ylab('Kernel density') ## Plot kernel density of cross-price elasticity of oc for gc heating %>% ggplot(aes(x = crosselas_gc_oc_mlogit)) + geom_density() + xlab('Cross-price elasticity of central gas operating cost') + ylab('Kernel density') ### Calculate marginal effects and elasticities of oc at data means ## Calculate marginal effects of ic at data means effects(model_mlogit, covariate = 'ic', type = 'aa') ## Calculate elasticities of ic at data means effects(model_mlogit, covariate = 'ic', type = 'rr') ### Model heating choice with heterogeneous cost coefficients ## Model heating choice with costs divided by income model_mlogit_inc <- mlogit(formula = depvar ~ I(ic / income) + I(oc / income) | 1 | 0, data = heating_dfidx, reflevel = 'hp') ## Summarize model results summary(model_mlogit_inc) ## Display model coefficients coef(model_mlogit_inc) ### Visualize income variable ## Plot kernel density of income heating %>% ggplot(aes(x = income)) + geom_density() + xlab('Income') + ylab('Kernel density') ### Calculate marginal utility of cost variables ## Calculate marginal utility of costs when income == 3 coef(model_mlogit_inc)[5:6] / 3 ## Calculate marginal utility of costs when income == 5 coef(model_mlogit_inc)[5:6] / 5 ## Calculate marginal utility of costs when income == 7 coef(model_mlogit_inc)[5:6] / 7 ### Model heating choice with regional scale parameters ## Create new dataset to work around bug heating_nas <- heating_long %>% mutate(available = 1) heating_nas <- heating_nas %>% bind_rows(heating_nas %>% filter(idcase == 1) %>% mutate(idcase = 9999) %>% mutate(available = 1 * choice)) heating_nas_dfidx <- dfidx(heating_nas, shape = 'long', idx = c('idcase', 'alt'), choice = 'choice', subset = available == 1) ## Model heating choice with regional scale parameters model_mlogit_region <- mlogit(formula = choice ~ ic + oc | 1 | 0 | region, data = heating_nas_dfidx, reflevel = 'hp') ## Summarize model results summary(model_mlogit_region) ## Display model coefficients coef(model_mlogit_region) ### Calculate the trade-off between installation cost and operating cost ## Calculate ic equivalence of an increase in oc -coef(model_mlogit)[6] / coef(model_mlogit)[5] ### Calculate the implied discount factor of consumers ## Calculate the implied discount rate coef(model_mlogit)[5] / coef(model_mlogit)[6] ### Calculate aggregate choice effects from a heat pump subsidy ## Create data with 50% heat pump subsidy heating_subsidy <- Heating %>% mutate(ic.hp = 0.5 * ic.hp) ## Convert subsidy data to dfidx format heating_subsidy_dfidx <- dfidx(heating_subsidy, shape = 'wide', choice = 'depvar', varying = 3:12) ## Look at subsidy dataset tibble(heating_subsidy_dfidx) ## Calculate aggregate choices using observed data agg_choices_obs <- predict(model_mlogit, newdata = heating_dfidx) %>% colSums() ## Calculate aggregate choices using subsidy data agg_choices_subsidy <- predict(model_mlogit, newdata = heating_subsidy_dfidx) %>% colSums() ## Calculate difference between aggregate choices in levels agg_choices_subsidy - agg_choices_obs ## Calculate difference between aggregate choices in percentages (agg_choices_subsidy - agg_choices_obs) / agg_choices_obs ### Calculate consumer surplus effects from a heat pump subsidy ## Calculate total log-sum value using observed data logsum_obs <- logsum(model_mlogit, data = heating_dfidx) %>% sum() ## Calculate total log-sum value using subsidy data logsum_subsidy <- logsum(model_mlogit, data = heating_subsidy_dfidx) %>% sum() ## Calculate change in consumer surplus from subsidy (logsum_subsidy - logsum_obs) / (-coef(model_mlogit)[5])