library(tidyverse)
library(readr)
library(gridExtra)
library(haven)
library(RColorBrewer)
library(estimatr)
library(ggridges)
library(DescTools)
library(lme4)
library(brms)
library(purrr)
library(tableone)
library(pscl)
library(ggcorrplot)
library(kableExtra)
library(lme4)
library(forcats)
library(glmmTMB)
Import data (no comment texts, open comments, usernames or email addresses)
# reddit data
user_data <- read_csv("../data/anon/users_anon.csv")
discussion_data <- read_csv("../data/anon/discussions_anon.csv")
external_data <- read_csv("../data/anon/external_reddit_anon.csv")
# survey data
pre_survey <- read_csv("../data/anon/pre_survey_anon.csv")
post_surveys <- read_csv("../data/anon/post_surveys_anon.csv")
# sample (combined data)
sample <- read_csv("../data/anon/sample_anon.csv")
Preprocessing, combining pre and post surveys
# average group toxicity
group_toxicity_dat <- discussion_data%>%
mutate(subreddit_w1 = subreddit)%>%
group_by(subreddit_w1)%>%
mutate(group_toxicity = mean(comment_toxicity, na.rm=T))%>%
select(subreddit_w1, group_toxicity)%>%
slice(1)%>%
ungroup()
# any check-in surveys answered
any_check <- post_surveys%>%
group_by(ParticipantID)%>%
mutate(any_checkin = 1)%>%
slice(1)%>%
pull(ParticipantID)
post_1 <- post_surveys%>%filter(survey_week == 1)
post_2 <- post_surveys%>%filter(survey_week == 2)
post_3 <- post_surveys%>%filter(survey_week == 3)
post_4 <- post_surveys%>%filter(survey_week == 4)
# pre-post data (combining pre and post surveys)
full_data <- left_join(sample, post_1, by = "ParticipantID", suffix = c("_w1" , "_w2"))%>%
left_join(., post_2, by = "ParticipantID", suffix = c("" , "_w3"))%>%
left_join(., post_3, by = "ParticipantID", suffix = c("" , "_w4"))%>%
left_join(., post_4, by = "ParticipantID", suffix = c("" , "_w5"))%>%
left_join(.,group_toxicity_dat, by = "subreddit_w1")%>%
mutate(active = ifelse(is.na(comment_count), 0, 1),
condition = factor(condition),
active2 = ifelse(!ParticipantID %in% any_check & active == 0, "inactive", active))
write_csv(full_data, file = "../data/anon/full_data_waves.csv")
Data Scaling for models https://onlinelibrary.wiley.com/doi/10.1002/sim.3107
# scale data for better comparison of coefficients
gelman_scale <- function(x){
( (x - mean(x, na.rm=T)) / (2*sd(x, na.rm=TRUE)))
}
scaled_sample <- full_data%>%
# generation of check-in variables (average over check-ins)
mutate(mean_issue_distance_abs = rowMeans(select(., starts_with("issue_distance_abs")), na.rm = TRUE),
mean_discussion_constructive = rowMeans(select(., starts_with("discussion_percep_2")), na.rm = TRUE),
mean_discussion_enjoyable = rowMeans(select(., starts_with("discussion_percep_3")), na.rm = TRUE),
mean_discussion_toxic = rowMeans(select(., starts_with("discussion_percep_1")), na.rm = TRUE),
mean_group_knowledgeable = rowMeans(select(., starts_with("group_percep_3")), na.rm = TRUE),
mean_group_polarized = rowMeans(select(., starts_with("group_percep_2")), na.rm = TRUE),
mean_group_respectful = rowMeans(select(., starts_with("group_percep_1")), na.rm = TRUE))%>%
# scale selected variables with Gelman (2008) method by dividing by two standard deviations
select(ParticipantID, condition, subreddit_w1, comment_count, comment_mean_tox,comment_mean_score,
ex_comment_mean_score,
active, active2, gender, age, education, comments_online_w1, group_toxicity,
leftright_w1, polinterest_w1, affective_polarization_w1, ex_comment_mean_tox,
affective_pol_1_w1,affective_pol_2_w1,comment_karma,
pol_cynicism_w1, efficacy_w1, trust_general_1_w1,
trust_general_2_w1,trust_general_3_w1,trust_general_4_w1,
mean_issue_distance_abs, mean_discussion_constructive,
mean_discussion_enjoyable, mean_discussion_toxic,
mean_group_knowledgeable, mean_group_polarized,
mean_group_respectful)%>%
mutate(across(c(age:mean_group_respectful), gelman_scale))%>%
mutate(comment_count = ifelse(is.na(comment_count),0,comment_count),
gender_male = factor(ifelse(gender == 1, 1, 0)))
write_csv(scaled_sample, file = "../data/anon/scaled_sample.csv")
https://search.r-project.org/CRAN/refmans/pscl/html/hurdle.html
# correspondence between perceptions and external measures
cor.test(scaled_sample$mean_discussion_toxic, scaled_sample$group_toxicity)
##
## Pearson's product-moment correlation
##
## data: scaled_sample$mean_discussion_toxic and scaled_sample$group_toxicity
## t = 2.8732, df = 364, p-value = 0.004302
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04712583 0.24764617
## sample estimates:
## cor
## 0.1489165
present_sample <- scaled_sample%>%
filter(active2 != "inactive")%>%
# also include external measure of group toxicity
mutate(group_toxicity = gelman_scale(group_toxicity))
models <- list(
hurdle(comment_count ~ gender_male, data = present_sample),
hurdle(comment_count ~ age, data = present_sample),
hurdle(comment_count ~ education, data = present_sample),
hurdle(comment_count ~ comments_online_w1, data = present_sample),
hurdle(comment_count ~ comment_karma, data = present_sample),
hurdle(comment_count ~ leftright_w1, data = present_sample),
hurdle(comment_count ~ polinterest_w1, data = present_sample),
hurdle(comment_count ~ affective_polarization_w1, data = present_sample),
hurdle(comment_count ~ trust_general_1_w1, data = present_sample),
hurdle(comment_count ~ trust_general_2_w1, data = present_sample),
hurdle(comment_count ~ trust_general_3_w1, data = present_sample),
hurdle(comment_count ~ trust_general_4_w1, data = present_sample),
hurdle(comment_count ~ mean_issue_distance_abs, data = present_sample),
hurdle(comment_count ~ mean_group_respectful, data = present_sample),
hurdle(comment_count ~ mean_group_knowledgeable, data = present_sample),
hurdle(comment_count ~ mean_group_polarized, data = present_sample),
hurdle(comment_count ~ mean_discussion_enjoyable, data = present_sample),
hurdle(comment_count ~ mean_discussion_constructive, data = present_sample),
hurdle(comment_count ~ mean_discussion_toxic, data = present_sample),
hurdle(comment_count ~ mean_discussion_toxic + group_toxicity, data = present_sample)
#hurdle(comment_count ~ group_toxicity, data = present_sample) #"Average group toxicity"
)
predictors <- c(
"Gender (male)", "Age", "Education", "Commenting online", "Comment karma", "Political orientation",
"Political interest", "Affective polarization", "Trust in politics","Trust in media",
"Trust in science", "Trust in others", "Issue distance",
"Group respectful", "Group knowledgeable", "Group polarized",
"Discussion enjoyable", "Discussion constructive","Discussion toxic", "Discussion toxic contr.",
"Write toxic external"
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df <- do.call(rbind, model_frames)%>%
filter(!row.names(.) %in% c("group_toxicity","group_toxicity1"))
combined_df <- combined_df %>%
mutate(Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
interval2 <- -qnorm((1 - 0.95) / 2)
comment_plot_pred_active_hurdle_ext <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(AlphaValue))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
geom_vline(xintercept=8.5)+
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression") +
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/comment_plot_pred_hurdle_with_external.pdf", comment_plot_pred_active_hurdle_ext, width = 6, height = 4)
comment_plot_pred_active_hurdle_ext
glmmTMB(comment_count ~ gender_male + (1 + gender_male | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ gender_male + (1 + gender_male | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## NA NA NA NA 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 8.295e-08
## gender_male1 2.070e-01 0.22
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) gender_male1
## 2.4631 0.2695
glmmTMB(comment_count ~ age + (1 + age | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula: comment_count ~ age + (1 + age | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3668.033 3692.317 -1828.017 3656.033 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 0.03630
## age 0.05996 -1.00
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18.2
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) age
## 2.60763 0.02067
glmmTMB(comment_count ~ education + (1 + education | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula: comment_count ~ education + (1 + education | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## NA NA NA NA 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 7.000e-05
## education 5.565e-05 0.78
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18.2
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) education
## 2.60874 -0.01315
glmmTMB(comment_count ~ comments_online_w1 + (1 + comments_online_w1 | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ comments_online_w1 + (1 + comments_online_w1 |
## subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3661.755 3686.039 -1824.877 3649.755 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 8.243e-05
## comments_online_w1 5.689e-05 -0.99
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18.1
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) comments_online_w1
## 2.5867 0.3485
glmmTMB(comment_count ~ comment_karma + (1 + comment_karma | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ comment_karma + (1 + comment_karma | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## NA NA NA NA 416
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 6.693e-05
## comment_karma 6.098e-07 0.54
##
## Number of obs: 422 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18.1
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) comment_karma
## 2.597 0.108
glmmTMB(comment_count ~ leftright_w1 + (1 + leftright_w1 | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ leftright_w1 + (1 + leftright_w1 | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3665.500 3689.785 -1826.750 3653.500 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 0.0500
## leftright_w1 0.3145 -1.00
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18.1
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) leftright_w1
## 2.58254 0.04445
glmmTMB(comment_count ~ polinterest_w1 + (1 + polinterest_w1 | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ polinterest_w1 + (1 + polinterest_w1 | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3661.391 3685.675 -1824.695 3649.391 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 0.0649
## polinterest_w1 0.2477 1.00
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) polinterest_w1
## 2.5861 0.3447
glmmTMB(comment_count ~ affective_polarization_w1 + (1 + affective_polarization_w1 | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ affective_polarization_w1 + (1 + affective_polarization_w1 |
## subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3293.792 3317.512 -1640.896 3281.792 379
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 0.08959
## affective_polarization_w1 0.14672 -1.00
##
## Number of obs: 385 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 17.1
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) affective_polarization_w1
## 2.5735 -0.1926
glmmTMB(comment_count ~ trust_general_1_w1 + (1 + trust_general_1_w1 | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ trust_general_1_w1 + (1 + trust_general_1_w1 |
## subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3654.137 3678.421 -1821.068 3642.137 417
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 0.08099
## trust_general_1_w1 0.47882 -1.00
##
## Number of obs: 423 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 17.7
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) trust_general_1_w1
## 2.5452 0.3086
glmmTMB(comment_count ~ mean_discussion_toxic + group_toxicity + (1 + mean_discussion_toxic | subreddit_w1), data = present_sample, family = hurdle_poisson(link = "log"))
## Formula:
## comment_count ~ mean_discussion_toxic + group_toxicity + (1 +
## mean_discussion_toxic | subreddit_w1)
## Data: present_sample
## AIC BIC logLik -2*log(L) df.resid
## 3197.520 3224.838 -1591.760 3183.520 359
## Random-effects (co)variances:
##
## Conditional model:
## Groups Name Std.Dev. Corr
## subreddit_w1 (Intercept) 0.1208
## mean_discussion_toxic 0.3039 -1.00
##
## Number of obs: 366 / Conditional model: subreddit_w1, 6
##
## Dispersion parameter for hurdle_poisson family (): 18.6
##
## Fixed Effects:
##
## Conditional model:
## (Intercept) mean_discussion_toxic group_toxicity
## 2.7002 0.0922 0.2199
models <- list(
hurdle(comment_count ~ gender_male, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ age, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ education, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ comments_online_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ comment_karma, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ leftright_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ polinterest_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ affective_polarization_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_1_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_2_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_3_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_4_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_issue_distance_abs, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_group_respectful, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_group_knowledgeable, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_group_polarized, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_discussion_enjoyable, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_discussion_toxic, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_discussion_constructive, data = present_sample, dist = "geometric")
)
predictors <- c(
"Gender (male)", "Age", "Education", "Commenting online", "Comment karma", "Political orientation",
"Political interest", "Affective polarization", "Trust in politics", "Trust in media",
"Trust in science", "Trust in others", "Issue distance",
"Group respectful", "Group knowledgeable", "Group polarized",
"Discussion enjoyable", "Discussion toxic", "Discussion constructive"
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
mutate(Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_active_hurdle_geometric <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(AlphaValue))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression (geometric, binomial)") +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/comment_plot_pred_hurdle_geometric.pdf", comment_plot_pred_active_hurdle_geometric, width = 6, height = 4)
comment_plot_pred_active_hurdle_geometric
models <- list(
hurdle(comment_count ~ gender_male, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ age, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ education, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ comments_online_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ comment_karma, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ leftright_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ polinterest_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ affective_polarization_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_1_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_2_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_3_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_4_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_issue_distance_abs, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_group_respectful, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_group_knowledgeable, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_group_polarized, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_discussion_enjoyable, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_discussion_toxic, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_discussion_constructive, data = present_sample, dist = "negbin")
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
rownames_to_column()%>%
filter(!str_detect(rowname,"theta"))%>%
mutate(Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_active_hurdle_negbin <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(AlphaValue))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression (negbin, binomial)") +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# Save the plot with "negbin" in the file name
ggsave("../output/comment_plot_pred_hurdle_negbin.pdf", comment_plot_pred_active_hurdle_negbin, width = 6, height = 4)
# Display the plot
comment_plot_pred_active_hurdle_negbin
models <- list(
glm(active ~ gender_male, data = present_sample),
glm(active ~ age, data = present_sample),
glm(active ~ education, data = present_sample),
glm(active ~ comments_online_w1, data = present_sample),
glm(active ~ comment_karma, data = present_sample),
glm(active ~ leftright_w1, data = present_sample),
glm(active ~ polinterest_w1, data = present_sample),
glm(active ~ affective_polarization_w1, data = present_sample),
glm(active ~ trust_general_1_w1, data = present_sample),
glm(active ~ trust_general_2_w1, data = present_sample),
glm(active ~ trust_general_3_w1, data = present_sample),
glm(active ~ trust_general_4_w1, data = present_sample),
glm(active ~ mean_issue_distance_abs, data = present_sample),
glm(active ~ mean_group_respectful, data = present_sample),
glm(active ~ mean_group_knowledgeable, data = present_sample),
glm(active ~ mean_group_polarized, data = present_sample),
glm(active ~ mean_discussion_enjoyable, data = present_sample),
glm(active ~ mean_discussion_toxic, data = present_sample),
glm(active ~ mean_discussion_constructive, data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_frames[[i]] <- data.frame(
Coefficient = summary(models[[i]])$coef[-1, 1],
pval = summary(models[[i]])$coef[,4],
SE = summary(models[[i]])$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df%>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor)))%>%
group_by(Predictor)%>%
slice(2)
comment_plot_pred_binary <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2/3)) +
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Comment activity binary", subtitle = "Excluding inactive users")+
xlab("")+
ylab("Change in log odds")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_binary
ggsave("../output/comment_plot_pred_binary.pdf",comment_plot_pred_binary, width = 4, height = 4)
predictors <- c("gender_male", "age", "education", "comments_online_w1", "comment_karma",
"leftright_w1", "polinterest_w1", "affective_polarization_w1",
"trust_general_1_w1", "trust_general_2_w1", "trust_general_3_w1",
"trust_general_4_w1", "mean_issue_distance_abs", "mean_group_respectful",
"mean_group_knowledgeable", "mean_group_polarized",
"mean_discussion_enjoyable","mean_discussion_constructive", "mean_discussion_toxic")
predictor_labels <- c(
"Gender (male)", "Age", "Education", "Commenting online", "Comment karma", "Political orientation",
"Political interest", "Affective polarization", "Trust in politics","Trust in media",
"Trust in science", "Trust in others", "Issue distance",
"Group respectful", "Group knowledgeable", "Group polarized",
"Discussion enjoyable", "Discussion constructive","Discussion toxic"
)
models <- lapply(predictors, function(pred) {
formula <- as.formula(paste("active ~", pred, "+ (1 +", pred," | subreddit_w1)"))
glmer(formula, data = present_sample, family = binomial)
})
model_frames <- list()
for (i in seq_along(models)) {
# Extract fixed effects summary; skip the intercept (row 1)
coefs <- summary(models[[i]])$coefficients
model_frames[[i]] <- data.frame(
Coefficient = coefs[-1, "Estimate"], # exclude intercept
SE = coefs[-1, "Std. Error"],
pval = coefs[-1, "Pr(>|z|)"],
Predictor = predictor_labels[i]
)
}
combined_df <- do.call(rbind, model_frames) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Predictor) %>%
slice(1) # slice(1) because we have only one predictor per model
interval2 <- 1.96 # for 95% CI
comment_plot_pred_binary_random <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2/3)) +
geom_vline(xintercept = 7.5)+
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Zero model | silent vs. active", subtitle = "Excluding join-only users, community random effects")+
xlab("")+
ylab("Change in log odds")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_binary_random
ggsave("../output/comment_plot_pred_binary_random.pdf", comment_plot_pred_binary_random, width = 5, height = 5)
present_sample <- scaled_sample%>%filter(active == 1)
models <- list(
glm(comment_count ~ gender_male, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ age, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ education, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ comments_online_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ comment_karma, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ leftright_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ polinterest_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ affective_polarization_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_1_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_2_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_3_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_4_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_issue_distance_abs, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_group_respectful, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_group_knowledgeable, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_group_polarized, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_discussion_enjoyable, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_discussion_toxic, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_discussion_constructive, family = poisson(link = "log"), data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
model_frames[[i]] <- data.frame(
Coefficient = model_summary$coef[-1, 1],
pval = model_summary$coef[-1, 4],
SE = model_summary$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
mutate(Predictor = fct_inorder(rev(Predictor)))
comment_plot_pred_active <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1 , position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Comment activity", subtitle = "Poisson prediction of comment count") +
xlab("") +
ylab("Change in log number of comments")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_active
ggsave("../output/comment_plot_pred_poisson.pdf", comment_plot_pred_active, width = 4, height = 4)
# Filter for active users
present_sample <- scaled_sample %>% filter(active == 1)
# Fit Poisson models with random effects
models <- lapply(predictors, function(pred) {
formula <- as.formula(paste("comment_count ~", pred, "+ (1 +", pred," | subreddit_w1)"))
glmer(formula, data = present_sample, family = poisson(link = "log"))
})
# Extract model summaries
model_frames <- list()
for (i in seq_along(models)) {
coefs <- summary(models[[i]])$coefficients
model_frames[[i]] <- data.frame(
Coefficient = coefs[-1, "Estimate"], # Exclude intercept
SE = coefs[-1, "Std. Error"],
pval = coefs[-1, "Pr(>|z|)"],
Predictor = predictor_labels[i]
)
}
# Combine results
combined_df <- do.call(rbind, model_frames) %>%
mutate(Predictor = fct_inorder(rev(Predictor)))
# Define interval for confidence intervals
interval2 <- 1.96 # 95% CI
# Create the plot
comment_plot_pred_active_random <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_vline(xintercept = 7.5)+
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1 , position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Count model | comment activity", subtitle = "Poisson model | subreddit random effects") +
xlab("") +
ylab("Change in log number of comments") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# Display and save the plot
comment_plot_pred_active_random
ggsave("../output/comment_plot_pred_poisson_random.pdf", comment_plot_pred_active_random, width = 5, height = 5)
present_sample <- scaled_sample%>%filter(active == 1)
mean(present_sample$comment_count)
## [1] 17.35347
sd(present_sample$comment_count)
## [1] 18.97236
max(present_sample$comment_count)
## [1] 114
models <- list(
lm_robust(comment_count ~ gender_male, data = present_sample),
lm_robust(comment_count ~ age, data = present_sample),
lm_robust(comment_count ~ education, data = present_sample),
lm_robust(comment_count ~ comments_online_w1, data = present_sample),
lm_robust(comment_count ~ comment_karma, data = present_sample),
lm_robust(comment_count ~ leftright_w1, data = present_sample),
lm_robust(comment_count ~ polinterest_w1, data = present_sample),
lm_robust(comment_count ~ affective_polarization_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_1_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_2_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_3_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_4_w1, data = present_sample),
lm_robust(comment_count ~ mean_issue_distance_abs, data = present_sample),
lm_robust(comment_count ~ mean_group_respectful, data = present_sample),
lm_robust(comment_count ~ mean_group_knowledgeable, data = present_sample),
lm_robust(comment_count ~ mean_group_polarized, data = present_sample),
lm_robust(comment_count ~ mean_discussion_enjoyable, data = present_sample),
lm_robust(comment_count ~ mean_discussion_toxic, data = present_sample),
lm_robust(comment_count ~ mean_discussion_constructive, data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_frames[[i]] <- data.frame(
Coefficient = summary(models[[i]])$coef[-1, 1],
pval = summary(models[[i]])$coef[,4],
SE = summary(models[[i]])$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df%>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor)))%>%
group_by(Predictor)%>%
slice(2)
comment_plot_pred_active_ex0 <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1/2, position = position_dodge(width = 2/3)) +
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Comment activity", subtitle = "Linear prediction of comment count")+
xlab("")+
ylab("Scaled beta coefficients, HC2 robust SE")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_active_ex0
ggsave("../output/comment_plot_pred_active_excl0.pdf",comment_plot_pred_active_ex0, width = 4, height = 4)
present_sample <- scaled_sample%>%filter(active == 1)
mean(present_sample$comment_mean_score)
## [1] 2.607417
sd(present_sample$comment_mean_score)
## [1] 1.722391
max(present_sample$comment_mean_score)
## [1] 18
models <- list(
lm_robust(comment_mean_score ~ gender_male, data = present_sample),
lm_robust(comment_mean_score ~ age, data = present_sample),
lm_robust(comment_mean_score ~ education, data = present_sample),
lm_robust(comment_mean_score ~ comments_online_w1, data = present_sample),
lm_robust(comment_mean_score ~ comment_karma, data = present_sample),
lm_robust(comment_mean_score ~ leftright_w1, data = present_sample),
lm_robust(comment_mean_score ~ polinterest_w1, data = present_sample),
lm_robust(comment_mean_score ~ affective_polarization_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_1_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_2_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_3_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_4_w1, data = present_sample),
lm_robust(comment_mean_score ~ mean_issue_distance_abs, data = present_sample),
lm_robust(comment_mean_score ~ mean_group_respectful, data = present_sample),
lm_robust(comment_mean_score ~ mean_group_knowledgeable, data = present_sample),
lm_robust(comment_mean_score ~ mean_group_polarized, data = present_sample),
lm_robust(comment_mean_score ~ mean_discussion_enjoyable, data = present_sample),
lm_robust(comment_mean_score ~ mean_discussion_toxic, data = present_sample),
lm_robust(comment_mean_score ~ mean_discussion_constructive, data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_frames[[i]] <- data.frame(
Coefficient = summary(models[[i]])$coef[-1, 1],
pval = summary(models[[i]])$coef[,4],
SE = summary(models[[i]])$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df%>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor)))%>%
group_by(Predictor)%>%
slice(2)
comment_plot_pred_active_ex0_score <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1/2, position = position_dodge(width = 2/3)) +
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Comment scores", subtitle = "Linear prediction of comment scores")+
xlab("")+
ylab("Scaled beta coefficients, HC2 robust SE")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_active_ex0_score
ggsave("../output/comment_plot_pred_active_excl0_score.pdf",comment_plot_pred_active_ex0_score, width = 4, height = 4)
present_sample <- scaled_sample%>%filter(active2 != "inactive")
# filtered data for control condition
scaled_sample_c <- present_sample %>% filter(condition == "control")
# models for control condition
models_control <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_c),
hurdle(comment_count ~ age, data = scaled_sample_c),
hurdle(comment_count ~ education, data = scaled_sample_c),
hurdle(comment_count ~ comments_online_w1, data = scaled_sample_c),
hurdle(comment_count ~ comment_karma, data = scaled_sample_c),
hurdle(comment_count ~ leftright_w1, data = scaled_sample_c),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_c),
hurdle(comment_count ~ affective_polarization_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_1_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_2_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_3_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_4_w1, data = scaled_sample_c),
hurdle(comment_count ~ mean_issue_distance_abs, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_respectful, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_polarized, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_enjoyable, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_constructive, data = scaled_sample_c)
)
model_frames_control <- list()
for (i in seq_along(models_control)) {
model_summary <- summary(models_control[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_control[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_control <- bind_rows(model_frames_control) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_c <- ggplot(combined_df_control, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "grey", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
ggtitle("Control condition", subtitle = "Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# filtered data for moderation condition
scaled_sample_m <- present_sample %>% filter(condition == "moderation")
# Models for moderation condition
models_moderation <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_m),
hurdle(comment_count ~ age, data = scaled_sample_m),
hurdle(comment_count ~ education, data = scaled_sample_m),
hurdle(comment_count ~ comments_online_w1, data = scaled_sample_m),
hurdle(comment_count ~ comment_karma, data = scaled_sample_m),
hurdle(comment_count ~ leftright_w1, data = scaled_sample_m),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_m),
hurdle(comment_count ~ affective_polarization_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_1_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_2_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_3_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_4_w1, data = scaled_sample_m),
hurdle(comment_count ~ mean_issue_distance_abs, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_respectful, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_polarized, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_enjoyable, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_constructive, data = scaled_sample_m)
)
model_frames_moderation <- list()
for (i in seq_along(models_moderation)) {
model_summary <- summary(models_moderation[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_moderation[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_moderation <- bind_rows(model_frames_moderation) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_m <- ggplot(combined_df_moderation, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#6699FF", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
ggtitle("Moderation condition", subtitle = "Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# filtered data for incentives condition
scaled_sample_i <- present_sample %>% filter(condition == "incentives")
# Models for incentives condition
models_incentives <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_i),
hurdle(comment_count ~ age, data = scaled_sample_i),
hurdle(comment_count ~ education, data = scaled_sample_i),
hurdle(comment_count ~ comments_online_w1, data = scaled_sample_i),
hurdle(comment_count ~ comment_karma, data = scaled_sample_i),
hurdle(comment_count ~ leftright_w1, data = scaled_sample_i),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_i),
hurdle(comment_count ~ affective_polarization_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_1_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_2_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_3_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_4_w1, data = scaled_sample_i),
hurdle(comment_count ~ mean_issue_distance_abs, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_respectful, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_polarized, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_enjoyable, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_constructive, data = scaled_sample_i)
)
model_frames_incentives <- list()
for (i in seq_along(models_incentives)) {
model_summary <- summary(models_incentives[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_incentives[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_incentives <- bind_rows(model_frames_incentives) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_i <- ggplot(combined_df_incentives, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#C5701A", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
ggtitle("Incentives condition", subtitle = "Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
combi_conditions <- grid.arrange(comment_plot_pred_c, comment_plot_pred_m, comment_plot_pred_i, nrow = 3)
ggsave("../output/combi_comment_plot_pred_conditions_hurdle.pdf", combi_conditions, width = 8, height = 12)
present_sample <- scaled_sample%>%filter(active2 != "inactive")
# filtered data for control condition
scaled_sample_c <- present_sample %>% filter(condition == "control")
models_control <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_c),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_c)
)
predictors <- c("Gender (male)", "Political interest",
"Group knowledgeable", "Discussion toxic")
model_frames_control <- list()
for (i in seq_along(models_control)) {
model_summary <- summary(models_control[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_control[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_control <- bind_rows(model_frames_control) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_c <- ggplot(combined_df_control, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "grey", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Control condition", subtitle = "Prediction of discussion participation") +
xlab("") +
ylab("")+
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# filtered data for moderation condition
scaled_sample_m <- present_sample %>% filter(condition == "moderation")
models_moderation <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_m),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_m)
)
model_frames_moderation <- list()
for (i in seq_along(models_moderation)) {
model_summary <- summary(models_moderation[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_moderation[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_moderation <- bind_rows(model_frames_moderation) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_m <- ggplot(combined_df_moderation, aes(x = rev(Predictor))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#6699FF", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Moderation condition", subtitle = "Prediction of discussion participation") +
xlab("") +
ylab("")+
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
scaled_sample_i <- present_sample %>% filter(condition == "incentives")
models_incentives <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_i),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_i)
)
model_frames_incentives <- list()
for (i in seq_along(models_incentives)) {
model_summary <- summary(models_incentives[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_incentives[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_incentives <- bind_rows(model_frames_incentives) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_i <- ggplot(combined_df_incentives, aes(x = rev(Predictor))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#C5701A", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Incentives condition",subtitle = "Prediction of discussion participation") +
xlab("") +
ylab("")+
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
combi_conditions <- grid.arrange(comment_plot_pred_m, comment_plot_pred_i, nrow = 1)
ggsave("../output/combi_comment_plot_pred_conditions_hurdle_simple.pdf", combi_conditions, width = 10, height = 3)
# variables balance table
covariates <- c("age", "gender", "education","time_online_w1" , "social_media_w1" , "comments_online_w1" ,
"leftright_w1" , "polinterest_w1" , "affective_pol_1_w1" ,
"affective_pol_2_w1" , "issue_attitudes_loan_w1" , "issue_attitudes_airbnb_w1" ,
"issue_attitudes_minwage_w1" , "issue_attitudes_ukraine_w1" , "issue_attitudes_ubi_w1" ,
"issue_attitudes_climate_w1" , "issue_attitudes_fur_w1" , "issue_attitudes_renewable_w1" ,
"issue_attitudes_gaza_w1" , "issue_attitudes_vegetarian_w1" , "issue_attitudes_gender_w1" ,
"issue_attitudes_sexwork_w1" , "issue_attitudes_vaccine_w1" , "issue_attitudes_healthcare_w1" ,
"issue_attitudes_immigration_w1", "issue_attitudes_guns_w1" , "issue_attitudes_deathpenalty_w1",
"issue_attitudes_bodycams_w1" , "issue_attitudes_ai_w1" , "issue_attitudes_socialmedia_w1" ,
"issue_knowledge_loan_w1" , "issue_knowledge_airbnb_w1" , "issue_knowledge_minwage_w1" ,
"issue_knowledge_ukraine_w1" , "issue_knowledge_ubi_w1" , "issue_knowledge_climate_w1" ,
"issue_knowledge_fur_w1" , "issue_knowledge_renewable_w1" , "issue_knowledge_mideast_w1" ,
"issue_knowledge_vegetarian_w1" , "issue_knowledge_gender_w1" , "issue_knowledge_sexwork_w1" ,
"issue_knowledge_vaccine_w1" , "issue_knowledge_healthcare_w1" , "issue_knowledge_immigration_w1" ,
"issue_knowledge_guns_w1" , "issue_knowledge_deathpenalty_w1", "issue_knowledge_bodycams_w1" ,
"issue_knowledge_ai_w1" , "issue_knowledge_socialmedia_w1", "political_efficacy_1_w1" ,
"political_efficacy_2_w1" , "political_efficacy_3_w1" , "trust_general_1_w1" ,
"trust_general_2_w1" , "trust_general_3_w1" , "trust_general_4_w1" ,
"pol_cynicism_w1", "affective_polarization_w1" , "efficacy_w1" ,"comment_karma","comment_count" , "comment_mean_lenght" , "comment_mean_score" , "comment_mean_tox" , "ex_comment_count", "ex_comment_mean_score" , "ex_comment_mean_tox" , "ex_comment_mean_lenght" )
table1 <- CreateTableOne(vars = covariates, strata = "condition", data = full_data)
latex_table <- print(table1, smd = TRUE, printToggle = FALSE)
rownames(latex_table) <- gsub("\\(mean \\(SD\\)\\)", "", rownames(latex_table))
knitr::kable(latex_table[, -grep("test", colnames(latex_table))],
format = "html", booktabs = TRUE)
control | incentives | moderation | p | SMD | |
---|---|---|---|---|---|
n | 181 | 169 | 170 | ||
age | 43.46 (13.90) | 43.03 (13.69) | 43.77 (13.71) | 0.883 | 0.036 |
gender | 1.62 (0.59) | 1.54 (0.61) | 1.56 (0.60) | 0.442 | 0.089 |
education | 4.28 (1.35) | 4.25 (1.38) | 4.27 (1.31) | 0.982 | 0.013 |
time_online_w1 | 3.93 (0.50) | 3.89 (0.44) | 3.92 (0.53) | 0.743 | 0.056 |
social_media_w1 | 3.77 (0.64) | 3.81 (0.60) | 3.83 (0.60) | 0.628 | 0.066 |
comments_online_w1 | 3.48 (1.08) | 3.59 (1.00) | 3.68 (1.02) | 0.189 | 0.130 |
leftright_w1 | 3.73 (2.06) | 3.85 (2.04) | 4.16 (2.18) | 0.150 | 0.134 |
polinterest_w1 | 3.60 (0.62) | 3.60 (0.56) | 3.61 (0.55) | 0.971 | 0.017 |
affective_pol_1_w1 | 53.20 (25.74) | 50.31 (27.37) | 48.66 (27.65) | 0.278 | 0.113 |
affective_pol_2_w1 | 16.51 (21.50) | 15.16 (21.11) | 16.46 (19.67) | 0.795 | 0.043 |
issue_attitudes_loan_w1 | 1.96 (1.43) | 2.20 (1.55) | 2.41 (1.58) | 0.020 | 0.201 |
issue_attitudes_airbnb_w1 | 3.12 (1.46) | 3.15 (1.44) | 3.08 (1.49) | 0.904 | 0.033 |
issue_attitudes_minwage_w1 | 5.24 (1.19) | 5.20 (1.28) | 4.87 (1.51) | 0.019 | 0.179 |
issue_attitudes_ukraine_w1 | 4.49 (1.61) | 4.61 (1.48) | 4.36 (1.70) | 0.354 | 0.105 |
issue_attitudes_ubi_w1 | 1.98 (1.33) | 2.21 (1.50) | 2.45 (1.53) | 0.012 | 0.213 |
issue_attitudes_climate_w1 | 5.19 (1.24) | 5.02 (1.50) | 5.02 (1.42) | 0.425 | 0.084 |
issue_attitudes_fur_w1 | 3.08 (1.62) | 3.18 (1.57) | 2.91 (1.48) | 0.254 | 0.120 |
issue_attitudes_renewable_w1 | 1.41 (0.93) | 1.43 (0.90) | 1.61 (1.20) | 0.158 | 0.120 |
issue_attitudes_gaza_w1 | 4.81 (1.44) | 4.92 (1.45) | 4.68 (1.51) | 0.320 | 0.109 |
issue_attitudes_vegetarian_w1 | 1.75 (0.97) | 1.89 (1.16) | 1.83 (1.07) | 0.458 | 0.089 |
issue_attitudes_gender_w1 | 2.74 (1.63) | 2.92 (1.70) | 3.06 (1.73) | 0.208 | 0.126 |
issue_attitudes_sexwork_w1 | 2.40 (1.37) | 2.25 (1.30) | 2.49 (1.50) | 0.282 | 0.115 |
issue_attitudes_vaccine_w1 | 3.53 (1.55) | 3.62 (1.60) | 3.35 (1.68) | 0.279 | 0.113 |
issue_attitudes_healthcare_w1 | 1.69 (1.29) | 1.85 (1.44) | 2.03 (1.51) | 0.075 | 0.163 |
issue_attitudes_immigration_w1 | 3.31 (1.50) | 3.21 (1.51) | 3.48 (1.65) | 0.260 | 0.117 |
issue_attitudes_guns_w1 | 4.89 (1.50) | 4.63 (1.66) | 4.45 (1.73) | 0.042 | 0.179 |
issue_attitudes_deathpenalty_w1 | 2.12 (1.46) | 2.08 (1.49) | 2.26 (1.62) | 0.502 | 0.079 |
issue_attitudes_bodycams_w1 | 5.70 (0.65) | 5.59 (0.82) | 5.62 (0.70) | 0.385 | 0.097 |
issue_attitudes_ai_w1 | 2.44 (1.29) | 2.51 (1.26) | 2.44 (1.38) | 0.861 | 0.035 |
issue_attitudes_socialmedia_w1 | 3.24 (1.53) | 3.32 (1.50) | 3.34 (1.44) | 0.803 | 0.044 |
issue_knowledge_loan_w1 | 3.05 (0.81) | 3.11 (0.78) | 3.08 (0.80) | 0.764 | 0.052 |
issue_knowledge_airbnb_w1 | 2.53 (0.76) | 2.64 (0.92) | 2.58 (0.79) | 0.425 | 0.092 |
issue_knowledge_minwage_w1 | 3.18 (0.68) | 3.20 (0.72) | 3.11 (0.73) | 0.439 | 0.089 |
issue_knowledge_ukraine_w1 | 2.86 (0.79) | 3.01 (0.75) | 3.02 (0.74) | 0.086 | 0.140 |
issue_knowledge_ubi_w1 | 2.70 (0.75) | 2.85 (0.71) | 2.74 (0.76) | 0.126 | 0.142 |
issue_knowledge_climate_w1 | 3.24 (0.73) | 3.33 (0.71) | 3.30 (0.65) | 0.440 | 0.089 |
issue_knowledge_fur_w1 | 2.31 (0.80) | 2.28 (0.84) | 2.29 (0.94) | 0.962 | 0.020 |
issue_knowledge_renewable_w1 | 2.94 (0.72) | 3.07 (0.72) | 2.98 (0.77) | 0.261 | 0.115 |
issue_knowledge_mideast_w1 | 2.82 (0.72) | 2.95 (0.74) | 2.85 (0.75) | 0.257 | 0.112 |
issue_knowledge_vegetarian_w1 | 2.76 (0.79) | 2.79 (0.94) | 2.81 (0.85) | 0.857 | 0.039 |
issue_knowledge_gender_w1 | 3.13 (0.80) | 3.17 (0.81) | 3.11 (0.86) | 0.760 | 0.053 |
issue_knowledge_sexwork_w1 | 2.64 (0.80) | 2.62 (0.92) | 2.56 (0.82) | 0.713 | 0.056 |
issue_knowledge_vaccine_w1 | 2.92 (0.76) | 3.05 (0.76) | 2.98 (0.70) | 0.291 | 0.112 |
issue_knowledge_healthcare_w1 | 3.18 (0.70) | 3.28 (0.72) | 3.17 (0.69) | 0.304 | 0.101 |
issue_knowledge_immigration_w1 | 2.92 (0.74) | 2.93 (0.74) | 2.97 (0.76) | 0.793 | 0.048 |
issue_knowledge_guns_w1 | 3.16 (0.72) | 3.17 (0.71) | 3.08 (0.73) | 0.459 | 0.082 |
issue_knowledge_deathpenalty_w1 | 2.99 (0.78) | 3.07 (0.81) | 3.02 (0.73) | 0.604 | 0.070 |
issue_knowledge_bodycams_w1 | 2.86 (0.73) | 3.05 (0.77) | 2.85 (0.73) | 0.015 | 0.184 |
issue_knowledge_ai_w1 | 2.75 (0.71) | 2.94 (0.76) | 2.81 (0.74) | 0.050 | 0.170 |
issue_knowledge_socialmedia_w1 | 3.24 (0.62) | 3.25 (0.74) | 3.25 (0.71) | 0.988 | 0.011 |
political_efficacy_1_w1 | 2.08 (1.05) | 2.03 (0.90) | 2.08 (1.05) | 0.865 | 0.036 |
political_efficacy_2_w1 | 4.54 (1.10) | 4.67 (1.00) | 4.64 (0.98) | 0.429 | 0.089 |
political_efficacy_3_w1 | 4.76 (1.04) | 4.82 (0.96) | 4.78 (0.95) | 0.846 | 0.041 |
trust_general_1_w1 | 1.77 (0.62) | 1.84 (0.63) | 1.81 (0.69) | 0.629 | 0.069 |
trust_general_2_w1 | 2.08 (0.67) | 2.08 (0.68) | 2.06 (0.66) | 0.941 | 0.024 |
trust_general_3_w1 | 3.62 (0.55) | 3.63 (0.53) | 3.49 (0.68) | 0.058 | 0.152 |
trust_general_4_w1 | 2.40 (0.72) | 2.46 (0.66) | 2.32 (0.73) | 0.195 | 0.131 |
pol_cynicism_w1 | 4.17 (0.99) | 3.99 (1.05) | 4.16 (1.08) | 0.204 | 0.114 |
affective_polarization_w1 | 46.36 (28.59) | 45.01 (28.05) | 41.25 (30.20) | 0.267 | 0.117 |
efficacy_w1 | 3.03 (0.35) | 3.01 (0.30) | 3.03 (0.35) | 0.865 | 0.036 |
comment_karma | 17451.63 (42386.03) | 16819.56 (36107.33) | 25077.38 (65773.47) | 0.233 | 0.103 |
comment_count | 16.64 (20.16) | 17.38 (15.67) | 18.10 (20.91) | 0.850 | 0.050 |
comment_mean_lenght | 426.23 (261.86) | 432.31 (248.13) | 398.90 (286.95) | 0.618 | 0.083 |
comment_mean_score | 3.09 (1.70) | 2.37 (1.91) | 2.33 (1.41) | 0.001 | 0.303 |
comment_mean_tox | 0.09 (0.05) | 0.10 (0.06) | 0.11 (0.07) | 0.183 | 0.167 |
ex_comment_count | 136.05 (169.42) | 159.14 (164.32) | 165.42 (176.26) | 0.291 | 0.115 |
ex_comment_mean_score | 6.22 (6.61) | 7.64 (7.23) | 8.63 (11.69) | 0.059 | 0.187 |
ex_comment_mean_tox | 0.12 (0.07) | 0.13 (0.07) | 0.12 (0.06) | 0.672 | 0.071 |
ex_comment_mean_lenght | 229.17 (173.18) | 205.92 (136.93) | 192.37 (124.69) | 0.093 | 0.165 |
profile_sample <- scaled_sample %>%
mutate(group = case_when(comment_count == 0 ~ "Never Comment",
comment_count > 0 & comment_count <= quantile(comment_count[comment_count > 0], 0.3) ~ "Infrequent Commenter",
comment_count > quantile(comment_count[comment_count > 0], 0.7) ~ "Frequent Commenter",
TRUE ~ NA_character_))
table(profile_sample$group)
##
## Frequent Commenter Infrequent Commenter Never Comment
## 94 106 189
profile_sample <- profile_sample %>% filter(!is.na(group))%>%
mutate(gender_male = as.numeric(as.character(gender_male)))%>%
rename_with(~ gsub("_", ".", .x))
# Calculate means and confidence intervals for the specified variables by group
summary_table <- profile_sample %>%
group_by(group) %>%
summarise(across(c(gender.male, comments.online.w1, polinterest.w1,
mean.group.respectful, mean.group.knowledgeable, mean.group.polarized,
mean.discussion.enjoyable, mean.discussion.toxic, mean.discussion.constructive),
list(mean = ~mean(.x, na.rm = TRUE),
lower = ~mean(.x, na.rm = TRUE) - qt(0.975, df=n()-1) * (sd(.x, na.rm = TRUE)/sqrt(n())),
upper = ~mean(.x, na.rm = TRUE) + qt(0.975, df=n()-1) * (sd(.x, na.rm = TRUE)/sqrt(n())))))%>%
pivot_longer(-group, names_to = c("variable", "value"), names_sep = "_", names_repair = "unique")%>%
pivot_wider(names_from = value...3, values_from = value...4)
variable_labels <- c(
gender.male = "Male Gender",
comments.online.w1 = "Commenting Online",
polinterest.w1 = "Political Interest",
mean.group.respectful = "Group respectful",
mean.group.knowledgeable = "Group knowledgeable",
mean.group.polarized = "Group polarized",
mean.discussion.enjoyable = "Discussions enjoyable",
mean.discussion.toxic = "Discussions toxic",
mean.discussion.constructive = "Discussions constructive"
)
summary_table$variable <- factor(summary_table$variable,
levels = names(variable_labels), labels = variable_labels)
profiles <- ggplot(summary_table, aes(x = group, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), alpha = 0.8) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~variable, scales = "free_y", ncol = 3) + # Use facet_wrap with variable
labs(title = "Profiles of three user groups",
x = "",
y = "Mean Values by Group with 95% Confidence Intervals") +
theme_minimal() +
theme(legend.position = "bottom", axis.text.x=element_blank(),
strip.text = element_text(size = 10)) +
scale_fill_manual(values = c("#334c7f","#b2ccff","lightgrey"))
profiles
ggsave("../output/profiles_participation.pdf", profiles, width = 10, height = 6)