library(tidyverse)
library(readr)
library(gridExtra)
library(haven)
library(RColorBrewer)
library(estimatr)
library(ggridges)
library(DescTools)
library(lme4)
library(brms)
library(purrr)
library(tableone)
library(pscl)
library(ggcorrplot)
library(kableExtra)
Import data (no comment texts, open comments, usernames or email addresses)
# reddit data
user_data <- read_csv("../data/anon/users_anon.csv")
discussion_data <- read_csv("../data/anon/discussions_anon.csv")
external_data <- read_csv("../data/anon/external_reddit_anon.csv")
# survey data
pre_survey <- read_csv("../data/anon/pre_survey_anon.csv")
post_surveys <- read_csv("../data/anon/post_surveys_anon.csv")
# sample (combined data)
sample <- read_csv("../data/anon/sample_anon.csv")
Preprocessing, combining pre and post surveys
# average group toxicity
group_toxicity_dat <- discussion_data%>%
mutate(subreddit_w1 = subreddit)%>%
group_by(subreddit_w1)%>%
mutate(group_toxicity = mean(comment_toxicity, na.rm=T))%>%
select(subreddit_w1, group_toxicity)%>%
slice(1)%>%
ungroup()
# any check-in surveys answered
any_check <- post_surveys%>%
group_by(ParticipantID)%>%
mutate(any_checkin = 1)%>%
slice(1)%>%
pull(ParticipantID)
post_1 <- post_surveys%>%filter(survey_week == 1)
post_2 <- post_surveys%>%filter(survey_week == 2)
post_3 <- post_surveys%>%filter(survey_week == 3)
post_4 <- post_surveys%>%filter(survey_week == 4)
# pre-post data (combining pre and post surveys)
full_data <- left_join(sample, post_1, by = "ParticipantID", suffix = c("_w1" , "_w2"))%>%
left_join(., post_2, by = "ParticipantID", suffix = c("" , "_w3"))%>%
left_join(., post_3, by = "ParticipantID", suffix = c("" , "_w4"))%>%
left_join(., post_4, by = "ParticipantID", suffix = c("" , "_w5"))%>%
left_join(.,group_toxicity_dat, by = "subreddit_w1")%>%
mutate(active = ifelse(is.na(comment_count), 0, 1),
condition = factor(condition),
active2 = ifelse(!ParticipantID %in% any_check & active == 0, "inactive", active))
write_csv(full_data, file = "../data/anon/full_data_waves.csv")
Data Scaling for models https://onlinelibrary.wiley.com/doi/10.1002/sim.3107
# scale data for better comparison of coefficients
gelman_scale <- function(x){
( (x - mean(x, na.rm=T)) / (2*sd(x, na.rm=TRUE)))
}
scaled_sample <- full_data%>%
# generation of check-in variables (average over check-ins)
mutate(mean_issue_distance_abs = rowMeans(select(., starts_with("issue_distance_abs")), na.rm = TRUE),
mean_discussion_constructive = rowMeans(select(., starts_with("discussion_percep_2")), na.rm = TRUE),
mean_discussion_enjoyable = rowMeans(select(., starts_with("discussion_percep_3")), na.rm = TRUE),
mean_discussion_toxic = rowMeans(select(., starts_with("discussion_percep_1")), na.rm = TRUE),
mean_group_knowledgeable = rowMeans(select(., starts_with("group_percep_3")), na.rm = TRUE),
mean_group_polarized = rowMeans(select(., starts_with("group_percep_2")), na.rm = TRUE),
mean_group_respectful = rowMeans(select(., starts_with("group_percep_1")), na.rm = TRUE))%>%
# scale selected variables with Gelman (2008) method by dividing by two standard deviations
select(ParticipantID, condition, subreddit_w1, comment_count, comment_mean_tox,comment_mean_score,
ex_comment_mean_score,
active, active2, gender, age, education, comments_online_w1, group_toxicity,
leftright_w1, polinterest_w1, affective_polarization_w1,
affective_pol_1_w1,affective_pol_2_w1,comment_karma,
pol_cynicism_w1, efficacy_w1, trust_general_1_w1,
trust_general_2_w1,trust_general_3_w1,trust_general_4_w1,
mean_issue_distance_abs, mean_discussion_constructive,
mean_discussion_enjoyable, mean_discussion_toxic,
mean_group_knowledgeable, mean_group_polarized,
mean_group_respectful)%>%
mutate(across(c(age:mean_group_respectful), gelman_scale))%>%
mutate(comment_count = ifelse(is.na(comment_count),0,comment_count),
gender_male = factor(ifelse(gender == 1, 1, 0)))
write_csv(scaled_sample, file = "../data/anon/scaled_sample.csv")
https://search.r-project.org/CRAN/refmans/pscl/html/hurdle.html
# correspondence between perceptions and external measures
cor.test(scaled_sample$mean_discussion_toxic, scaled_sample$group_toxicity)
##
## Pearson's product-moment correlation
##
## data: scaled_sample$mean_discussion_toxic and scaled_sample$group_toxicity
## t = 2.8732, df = 364, p-value = 0.004302
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04712583 0.24764617
## sample estimates:
## cor
## 0.1489165
present_sample <- scaled_sample%>%
filter(active2 != "inactive")%>%
# also include external measure of group toxicity
mutate(group_toxicity = gelman_scale(group_toxicity))
models <- list(
hurdle(comment_count ~ gender_male, data = present_sample),
hurdle(comment_count ~ age, data = present_sample),
hurdle(comment_count ~ education, data = present_sample),
hurdle(comment_count ~ comments_online_w1, data = present_sample),
hurdle(comment_count ~ comment_karma, data = present_sample),
hurdle(comment_count ~ leftright_w1, data = present_sample),
hurdle(comment_count ~ polinterest_w1, data = present_sample),
hurdle(comment_count ~ affective_polarization_w1, data = present_sample),
hurdle(comment_count ~ trust_general_1_w1, data = present_sample),
hurdle(comment_count ~ trust_general_2_w1, data = present_sample),
hurdle(comment_count ~ trust_general_3_w1, data = present_sample),
hurdle(comment_count ~ trust_general_4_w1, data = present_sample),
hurdle(comment_count ~ mean_issue_distance_abs, data = present_sample),
hurdle(comment_count ~ mean_group_respectful, data = present_sample),
hurdle(comment_count ~ mean_group_knowledgeable, data = present_sample),
hurdle(comment_count ~ mean_group_polarized, data = present_sample),
hurdle(comment_count ~ mean_discussion_enjoyable, data = present_sample),
hurdle(comment_count ~ mean_discussion_constructive, data = present_sample),
hurdle(comment_count ~ mean_discussion_toxic, data = present_sample),
hurdle(comment_count ~ group_toxicity, data = present_sample)
)
predictors <- c(
"Gender (male)", "Age", "Education", "Commenting online", "Comment karma", "Political orientation",
"Political interest", "Affective polarization", "Trust in politics","Trust in media",
"Trust in science", "Trust in others", "Issue distance",
"Group respectful", "Group knowledgeable", "Group polarized",
"Discussion enjoyable", "Discussion constructive","Discussion toxic",
"Average group toxicity")
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
mutate(Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
interval2 <- -qnorm((1 - 0.95) / 2)
comment_plot_pred_active_hurdle_ext <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(AlphaValue))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression") +
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/comment_plot_pred_hurdle_with_external.pdf", comment_plot_pred_active_hurdle_ext, width = 6, height = 4)
comment_plot_pred_active_hurdle_ext
models <- list(
hurdle(comment_count ~ gender_male, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ age, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ education, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ comments_online_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ comment_karma, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ leftright_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ polinterest_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ affective_polarization_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_1_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_2_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_3_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ trust_general_4_w1, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_issue_distance_abs, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_group_respectful, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_group_knowledgeable, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_group_polarized, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_discussion_enjoyable, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_discussion_toxic, data = present_sample, dist = "geometric"),
hurdle(comment_count ~ mean_discussion_constructive, data = present_sample, dist = "geometric")
)
predictors <- c(
"Gender (male)", "Age", "Education", "Commenting online", "Comment karma", "Political orientation",
"Political interest", "Affective polarization", "Trust in politics", "Trust in media",
"Trust in science", "Trust in others", "Issue distance",
"Group respectful", "Group knowledgeable", "Group polarized",
"Discussion enjoyable", "Discussion toxic", "Discussion constructive"
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
mutate(Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_active_hurdle_geometric <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(AlphaValue))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression (geometric, binomial)") +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/comment_plot_pred_hurdle_geometric.pdf", comment_plot_pred_active_hurdle_geometric, width = 6, height = 4)
comment_plot_pred_active_hurdle_geometric
models <- list(
hurdle(comment_count ~ gender_male, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ age, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ education, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ comments_online_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ comment_karma, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ leftright_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ polinterest_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ affective_polarization_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_1_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_2_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_3_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ trust_general_4_w1, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_issue_distance_abs, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_group_respectful, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_group_knowledgeable, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_group_polarized, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_discussion_enjoyable, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_discussion_toxic, data = present_sample, dist = "negbin"),
hurdle(comment_count ~ mean_discussion_constructive, data = present_sample, dist = "negbin")
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
rownames_to_column()%>%
filter(!str_detect(rowname,"theta"))%>%
mutate(Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_active_hurdle_negbin <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(AlphaValue))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression (negbin, binomial)") +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# Save the plot with "negbin" in the file name
ggsave("../output/comment_plot_pred_hurdle_negbin.pdf", comment_plot_pred_active_hurdle_negbin, width = 6, height = 4)
# Display the plot
comment_plot_pred_active_hurdle_negbin
models <- list(
glm(active ~ gender_male, data = present_sample),
glm(active ~ age, data = present_sample),
glm(active ~ education, data = present_sample),
glm(active ~ comments_online_w1, data = present_sample),
glm(active ~ comment_karma, data = present_sample),
glm(active ~ leftright_w1, data = present_sample),
glm(active ~ polinterest_w1, data = present_sample),
glm(active ~ affective_polarization_w1, data = present_sample),
glm(active ~ trust_general_1_w1, data = present_sample),
glm(active ~ trust_general_2_w1, data = present_sample),
glm(active ~ trust_general_3_w1, data = present_sample),
glm(active ~ trust_general_4_w1, data = present_sample),
glm(active ~ mean_issue_distance_abs, data = present_sample),
glm(active ~ mean_group_respectful, data = present_sample),
glm(active ~ mean_group_knowledgeable, data = present_sample),
glm(active ~ mean_group_polarized, data = present_sample),
glm(active ~ mean_discussion_enjoyable, data = present_sample),
glm(active ~ mean_discussion_toxic, data = present_sample),
glm(active ~ mean_discussion_constructive, data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_frames[[i]] <- data.frame(
Coefficient = summary(models[[i]])$coef[-1, 1],
pval = summary(models[[i]])$coef[,4],
SE = summary(models[[i]])$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df%>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor)))%>%
group_by(Predictor)%>%
slice(2)
comment_plot_pred_binary <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1, position = position_dodge(width = 2/3)) +
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Comment activity binary", subtitle = "Excluding inactive users")+
xlab("")+
ylab("Change in log odds")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_binary
ggsave("../output/comment_plot_pred_binary.pdf",comment_plot_pred_binary, width = 4, height = 4)
present_sample <- scaled_sample%>%filter(active == 1)
models <- list(
glm(comment_count ~ gender_male, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ age, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ education, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ comments_online_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ comment_karma, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ leftright_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ polinterest_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ affective_polarization_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_1_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_2_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_3_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ trust_general_4_w1, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_issue_distance_abs, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_group_respectful, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_group_knowledgeable, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_group_polarized, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_discussion_enjoyable, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_discussion_toxic, family = poisson(link = "log"), data = present_sample),
glm(comment_count ~ mean_discussion_constructive, family = poisson(link = "log"), data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_summary <- summary(models[[i]])
model_frames[[i]] <- data.frame(
Coefficient = model_summary$coef[-1, 1],
pval = model_summary$coef[-1, 4],
SE = model_summary$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df %>%
mutate(Predictor = fct_inorder(rev(Predictor)))
comment_plot_pred_active <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1 / 2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "black",
lwd = 1 , position = position_dodge(width = 2 / 3)) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Comment activity", subtitle = "Poisson prediction of comment count") +
xlab("") +
ylab("Change in log number of comments")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_active
ggsave("../output/comment_plot_pred_poisson.pdf", comment_plot_pred_active, width = 4, height = 4)
present_sample <- scaled_sample%>%filter(active == 1)
mean(present_sample$comment_count)
## [1] 17.35347
sd(present_sample$comment_count)
## [1] 18.97236
max(present_sample$comment_count)
## [1] 114
models <- list(
lm_robust(comment_count ~ gender_male, data = present_sample),
lm_robust(comment_count ~ age, data = present_sample),
lm_robust(comment_count ~ education, data = present_sample),
lm_robust(comment_count ~ comments_online_w1, data = present_sample),
lm_robust(comment_count ~ comment_karma, data = present_sample),
lm_robust(comment_count ~ leftright_w1, data = present_sample),
lm_robust(comment_count ~ polinterest_w1, data = present_sample),
lm_robust(comment_count ~ affective_polarization_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_1_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_2_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_3_w1, data = present_sample),
lm_robust(comment_count ~ trust_general_4_w1, data = present_sample),
lm_robust(comment_count ~ mean_issue_distance_abs, data = present_sample),
lm_robust(comment_count ~ mean_group_respectful, data = present_sample),
lm_robust(comment_count ~ mean_group_knowledgeable, data = present_sample),
lm_robust(comment_count ~ mean_group_polarized, data = present_sample),
lm_robust(comment_count ~ mean_discussion_enjoyable, data = present_sample),
lm_robust(comment_count ~ mean_discussion_toxic, data = present_sample),
lm_robust(comment_count ~ mean_discussion_constructive, data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_frames[[i]] <- data.frame(
Coefficient = summary(models[[i]])$coef[-1, 1],
pval = summary(models[[i]])$coef[,4],
SE = summary(models[[i]])$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df%>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor)))%>%
group_by(Predictor)%>%
slice(2)
comment_plot_pred_active_ex0 <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1/2, position = position_dodge(width = 2/3)) +
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Comment activity", subtitle = "Linear prediction of comment count")+
xlab("")+
ylab("Scaled beta coefficients, HC2 robust SE")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_active_ex0
ggsave("../output/comment_plot_pred_active_excl0.pdf",comment_plot_pred_active_ex0, width = 4, height = 4)
present_sample <- scaled_sample%>%filter(active == 1)
mean(present_sample$comment_mean_score)
## [1] 2.607417
sd(present_sample$comment_mean_score)
## [1] 1.722391
max(present_sample$comment_mean_score)
## [1] 18
models <- list(
lm_robust(comment_mean_score ~ gender_male, data = present_sample),
lm_robust(comment_mean_score ~ age, data = present_sample),
lm_robust(comment_mean_score ~ education, data = present_sample),
lm_robust(comment_mean_score ~ comments_online_w1, data = present_sample),
lm_robust(comment_mean_score ~ comment_karma, data = present_sample),
lm_robust(comment_mean_score ~ leftright_w1, data = present_sample),
lm_robust(comment_mean_score ~ polinterest_w1, data = present_sample),
lm_robust(comment_mean_score ~ affective_polarization_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_1_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_2_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_3_w1, data = present_sample),
lm_robust(comment_mean_score ~ trust_general_4_w1, data = present_sample),
lm_robust(comment_mean_score ~ mean_issue_distance_abs, data = present_sample),
lm_robust(comment_mean_score ~ mean_group_respectful, data = present_sample),
lm_robust(comment_mean_score ~ mean_group_knowledgeable, data = present_sample),
lm_robust(comment_mean_score ~ mean_group_polarized, data = present_sample),
lm_robust(comment_mean_score ~ mean_discussion_enjoyable, data = present_sample),
lm_robust(comment_mean_score ~ mean_discussion_toxic, data = present_sample),
lm_robust(comment_mean_score ~ mean_discussion_constructive, data = present_sample)
)
model_frames <- list()
for (i in seq_along(models)) {
model_frames[[i]] <- data.frame(
Coefficient = summary(models[[i]])$coef[-1, 1],
pval = summary(models[[i]])$coef[,4],
SE = summary(models[[i]])$coef[-1, 2],
Predictor = predictors[i]
)
}
combined_df <- do.call(rbind, model_frames)
combined_df <- combined_df%>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor)))%>%
group_by(Predictor)%>%
slice(2)
comment_plot_pred_active_ex0_score <- ggplot(combined_df, aes(x = rev(Predictor), alpha = abs(Coefficient))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2), colour = "black",
lwd = 1/2, position = position_dodge(width = 2/3)) +
guides(alpha = "none")+
theme_bw(base_size=10)+
coord_flip()+
ggtitle("Comment scores", subtitle = "Linear prediction of comment scores")+
xlab("")+
ylab("Scaled beta coefficients, HC2 robust SE")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
comment_plot_pred_active_ex0_score
ggsave("../output/comment_plot_pred_active_excl0_score.pdf",comment_plot_pred_active_ex0_score, width = 4, height = 4)
present_sample <- scaled_sample%>%filter(active2 != "inactive")
# filtered data for control condition
scaled_sample_c <- present_sample %>% filter(condition == "control")
# models for control condition
models_control <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_c),
hurdle(comment_count ~ age, data = scaled_sample_c),
hurdle(comment_count ~ education, data = scaled_sample_c),
hurdle(comment_count ~ comments_online_w1, data = scaled_sample_c),
hurdle(comment_count ~ comment_karma, data = scaled_sample_c),
hurdle(comment_count ~ leftright_w1, data = scaled_sample_c),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_c),
hurdle(comment_count ~ affective_polarization_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_1_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_2_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_3_w1, data = scaled_sample_c),
hurdle(comment_count ~ trust_general_4_w1, data = scaled_sample_c),
hurdle(comment_count ~ mean_issue_distance_abs, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_respectful, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_polarized, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_enjoyable, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_constructive, data = scaled_sample_c)
)
model_frames_control <- list()
for (i in seq_along(models_control)) {
model_summary <- summary(models_control[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_control[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_control <- bind_rows(model_frames_control) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_c <- ggplot(combined_df_control, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "grey", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
ggtitle("Control condition", subtitle = "Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# filtered data for moderation condition
scaled_sample_m <- present_sample %>% filter(condition == "moderation")
# Models for moderation condition
models_moderation <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_m),
hurdle(comment_count ~ age, data = scaled_sample_m),
hurdle(comment_count ~ education, data = scaled_sample_m),
hurdle(comment_count ~ comments_online_w1, data = scaled_sample_m),
hurdle(comment_count ~ comment_karma, data = scaled_sample_m),
hurdle(comment_count ~ leftright_w1, data = scaled_sample_m),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_m),
hurdle(comment_count ~ affective_polarization_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_1_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_2_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_3_w1, data = scaled_sample_m),
hurdle(comment_count ~ trust_general_4_w1, data = scaled_sample_m),
hurdle(comment_count ~ mean_issue_distance_abs, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_respectful, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_polarized, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_enjoyable, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_constructive, data = scaled_sample_m)
)
model_frames_moderation <- list()
for (i in seq_along(models_moderation)) {
model_summary <- summary(models_moderation[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_moderation[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_moderation <- bind_rows(model_frames_moderation) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_m <- ggplot(combined_df_moderation, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#6699FF", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
ggtitle("Moderation condition", subtitle = "Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# filtered data for incentives condition
scaled_sample_i <- present_sample %>% filter(condition == "incentives")
# Models for incentives condition
models_incentives <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_i),
hurdle(comment_count ~ age, data = scaled_sample_i),
hurdle(comment_count ~ education, data = scaled_sample_i),
hurdle(comment_count ~ comments_online_w1, data = scaled_sample_i),
hurdle(comment_count ~ comment_karma, data = scaled_sample_i),
hurdle(comment_count ~ leftright_w1, data = scaled_sample_i),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_i),
hurdle(comment_count ~ affective_polarization_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_1_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_2_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_3_w1, data = scaled_sample_i),
hurdle(comment_count ~ trust_general_4_w1, data = scaled_sample_i),
hurdle(comment_count ~ mean_issue_distance_abs, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_respectful, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_polarized, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_enjoyable, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_constructive, data = scaled_sample_i)
)
model_frames_incentives <- list()
for (i in seq_along(models_incentives)) {
model_summary <- summary(models_incentives[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_incentives[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_incentives <- bind_rows(model_frames_incentives) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_i <- ggplot(combined_df_incentives, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#C5701A", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
facet_wrap(~ Model, ncol = 2, scales = "free_x") +
ggtitle("Incentives condition", subtitle = "Prediction of user-level discussion participation") +
xlab("") +
ylab("Zero hurdle models for count data regression")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
combi_conditions <- grid.arrange(comment_plot_pred_c, comment_plot_pred_m, comment_plot_pred_i, nrow = 3)
ggsave("../output/combi_comment_plot_pred_conditions_hurdle.pdf", combi_conditions, width = 8, height = 12)
present_sample <- scaled_sample%>%filter(active2 != "inactive")
# filtered data for control condition
scaled_sample_c <- present_sample %>% filter(condition == "control")
models_control <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_c),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_c),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_c),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_c)
)
predictors <- c("Gender (male)", "Political interest",
"Group knowledgeable", "Discussion toxic")
model_frames_control <- list()
for (i in seq_along(models_control)) {
model_summary <- summary(models_control[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_control[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_control <- bind_rows(model_frames_control) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_c <- ggplot(combined_df_control, aes(x = rev(Predictor), alpha = AlphaValue)) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "grey", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Control condition", subtitle = "Prediction of discussion participation") +
xlab("") +
ylab("")+
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
# filtered data for moderation condition
scaled_sample_m <- present_sample %>% filter(condition == "moderation")
models_moderation <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_m),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_m),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_m),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_m)
)
model_frames_moderation <- list()
for (i in seq_along(models_moderation)) {
model_summary <- summary(models_moderation[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_moderation[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_moderation <- bind_rows(model_frames_moderation) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_m <- ggplot(combined_df_moderation, aes(x = rev(Predictor))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#6699FF", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Moderation condition", subtitle = "Prediction of discussion participation") +
xlab("") +
ylab("")+
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
scaled_sample_i <- present_sample %>% filter(condition == "incentives")
models_incentives <- list(
hurdle(comment_count ~ gender_male, data = scaled_sample_i),
hurdle(comment_count ~ polinterest_w1, data = scaled_sample_i),
hurdle(comment_count ~ mean_group_knowledgeable, data = scaled_sample_i),
hurdle(comment_count ~ mean_discussion_toxic, data = scaled_sample_i)
)
model_frames_incentives <- list()
for (i in seq_along(models_incentives)) {
model_summary <- summary(models_incentives[[i]])
count_model_coef <- as.data.frame(model_summary$coefficients$count)
count_model_coef$Predictor <- predictors[i]
count_model_coef$Model <- "Count Model"
count_model_coef <- count_model_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
zero_hurdle_coef <- as.data.frame(model_summary$coefficients$zero)
zero_hurdle_coef$Predictor <- predictors[i]
zero_hurdle_coef$Model <- "Zero Hurdle Model"
zero_hurdle_coef <- zero_hurdle_coef %>%
filter(row.names(.) != "(Intercept)") %>%
rename(Coefficient = Estimate, SE = `Std. Error`, pval = `Pr(>|z|)`)
model_frames_incentives[[i]] <- rbind(zero_hurdle_coef, count_model_coef)
}
combined_df_incentives <- bind_rows(model_frames_incentives) %>%
mutate(Significance = ifelse(pval < 0.05, "sig.", "insig."),
Predictor = fct_inorder(rev(Predictor))) %>%
group_by(Model) %>%
mutate(AlphaValue = abs(Coefficient) / max(abs(Coefficient), na.rm = TRUE)) %>%
ungroup()
comment_plot_pred_i <- ggplot(combined_df_incentives, aes(x = rev(Predictor))) +
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_pointrange(aes(y = Coefficient, ymin = Coefficient - SE * interval2,
ymax = Coefficient + SE * interval2), colour = "#C5701A", lwd = 1) +
guides(alpha = "none") +
theme_bw(base_size = 10) +
coord_flip() +
ggtitle("Incentives condition",subtitle = "Prediction of discussion participation") +
xlab("") +
ylab("")+
facet_wrap(~ forcats::fct_rev(Model), ncol = 2, scales = "free_x")+
theme(#panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())
combi_conditions <- grid.arrange(comment_plot_pred_m, comment_plot_pred_i, nrow = 1)
ggsave("../output/combi_comment_plot_pred_conditions_hurdle_simple.pdf", combi_conditions, width = 10, height = 3)
# variables balance table
covariates <- c("age", "gender", "education","time_online_w1" , "social_media_w1" , "comments_online_w1" ,
"leftright_w1" , "polinterest_w1" , "affective_pol_1_w1" ,
"affective_pol_2_w1" , "issue_attitudes_loan_w1" , "issue_attitudes_airbnb_w1" ,
"issue_attitudes_minwage_w1" , "issue_attitudes_ukraine_w1" , "issue_attitudes_ubi_w1" ,
"issue_attitudes_climate_w1" , "issue_attitudes_fur_w1" , "issue_attitudes_renewable_w1" ,
"issue_attitudes_gaza_w1" , "issue_attitudes_vegetarian_w1" , "issue_attitudes_gender_w1" ,
"issue_attitudes_sexwork_w1" , "issue_attitudes_vaccine_w1" , "issue_attitudes_healthcare_w1" ,
"issue_attitudes_immigration_w1", "issue_attitudes_guns_w1" , "issue_attitudes_deathpenalty_w1",
"issue_attitudes_bodycams_w1" , "issue_attitudes_ai_w1" , "issue_attitudes_socialmedia_w1" ,
"issue_knowledge_loan_w1" , "issue_knowledge_airbnb_w1" , "issue_knowledge_minwage_w1" ,
"issue_knowledge_ukraine_w1" , "issue_knowledge_ubi_w1" , "issue_knowledge_climate_w1" ,
"issue_knowledge_fur_w1" , "issue_knowledge_renewable_w1" , "issue_knowledge_mideast_w1" ,
"issue_knowledge_vegetarian_w1" , "issue_knowledge_gender_w1" , "issue_knowledge_sexwork_w1" ,
"issue_knowledge_vaccine_w1" , "issue_knowledge_healthcare_w1" , "issue_knowledge_immigration_w1" ,
"issue_knowledge_guns_w1" , "issue_knowledge_deathpenalty_w1", "issue_knowledge_bodycams_w1" ,
"issue_knowledge_ai_w1" , "issue_knowledge_socialmedia_w1", "political_efficacy_1_w1" ,
"political_efficacy_2_w1" , "political_efficacy_3_w1" , "trust_general_1_w1" ,
"trust_general_2_w1" , "trust_general_3_w1" , "trust_general_4_w1" ,
"pol_cynicism_w1", "affective_polarization_w1" , "efficacy_w1" ,"comment_karma","comment_count" , "comment_mean_lenght" , "comment_mean_score" , "comment_mean_tox" , "ex_comment_count", "ex_comment_mean_score" , "ex_comment_mean_tox" , "ex_comment_mean_lenght" )
table1 <- CreateTableOne(vars = covariates, strata = "condition", data = full_data)
latex_table <- print(table1, smd = TRUE, printToggle = FALSE)
rownames(latex_table) <- gsub("\\(mean \\(SD\\)\\)", "", rownames(latex_table))
knitr::kable(latex_table[, -grep("test", colnames(latex_table))],
format = "html", booktabs = TRUE)
control | incentives | moderation | p | SMD | |
---|---|---|---|---|---|
n | 181 | 169 | 170 | ||
age | 43.46 (13.90) | 43.03 (13.69) | 43.77 (13.71) | 0.883 | 0.036 |
gender | 1.62 (0.59) | 1.54 (0.61) | 1.56 (0.60) | 0.442 | 0.089 |
education | 4.28 (1.35) | 4.25 (1.38) | 4.27 (1.31) | 0.982 | 0.013 |
time_online_w1 | 3.93 (0.50) | 3.89 (0.44) | 3.92 (0.53) | 0.743 | 0.056 |
social_media_w1 | 3.77 (0.64) | 3.81 (0.60) | 3.83 (0.60) | 0.628 | 0.066 |
comments_online_w1 | 3.48 (1.08) | 3.59 (1.00) | 3.68 (1.02) | 0.189 | 0.130 |
leftright_w1 | 3.73 (2.06) | 3.85 (2.04) | 4.16 (2.18) | 0.150 | 0.134 |
polinterest_w1 | 3.60 (0.62) | 3.60 (0.56) | 3.61 (0.55) | 0.971 | 0.017 |
affective_pol_1_w1 | 53.20 (25.74) | 50.31 (27.37) | 48.66 (27.65) | 0.278 | 0.113 |
affective_pol_2_w1 | 16.51 (21.50) | 15.16 (21.11) | 16.46 (19.67) | 0.795 | 0.043 |
issue_attitudes_loan_w1 | 1.96 (1.43) | 2.20 (1.55) | 2.41 (1.58) | 0.020 | 0.201 |
issue_attitudes_airbnb_w1 | 3.12 (1.46) | 3.15 (1.44) | 3.08 (1.49) | 0.904 | 0.033 |
issue_attitudes_minwage_w1 | 5.24 (1.19) | 5.20 (1.28) | 4.87 (1.51) | 0.019 | 0.179 |
issue_attitudes_ukraine_w1 | 4.49 (1.61) | 4.61 (1.48) | 4.36 (1.70) | 0.354 | 0.105 |
issue_attitudes_ubi_w1 | 1.98 (1.33) | 2.21 (1.50) | 2.45 (1.53) | 0.012 | 0.213 |
issue_attitudes_climate_w1 | 5.19 (1.24) | 5.02 (1.50) | 5.02 (1.42) | 0.425 | 0.084 |
issue_attitudes_fur_w1 | 3.08 (1.62) | 3.18 (1.57) | 2.91 (1.48) | 0.254 | 0.120 |
issue_attitudes_renewable_w1 | 1.41 (0.93) | 1.43 (0.90) | 1.61 (1.20) | 0.158 | 0.120 |
issue_attitudes_gaza_w1 | 4.81 (1.44) | 4.92 (1.45) | 4.68 (1.51) | 0.320 | 0.109 |
issue_attitudes_vegetarian_w1 | 1.75 (0.97) | 1.89 (1.16) | 1.83 (1.07) | 0.458 | 0.089 |
issue_attitudes_gender_w1 | 2.74 (1.63) | 2.92 (1.70) | 3.06 (1.73) | 0.208 | 0.126 |
issue_attitudes_sexwork_w1 | 2.40 (1.37) | 2.25 (1.30) | 2.49 (1.50) | 0.282 | 0.115 |
issue_attitudes_vaccine_w1 | 3.53 (1.55) | 3.62 (1.60) | 3.35 (1.68) | 0.279 | 0.113 |
issue_attitudes_healthcare_w1 | 1.69 (1.29) | 1.85 (1.44) | 2.03 (1.51) | 0.075 | 0.163 |
issue_attitudes_immigration_w1 | 3.31 (1.50) | 3.21 (1.51) | 3.48 (1.65) | 0.260 | 0.117 |
issue_attitudes_guns_w1 | 4.89 (1.50) | 4.63 (1.66) | 4.45 (1.73) | 0.042 | 0.179 |
issue_attitudes_deathpenalty_w1 | 2.12 (1.46) | 2.08 (1.49) | 2.26 (1.62) | 0.502 | 0.079 |
issue_attitudes_bodycams_w1 | 5.70 (0.65) | 5.59 (0.82) | 5.62 (0.70) | 0.385 | 0.097 |
issue_attitudes_ai_w1 | 2.44 (1.29) | 2.51 (1.26) | 2.44 (1.38) | 0.861 | 0.035 |
issue_attitudes_socialmedia_w1 | 3.24 (1.53) | 3.32 (1.50) | 3.34 (1.44) | 0.803 | 0.044 |
issue_knowledge_loan_w1 | 3.05 (0.81) | 3.11 (0.78) | 3.08 (0.80) | 0.764 | 0.052 |
issue_knowledge_airbnb_w1 | 2.53 (0.76) | 2.64 (0.92) | 2.58 (0.79) | 0.425 | 0.092 |
issue_knowledge_minwage_w1 | 3.18 (0.68) | 3.20 (0.72) | 3.11 (0.73) | 0.439 | 0.089 |
issue_knowledge_ukraine_w1 | 2.86 (0.79) | 3.01 (0.75) | 3.02 (0.74) | 0.086 | 0.140 |
issue_knowledge_ubi_w1 | 2.70 (0.75) | 2.85 (0.71) | 2.74 (0.76) | 0.126 | 0.142 |
issue_knowledge_climate_w1 | 3.24 (0.73) | 3.33 (0.71) | 3.30 (0.65) | 0.440 | 0.089 |
issue_knowledge_fur_w1 | 2.31 (0.80) | 2.28 (0.84) | 2.29 (0.94) | 0.962 | 0.020 |
issue_knowledge_renewable_w1 | 2.94 (0.72) | 3.07 (0.72) | 2.98 (0.77) | 0.261 | 0.115 |
issue_knowledge_mideast_w1 | 2.82 (0.72) | 2.95 (0.74) | 2.85 (0.75) | 0.257 | 0.112 |
issue_knowledge_vegetarian_w1 | 2.76 (0.79) | 2.79 (0.94) | 2.81 (0.85) | 0.857 | 0.039 |
issue_knowledge_gender_w1 | 3.13 (0.80) | 3.17 (0.81) | 3.11 (0.86) | 0.760 | 0.053 |
issue_knowledge_sexwork_w1 | 2.64 (0.80) | 2.62 (0.92) | 2.56 (0.82) | 0.713 | 0.056 |
issue_knowledge_vaccine_w1 | 2.92 (0.76) | 3.05 (0.76) | 2.98 (0.70) | 0.291 | 0.112 |
issue_knowledge_healthcare_w1 | 3.18 (0.70) | 3.28 (0.72) | 3.17 (0.69) | 0.304 | 0.101 |
issue_knowledge_immigration_w1 | 2.92 (0.74) | 2.93 (0.74) | 2.97 (0.76) | 0.793 | 0.048 |
issue_knowledge_guns_w1 | 3.16 (0.72) | 3.17 (0.71) | 3.08 (0.73) | 0.459 | 0.082 |
issue_knowledge_deathpenalty_w1 | 2.99 (0.78) | 3.07 (0.81) | 3.02 (0.73) | 0.604 | 0.070 |
issue_knowledge_bodycams_w1 | 2.86 (0.73) | 3.05 (0.77) | 2.85 (0.73) | 0.015 | 0.184 |
issue_knowledge_ai_w1 | 2.75 (0.71) | 2.94 (0.76) | 2.81 (0.74) | 0.050 | 0.170 |
issue_knowledge_socialmedia_w1 | 3.24 (0.62) | 3.25 (0.74) | 3.25 (0.71) | 0.988 | 0.011 |
political_efficacy_1_w1 | 2.08 (1.05) | 2.03 (0.90) | 2.08 (1.05) | 0.865 | 0.036 |
political_efficacy_2_w1 | 4.54 (1.10) | 4.67 (1.00) | 4.64 (0.98) | 0.429 | 0.089 |
political_efficacy_3_w1 | 4.76 (1.04) | 4.82 (0.96) | 4.78 (0.95) | 0.846 | 0.041 |
trust_general_1_w1 | 1.77 (0.62) | 1.84 (0.63) | 1.81 (0.69) | 0.629 | 0.069 |
trust_general_2_w1 | 2.08 (0.67) | 2.08 (0.68) | 2.06 (0.66) | 0.941 | 0.024 |
trust_general_3_w1 | 3.62 (0.55) | 3.63 (0.53) | 3.49 (0.68) | 0.058 | 0.152 |
trust_general_4_w1 | 2.40 (0.72) | 2.46 (0.66) | 2.32 (0.73) | 0.195 | 0.131 |
pol_cynicism_w1 | 4.17 (0.99) | 3.99 (1.05) | 4.16 (1.08) | 0.204 | 0.114 |
affective_polarization_w1 | 46.36 (28.59) | 45.01 (28.05) | 41.25 (30.20) | 0.267 | 0.117 |
efficacy_w1 | 3.03 (0.35) | 3.01 (0.30) | 3.03 (0.35) | 0.865 | 0.036 |
comment_karma | 17451.63 (42386.03) | 16819.56 (36107.33) | 25077.38 (65773.47) | 0.233 | 0.103 |
comment_count | 16.64 (20.16) | 17.38 (15.67) | 18.10 (20.91) | 0.850 | 0.050 |
comment_mean_lenght | 426.23 (261.86) | 432.31 (248.13) | 398.90 (286.95) | 0.618 | 0.083 |
comment_mean_score | 3.09 (1.70) | 2.37 (1.91) | 2.33 (1.41) | 0.001 | 0.303 |
comment_mean_tox | 0.09 (0.05) | 0.10 (0.06) | 0.11 (0.07) | 0.183 | 0.167 |
ex_comment_count | 136.05 (169.42) | 159.14 (164.32) | 165.42 (176.26) | 0.291 | 0.115 |
ex_comment_mean_score | 6.22 (6.61) | 7.64 (7.23) | 8.63 (11.69) | 0.059 | 0.187 |
ex_comment_mean_tox | 0.12 (0.07) | 0.13 (0.07) | 0.12 (0.06) | 0.672 | 0.071 |
ex_comment_mean_lenght | 229.17 (173.18) | 205.92 (136.93) | 192.37 (124.69) | 0.093 | 0.165 |
profile_sample <- scaled_sample %>%
mutate(group = case_when(comment_count == 0 ~ "Never Comment",
comment_count > 0 & comment_count <= quantile(comment_count[comment_count > 0], 0.3) ~ "Infrequent Commenter",
comment_count > quantile(comment_count[comment_count > 0], 0.7) ~ "Frequent Commenter",
TRUE ~ NA_character_))
table(profile_sample$group)
##
## Frequent Commenter Infrequent Commenter Never Comment
## 94 106 189
profile_sample <- profile_sample %>% filter(!is.na(group))%>%
mutate(gender_male = as.numeric(as.character(gender_male)))%>%
rename_with(~ gsub("_", ".", .x))
# Calculate means and confidence intervals for the specified variables by group
summary_table <- profile_sample %>%
group_by(group) %>%
summarise(across(c(gender.male, comments.online.w1, polinterest.w1,
mean.group.respectful, mean.group.knowledgeable, mean.group.polarized,
mean.discussion.enjoyable, mean.discussion.toxic, mean.discussion.constructive),
list(mean = ~mean(.x, na.rm = TRUE),
lower = ~mean(.x, na.rm = TRUE) - qt(0.975, df=n()-1) * (sd(.x, na.rm = TRUE)/sqrt(n())),
upper = ~mean(.x, na.rm = TRUE) + qt(0.975, df=n()-1) * (sd(.x, na.rm = TRUE)/sqrt(n())))))%>%
pivot_longer(-group, names_to = c("variable", "value"), names_sep = "_", names_repair = "unique")%>%
pivot_wider(names_from = value...3, values_from = value...4)
variable_labels <- c(
gender.male = "Male Gender",
comments.online.w1 = "Commenting Online",
polinterest.w1 = "Political Interest",
mean.group.respectful = "Group respectful",
mean.group.knowledgeable = "Group knowledgeable",
mean.group.polarized = "Group polarized",
mean.discussion.enjoyable = "Discussions enjoyable",
mean.discussion.toxic = "Discussions toxic",
mean.discussion.constructive = "Discussions constructive"
)
summary_table$variable <- factor(summary_table$variable,
levels = names(variable_labels), labels = variable_labels)
profiles <- ggplot(summary_table, aes(x = group, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), alpha = 0.8) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~variable, scales = "free_y", ncol = 3) + # Use facet_wrap with variable
labs(title = "Profiles of three user groups",
x = "",
y = "Mean Values by Group with 95% Confidence Intervals") +
theme_minimal() +
theme(legend.position = "bottom", axis.text.x=element_blank(),
strip.text = element_text(size = 10)) +
scale_fill_manual(values = c("#334c7f","#b2ccff","lightgrey"))
profiles
ggsave("../output/profiles_participation.pdf", profiles, width = 10, height = 6)