library(tidyverse)
library(readr)
library(gridExtra)
library(haven)
library(RColorBrewer)
library(igraph)
library(kableExtra)
library(ggridges)
library(lubridate)
Load anonymous data (no comment texts, open comments, usernames or email addresses)
# reddit data
discussion_data <- read_csv("../data/anon/discussions_anon.csv")
external_data <- read_csv("../data/anon/external_reddit_anon.csv")
user_data <- read_csv("../data/anon/users_anon.csv")
# survey data
pre_survey <- read_csv("../data/anon/pre_survey_anon.csv")
post_surveys <- read_csv("../data/anon/post_surveys_anon.csv")
Combine survey and reddit data
## Reddit metrics by user - add to sample
user_comments <- discussion_data%>%
group_by(ParticipantID)%>%
mutate(comment_count = n(),
comment_mean_score = round(mean(score_comment,na.rm=T),3),
comment_mean_tox = round(mean(comment_toxicity,na.rm=T),3),
comment_mean_lenght = round(mean(length_comment_char,na.rm=T),3))%>%
slice(1)%>%
select(ParticipantID,comment_count,comment_mean_lenght,comment_mean_score,comment_mean_tox)
user_comments_external <- external_data%>%
group_by(ParticipantID)%>%
mutate(ex_comment_count = n(),
ex_comment_mean_score = round(mean(score_comment,na.rm=T),3),
ex_comment_mean_tox = round(mean(comment_toxicity,na.rm=T),3),
ex_comment_mean_lenght = round(mean(length_comment_char,na.rm=T),3))%>%
slice(1)%>%
select(ParticipantID,ex_comment_count,ex_comment_mean_score,ex_comment_mean_tox,ex_comment_mean_lenght)
sample <- pre_survey%>%
filter(on_reddit == 1)%>%
left_join(., user_data, by = "ParticipantID")%>%
left_join(., user_comments, by = "ParticipantID")%>%
left_join(., user_comments_external, by = "ParticipantID")
write_csv(sample, file = "../data/anon/sample_anon.csv")
#(1 = Man, 2 = Woman, 3 = Other)
table(sample$gender)
##
## 1 2 3
## 251 239 30
table(sample$condition)
##
## control incentives moderation
## 181 169 170
table(sample$subreddit)
##
## DiscussPolitics1 DiscussPolitics2 DiscussPolitics3 DiscussPolitics4
## 80 85 87 82
## DiscussPolitics5 DiscussPolitics6
## 85 101
ageplot <- ggplot(sample)+
geom_histogram(aes(age), fill = "#6699FF")+
theme_bw()+
ylab("Sample: Participants who joined subreddit")+
xlab(paste0("Age in years (M = ",round(mean(sample$age),2),", SD = ",round(sd(sample$age),2),")"))+
annotate(geom="text", x=60, y=30,hjust = 0, label= paste("Male: ",table(sample$gender)[1]))+
annotate(geom="text", x=60, y=28,hjust = 0, label= paste("Female: ",table(sample$gender)[2]))+
annotate(geom="text", x=60, y=26,hjust = 0, label= paste("Other: ",table(sample$gender)[3]))+
annotate(geom="text", x=60, y=33,hjust = 0, label= paste("N: ", nrow(sample)),fontface =2)+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
educplot <- ggplot(sample)+
geom_bar(aes(education), fill = "#6699FF")+
coord_flip()+
ylab("")+
xlab("Level of formal education")+
theme_bw()+
annotate(geom="text", x = 1, y = 0, hjust = 0, label = "No degree")+
annotate(geom="text", x = 2, y = 0, hjust = 0, label = "High school")+
annotate(geom="text", x = 3, y = 0, hjust = 0, label = "Some college education, no degree")+
annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Bachelor's degree")+
annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Some postgraduate education, no degree")+
annotate(geom="text", x = 6, y = 0, hjust = 0, label = "Postgraduate degree")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
demo <- grid.arrange(ageplot, educplot, nrow = 1,
top = "Age and education across study sample")
ggsave("../output/demographics.pdf",demo, width = 10, height = 5)
int <- ggplot(sample, aes(polinterest))+
geom_bar(fill = "#6699FF")+
ggtitle("Political Interest")+
xlab("")+
ylab("")+
theme_bw()+
xlim("Not interested", "Sighly interested","Moderately interested", "Very interested")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
pol <- ggplot(sample)+
geom_bar(aes(x = leftright, fill = ..x..))+
theme_bw()+
scale_fill_gradient2(low='#6699ff', mid='lightgrey', high='#bc3455', midpoint=6)+
ggtitle("Political Orientation")+
xlab("")+
ylab("")+
theme(legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
xlim("far left", "", "", "", "", "", "", "",
"", "", "far right")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
polint <- grid.arrange(int, pol, nrow = 1)
ggsave("../output/pol_lefright.pdf", polint, width = 12 , height = 5)
ggsave("../output/lefright.pdf", pol, width = 4 , height = 4)
Issue | Toxicity | Comments | Views |
---|---|---|---|
The US should condemn Israel’s military actions in Gaza as acts of genocide and impose full sanctions. | 0.191 | 435 | 5535 |
Prostitution should be illegal. | 0.190 | 213 | 3134 |
Things like gender-neutral language and stating pronouns are silly issues. | 0.183 | 538 | 3926 |
The death penalty should be reestablished US-wide. | 0.149 | 388 | 4985 |
The US should provide financial and military aid to Ukraine. | 0.136 | 263 | 3892 |
We need stricter gun control laws. | 0.118 | 338 | 4472 |
Social media is a threat to democracy. | 0.108 | 233 | 3460 |
Immigration should be regulated more strictly. | 0.101 | 286 | 4235 |
Fur clothing should be banned. | 0.096 | 230 | 3311 |
Police officers should wear body cameras. | 0.092 | 313 | 3854 |
Climate change is one of the greatest threats to humanity. | 0.084 | 223 | 2863 |
Employers should mandate vaccination. | 0.073 | 258 | 3183 |
The government should not be responsible for providing universal health care. | 0.073 | 220 | 3326 |
The government should not forgive student loan debt. | 0.069 | 296 | 3850 |
Artificial intelligence should replace humans where possible. | 0.068 | 293 | 4832 |
There should only be vegetarian food in cantines. | 0.065 | 290 | 4683 |
A universal basic income would kill the economy. | 0.064 | 225 | 3185 |
The federal minimum wage should be increased. | 0.063 | 305 | 4321 |
The government should not invest in renewable energy. | 0.060 | 198 | 3576 |
Airbnb should be banned in cities. | 0.059 | 274 | 3043 |
issue_order <- discussion_data%>%
group_by(post_title)%>%
summarise(Toxicity = round(mean(comment_toxicity, na.rm = T),3))%>%
arrange(desc(Toxicity))%>%
pull(post_title)
discussion_data$issues <- factor(discussion_data$post_title, levels = issue_order)
colour_values <- c("#6699ff", "#729dee", "#7ca1dd", "#86a5cc", "#91a9bb", "#9bada9", "#a6b298", "#d3d3d3", "#bfc7c7", "#b5b1b1", "#ab9b9b", "#a18585", "#976f6f", "#8d5959", "#844444", "#7b3232", "#732525", "#6a1a1a", "#621010", "#bc3455")
issues <- c("US foreign policy toward Israel and Gaza","Legality of prostitution", "Use of gender-neutral language and pronouns", "Death penalty reestablishment in the US", "US financial and military aid to Ukraine", "Gun control laws", "Impact of social media on democracy", "Immigration regulation", "Ban on fur clothing","Police accountability and body cameras", "Climate change and environmental threats","Mandatory vaccination policies by employers", "Universal healthcare responsibility","Government forgiveness of student loan debt", "Artificial intelligence replacing human labor", "Vegetarian food policies in public cantines", "Economic impact of universal basic income", "Federal minimum wage increase", "Government investment in renewable energy", "Regulation of Airbnb in cities")
# colours, means
tox_ridge <- ggplot(discussion_data)+
geom_density_ridges(aes(x = comment_toxicity, y = fct_rev(issues), fill = fct_rev(issues),
alpha = 0.5))+
theme(legend.position = "bottom", legend.direction="vertical",
legend.title = element_blank())+
guides(alpha = "none")+
ylab("")+
xlab("")+
scale_y_discrete(labels = rev(issues))+
theme_ridges() +
scale_fill_manual(values = colour_values) +
scale_x_continuous(limits = c(0, 0.6))+
theme(legend.position = "none")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
tox_ridge
ggsave("../output/toxicity_issues_ridges.pdf", tox_ridge, width = 10, height = 6)
views <- discussion_data%>%
group_by(subreddit,date(created_post))%>%
slice(1)%>%
ggplot(aes(x = date(created_post), y = total_views, color = subreddit))+
geom_smooth(method = loess,alpha=0.1)+
geom_point(alpha = 0.3)+
theme_bw()+
scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
ylab("")+
guides(color = "none")+
ggtitle("Views per post")+
xlab("Date")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
comments <- discussion_data%>%
group_by(subreddit,date(created_post))%>%
slice(1)%>%
ggplot(aes(x = date(created_post), y = `Collected comments`, color = subreddit))+
geom_smooth(method = loess,alpha=0.1)+
geom_point(alpha = 0.3)+
theme_bw()+
scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
ylab("")+
guides(color = "none")+
ggtitle("Comments per post")+
xlab("Date")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
comments_views <- discussion_data%>%
mutate(comments_per_views = `Collected comments`/total_views)%>%
group_by(subreddit,date(created_post))%>%
slice(1)%>%
ggplot(aes(x = date(created_post), y = comments_per_views, color = subreddit))+
geom_smooth(method = loess,alpha=0.1)+
geom_point(alpha = 0.3)+
theme_bw()+
scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
ylab("")+
ggtitle("Comments/views ratio")+
xlab("Date")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
activity_combi <- grid.arrange(views, comments, comments_views, nrow=1, widths = c(3,3,4))
ggsave("../output/comments_views.pdf", activity_combi, width = 14, height = 5)
toxicity <- ggplot(discussion_data, aes(x = date(created_post), y = comment_toxicity, color = subreddit))+
geom_smooth(method = loess,alpha=0.1)+
geom_point(alpha = 0.1)+
theme_bw()+
scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
ylab("")+
guides(color = "none")+
ggtitle("Comment toxicity")+
xlab("Date")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
length <- ggplot(discussion_data, aes(x = date(created_post), y = length_comment_char, color = subreddit))+
geom_smooth(method = loess,alpha=0.1)+
geom_point(alpha = 0.1)+
theme_bw()+
scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
ylab("")+
guides(color = "none")+
ggtitle("Comment length")+
xlab("Date")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
score <- discussion_data%>%
group_by(subreddit,date(created_post))%>%
mutate(score_views = score_post/total_views)%>%
slice(1)%>%
ggplot(aes(x = date(created_post), y = score_views, color = subreddit))+
geom_smooth(method = loess,alpha=0.1)+
geom_point(alpha = 0.3)+
theme_bw()+
scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
ylab("")+
ggtitle("Post score/views ratio")+
xlab("Date")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
characteristics_combi <- grid.arrange(toxicity, length, score, nrow=1, widths = c(3,3,4))
ggsave("../output/characteristics_combi.pdf", characteristics_combi, width = 14, height = 5)
comments_tox <- ggplot(sample,aes(x = comment_mean_tox, y = ex_comment_mean_tox, size = comment_count))+
geom_point(alpha = 0.5, color = "#6699FF")+
theme_bw()+
xlab("Internal comment toxicity")+
ylab("Average external comment toxicity")+
guides(color = "none", size = "none")+
geom_abline()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cor.test(sample$comment_mean_tox,sample$ex_comment_mean_tox)
##
## Pearson's product-moment correlation
##
## data: sample$comment_mean_tox and sample$ex_comment_mean_tox
## t = 3.8092, df = 284, p-value = 0.0001709
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1072295 0.3280655
## sample estimates:
## cor
## 0.220471
comments_length <- ggplot(sample,aes(x = comment_mean_lenght, y = ex_comment_mean_lenght,
size = comment_count))+
geom_point(alpha = 0.5, color = "#6699FF")+
theme_bw()+
xlab("Internal comment length")+
ylab("Average external comment length")+
guides(color = "none", size = "none")+
geom_abline()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cor.test(sample$comment_mean_lenght,sample$ex_comment_mean_lenght)
##
## Pearson's product-moment correlation
##
## data: sample$comment_mean_lenght and sample$ex_comment_mean_lenght
## t = 10.417, df = 284, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4364171 0.6048829
## sample estimates:
## cor
## 0.5257867
cor.test(log(sample$comment_karma),sample$comment_count)
##
## Pearson's product-moment correlation
##
## data: log(sample$comment_karma) and sample$comment_count
## t = 2.481, df = 328, p-value = 0.0136
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02817168 0.24016730
## sample estimates:
## cor
## 0.1357227
comment_tox_length <- grid.arrange(comments_tox, comments_length, nrow = 1)
ggsave("../output/comment_tox_length_narrow.pdf", comment_tox_length, width = 5 , height = 3)
# Posting a comment on an existing post and receiving upvotes results in "comment" karma
k1 <- ggplot(sample)+
geom_histogram(aes(comment_karma), fill = "#6699FF")+
theme_bw()+
ylab("Number of users")+
xlab("Comment Karma")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
k2 <- ggplot(sample)+
geom_histogram(aes(log(comment_karma)), fill = "#6699FF")+
theme_bw()+
ylab("Number of users")+
xlab("Comment Karma (Log-scale)")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
create <- ggplot(sample)+
geom_histogram(aes(date(created)), fill = "lightgrey", bins = 100)+
annotate("text", x = min(date(sample$created), na.rm = TRUE), y = Inf, label = paste("First:", min(date(sample$created), na.rm = T)), vjust = 2, hjust = 0) +
annotate("text", x = max(date(sample$created), na.rm = TRUE), y = Inf, label = paste("Last:", max(date(sample$created), na.rm = T)), vjust = 2, hjust = 1)+
theme_bw()+
ylab("Number of accounts")+
xlab("Date joined Reddit")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
kplot <- grid.arrange(k2,create, nrow = 1,
top = paste0("Distributions of platform seniority (N = ",nrow(sample),") Start of recruitment: 2024-05-09"))
ggsave("../output/platform_seniority.pdf", kplot, width = 8, height = 4)
internal_comments <- discussion_data %>% mutate(source = "internal")%>%
select(ParticipantID,created_comment,comment_toxicity,length_comment_char, source)%>%
filter(created_comment > as_datetime("2024-06-10") & created_comment < as_datetime("2024-07-06"))
external_comments <- external_data %>% mutate(source = "external")%>%
select(ParticipantID,created_comment,comment_toxicity,length_comment_char, source)%>%
filter(created_comment > as_datetime("2024-05-13"))
all_data <- bind_rows(internal_comments, external_comments)
all_data$created_comment <- as.Date(all_data$created_comment)
all_data <- all_data %>%
mutate(week = floor_date(created_comment, unit = "week"))
daily_summary <- all_data %>%
group_by(source, ParticipantID, created_comment) %>%
dplyr::summarize(comment_volume = n(), # Count of comments per user per day
avg_toxicity = mean(comment_toxicity, na.rm = TRUE)) %>%
ungroup()
study_start <- as.Date("2024-06-10")
study_end <- as.Date("2024-07-05")
int_ext <- ggplot(daily_summary, aes(x = created_comment, y = avg_toxicity)) +
geom_point(data = subset(daily_summary, source == "external"),
aes(size = comment_volume), color = "grey", alpha = 0.2) +
geom_point(data = subset(daily_summary, source == "internal"),
aes(size = comment_volume), color = "#6699FF", alpha = 0.5) +
geom_smooth(data = subset(daily_summary, source == "external"),
aes(y = avg_toxicity), color = "darkgrey", method = "loess", se = FALSE) +
geom_smooth(data = subset(daily_summary, source == "internal"),
aes(y = avg_toxicity), color = "#3d5b99", method = "loess", se = FALSE) +
geom_vline(xintercept = as.numeric(study_start), linetype = "dashed", color = "black") +
geom_vline(xintercept = as.numeric(study_end), linetype = "dashed", color = "black") +
labs(x = "Date", y = "Average toxicity (per user per day)",
size = "Number of Comments \n per user per day",
title = "Daily Comment Toxicity and Volume per User (External vs Internal Data)") +
scale_size_continuous(range = c(1, 5)) +
theme_minimal() +
theme(legend.position = "right",
plot.title = element_text(hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
int_ext
ggsave("../output/internal_external_toxicity.pdf", int_ext, width = 9, height = 4)
# Aggregate data by ParticipantID, day, and source
daily_summary_act <- all_data %>%
group_by(source, ParticipantID, created_comment) %>%
dplyr::summarize(comment_volume = n(),
avg_toxicity = mean(comment_toxicity, na.rm = TRUE)) %>%
ungroup()
int_ext_act <- ggplot(daily_summary, aes(x = created_comment, y = comment_volume)) +
geom_point(data = subset(daily_summary, source == "external"),
aes(size = avg_toxicity, alpha = avg_toxicity), color = "grey") +
geom_point(data = subset(daily_summary, source == "internal"),
aes(size = avg_toxicity, alpha = avg_toxicity), color = "#6699FF") +
geom_smooth(data = subset(daily_summary, source == "external"),
aes(y = comment_volume), color = "darkgrey", method = "loess", se = FALSE) +
geom_smooth(data = subset(daily_summary, source == "internal"),
aes(y = comment_volume), color = "#3d5b99", method = "loess", se = FALSE) +
geom_vline(xintercept = as.numeric(study_start), linetype = "dashed", color = "black") +
geom_vline(xintercept = as.numeric(study_end), linetype = "dashed", color = "black") +
labs(x = "Date", y = "Number of Comments per User per Day",
size = "Average Toxicity",
alpha = "Average Toxicity",
title = "Daily Comment Volume and Toxicity per User (External vs Internal Data)") +
scale_size_continuous(range = c(1, 5)) +
scale_alpha_continuous(range = c(0.2, 1)) +
theme_minimal() +
theme(legend.position = "right",
plot.title = element_text(hjust = 0.5))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
int_ext_act
ggsave("../output/internal_external_activity.pdf", int_ext_act, width = 9, height = 3)
``
a <- ggplot(sample)+
geom_bar(aes(time_online), fill = "#6699FF")+
theme_bw()+
coord_flip()+
xlab("")+
ylab("")+
labs(title = "Time online")+
annotate(geom="text", x = 2, y = 0, hjust = 0, label = "Up to one hour per week")+
annotate(geom="text", x = 3, y = 0, hjust = 0, label = "Up to one hour per day")+
annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Multiple hours per day")+
annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Almost the entire day")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
b <- ggplot(sample)+
geom_bar(aes(social_media), fill = "#6699FF")+
theme_bw()+
coord_flip()+
xlab("")+
labs(title = "Social media use")+
annotate(geom="text", x = 2, y = 0, hjust = 0, label = "A couple of times per week")+
annotate(geom="text", x = 3, y = 0, hjust = 0, label = "About once per day")+
annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Multiple times per day")+
annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Almost constantly")+
ylab("")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
c <- ggplot(sample)+
geom_bar(aes(comments_online), fill = "#6699FF")+
theme_bw()+
coord_flip()+
xlab("")+
ylab("")+
labs(title = "Commenting online")+
annotate(geom="text", x = 1, y = 0, hjust = 0, label = "Never")+
annotate(geom="text", x = 2, y = 0, hjust = 0, label = "About once per month")+
annotate(geom="text", x = 3, y = 0, hjust = 0, label = "About once per week")+
annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Almost daily")+
annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Multiple times per day")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
abc <- grid.arrange(a, b, c, nrow = 1)
ggsave("../output/online_activity.pdf", abc, width = 10 , height = 3)
# affective polarization
parties <- ggplot(sample)+
geom_density(aes(affective_pol_1),fill = "#6699ff", alpha = 0.5)+
geom_density(aes(affective_pol_2),fill = "#bc3455", alpha = 0.5)+
theme_bw()+
ylab("")+
annotate(geom = "text", x = 10, y = 0.04, hjust = 0, label = "Rating of Republican Party")+
annotate(geom = "text", x = 60, y = 0.02, hjust = 0, label = "Rating of Democratic Party")+
xlab("Party Rating: 0 = negative, 100 = positive")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
dist <- ggplot(sample, aes(affective_polarization))+
geom_density(fill = '#a7aabd', alpha = 0.5)+
theme_bw()+
xlab("Affective Polarization (In party - out party)")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
polint <- grid.arrange(parties, dist, nrow = 1, top = "Affective Polarization")
ggsave("../output/polarization.pdf", polint, width = 10 , height = 5)
# trust matrix
trust <- sample %>%
dplyr::select(trust_general_1:trust_general_4)%>%
rename(`Politics` = trust_general_1,
`Media` = trust_general_2,
`Science` = trust_general_3,
`People generally` = trust_general_4)%>%
# change 1: convert haven_labelled variables to factors
as_factor() %>%
pivot_longer(
cols = 1:4,
names_to = "Variable",
values_to = "Agreement"
) %>%
count(Variable, Agreement) %>%
ggplot(aes(y = n, x = Agreement)) +
facet_wrap(. ~ Variable) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("1: Not at all, 2: A little, 3: Quite a bit, 4: Very much")+
ggtitle("Trust")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
trust
ggsave("../output/trust_matrix.pdf", trust, width = 4 , height = 5)
## issue knowledge
knowledge <- sample %>%
dplyr::select(issue_knowledge_loan:issue_knowledge_socialmedia)%>%
rename_at(1:20, list(~ substr(., 17, nchar(.))))%>%
as_factor() %>%
pivot_longer( cols = 1:20,
names_to = "Variable",
values_to = "Knowledge") %>%
count(Variable, Knowledge) %>%
ggplot(aes(y = n, x = Knowledge)) +
facet_wrap(. ~ Variable) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("1: Nothing, 2: A little, 3: A moderate amoung, 4: A lot")+
ggtitle("Issue Knowledge T1")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
knowledge
ggsave("../output/knowledge_matrix.pdf", knowledge, width = 10 , height = 8)
## issue attitudes
attitudes <- sample %>%
dplyr::select(issue_attitudes_loan:issue_attitudes_socialmedia)%>%
rename_at(1:20, list(~ substr(., 17, nchar(.))))%>%
as_factor() %>%
pivot_longer(cols = 1:20,
names_to = "Variable",
values_to = "Attitude") %>%
count(Variable, Attitude) %>%
ggplot(aes(y = n, x = Attitude)) +
facet_wrap(. ~ Variable) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("1: Strongly disagree - 6: Strongly agree")+
ggtitle("Issue Attitudes T1")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
attitudes
ggsave("../output/attitudes_matrix.pdf", attitudes, width = 10 , height = 8)
# exposure check
exp <- ggplot(post_surveys)+
geom_density(aes(exposure_correct, fill = as.factor(survey_week)), alpha = 0.7)+
theme_bw()+
ylab("")+
xlab("Exposure Check: TP - FP, 5 topics, 5 distractors")+
scale_fill_manual(name = "Discussion week", values = rev(c("#6699FF","#70facb","#29c195","#008a62")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/exposure_check.pdf", exp, width = 5, height = 3)
# issue distance
dist_abs <- ggplot(post_surveys)+
geom_density(aes(issue_distance_abs, fill = as.factor(survey_week)), alpha = 0.7)+
theme_bw()+
ylab("")+
xlab("Absolute Issue Distance: | self - others |, aggregate over 5 topics")+
scale_fill_manual(name = "Discussion week", values = rev(c("#6699FF","#70facb","#29c195","#008a62")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
plot <- grid.arrange(exp,dist_abs, nrow = 1)
ggsave("../output/issue_distance.pdf", dist_abs, width = 6, height = 4)
# discussion perception
perceptions <- post_surveys %>%
dplyr::select(discussion_percep_1:discussion_percep_3)%>%
rename(`toxic` = discussion_percep_1,
`constructive` = discussion_percep_2,
`enjoyable` = discussion_percep_3)%>%
as_factor() %>%
pivot_longer(cols = 1:3,
names_to = "Variable",
values_to = "Perception") %>%
count(Variable, Perception) %>%
ggplot(aes(y = n, x = Perception)) +
facet_wrap(. ~ Variable) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("1: Not at all, 2: A little, 3: Quite a bit, 4: Very much")+
ggtitle("Discussion Perception")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/discussion_perception.pdf", perceptions, width = 6 , height = 4)
# group perception
perceptions_g <- post_surveys %>%
dplyr::select(group_percep_1:group_percep_3)%>%
rename(`respectful` = group_percep_1,
`polarized` = group_percep_2,
`knowledgeable` = group_percep_3)%>%
as_factor() %>%
pivot_longer(cols = 1:3,
names_to = "Variable",
values_to = "Perception") %>%
count(Variable, Perception) %>%
ggplot(aes(y = n, x = Perception)) +
facet_wrap(. ~ Variable) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("1: Not at all, 2: A little, 3: Quite a bit, 4: Very much")+
ggtitle("Group Perception")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
plot <- grid.arrange(perceptions,perceptions_g, nrow = 1)
ggsave("../output/group_perception.pdf", perceptions_g, width = 6 , height = 4)
# participation
participation <- post_surveys %>%
dplyr::select(participation_4:participation_1)%>%
rename(`checked topics` = participation_4,
`read some discussions` = participation_5,
`followed discussions` = participation_6,
`voted some` = participation_7,
`voted many` = participation_8,
`wrote few comments` = participation_9,
`commented everything` = participation_10,
`did not participate` = participation_1)%>%
as_factor() %>%
pivot_longer(cols = 1:8,
names_to = "Variable",
values_to = "Participation") %>%
group_by(Variable)%>%
summarise(Participation = sum(Participation, na.rm = T))%>%
mutate(Variable = factor(Variable, levels = c("did not participate", "checked topics",
"read some discussions",
"followed discussions",
"voted some",
"voted many",
"wrote few comments",
"commented everything")))%>%
ggplot(aes(y = Participation, x = reorder(Variable,Participation))) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("")+
ggtitle("Self-reported participation")+
theme_bw()+
coord_flip()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/participation_check.pdf", participation, width = 6 , height = 4)
# motives
motives <- post_surveys %>%
dplyr::select(post_motives_20:post_motives_51)%>%
rename(`inform others` = post_motives_20,
`entertain others` = post_motives_39,
`express my opinion` = post_motives_40,
`provoke others` = post_motives_41,
`express my emotions` = post_motives_42,
`connect with others` = post_motives_43,
`deceive others` = post_motives_44,
`gain attention` = post_motives_45,
`prove a point` = post_motives_46,
`cause chaos` = post_motives_47,
`bring attention to a topic` = post_motives_48,
`pursuade others` = post_motives_49,
`surprise or shock others` = post_motives_50,
`help researchers` = post_motives_51)%>%
pivot_longer(cols = 1:14,
names_to = "Motives",
values_to = "Variable") %>%
group_by(Motives)%>%
summarise(Variable = sum(Variable, na.rm = T))%>%
mutate(Motives = factor(Motives))%>%
ggplot(aes(x = reorder(Motives,Variable), y = Variable)) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("")+
ggtitle("Motives for writing comments")+
theme_bw()+
coord_flip()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave("../output/motives.pdf", motives, width = 5 , height = 4)
# negative motives / barriers to participation
barriers <- post_surveys %>%
dplyr::select(non_participation_2:non_participation_5,non_participation_1)%>%
rename(`no barriers` = non_participation_1,
`personally triggered` = non_participation_2,
`could not add anything` = non_participation_3,
`intimidated by sophistication` = non_participation_4,
`afraid of backlash` = non_participation_5)%>%
pivot_longer(cols = 1:5,
names_to = "Barriers",
values_to = "Variable") %>%
group_by(Barriers)%>%
summarise(Variable = sum(Variable, na.rm = T))%>%
mutate(Barriers = factor(Barriers))%>%
ggplot(aes(y = Variable, x = reorder(Barriers,Variable))) +
geom_col(fill = "#6699FF")+
ylab("")+
xlab("")+
ggtitle("Barriers to writing comments")+
theme_bw()+
coord_flip()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
plot <- grid.arrange(participation,motives,barriers, nrow = 1)
ggsave("../output/barriers.pdf", barriers, width = 5 , height = 2)
## different to normal Reddit experience
differences <- post_surveys %>%
dplyr::select(different_to_real_1:different_to_real_6)%>%
na.omit()%>%
rename(`toxicity discussion` = different_to_real_1,
`constructiveness discussion` = different_to_real_2,
`enjoyability discussion` = different_to_real_3,
`knowleadgeability group` = different_to_real_4,
`polarization group` = different_to_real_5,
`respectfulness group` = different_to_real_6)%>%
as_factor() %>%
pivot_longer(cols = 1:6,
names_to = "Variable",
values_to = "Perception") %>%
count(Variable, Perception) %>%
mutate(Variable = factor(Variable, levels = c("toxicity discussion",
"constructiveness discussion",
"enjoyability discussion",
"knowleadgeability group",
"polarization group",
"respectfulness group")))%>%
ggplot(aes(y = n, x = Perception)) +
facet_wrap(. ~ Variable) +
geom_col(fill = "#6699FF")+
#coord_flip()+
ylab("")+
xlab("1: Very similar, 2: Somewhat similar, 3: Somewhat different, 4: Very different")+
ggtitle("Perceived difference to typical experience on Reddit")+
theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
differences
ggsave("../output/differences_perception.pdf", differences, width = 6 , height = 5)
#colors
colors <- c(control = "gray", incentives = "#C5701A", moderation = "#6699FF")
#adjust alphas to .7
colors <- c(control = adjustcolor("gray", alpha.f = .7), incentives = adjustcolor("#C5701A", alpha.f = .7), moderation = adjustcolor("#6699FF", alpha.f = .7))
comment_data_ranges <- daily_summary_act %>% group_by(ParticipantID) %>% dplyr::summarize(earliest_comment = min(created_comment), latest_comment = max(created_comment))
#plot hist of earliest and latest comment dates
h1 <- hist(comment_data_ranges$earliest_comment, breaks = 20, main = "Earliest comment date", xlab = "Date")
h2 <- hist(comment_data_ranges$latest_comment, breaks = 20, main = "Latest comment date", xlab = "Date")
#calculate number of days between earliest comment and study start
days_prestudy_period <- study_start - min(comment_data_ranges$earliest_comment)
#calculate mean number of comments per day in the pre-study period
mean_prestudy_daily_comments <- daily_summary_act %>%
filter(created_comment < study_start) %>%
group_by(ParticipantID) %>%
dplyr::summarize(mean_daily_comments_prestudy = sum(comment_volume)/as.numeric(days_prestudy_period))
#calculate mean number of comments-per day in the study period, within study
mean_duringstudy_int_daily_comments <- daily_summary_act %>% filter(source == "internal") %>%
filter(created_comment >= study_start & created_comment <= study_end) %>%
group_by(ParticipantID) %>%
dplyr::summarize(mean_daily_comments_instudy = sum(comment_volume)/as.numeric(study_end - study_start))
#calculate mean number of comments-per day in the study period, outside study
mean_duringstudy_ext_daily_comments <- daily_summary_act %>% filter(source == "external") %>%
filter(created_comment >= study_start & created_comment <= study_end) %>%
group_by(ParticipantID) %>%
dplyr::summarize(mean_daily_comments_extstudy = sum(comment_volume)/as.numeric(study_end - study_start))
#join the two dataframes
mean_daily_comments <- left_join(mean_prestudy_daily_comments, mean_duringstudy_int_daily_comments, by = "ParticipantID")
mean_daily_comments <- left_join(mean_daily_comments, mean_duringstudy_ext_daily_comments, by = "ParticipantID")
daily_summary_act_complete <- daily_summary_act %>%
complete(source, ParticipantID, created_comment = seq.Date(min(created_comment), max(created_comment), by = "day"), fill = list(comment_volume = 0, avg_toxicity = NA))
mean_duringstudy_int_commentprob <- daily_summary_act_complete %>% filter(source == "internal") %>%
filter(created_comment >= study_start & created_comment <= study_end) %>%
group_by(ParticipantID) %>%
dplyr::summarize(duringstudy_int_daily_commentprob = sum(comment_volume!=0)/as.numeric(study_end - study_start))
mean_duringstudy_ext_commentprob <- daily_summary_act_complete %>% filter(source == "external") %>%
filter(created_comment >= study_start & created_comment <= study_end) %>%
group_by(ParticipantID) %>%
dplyr::summarize(duringstudy_ext_daily_commentprob = sum(comment_volume!=0)/as.numeric(study_end - study_start))
mean_prestudy_commentprob <- daily_summary_act_complete %>%
filter(created_comment < study_start) %>%
group_by(ParticipantID) %>%
dplyr::summarize(prestudy_daily_commentprob = sum(comment_volume!=0)/as.numeric(days_prestudy_period))
mean_daily_comments <- left_join(mean_daily_comments, mean_duringstudy_int_commentprob, by = "ParticipantID")
mean_daily_comments <- left_join(mean_daily_comments, mean_duringstudy_ext_commentprob, by = "ParticipantID")
mean_daily_comments <- left_join(mean_daily_comments, mean_prestudy_commentprob, by = "ParticipantID")
# join mean_daily_comments to sample
s <- left_join(sample, mean_daily_comments, by = "ParticipantID")
#come back and use S for this
daily_summary_act %>% pull(created_comment) %>% hist(breaks = "day")
# bar chart of comment count by day, stacked by source=="internal" vs source=="external"
barplot(table(daily_summary_act$source, daily_summary_act$created_comment), beside = T, col = c("grey50", "#6699FF"), legend = T, border = NA)
# break down by condition
control <- s %>% filter(condition == "control")
moderation <- s %>% filter(condition == "moderation")
incentives <- s %>% filter(condition == "incentives")
# copy of 3-gang version subset by condition, not logged
this_xlim = c(0,12)
this_ylim = c(0,5)
par(pty="s", mfrow = c(1,3), cex = .6)
plot((control$mean_daily_comments_prestudy), (control$mean_daily_comments_instudy), xlab = "Mean daily comments pre-study", ylab = "Mean daily comments in study", main = "Control", pch = 16, col = colors['control'], xlim = this_xlim, ylim = this_ylim)
abline(0,1)
plot((moderation$mean_daily_comments_prestudy), (moderation$mean_daily_comments_instudy), xlab = "Mean daily comments pre-study", ylab = "Mean daily comments in study", main = "Moderation", pch = 16, col = colors['moderation'], xlim = this_xlim, ylim = this_ylim)
abline(0,1)
plot((incentives$mean_daily_comments_prestudy), (incentives$mean_daily_comments_instudy), xlab = "Mean daily comments pre-study", ylab = "Mean daily comments in study", main = "Incentives", pch = 16, col = colors['incentives'], xlim = this_xlim, ylim = this_ylim)
abline(0,1)
#plot ratio by prestudy
s <- s %>% mutate(prestudy_instudy_comment_ratio = mean_daily_comments_instudy/mean_daily_comments_prestudy)
control <- s %>% filter(condition == "control")
moderation <- s %>% filter(condition == "moderation")
incentives <- s %>% filter(condition == "incentives")
par(cex = .8)
plot(control$mean_daily_comments_prestudy, control$prestudy_instudy_comment_ratio, xlab = "Mean daily comments pre-study", ylab = "In-Study/Pre-Study", main = "Mean daily comments in study / pre-study ", pch = 16, col = colors['control'])
points(moderation$mean_daily_comments_prestudy, moderation$prestudy_instudy_comment_ratio, pch = 16, col = colors['moderation'])
points(incentives$mean_daily_comments_prestudy, incentives$prestudy_instudy_comment_ratio, pch = 16, col = colors['incentives'])
#plot difference by prestudy
s <- s %>% mutate(prestudy_instudy_comment_difference = mean_daily_comments_instudy-mean_daily_comments_prestudy)
control <- s %>% filter(condition == "control")
moderation <- s %>% filter(condition == "moderation")
incentives <- s %>% filter(condition == "incentives")
par(cex = .8)
plot(control$mean_daily_comments_prestudy, control$prestudy_instudy_comment_difference, xlab = "Mean daily comments pre-study", ylab = "In-Study - Pre-Study", main = "Mean daily comments pre-study vs in study", pch = 16, col = colors['control'])
points(moderation$mean_daily_comments_prestudy, moderation$prestudy_instudy_comment_difference, pch = 16, col = colors['moderation'])
points(incentives$mean_daily_comments_prestudy, incentives$prestudy_instudy_comment_difference, pch = 16, col = colors['incentives'])
abline(h = 0, lty = 2)
abline(0,-1)
#### Make Raw non-diff versions
#keep
#pdf("ws_outputs/comment_raw_by_prestudy_quantile.pdf", width = 8, height = 10)
s <- s %>%
mutate(prestudy_quantile_bin = cut(mean_daily_comments_prestudy, breaks = quantile(mean_daily_comments_prestudy, probs = seq(0, 1, by = 0.25), na.rm = TRUE)))
# plot s subsets
control <- s %>% filter(condition == "control") %>%
select(prestudy_quantile_bin, mean_daily_comments_instudy) %>%
drop_na()
moderation <- s %>%
filter(condition == "moderation") %>%
select(prestudy_quantile_bin, mean_daily_comments_instudy) %>%
drop_na()
incentives <- s %>%
filter(condition == "incentives") %>%
select(prestudy_quantile_bin, mean_daily_comments_instudy) %>%
drop_na()
#fill out the missing bins in control
bins <- s %>% pull(prestudy_quantile_bin) %>% unique()
#remove NA values and put in order
bins <- bins[!is.na(bins)]
bins <- bins[order(bins)]
control <- control %>% complete(prestudy_quantile_bin = bins)
par(mfrow = c(3,1))
this_ylim = c(0,4)
#this_ylim = NULL
this_xlab = "Pre-Study Mean Daily Comments"
this_ylab = "In-Study Mean Daily Comments"
this_main = "Pre-Study vs In-Study Mean Daily Comments"
boxplot(mean_daily_comments_instudy ~ prestudy_quantile_bin, data = control, xlab =this_xlab, ylab = this_ylab, main = this_main, pch = 16, col = colors['control'], ylim =this_ylim)
#abline(h = 0, lty = 2)
#abline(0,-1)
plot(mean_daily_comments_instudy ~ prestudy_quantile_bin, data = moderation, xlab = this_xlab, ylab = this_ylab, main = this_main, pch = 16, col = colors['moderation'], ylim = this_ylim)
#abline(h = 0, lty = 2)
#abline(0,-1)
plot(mean_daily_comments_instudy ~ prestudy_quantile_bin, data = incentives, xlab = this_xlab, ylab = this_ylab, main = this_main, pch = 16, col = colors['incentives'], ylim = this_ylim)
#abline(h = 0, lty = 2)
#abline(0,-1)
legend("topright", legend = c("Control", "Moderation", "Incentives"), col = c(colors['control'], colors['moderation'], colors['incentives']), pch = 15, bty = "n")
#dev.off()
Comment activity per user