library(tidyverse)
library(readr)
library(gridExtra)
library(haven)
library(RColorBrewer)
library(igraph)
library(kableExtra)
library(ggridges)
library(lubridate)

Load anonymous data (no comment texts, open comments, usernames or email addresses)

# reddit data
discussion_data <- read_csv("../data/anon/discussions_anon.csv")
external_data <- read_csv("../data/anon/external_reddit_anon.csv")
user_data <-  read_csv("../data/anon/users_anon.csv")

# survey data
pre_survey <- read_csv("../data/anon/pre_survey_anon.csv")
post_surveys <- read_csv("../data/anon/post_surveys_anon.csv")

Combine survey and reddit data

## Reddit metrics by user - add to sample
user_comments <- discussion_data%>%
  group_by(ParticipantID)%>%
  mutate(comment_count = n(),
         comment_mean_score = round(mean(score_comment,na.rm=T),3),
         comment_mean_tox = round(mean(comment_toxicity,na.rm=T),3),
         comment_mean_lenght = round(mean(length_comment_char,na.rm=T),3))%>%
  slice(1)%>%
  select(ParticipantID,comment_count,comment_mean_lenght,comment_mean_score,comment_mean_tox)

user_comments_external <- external_data%>%
  group_by(ParticipantID)%>%
  mutate(ex_comment_count = n(),
         ex_comment_mean_score = round(mean(score_comment,na.rm=T),3),
         ex_comment_mean_tox = round(mean(comment_toxicity,na.rm=T),3),
         ex_comment_mean_lenght = round(mean(length_comment_char,na.rm=T),3))%>%
  slice(1)%>%
  select(ParticipantID,ex_comment_count,ex_comment_mean_score,ex_comment_mean_tox,ex_comment_mean_lenght)

sample <- pre_survey%>%
  filter(on_reddit == 1)%>%
  left_join(., user_data, by = "ParticipantID")%>%
  left_join(., user_comments, by = "ParticipantID")%>%
  left_join(., user_comments_external, by = "ParticipantID")

write_csv(sample, file = "../data/anon/sample_anon.csv")

Comment activity per user

# comments per user
part1 <- sample%>%
  mutate(comment_count = ifelse(is.na(comment_count),0,comment_count))%>%
  ggplot()+
  geom_bar(aes(comment_count), fill = "#6699FF")+
  theme_bw()+
  ylab("Number of users")+
  xlab("Number of comments per user")+
  labs(title = "Comment activity", subtitle = paste0("20 discussion issues | N(comments) = ", 
                                                     nrow(discussion_data), " | N(users) = ", nrow(sample)))+
  annotate(geom = "text", x = 20, hjust = 0,  y = 200, label = paste0("min = ",min(sample$comment_count,na.rm=T)))+
  annotate(geom = "text", x = 20, hjust = 0, y = 180, label = paste0("max = ",max(sample$comment_count,na.rm=T)))

any_check <- post_surveys%>%
  group_by(ParticipantID)%>%
  mutate(any_checkin = 1)%>%
  slice(1)%>%
  pull(ParticipantID)

sample%>% mutate(active = ifelse(is.na(comment_count),"silent","active"),
         active = ifelse(!ParticipantID %in% any_check & active == "silent", "join only", active))%>%count(active)

## # A tibble: 3 × 2
##   active        n
##   <chr>     <int>
## 1 active      331
## 2 join only    97
## 3 silent       92

bars <- sample%>%
  mutate(active = ifelse(is.na(comment_count),"silent","active"),
         active = ifelse(!ParticipantID %in% any_check & active == "silent", "join only", active),
         active = factor(active, levels = c("join only","silent","active")))%>%
  ggplot()+
  geom_bar(aes(active), fill = c("darkgrey","lightgrey","#6699FF"))+
  xlab("Participation group")+
  ylab("")+
  annotate(geom = "text", x = "join only",   y = 85, label = "97")+
  annotate(geom = "text", x = "silent",   y = 80, label = "92")+
  annotate(geom = "text", x = "active",   y = 319, label = "331")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
  
histog <- ggplot(sample)+
  geom_bar(aes(comment_count), fill = "#6699FF")+
  xlab("Number of comments written in experiment")+
  ylab("Number of users")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

dots <- ggplot(sample)+
  geom_point(aes(comment_count, log(comment_karma+1), colour = comment_mean_tox, 
                 size = comment_mean_lenght), alpha = 0.8)+
  #geom_smooth(method="lm", aes(comment_count,comment_mean_tox))+
  scale_colour_gradient(low = "#6699FF", high = "black")+
  xlab("Comments written in experiment")+
  ylab("Comment karma on Reddit (log-scale)")+
  theme_bw()+
  guides(colour = guide_legend(title="Toxicity"), size = guide_legend(title="Length"))+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

combi2 <- grid.arrange(bars, histog, widths = c(3,4.5), top = "User-level discussion participation")

ggsave("../output/participation_dots.pdf", dots, width = 5, height = 4)
ggsave("../output/participation_total2.pdf", combi2, width = 6, height = 3)

Survey Sample

Demographics

#(1 = Man, 2 = Woman, 3 = Other)
table(sample$gender)

## 
##   1   2   3 
## 251 239  30

table(sample$condition)

## 
##    control incentives moderation 
##        181        169        170

table(sample$subreddit)

## 
## DiscussPolitics1 DiscussPolitics2 DiscussPolitics3 DiscussPolitics4 
##               80               85               87               82 
## DiscussPolitics5 DiscussPolitics6 
##               85              101

ageplot <- ggplot(sample)+
  geom_histogram(aes(age), fill = "#6699FF")+
  theme_bw()+ 
  ylab("Sample: Participants who joined subreddit")+
  xlab(paste0("Age in years (M = ",round(mean(sample$age),2),", SD = ",round(sd(sample$age),2),")"))+
  annotate(geom="text", x=60, y=30,hjust = 0, label= paste("Male: ",table(sample$gender)[1]))+
  annotate(geom="text", x=60, y=28,hjust = 0, label= paste("Female: ",table(sample$gender)[2]))+
  annotate(geom="text", x=60, y=26,hjust = 0, label= paste("Other: ",table(sample$gender)[3]))+
  annotate(geom="text", x=60, y=33,hjust = 0, label= paste("N: ", nrow(sample)),fontface =2)+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

educplot <- ggplot(sample)+
  geom_bar(aes(education), fill = "#6699FF")+
  coord_flip()+
  ylab("")+
  xlab("Level of formal education")+
  theme_bw()+
  annotate(geom="text", x = 1, y = 0, hjust = 0,  label = "No degree")+
  annotate(geom="text", x = 2, y = 0, hjust = 0, label = "High school")+
  annotate(geom="text", x = 3, y = 0, hjust = 0, label = "Some college education, no degree")+
  annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Bachelor's degree")+
  annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Some postgraduate education, no degree")+
  annotate(geom="text", x = 6, y = 0, hjust = 0, label = "Postgraduate degree")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

demo <- grid.arrange(ageplot, educplot, nrow = 1,
                     top = "Age and education across study sample")

ggsave("../output/demographics.pdf",demo, width = 10, height = 5)

Political variables pre-survey

int <- ggplot(sample, aes(polinterest))+
  geom_bar(fill = "#6699FF")+
  ggtitle("Political Interest")+
  xlab("")+
  ylab("")+
  theme_bw()+
  xlim("Not interested", "Sighly interested","Moderately interested", "Very interested")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

pol <- ggplot(sample)+
  geom_bar(aes(x = leftright, fill = ..x..))+
  theme_bw()+
  scale_fill_gradient2(low='#6699ff', mid='lightgrey', high='#bc3455', midpoint=6)+
  ggtitle("Political Orientation")+
  xlab("")+
  ylab("")+
  theme(legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  xlim("far left", "", "", "", "", "", "", "",
       "", "", "far right")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

polint <- grid.arrange(int, pol, nrow = 1)

ggsave("../output/pol_lefright.pdf", polint, width = 12 , height = 5) 
ggsave("../output/lefright.pdf", pol, width = 4 , height = 4)

Reddit Data

Engagement metrics by political issue

Issue	Toxicity	Comments	Views
The US should condemn Israel’s military actions in Gaza as acts of genocide and impose full sanctions.	0.191	435	5535
Prostitution should be illegal.	0.190	213	3134
Things like gender-neutral language and stating pronouns are silly issues.	0.183	538	3926
The death penalty should be reestablished US-wide.	0.149	388	4985
The US should provide financial and military aid to Ukraine.	0.136	263	3892
We need stricter gun control laws.	0.118	338	4472
Social media is a threat to democracy.	0.108	233	3460
Immigration should be regulated more strictly.	0.101	286	4235
Fur clothing should be banned.	0.096	230	3311
Police officers should wear body cameras.	0.092	313	3854
Climate change is one of the greatest threats to humanity.	0.084	223	2863
Employers should mandate vaccination.	0.073	258	3183
The government should not be responsible for providing universal health care.	0.073	220	3326
The government should not forgive student loan debt.	0.069	296	3850
Artificial intelligence should replace humans where possible.	0.068	293	4832
There should only be vegetarian food in cantines.	0.065	290	4683
A universal basic income would kill the economy.	0.064	225	3185
The federal minimum wage should be increased.	0.063	305	4321
The government should not invest in renewable energy.	0.060	198	3576
Airbnb should be banned in cities.	0.059	274	3043

Toxicity distributions by political issue

issue_order <- discussion_data%>%
  group_by(post_title)%>%
  summarise(Toxicity = round(mean(comment_toxicity, na.rm = T),3))%>%
  arrange(desc(Toxicity))%>%
  pull(post_title)

discussion_data$issues <- factor(discussion_data$post_title, levels = issue_order)

colour_values <- c("#6699ff", "#729dee", "#7ca1dd", "#86a5cc", "#91a9bb", "#9bada9", "#a6b298", "#d3d3d3", "#bfc7c7", "#b5b1b1", "#ab9b9b", "#a18585", "#976f6f", "#8d5959", "#844444", "#7b3232", "#732525", "#6a1a1a", "#621010", "#bc3455")

issues <- c("US foreign policy toward Israel and Gaza","Legality of prostitution", "Use of gender-neutral language and pronouns", "Death penalty reestablishment in the US", "US financial and military aid to Ukraine", "Gun control laws", "Impact of social media on democracy", "Immigration regulation", "Ban on fur clothing","Police accountability and body cameras", "Climate change and environmental threats","Mandatory vaccination policies by employers", "Universal healthcare responsibility","Government forgiveness of student loan debt", "Artificial intelligence replacing human labor", "Vegetarian food policies in public cantines", "Economic impact of universal basic income",  "Federal minimum wage increase", "Government investment in renewable energy", "Regulation of Airbnb in cities")

# colours, means
tox_ridge <- ggplot(discussion_data)+
  geom_density_ridges(aes(x = comment_toxicity, y = fct_rev(issues), fill = fct_rev(issues), 
                          alpha = 0.5))+
  theme(legend.position = "bottom", legend.direction="vertical", 
        legend.title = element_blank())+
  guides(alpha = "none")+
  ylab("")+
  xlab("")+
  scale_y_discrete(labels = rev(issues))+
  theme_ridges() + 
  scale_fill_manual(values = colour_values) +
  scale_x_continuous(limits = c(0, 0.6))+
  theme(legend.position = "none")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
tox_ridge

ggsave("../output/toxicity_issues_ridges.pdf", tox_ridge, width = 10, height = 6)

Reddit mertrics over time

views <- discussion_data%>%
  group_by(subreddit,date(created_post))%>%
  slice(1)%>%
  ggplot(aes(x = date(created_post), y = total_views, color = subreddit))+
  geom_smooth(method = loess,alpha=0.1)+
  geom_point(alpha = 0.3)+
  theme_bw()+
  scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
  ylab("")+
  guides(color = "none")+
  ggtitle("Views per post")+
  xlab("Date")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

comments <- discussion_data%>%
  group_by(subreddit,date(created_post))%>%
  slice(1)%>%
  ggplot(aes(x = date(created_post), y = `Collected comments`, color = subreddit))+
  geom_smooth(method = loess,alpha=0.1)+
  geom_point(alpha = 0.3)+
  theme_bw()+
  scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
  ylab("")+
  guides(color = "none")+
  ggtitle("Comments per post")+
  xlab("Date")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

comments_views <- discussion_data%>%
  mutate(comments_per_views = `Collected comments`/total_views)%>%
  group_by(subreddit,date(created_post))%>%
  slice(1)%>%
  ggplot(aes(x = date(created_post), y = comments_per_views, color = subreddit))+
  geom_smooth(method = loess,alpha=0.1)+
  geom_point(alpha = 0.3)+
  theme_bw()+
  scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
  ylab("")+
  ggtitle("Comments/views ratio")+
  xlab("Date")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

activity_combi <- grid.arrange(views, comments, comments_views, nrow=1, widths = c(3,3,4))

ggsave("../output/comments_views.pdf", activity_combi, width = 14, height = 5)

ggsave("../output/comments_views_larger.pdf", activity_combi, width = 12, height = 4)

toxicity <- ggplot(discussion_data, aes(x = date(created_post), y = comment_toxicity, color = subreddit))+
  geom_smooth(method = loess,alpha=0.1)+
  geom_point(alpha = 0.1)+
  theme_bw()+
  scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
  ylab("")+
  guides(color = "none")+
  ggtitle("Comment toxicity")+
  xlab("Date")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

length <- ggplot(discussion_data, aes(x = date(created_post), y = length_comment_char, color = subreddit))+
  geom_smooth(method = loess,alpha=0.1)+
  geom_point(alpha = 0.1)+
  theme_bw()+
  scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
  ylab("")+
  guides(color = "none")+
  ggtitle("Comment length")+
  xlab("Date")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

score <- discussion_data%>%
  group_by(subreddit,date(created_post))%>%
  mutate(score_views = score_post/total_views)%>%
  slice(1)%>%
  ggplot(aes(x = date(created_post), y = score_views, color = subreddit))+
  geom_smooth(method = loess,alpha=0.1)+
  geom_point(alpha = 0.3)+
  theme_bw()+
  scale_color_manual(values = c("#29c195","#6699ff","#424656","#a7aabd","#f96d86","#bc3455"))+
  ylab("")+
  ggtitle("Post score/views ratio")+
  xlab("Date")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

characteristics_combi <- grid.arrange(toxicity, length, score, nrow=1, widths = c(3,3,4))

ggsave("../output/characteristics_combi.pdf", characteristics_combi, width = 14, height = 5)

Relationship Comments, Views, Toxicity

### Relationship Comments, Views, Toxicity 
# model 
model <- lm(`Collected comments` ~ comment_toxicity * total_views, data = discussion_data)

summary(model)

## 
## Call:
## lm(formula = `Collected comments` ~ comment_toxicity * total_views, 
##     data = discussion_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.731  -9.398  -1.133   6.358  28.899 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  4.043438   0.509471   7.937 2.48e-15 ***
## comment_toxicity             5.086754   2.879936   1.766   0.0774 .  
## total_views                  0.061168   0.000558 109.619  < 2e-16 ***
## comment_toxicity:total_views 0.001208   0.002936   0.412   0.6807    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.53 on 5693 degrees of freedom
##   (122 observations deleted due to missingness)
## Multiple R-squared:  0.8102, Adjusted R-squared:  0.8101 
## F-statistic:  8100 on 3 and 5693 DF,  p-value: < 2.2e-16

relationship_plot <- ggplot(discussion_data, aes(x = total_views, y = `Collected comments`, colour = comment_toxicity)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", aes(fill = comment_toxicity), se = TRUE, alpha = 0.2) +
  theme_bw(base_size = 15) +
  labs(x = "Total Views",
    y = "Number of comments",
    colour = "Comment Toxicity",
    fill = "Comment Toxicity",
    title = "Relationship between Comments, Views, and Toxicity") +
  theme(legend.position = "right",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank())

relationship_plot

Reddit activity by users - internal vs. external

comments_tox <- ggplot(sample,aes(x = comment_mean_tox, y = ex_comment_mean_tox, size = comment_count))+
  geom_point(alpha = 0.5, color = "#6699FF")+
  theme_bw()+
  xlab("Internal comment toxicity")+
  ylab("Average external comment toxicity")+
  guides(color = "none", size = "none")+
  geom_abline()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

cor.test(sample$comment_mean_tox,sample$ex_comment_mean_tox)

## 
##  Pearson's product-moment correlation
## 
## data:  sample$comment_mean_tox and sample$ex_comment_mean_tox
## t = 3.8092, df = 284, p-value = 0.0001709
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1072295 0.3280655
## sample estimates:
##      cor 
## 0.220471

comments_length <- ggplot(sample,aes(x = comment_mean_lenght, y = ex_comment_mean_lenght,
                                     size = comment_count))+
  geom_point(alpha = 0.5, color = "#6699FF")+
  theme_bw()+
  xlab("Internal comment length")+
  ylab("Average external comment length")+
  guides(color = "none", size = "none")+
  geom_abline()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

cor.test(sample$comment_mean_lenght,sample$ex_comment_mean_lenght)

## 
##  Pearson's product-moment correlation
## 
## data:  sample$comment_mean_lenght and sample$ex_comment_mean_lenght
## t = 10.417, df = 284, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4364171 0.6048829
## sample estimates:
##       cor 
## 0.5257867

cor.test(log(sample$comment_karma),sample$comment_count)

## 
##  Pearson's product-moment correlation
## 
## data:  log(sample$comment_karma) and sample$comment_count
## t = 2.481, df = 328, p-value = 0.0136
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02817168 0.24016730
## sample estimates:
##       cor 
## 0.1357227

comment_tox_length <- grid.arrange(comments_tox, comments_length, nrow = 1)

ggsave("../output/comment_tox_length_narrow.pdf", comment_tox_length, width = 6 , height = 3)

comment_tox_length <- grid.arrange(comments_tox, comments_length, nrow = 2)

ggsave("../output/comment_tox_length_long.pdf", comment_tox_length, width = 3 , height = 6)

Interaction Models

summary(lm(comment_count ~ condition * comment_karma, data = sample))

## 
## Call:
## lm(formula = comment_count ~ condition * comment_karma, data = sample)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.914 -12.613  -5.504   5.976  96.532 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        1.650e+01  1.907e+00   8.652  2.4e-16 ***
## conditionincentives                5.963e-01  2.762e+00   0.216    0.829    
## conditionmoderation                7.316e-01  2.759e+00   0.265    0.791    
## comment_karma                      7.060e-06  3.592e-05   0.197    0.844    
## conditionincentives:comment_karma -1.305e-05  5.728e-05  -0.228    0.820    
## conditionmoderation:comment_karma  2.880e-05  4.725e-05   0.610    0.543    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.94 on 324 degrees of freedom
##   (190 observations deleted due to missingness)
## Multiple R-squared:  0.005447,   Adjusted R-squared:  -0.009901 
## F-statistic: 0.3549 on 5 and 324 DF,  p-value: 0.879

summary(lm(comment_mean_tox ~ condition * ex_comment_mean_tox, data = sample))

## 
## Call:
## lm(formula = comment_mean_tox ~ condition * ex_comment_mean_tox, 
##     data = sample)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10119 -0.03710 -0.00997  0.02536  0.41584 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              0.065102   0.012662   5.142 5.12e-07
## conditionincentives                      0.008496   0.019258   0.441   0.6594
## conditionmoderation                      0.021513   0.020012   1.075   0.2833
## ex_comment_mean_tox                      0.226842   0.088516   2.563   0.0109
## conditionincentives:ex_comment_mean_tox  0.004706   0.133385   0.035   0.9719
## conditionmoderation:ex_comment_mean_tox -0.043295   0.144699  -0.299   0.7650
##                                            
## (Intercept)                             ***
## conditionincentives                        
## conditionmoderation                        
## ex_comment_mean_tox                     *  
## conditionincentives:ex_comment_mean_tox    
## conditionmoderation:ex_comment_mean_tox    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06376 on 280 degrees of freedom
##   (234 observations deleted due to missingness)
## Multiple R-squared:  0.05944,    Adjusted R-squared:  0.04264 
## F-statistic: 3.539 on 5 and 280 DF,  p-value: 0.004051

Comment karma and platform seniority

# Posting a comment on an existing post and receiving upvotes results in "comment" karma
k1 <- ggplot(sample)+
  geom_histogram(aes(comment_karma), fill = "#6699FF")+
  theme_bw()+
  ylab("Number of users")+
  xlab("Comment Karma")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

k2 <- ggplot(sample)+
  geom_histogram(aes(log(comment_karma)), fill = "#6699FF")+
  theme_bw()+
  ylab("Number of users")+
  xlab("Comment Karma (Log-scale)")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

create <- ggplot(sample)+
  geom_histogram(aes(date(created)), fill = "lightgrey", bins = 100)+
  annotate("text", x = min(date(sample$created), na.rm = TRUE), y = Inf, label = paste("First:", min(date(sample$created), na.rm = T)), vjust = 2, hjust = 0) +
  annotate("text", x = max(date(sample$created), na.rm = TRUE), y = Inf, label = paste("Last:", max(date(sample$created), na.rm = T)), vjust = 2, hjust = 1)+
  theme_bw()+
  ylab("Number of accounts")+
  xlab("Date joined Reddit")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

kplot <- grid.arrange(k2,create, nrow = 1,
                      top = paste0("Distributions of platform seniority (N = ",nrow(sample),") Start of recruitment: 2024-05-09"))

ggsave("../output/platform_seniority.pdf", kplot, width = 8, height = 4)

External comments

internal_comments <- discussion_data %>% mutate(source = "internal")%>%
  select(ParticipantID,created_comment,comment_toxicity,length_comment_char, source)%>%
  filter(created_comment > as_datetime("2024-06-10") & created_comment < as_datetime("2024-07-06"))
  
external_comments <- external_data %>% mutate(source = "external")%>%
  select(ParticipantID,created_comment,comment_toxicity,length_comment_char, source)%>%
  filter(created_comment > as_datetime("2024-05-13"))

all_data <- bind_rows(internal_comments, external_comments)
all_data$created_comment <- as.Date(all_data$created_comment)

all_data <- all_data %>%
  mutate(week = floor_date(created_comment, unit = "week"))

daily_summary <- all_data %>%
  group_by(source, ParticipantID, created_comment) %>%
  dplyr::summarize(comment_volume = n(),  # Count of comments per user per day
    avg_toxicity = mean(comment_toxicity, na.rm = TRUE)) %>%
  ungroup()

study_start <- as.Date("2024-06-10")
study_end <- as.Date("2024-07-05")

int_ext <- ggplot(daily_summary, aes(x = created_comment, y = avg_toxicity)) +
  geom_point(data = subset(daily_summary, source == "external"),
             aes(size = comment_volume), color = "grey", alpha = 0.2) +
  geom_point(data = subset(daily_summary, source == "internal"),
             aes(size = comment_volume), color = "#6699FF", alpha = 0.5) +
  geom_smooth(data = subset(daily_summary, source == "external"),
              aes(y = avg_toxicity), color = "darkgrey", method = "loess", se = FALSE) +
  geom_smooth(data = subset(daily_summary, source == "internal"),
              aes(y = avg_toxicity), color = "#3d5b99", method = "loess", se = FALSE) +
  geom_vline(xintercept = as.numeric(study_start), linetype = "dashed", color = "black") +
  geom_vline(xintercept = as.numeric(study_end), linetype = "dashed", color = "black") +
  labs(x = "Date", y = "Average toxicity (per user per day)",
       size = "Number of Comments \n per user per day",
       title = "Daily Comment Toxicity and Volume per User (External vs Internal Data)") +
  scale_size_continuous(range = c(1, 5)) +  
  theme_minimal() +
  theme(legend.position = "right",
    plot.title = element_text(hjust = 0.5),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank())
int_ext

ggsave("../output/internal_external_toxicity.pdf", int_ext, width = 9, height = 4)

# Aggregate data by ParticipantID, day, and source

daily_summary_act <- all_data %>%
  group_by(source, ParticipantID, created_comment) %>%
  dplyr::summarize(comment_volume = n(), 
    avg_toxicity = mean(comment_toxicity, na.rm = TRUE)) %>%
  ungroup()

int_ext_act <- ggplot(daily_summary, aes(x = created_comment, y = comment_volume)) +
  geom_point(data = subset(daily_summary, source == "external"),
             aes(size = avg_toxicity, alpha = avg_toxicity), color = "grey") +
  geom_point(data = subset(daily_summary, source == "internal"),
             aes(size = avg_toxicity, alpha = avg_toxicity), color = "#6699FF") +
  geom_smooth(data = subset(daily_summary, source == "external"),
              aes(y = comment_volume), color = "darkgrey", method = "loess", se = FALSE) +
  geom_smooth(data = subset(daily_summary, source == "internal"),
              aes(y = comment_volume), color = "#3d5b99", method = "loess", se = FALSE) +
  geom_vline(xintercept = as.numeric(study_start), linetype = "dashed", color = "black") +
  geom_vline(xintercept = as.numeric(study_end), linetype = "dashed", color = "black") +
  labs(x = "Date", y = "Number of Comments per User per Day",
       size = "Average Toxicity",
       alpha = "Average Toxicity",
       title = "Daily Comment Volume and Toxicity per User (External vs Internal Data)") +
  scale_size_continuous(range = c(1, 5)) + 
  scale_alpha_continuous(range = c(0.2, 1)) + 
  theme_minimal() +
  theme(legend.position = "right",
    plot.title = element_text(hjust = 0.5))+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

int_ext_act

ggsave("../output/internal_external_activity.pdf", int_ext_act, width = 9, height = 3)

Other survey descriptives

Online activity

a <- ggplot(sample)+
  geom_bar(aes(time_online), fill = "#6699FF")+
  theme_bw()+
  coord_flip()+
  xlab("")+
  ylab("")+
  labs(title = "Time online")+
  annotate(geom="text", x = 2, y = 0, hjust = 0, label = "Up to one hour per week")+
  annotate(geom="text", x = 3, y = 0, hjust = 0, label = "Up to one hour per day")+
  annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Multiple hours per day")+
  annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Almost the entire day")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


b <- ggplot(sample)+
  geom_bar(aes(social_media), fill = "#6699FF")+
  theme_bw()+
  coord_flip()+
  xlab("")+
  labs(title = "Social media use")+
  annotate(geom="text", x = 2, y = 0, hjust = 0, label = "A couple of times per week")+
  annotate(geom="text", x = 3, y = 0, hjust = 0, label = "About once per day")+
  annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Multiple times per day")+
  annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Almost constantly")+
  ylab("")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

c <- ggplot(sample)+
  geom_bar(aes(comments_online), fill = "#6699FF")+
  theme_bw()+
  coord_flip()+
  xlab("")+
  ylab("")+
  labs(title = "Commenting online")+
  annotate(geom="text", x = 1, y = 0, hjust = 0, label = "Never")+
  annotate(geom="text", x = 2, y = 0, hjust = 0, label = "About once per month")+
  annotate(geom="text", x = 3, y = 0, hjust = 0, label = "About once per week")+
  annotate(geom="text", x = 4, y = 0, hjust = 0, label = "Almost daily")+
  annotate(geom="text", x = 5, y = 0, hjust = 0, label = "Multiple times per day")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

abc <- grid.arrange(a, b, c, nrow = 1)

ggsave("../output/online_activity.pdf", abc, width = 10 , height = 3)

# affective polarization
parties <- ggplot(sample)+
  geom_density(aes(affective_pol_1),fill = "#6699ff", alpha = 0.5)+
  geom_density(aes(affective_pol_2),fill = "#bc3455", alpha = 0.5)+
  theme_bw()+
  ylab("")+
  annotate(geom = "text", x = 10, y = 0.04, hjust = 0,  label = "Rating of Republican Party")+
  annotate(geom = "text", x = 60, y = 0.02, hjust = 0, label = "Rating of Democratic Party")+
  xlab("Party Rating: 0 = negative, 100 = positive")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

dist <- ggplot(sample, aes(affective_polarization))+
  geom_density(fill = '#a7aabd', alpha = 0.5)+
  theme_bw()+
  xlab("Affective Polarization (In party - out party)")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

polint <- grid.arrange(parties, dist, nrow = 1, top = "Affective Polarization")

ggsave("../output/polarization.pdf", polint, width = 10 , height = 5)

Attitude grids

# trust matrix 
trust <- sample %>% 
  dplyr::select(trust_general_1:trust_general_4)%>%
  rename(`Politics` = trust_general_1,
         `Media` = trust_general_2,
         `Science` = trust_general_3,
         `People generally` = trust_general_4)%>%
  # change 1: convert haven_labelled variables to factors 
  as_factor() %>% 
  pivot_longer(
    cols = 1:4,
    names_to = "Variable",
    values_to = "Agreement"
  ) %>% 
  count(Variable, Agreement) %>% 
  ggplot(aes(y = n, x = Agreement)) +
  facet_wrap(. ~ Variable) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("1: Not at all, 2: A little, 3: Quite a bit, 4: Very much")+
  ggtitle("Trust")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
trust

ggsave("../output/trust_matrix.pdf", trust, width = 4 , height = 5)

## issue knowledge 
knowledge <- sample %>% 
  dplyr::select(issue_knowledge_loan:issue_knowledge_socialmedia)%>%
  rename_at(1:20, list(~ substr(., 17, nchar(.))))%>%
  as_factor() %>% 
  pivot_longer( cols = 1:20,
    names_to = "Variable",
    values_to = "Knowledge") %>% 
  count(Variable, Knowledge) %>% 
  ggplot(aes(y = n, x = Knowledge)) +
  facet_wrap(. ~ Variable) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("1: Nothing, 2: A little, 3: A moderate amoung, 4: A lot")+
  ggtitle("Issue Knowledge T1")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
knowledge

ggsave("../output/knowledge_matrix.pdf", knowledge, width = 10 , height = 8)

## issue attitudes 
attitudes <- sample %>% 
  dplyr::select(issue_attitudes_loan:issue_attitudes_socialmedia)%>%
  rename_at(1:20, list(~ substr(., 17, nchar(.))))%>%
  as_factor() %>% 
  pivot_longer(cols = 1:20,
    names_to = "Variable",
    values_to = "Attitude") %>% 
  count(Variable, Attitude) %>% 
  ggplot(aes(y = n, x = Attitude)) +
  facet_wrap(. ~ Variable) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("1: Strongly disagree - 6: Strongly agree")+
  ggtitle("Issue Attitudes T1")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
attitudes

ggsave("../output/attitudes_matrix.pdf", attitudes, width = 10 , height = 8)

Check-in Surveys

# exposure check
exp <- ggplot(post_surveys)+
  geom_density(aes(exposure_correct, fill = as.factor(survey_week)), alpha = 0.7)+
  theme_bw()+
  ylab("")+
  xlab("Exposure Check: TP - FP, 5 topics, 5 distractors")+
  scale_fill_manual(name = "Discussion week", values = rev(c("#6699FF","#70facb","#29c195","#008a62")))+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave("../output/exposure_check.pdf", exp, width = 5, height = 3)

# issue distance
dist_abs <- ggplot(post_surveys)+
  geom_density(aes(issue_distance_abs, fill = as.factor(survey_week)), alpha = 0.7)+
  theme_bw()+
  ylab("")+
  xlab("Absolute Issue Distance: | self - others |, aggregate over 5 topics")+
  scale_fill_manual(name = "Discussion week", values = rev(c("#6699FF","#70facb","#29c195","#008a62")))+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

plot <- grid.arrange(exp,dist_abs, nrow = 1)

ggsave("../output/issue_distance.pdf", dist_abs, width = 6, height = 4)

# discussion perception
perceptions <- post_surveys %>% 
  dplyr::select(discussion_percep_1:discussion_percep_3)%>%
  rename(`toxic` = discussion_percep_1,
         `constructive` = discussion_percep_2,
         `enjoyable` = discussion_percep_3)%>%
  as_factor() %>% 
  pivot_longer(cols = 1:3,
    names_to = "Variable",
    values_to = "Perception") %>% 
  count(Variable, Perception) %>% 
  ggplot(aes(y = n, x = Perception)) +
  facet_wrap(. ~ Variable) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("1: Not at all, 2: A little, 3: Quite a bit, 4: Very much")+
  ggtitle("Discussion Perception")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave("../output/discussion_perception.pdf", perceptions, width = 6 , height = 4)

# group perception
perceptions_g <- post_surveys %>% 
  dplyr::select(group_percep_1:group_percep_3)%>%
  rename(`respectful` = group_percep_1,
         `polarized` = group_percep_2,
         `knowledgeable` = group_percep_3)%>%
  as_factor() %>% 
  pivot_longer(cols = 1:3,
    names_to = "Variable",
    values_to = "Perception") %>% 
  count(Variable, Perception) %>% 
  ggplot(aes(y = n, x = Perception)) +
  facet_wrap(. ~ Variable) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("1: Not at all, 2: A little, 3: Quite a bit, 4: Very much")+
  ggtitle("Group Perception")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

plot <- grid.arrange(perceptions,perceptions_g, nrow = 1)

ggsave("../output/group_perception.pdf", perceptions_g, width = 6 , height = 4)

# participation
participation <- post_surveys %>% 
  dplyr::select(participation_4:participation_1)%>%
  rename(`checked topics` = participation_4,
         `read some discussions` = participation_5,
         `followed discussions` = participation_6,
         `voted some` = participation_7,
         `voted many` = participation_8,
         `wrote few comments` = participation_9,
         `commented everything` = participation_10,
         `did not participate` = participation_1)%>%
  as_factor() %>% 
  pivot_longer(cols = 1:8,
    names_to = "Variable",
    values_to = "Participation") %>%
  group_by(Variable)%>%
  summarise(Participation = sum(Participation, na.rm = T))%>%
  mutate(Variable = factor(Variable, levels = c("did not participate", "checked topics",
                                                   "read some discussions",
                                                   "followed discussions",
                                                   "voted some",
                                                   "voted many",
                                                   "wrote few comments",
                                                   "commented everything")))%>%
  ggplot(aes(y = Participation, x = reorder(Variable,Participation))) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("")+
  ggtitle("Self-reported participation")+
  theme_bw()+
  coord_flip()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave("../output/participation_check.pdf", participation, width = 6 , height = 4)

Motives for participation (and barriers)

# motives 
motives <- post_surveys %>% 
  dplyr::select(post_motives_20:post_motives_51)%>%
  rename(`inform others` = post_motives_20,
         `entertain others` = post_motives_39,
         `express my opinion` = post_motives_40,
         `provoke others` = post_motives_41,
         `express my emotions` = post_motives_42,
         `connect with others` = post_motives_43,
         `deceive others` = post_motives_44,
         `gain attention` = post_motives_45,
         `prove a point` = post_motives_46,
         `cause chaos` = post_motives_47,
         `bring attention to a topic` = post_motives_48,
         `pursuade others` = post_motives_49,
         `surprise or shock others` = post_motives_50,
         `help researchers` = post_motives_51)%>%
  pivot_longer(cols = 1:14,
    names_to = "Motives",
    values_to = "Variable") %>%
  group_by(Motives)%>%
  summarise(Variable = sum(Variable, na.rm = T))%>%
  mutate(Motives = factor(Motives))%>%
  ggplot(aes(x = reorder(Motives,Variable), y = Variable)) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("")+
  ggtitle("Motives for writing comments")+
  theme_bw()+
  coord_flip()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave("../output/motives.pdf", motives, width = 5 , height = 4)

# negative motives / barriers to participation
barriers <- post_surveys %>% 
  dplyr::select(non_participation_2:non_participation_5,non_participation_1)%>%
  rename(`no barriers` = non_participation_1,
         `personally triggered` = non_participation_2,
         `could not add anything` = non_participation_3,
         `intimidated by sophistication` = non_participation_4,
         `afraid of backlash` = non_participation_5)%>%
  pivot_longer(cols = 1:5,
    names_to = "Barriers",
    values_to = "Variable") %>%
  group_by(Barriers)%>%
  summarise(Variable = sum(Variable, na.rm = T))%>%
  mutate(Barriers = factor(Barriers))%>%
  ggplot(aes(y = Variable, x = reorder(Barriers,Variable))) +
  geom_col(fill = "#6699FF")+
  ylab("")+
  xlab("")+
  ggtitle("Barriers to writing comments")+
  theme_bw()+
  coord_flip()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

plot <- grid.arrange(participation,motives,barriers, nrow = 1)

ggsave("../output/barriers.pdf", barriers, width = 5 , height = 2)

## different to normal Reddit experience 
differences <- post_surveys %>% 
  dplyr::select(different_to_real_1:different_to_real_6)%>%
  na.omit()%>%
  rename(`toxicity discussion` = different_to_real_1,
         `constructiveness discussion` = different_to_real_2,
         `enjoyability discussion` = different_to_real_3,
         `knowleadgeability group` = different_to_real_4,
         `polarization group` = different_to_real_5,
         `respectfulness group` = different_to_real_6)%>%
  as_factor() %>% 
  pivot_longer(cols = 1:6,
    names_to = "Variable",
    values_to = "Perception") %>% 
  count(Variable, Perception) %>% 
  mutate(Variable = factor(Variable, levels = c("toxicity discussion",
                                                "constructiveness discussion",
                                                "enjoyability discussion",
                                                "knowleadgeability group",
                                                "polarization group",
                                                "respectfulness group")))%>%
  ggplot(aes(y = n, x = Perception)) +
  facet_wrap(. ~ Variable) +
  geom_col(fill = "#6699FF")+
  #coord_flip()+
  ylab("")+
  xlab("1: Very similar, 2: Somewhat similar, 3: Somewhat different, 4: Very different")+
  ggtitle("Perceived difference to typical experience on Reddit")+
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
differences

ggsave("../output/differences_perception.pdf", differences, width = 6 , height = 5)

Relative change in commenting activity

#colors
colors <- c(control = "gray", incentives = "#C5701A", moderation = "#6699FF")

#adjust alphas to .7
colors <- c(control = adjustcolor("gray", alpha.f = .7), incentives = adjustcolor("#C5701A", alpha.f = .7), moderation = adjustcolor("#6699FF", alpha.f = .7))

comment_data_ranges <- daily_summary_act %>% group_by(ParticipantID) %>% dplyr::summarize(earliest_comment = min(created_comment), latest_comment = max(created_comment))

#plot hist of earliest and latest comment dates
h1 <- hist(comment_data_ranges$earliest_comment, breaks = 20, main = "Earliest comment date", xlab = "Date")

h2 <- hist(comment_data_ranges$latest_comment, breaks = 20, main = "Latest comment date", xlab = "Date")

#calculate number of days between earliest comment and study start
days_prestudy_period <- study_start - min(comment_data_ranges$earliest_comment)

#calculate mean number of comments per day in the pre-study period
mean_prestudy_daily_comments <- daily_summary_act %>%
  filter(created_comment < study_start) %>%
  group_by(ParticipantID) %>%
  dplyr::summarize(mean_daily_comments_prestudy = sum(comment_volume)/as.numeric(days_prestudy_period))

#calculate mean number of comments-per day in the study period, within study
mean_duringstudy_int_daily_comments <- daily_summary_act %>% filter(source == "internal") %>%
  filter(created_comment >= study_start & created_comment <= study_end) %>%
  group_by(ParticipantID) %>%
  dplyr::summarize(mean_daily_comments_instudy = sum(comment_volume)/as.numeric(study_end - study_start))

#calculate mean number of comments-per day in the study period, outside study
mean_duringstudy_ext_daily_comments <- daily_summary_act %>% filter(source == "external") %>%
  filter(created_comment >= study_start & created_comment <= study_end) %>%
  group_by(ParticipantID) %>%
  dplyr::summarize(mean_daily_comments_extstudy = sum(comment_volume)/as.numeric(study_end - study_start))

#join the two dataframes
mean_daily_comments <- left_join(mean_prestudy_daily_comments, mean_duringstudy_int_daily_comments, by = "ParticipantID")
mean_daily_comments <- left_join(mean_daily_comments, mean_duringstudy_ext_daily_comments, by = "ParticipantID")

daily_summary_act_complete <- daily_summary_act %>%
  complete(source, ParticipantID, created_comment = seq.Date(min(created_comment), max(created_comment), by = "day"), fill = list(comment_volume = 0, avg_toxicity = NA))

mean_duringstudy_int_commentprob <- daily_summary_act_complete %>% filter(source == "internal") %>%
  filter(created_comment >= study_start & created_comment <= study_end) %>%
  group_by(ParticipantID) %>%
  dplyr::summarize(duringstudy_int_daily_commentprob = sum(comment_volume!=0)/as.numeric(study_end - study_start))

mean_duringstudy_ext_commentprob <- daily_summary_act_complete %>% filter(source == "external") %>%
  filter(created_comment >= study_start & created_comment <= study_end) %>%
  group_by(ParticipantID) %>%
  dplyr::summarize(duringstudy_ext_daily_commentprob = sum(comment_volume!=0)/as.numeric(study_end - study_start))

mean_prestudy_commentprob <- daily_summary_act_complete %>%
  filter(created_comment < study_start) %>%
  group_by(ParticipantID) %>%
  dplyr::summarize(prestudy_daily_commentprob = sum(comment_volume!=0)/as.numeric(days_prestudy_period))

mean_daily_comments <- left_join(mean_daily_comments, mean_duringstudy_int_commentprob, by = "ParticipantID")
mean_daily_comments <- left_join(mean_daily_comments, mean_duringstudy_ext_commentprob, by = "ParticipantID")
mean_daily_comments <- left_join(mean_daily_comments, mean_prestudy_commentprob, by = "ParticipantID")

# join mean_daily_comments to sample
s <- left_join(sample, mean_daily_comments, by = "ParticipantID")

#come back and use S for this
daily_summary_act %>% pull(created_comment) %>% hist(breaks = "day")

# bar chart of comment count by day, stacked by source=="internal" vs source=="external"
barplot(table(daily_summary_act$source, daily_summary_act$created_comment), beside = T, col = c("grey50", "#6699FF"), legend = T, border = NA)

# break down by condition
control <- s %>% filter(condition == "control")
moderation <- s %>% filter(condition == "moderation")
incentives <- s %>% filter(condition == "incentives")

# copy of 3-gang version subset by condition, not logged
this_xlim = c(0,12)
this_ylim = c(0,5)

par(pty="s", mfrow = c(1,3), cex = .6)
plot((control$mean_daily_comments_prestudy), (control$mean_daily_comments_instudy), xlab = "Mean daily comments pre-study", ylab = "Mean daily comments in study", main = "Control", pch = 16, col = colors['control'], xlim = this_xlim, ylim = this_ylim)
abline(0,1)
plot((moderation$mean_daily_comments_prestudy), (moderation$mean_daily_comments_instudy), xlab = "Mean daily comments pre-study", ylab = "Mean daily comments in study", main = "Moderation", pch = 16, col = colors['moderation'], xlim = this_xlim, ylim = this_ylim)
abline(0,1)
plot((incentives$mean_daily_comments_prestudy), (incentives$mean_daily_comments_instudy), xlab = "Mean daily comments pre-study", ylab = "Mean daily comments in study", main = "Incentives", pch = 16, col = colors['incentives'], xlim = this_xlim, ylim = this_ylim)
abline(0,1)

#plot ratio by prestudy
s <- s %>% mutate(prestudy_instudy_comment_ratio = mean_daily_comments_instudy/mean_daily_comments_prestudy)

control <- s %>% filter(condition == "control")
moderation <- s %>% filter(condition == "moderation")
incentives <- s %>% filter(condition == "incentives")

par(cex = .8)
plot(control$mean_daily_comments_prestudy, control$prestudy_instudy_comment_ratio, xlab = "Mean daily comments pre-study", ylab = "In-Study/Pre-Study", main = "Mean daily comments in study / pre-study ", pch = 16, col = colors['control'])
points(moderation$mean_daily_comments_prestudy, moderation$prestudy_instudy_comment_ratio, pch = 16, col = colors['moderation'])
points(incentives$mean_daily_comments_prestudy, incentives$prestudy_instudy_comment_ratio, pch = 16, col = colors['incentives'])

#plot difference by prestudy
s <- s %>% mutate(prestudy_instudy_comment_difference = mean_daily_comments_instudy-mean_daily_comments_prestudy)

control <- s %>% filter(condition == "control")
moderation <- s %>% filter(condition == "moderation")
incentives <- s %>% filter(condition == "incentives")

par(cex = .8)
plot(control$mean_daily_comments_prestudy, control$prestudy_instudy_comment_difference, xlab = "Mean daily comments pre-study", ylab = "In-Study - Pre-Study", main = "Mean daily comments pre-study vs in study", pch = 16, col = colors['control'])
points(moderation$mean_daily_comments_prestudy, moderation$prestudy_instudy_comment_difference, pch = 16, col = colors['moderation'])
points(incentives$mean_daily_comments_prestudy, incentives$prestudy_instudy_comment_difference, pch = 16, col = colors['incentives'])
abline(h = 0, lty = 2)
abline(0,-1)

#### Make Raw non-diff versions
#keep
#pdf("ws_outputs/comment_raw_by_prestudy_quantile.pdf", width = 8, height = 10)

s <- s %>% 
  mutate(prestudy_quantile_bin = cut(mean_daily_comments_prestudy, breaks = quantile(mean_daily_comments_prestudy, probs = seq(0, 1, by = 0.25), na.rm = TRUE)))

# plot s subsets
control <- s %>% filter(condition == "control") %>% 
  select(prestudy_quantile_bin, mean_daily_comments_instudy) %>% 
  drop_na()
moderation <- s %>% 
  filter(condition == "moderation") %>% 
  select(prestudy_quantile_bin, mean_daily_comments_instudy) %>% 
  drop_na()
incentives <- s %>% 
  filter(condition == "incentives") %>% 
  select(prestudy_quantile_bin, mean_daily_comments_instudy) %>% 
  drop_na()

#fill out the missing bins in control
bins <- s %>% pull(prestudy_quantile_bin) %>% unique()
#remove NA values and put in order
bins <- bins[!is.na(bins)]
bins <- bins[order(bins)]
control <- control %>% complete(prestudy_quantile_bin = bins)

par(mfrow = c(3,1))
this_ylim = c(0,4)
#this_ylim = NULL

this_xlab = "Pre-Study Mean Daily Comments"
this_ylab = "In-Study Mean Daily Comments"
this_main = "Pre-Study vs In-Study Mean Daily Comments"

boxplot(mean_daily_comments_instudy ~ prestudy_quantile_bin, data = control, xlab =this_xlab, ylab = this_ylab, main = this_main, pch = 16, col = colors['control'], ylim =this_ylim)
#abline(h = 0, lty = 2)
#abline(0,-1)

plot(mean_daily_comments_instudy ~ prestudy_quantile_bin, data = moderation, xlab = this_xlab, ylab = this_ylab, main = this_main, pch = 16, col = colors['moderation'], ylim = this_ylim)
#abline(h = 0, lty = 2)
#abline(0,-1)

plot(mean_daily_comments_instudy ~ prestudy_quantile_bin, data = incentives, xlab = this_xlab, ylab = this_ylab, main = this_main, pch = 16, col = colors['incentives'], ylim = this_ylim)
#abline(h = 0, lty = 2)
#abline(0,-1)

legend("topright", legend = c("Control", "Moderation", "Incentives"), col = c(colors['control'], colors['moderation'], colors['incentives']), pch = 15, bty = "n")

#dev.off()

Sample & Descriptives

Reddit Communities Field Experiment

2025-07-02

Comment activity per user

Survey Sample

Demographics

Political variables pre-survey

Reddit Data

Engagement metrics by political issue

Toxicity distributions by political issue

Reddit mertrics over time

Relationship Comments, Views, Toxicity

Reddit activity by users - internal vs. external

Interaction Models

Comment karma and platform seniority

External comments

Other survey descriptives

Online activity

Attitude grids

Check-in Surveys

Motives for participation (and barriers)

Relative change in commenting activity