library(tidyverse)
library(lubridate)
library(legislatoR)
# load packages
library(tidyverse)
library(legislatoR) # bug: typo in library(LegislatoR)
# get political data on German legislators
political_df <-
left_join(x = filter(get_political(legislature = "deu"), as.numeric(session) == 18),
y = get_core(legislature = "deu"), by = "pageid")
# bug: wrong country code ("ger" --> "deu")
# bug: variable name "session" should not be in quotation marks
# wiki traffic data
traffic_df <-
get_traffic(legislature = "deu") %>%
filter(date >= "2013-10-22" & date <= "2017-10-24") %>%
group_by(pageid) %>%
summarize(traffic_mean = mean(traffic, na.rm = TRUE),
traffic_max = max(traffic, na.rm = TRUE))
# bug: wrong country code ("ger" --> "deu")
# sessions served
sessions_served_df <-
get_political(legislature = "deu") %>%
group_by(pageid) %>%
dplyr::summarize(sessions_served = n())
# bug: missing ">" in pipe operator "%%"
# merge
legislator_df <-
left_join(political_df, sessions_served_df, by = "pageid") %>%
left_join(traffic_df, by = "pageid")
# compute age
get_age <- function(birth, date_at) {
date_at_fmt <- date_at
birth_fmt <- birth
diff <- difftime(lubridate::ymd(date_at_fmt), lubridate::ymd(birth_fmt))
diff_years <- time_length(diff, "years")
diff_years
}
# bug: arguments in difftime() in wrong order
legislator_df$age_in_years <- round(get_age(legislator_df$birth, "2017-10-24"), 0)
# plot top 10 pageviews
legislator_df <- arrange(legislator_df, desc(traffic_mean))
legislator_df$rank <- 1:nrow(legislator_df)
legislator_df_table <- dplyr::select(legislator_df, rank, name, traffic_mean, traffic_max) # bug: data argument missing
names(legislator_df_table) <- c("Rank", "Representative", "Mean", "Maximum")
legislator_df_table <- head(legislator_df_table, 10)
ggplot(legislator_df_table, aes(y = Mean, x = -Rank)) +
xlab("Rank") + ylab("Avg. daily page views") +
labs(title = "Top 10 representatives by average daily page views") +
geom_bar(stat = "identity") + # bug: stat instead of stats
scale_x_continuous(breaks = -nrow(legislator_df_table):-1, labels = rev(1:nrow(legislator_df_table))) + # bug: plus sign missing
geom_text(aes(y = 10, label = Representative), hjust = 0, color = "white", size = 2) +
coord_flip() +
theme_minimal()
# run model of page views as a function of sessions served, party, sex, and age in years
legislator_df$traffic_log <- log(legislator_df$traffic_mean)
covars <- c("sessions_served", "party", "sex", "age_in_years")
fmla <- paste("traffic_log", paste(covars, collapse = " + "), sep = " ~ ") # bug: plus instead of minus sign to separate covariates in collapse = " + "
summary(log_traffic_model <- lm(fmla, legislator_df))
##
## Call:
## lm(formula = fmla, data = legislator_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0877 -0.6216 -0.2203 0.3186 5.1325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.002022 0.257355 11.665 < 2e-16 ***
## sessions_served 0.320763 0.023761 13.500 < 2e-16 ***
## partyCDU -0.422987 0.151756 -2.787 0.00547 **
## partyCSU -0.230528 0.197778 -1.166 0.24421
## partyDIE LINKE -0.090306 0.188201 -0.480 0.63150
## partynone 1.885639 1.081932 1.743 0.08184 .
## partySPD -0.474897 0.154138 -3.081 0.00215 **
## sexmale -0.109511 0.089501 -1.224 0.22156
## age_in_years -0.019132 0.004494 -4.258 2.37e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.067 on 648 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.2469, Adjusted R-squared: 0.2376
## F-statistic: 26.56 on 8 and 648 DF, p-value: < 2.2e-16
# plot table
sjPlot::tab_model(log_traffic_model)
|
traffic log
|
Predictors
|
Estimates
|
CI
|
p
|
(Intercept)
|
3.00
|
2.50 – 3.51
|
<0.001
|
sessions_served
|
0.32
|
0.27 – 0.37
|
<0.001
|
party [CDU]
|
-0.42
|
-0.72 – -0.12
|
0.005
|
party [CSU]
|
-0.23
|
-0.62 – 0.16
|
0.244
|
party [DIE LINKE]
|
-0.09
|
-0.46 – 0.28
|
0.632
|
party [none]
|
1.89
|
-0.24 – 4.01
|
0.082
|
party [SPD]
|
-0.47
|
-0.78 – -0.17
|
0.002
|
sex [male]
|
-0.11
|
-0.29 – 0.07
|
0.222
|
age_in_years
|
-0.02
|
-0.03 – -0.01
|
<0.001
|
Observations
|
657
|
R2 / R2 adjusted
|
0.247 / 0.238
|