library(tidyverse)
library(lubridate)
library(legislatoR)
# load packages
library(tidyverse)
library(legislatoR) # bug: typo in library(LegislatoR)

# get political data on German legislators
political_df <- 
  left_join(x = filter(get_political(legislature = "deu"), as.numeric(session) == 18), 
            y = get_core(legislature = "deu"), by = "pageid")
# bug: wrong country code ("ger" --> "deu")
# bug: variable name "session" should not be in quotation marks

# wiki traffic data
traffic_df <- 
  get_traffic(legislature = "deu") %>% 
  filter(date >= "2013-10-22" & date <= "2017-10-24") %>% 
  group_by(pageid) %>% 
  summarize(traffic_mean = mean(traffic, na.rm = TRUE),
            traffic_max = max(traffic, na.rm = TRUE))
# bug: wrong country code ("ger" --> "deu")


# sessions served
sessions_served_df <- 
  get_political(legislature = "deu") %>% 
  group_by(pageid) %>% 
  dplyr::summarize(sessions_served = n())
# bug: missing ">" in pipe operator "%%"

# merge
legislator_df <- 
  left_join(political_df, sessions_served_df, by = "pageid") %>% 
  left_join(traffic_df, by = "pageid") 

# compute age
get_age <- function(birth, date_at) {
  date_at_fmt <- date_at
  birth_fmt <- birth
  diff <- difftime(lubridate::ymd(date_at_fmt), lubridate::ymd(birth_fmt))
  diff_years <- time_length(diff, "years")
  diff_years
}
# bug: arguments in difftime() in wrong order 

legislator_df$age_in_years <- round(get_age(legislator_df$birth, "2017-10-24"), 0)

# plot top 10 pageviews
legislator_df <- arrange(legislator_df, desc(traffic_mean))
legislator_df$rank <- 1:nrow(legislator_df)
legislator_df_table <- dplyr::select(legislator_df, rank, name, traffic_mean, traffic_max) # bug: data argument missing
names(legislator_df_table) <- c("Rank", "Representative", "Mean", "Maximum")
legislator_df_table <- head(legislator_df_table, 10)

ggplot(legislator_df_table, aes(y = Mean, x = -Rank)) + 
  xlab("Rank") + ylab("Avg. daily page views") + 
  labs(title = "Top 10 representatives by average daily page views") + 
  geom_bar(stat = "identity") +  # bug: stat instead of stats
  scale_x_continuous(breaks = -nrow(legislator_df_table):-1, labels = rev(1:nrow(legislator_df_table)))  + # bug: plus sign missing
  geom_text(aes(y = 10, label = Representative), hjust = 0, color = "white", size = 2) + 
  coord_flip() + 
  theme_minimal()

# run model of page views as a function of sessions served, party, sex, and age in years
legislator_df$traffic_log <- log(legislator_df$traffic_mean)

covars <- c("sessions_served", "party", "sex", "age_in_years")
fmla <- paste("traffic_log", paste(covars, collapse = " + "), sep = " ~ ") # bug: plus instead of minus sign to separate covariates in collapse = " + "
summary(log_traffic_model <- lm(fmla, legislator_df))
## 
## Call:
## lm(formula = fmla, data = legislator_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0877 -0.6216 -0.2203  0.3186  5.1325 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.002022   0.257355  11.665  < 2e-16 ***
## sessions_served  0.320763   0.023761  13.500  < 2e-16 ***
## partyCDU        -0.422987   0.151756  -2.787  0.00547 ** 
## partyCSU        -0.230528   0.197778  -1.166  0.24421    
## partyDIE LINKE  -0.090306   0.188201  -0.480  0.63150    
## partynone        1.885639   1.081932   1.743  0.08184 .  
## partySPD        -0.474897   0.154138  -3.081  0.00215 ** 
## sexmale         -0.109511   0.089501  -1.224  0.22156    
## age_in_years    -0.019132   0.004494  -4.258 2.37e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.067 on 648 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.2469, Adjusted R-squared:  0.2376 
## F-statistic: 26.56 on 8 and 648 DF,  p-value: < 2.2e-16
# plot table
sjPlot::tab_model(log_traffic_model)
  traffic log
Predictors Estimates CI p
(Intercept) 3.00 2.50 – 3.51 <0.001
sessions_served 0.32 0.27 – 0.37 <0.001
party [CDU] -0.42 -0.72 – -0.12 0.005
party [CSU] -0.23 -0.62 – 0.16 0.244
party [DIE LINKE] -0.09 -0.46 – 0.28 0.632
party [none] 1.89 -0.24 – 4.01 0.082
party [SPD] -0.47 -0.78 – -0.17 0.002
sex [male] -0.11 -0.29 – 0.07 0.222
age_in_years -0.02 -0.03 – -0.01 <0.001
Observations 657
R2 / R2 adjusted 0.247 / 0.238