Using APIs

SICSS, 2022

Christopher Barrie

Introduction

Why count tweets?
- Test your API call
- Of substantive interest
  - But… design considerations (normalizing)

Counting tweets

library(academictwitteR)
library(dplyr)
library(lubridate)
library(ggplot2)

tweetcounts <- count_all_tweets(
  query = "Hogmanay",
  start_tweets = "2019-12-27T00:00:00Z",
  end_tweets = "2020-01-05T00:00:00Z",
  bearer_token = get_bearer(),
  granularity = "hour",
  n = 500
)

Format date

tweetcounts$time <-
  parse_date_time(tweetcounts$start, 
                  orders = "ymd HMS")

Tab A
Tab B

head(tweetcounts)

                       end                    start tweet_count
1 2019-12-27T01:00:00.000Z 2019-12-27T00:00:00.000Z          47
2 2019-12-27T02:00:00.000Z 2019-12-27T01:00:00.000Z          38
3 2019-12-27T03:00:00.000Z 2019-12-27T02:00:00.000Z          19
4 2019-12-27T04:00:00.000Z 2019-12-27T03:00:00.000Z          14
5 2019-12-27T05:00:00.000Z 2019-12-27T04:00:00.000Z          10
6 2019-12-27T06:00:00.000Z 2019-12-27T05:00:00.000Z          12
                 time
1 2019-12-27 00:00:00
2 2019-12-27 01:00:00
3 2019-12-27 02:00:00
4 2019-12-27 03:00:00
5 2019-12-27 04:00:00
6 2019-12-27 05:00:00

tweetcounts %>% ggplot() +
  geom_line(aes(time, tweet_count))

Adding arguments

tweetcounts <- count_all_tweets(
  query = "Hogmanay",
  place = "Edinburgh",
  is_retweet = F,
  has_images = T,
  start_tweets = "2019-12-27T00:00:00Z",
  end_tweets = "2020-01-05T00:00:00Z",
  bearer_token = get_bearer(),
  granularity = "hour",
  n = 500
)

Adding user parameters

tweetcounts <- count_all_tweets(
  query = "Hogmanay",
  users = "edhogmanay",
  start_tweets = "2019-12-27T00:00:00Z",
  end_tweets = "2020-01-05T00:00:00Z",
  bearer_token = get_bearer(),
  granularity = "day",
  n = 500
)

tweetcounts

                       end                    start tweet_count
1 2019-12-28T00:00:00.000Z 2019-12-27T00:00:00.000Z           1
2 2019-12-29T00:00:00.000Z 2019-12-28T00:00:00.000Z           0
3 2019-12-30T00:00:00.000Z 2019-12-29T00:00:00.000Z           1
4 2019-12-31T00:00:00.000Z 2019-12-30T00:00:00.000Z           4
5 2020-01-01T00:00:00.000Z 2019-12-31T00:00:00.000Z           6
6 2020-01-02T00:00:00.000Z 2020-01-01T00:00:00.000Z           2
7 2020-01-03T00:00:00.000Z 2020-01-02T00:00:00.000Z           3
8 2020-01-04T00:00:00.000Z 2020-01-03T00:00:00.000Z           1
9 2020-01-05T00:00:00.000Z 2020-01-04T00:00:00.000Z           0

Normalizing

Counts
Baseline
Normalize
Plot 1
Plot 2

tweetcounts <- count_all_tweets(
  query = "immigration",
  start_tweets = "2015-12-27T00:00:00Z",
  end_tweets = "2020-01-05T00:00:00Z",
  bearer_token = get_bearer(),
  granularity = "day",
  n = 5000
)

baselinecounts <- count_all_tweets(
  query = "therefore",
  start_tweets = "2015-12-27T00:00:00Z",
  end_tweets = "2020-01-05T00:00:00Z",
  bearer_token = get_bearer(),
  granularity = "day",
  n = 5000
)

normalize_counts <- function(tweetcounts, baselinecounts) {
  tweetcounts <- tweetcounts$tweet_count
  baselinecounts <- baselinecounts$tweet_count
  normalized_counts <- tweetcounts/baselinecounts
  return(normalized_counts)
}

tweetcounts$normalized_count <-
  normalize_counts(tweetcounts = tweetcounts,
                   baselinecounts = baselinecounts)

head(tweetcounts$normalized_count)

[1] 0.8641441 1.4070588 1.8567669 2.3317991 1.1134310 1.0938708