library(quanteda) library(dplyr)
I met a traveller from an antique land,
Who said - “Two vast and trunkless legs of stone
Stand in the desert… Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
And wrinkled lip, and sneer of cold command,
Tell that its sculptor well those passions read
Which yet survive, stamped on these lifeless things,
The hand that mocked them, and the heart that fed;
And on the pedestal, these words appear:
My name is Ozymandias, King of Kings;
Look on my Works, ye Mighty, and despair!
Nothing beside remains. Round the decay
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away.
Make the tokenized text lowercase.
Remove (english) stopwords from the tokenized text.
library(quanteda)
library(dplyr)
library(quanteda.textstats) # for textstat_frequency()
library(ggplot2)
# Tokenize Data
= tokens(data_corpus_inaugural,
inaugural_token remove_punct = TRUE,
remove_symbols = TRUE) %>%
tokens_tolower() %>%
tokens_remove(stopwords("en"))
# Make into DFM and Ggplot
dfm(inaugural_token) %>% #make into dfm
textstat_frequency(n = 50) %>% #select top 50
ggplot(aes(x = reorder(feature, -rank), y = frequency)) + #reorder from least frequent word to most
geom_point() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+ # turn words sideways
labs(x = "Feature", y = "Frequency")
# Get frequency of each feature by president and make dfm
= textstat_frequency(dfm(inaugural_token),
president_dfm groups = inaugural_token$President)
# Subset for just "freedom"
= subset(president_dfm, president_dfm$feature %in% "freedom")
freq_freedom
# Plot!
ggplot(freq_freedom, aes(x = group, y = frequency)) +
geom_point() +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0, 14, 2))) +
xlab(NULL) +
ylab("Frequency") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "President", y = "Frequency", title = "Frequency for use of 'freedom' by President")