df <- fread("data/marketing_eda.csv")
glimpse(df)
## Rows: 1,000
## Columns: 9
## $ CustomerID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ Age <int> 54, 18, 42, 27, 53, 35, 64, 41, 24, 53, 42, 54, 63, 37, 43,…
## $ Gender <chr> "M", "F", "F", "F", "F", "M", "F", "M", "M", "F", "M", "F",…
## $ Device <chr> "Mobile", "Mobile", "Mobile", "Desktop", "Mobile", "Desktop…
## $ Channel <chr> "Social", "Search", "Search", "Social", "Social", "Video", …
## $ Ad_Spend <dbl> 718.60, 233.00, 122.51, 198.78, 145.19, 125.74, 421.93, 189…
## $ Clicks <int> 95, 34, 18, 19, 19, 9, 47, 25, 23, 15, 41, 2, 33, 66, 2, 17…
## $ Purchases <int> 6, 1, 0, 1, 4, 0, 0, 4, 2, 1, 1, 0, 2, 2, 0, 0, 0, 2, 4, 0,…
## $ Revenue <dbl> 149.16, 22.22, 0.00, 13.22, 150.48, 0.00, 0.00, 82.48, 177.…
ggplot(df, aes(Ad_Spend)) +
geom_histogram(bins = 30, fill = "grey70", color = "white") +
geom_vline(aes(xintercept = mean(Ad_Spend)), linetype = "dashed") +
geom_vline(aes(xintercept = median(Ad_Spend))) +
labs(subtitle = "Dashed = mean, solid = median",
x = "Ad Spend ($)", y = "Count") +
theme_minimal()
Questions:
ggplot(df, aes(Ad_Spend)) +
geom_histogram(bins = 30, fill = "grey70", color = "white") +
scale_x_continuous(trans = "log10") +
labs(title = "Ad Spend on Log Scale",
x = "Ad Spend ($, log10)", y = "Count") +
theme_minimal()
Questions:
df %>%
count(Channel) %>%
mutate(p = n / sum(n)) %>%
ggplot(aes(reorder(Channel, n), n)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = percent(p, accuracy = 0.1)),
vjust = -0.3, size = 3.5) +
labs(title = "Channel Mix",
x = "Channel", y = "Count") +
theme_minimal()
Questions:
#plot boxplot
ggplot(df, aes(y = Ad_Spend)) +
geom_boxplot(fill = "lightblue") +
scale_y_continuous(labels = dollar_format()) +
labs(title = "Boxplot of Ad Spend",
y = "Ad Spend ($)") +
theme_minimal()
Questions:
#plot boxplot
ggplot(df, aes(y = log(Ad_Spend+1))) +
geom_boxplot(fill = "lightblue") +
scale_y_continuous(labels = dollar_format()) +
labs(title = "Boxplot of Ad Spend",
y = "log Ad Spend ($)") +
theme_minimal()
Questions: