This blogpost explores suicide trends among youth (ages 10 - 24) in Florida between 2006 and 2017, examining the effects of race and gender. The post answers the following questions with respect to population in focus:
Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?
Q2 - What ages are most vulnerable to suicide?
Q3 - What are the most prevalent methods of youth suicide?
Q4 - How do trends compare across the methods of suicide?
Q5 - How do trends compare by race, sex, and methods of suicide?
This section contains technical information for deeper analysis and reproduction. Casual readers are invited to skip it.
Packages used in this report.
# Attach these packages so their functions don't need to be qualified
# see http://r-pkgs.had.co.nz/namespace.html#search-path
library(magrittr) # pipes %>%
library(ggplot2) # graphs
library(dplyr) # data wrangling
requireNamespace("tidyr") # data tidying
Script dependencies.
source("./scripts/modeling/model-basic.R")
source("./scripts/common-functions.R")
Definitions of global object (file paths, factor levels, object groups ) used throughout the report.
library(ggplot2)
# you will need to replace this path to the location where you stored your data file
path_file_input <- "data-unshared/derived/9-population-suicide.rds"
path_file_input2 <- "./data-unshared/derived/9-population-suicide-2.rds"
# to help with sorting the levels of the `age_group` factor
lvl_age_groups <- c(
"less_than_1" = "<1"
,"1_4" = "1-4"
,"5_9" = "5-9"
,"10_14" = "10-14"
,"15_19" = "15-19"
,"20_24" = "20-24"
,"25_34" = "25-34"
,"35_44" = "35-44"
,"45_54" = "45-54"
,"55_64" = "55-64"
,"65_74" = "65-74"
,"75_84" = "75-84"
,"85_plus" = "85+"
)
age_groups_in_focus <- lvl_age_groups[4:12]
age_groups_10_24 <- lvl_age_groups[4:6]
#set default ggplot theme
#set default ggplot theme
ggplot2::theme_set(
ggplot2::theme_bw(
)+
theme(
strip.background = element_rect(fill="grey90", color = NA)
)
)
Definitions of custom functions.
#updated to new compute rate function, that includes option for wide data
compute_rate <- function( d ,grouping_frame ,wide = FALSE ){
# d <- ds_population_suicide
# grouping_frame <- c("year")
#
d_wide <- d %>%
dplyr::group_by(.dots = grouping_frame) %>%
dplyr::summarize(
n_population = sum(n_population, na.rm = T)
,n_suicide = sum(n_suicides, na.rm = T)
,n_gun = sum(`Firearms Discharge`, na.rm=T)
,n_drug = sum(`Drugs & Biological Substances`, na.rm=T)
,n_hanging = sum(`Hanging, Strangulation, Suffocation`, na.rm=T)
,n_jump = sum(`Jump From High Place`, na.rm=T)
,n_other_seq = sum(`Other & Unspec & Sequelae`, na.rm = T)
,n_other_liq = sum(`Other & Unspec Sol/Liq & Vapor`, na.rm = T)
,n_other_gas = sum(`Other Gases & Vapors`, na.rm = T)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(
# n_other = n_suicide - n_drug - n_gun -n_hanging - n_jump
# n_non_gun = n_suicide - n_gun
n_non_gun_hang = n_suicide - n_gun - n_hanging
,rate_suicide = (n_suicide/n_population)*100000
,rate_gun = (n_gun/n_population)*100000
,rate_drug = (n_drug/n_population)*100000
,rate_hanging = (n_hanging/n_population)*100000
,rate_jump = (n_jump/n_population)*100000
# ,rate_other = (n_other/n_population)*100000
,rate_other_seq = (n_other_seq/n_population)*100000
,rate_other_liq = (n_other_liq/n_population)*100000
,rate_other_gas = (n_other_gas/n_population)*100000
# ,rate_non_gun = (n_non_gun/n_population)*100000
,rate_non_gun_hang = (n_non_gun_hang/n_population)*100000
)
# d_wide %>% glimpse()
col_select <- c("n_suicide"
,"n_drug"
,"n_gun"
,"n_hanging"
,"n_jump"
,"n_other_seq"
,"n_other_liq"
,"n_other_gas"
# ,"n_non_gun"
,"n_non_gun_hang")
d_n <- d_wide %>% dplyr::select(c(grouping_frame , col_select)) %>%
tidyr::pivot_longer(
cols = col_select
,names_to = "suicide_cause"
,values_to = "n_suicides"
) %>%
# tidyr::gather("suicide_cause", "n_suicides", n_suicide, n_drug,n_gun, n_hanging, n_jump, n_other) %>%
dplyr::mutate(
suicide_cause = gsub("^n_","",suicide_cause)
)
d_rate <- d_wide %>% dplyr::select(
c(grouping_frame, gsub("^n_","rate_",col_select))
) %>%
tidyr::pivot_longer(
cols = gsub("^n_","rate_",col_select)
,names_to = "suicide_cause"
,values_to = "rate_suicides"
) %>%
# tidyr::gather("suicide_cause", "rate_per_100k", rate_suicide, rate_drug,rate_gun, rate_hanging, rate_jump, rate_other) %>%
dplyr::mutate(
suicide_cause = gsub("^rate_","",suicide_cause)
)
d_long <- d_wide %>% dplyr::select( c(grouping_frame,"n_population") ) %>%
dplyr::left_join(d_n) %>%
dplyr::left_join(d_rate)
d_out <- d_long
if(wide){
d_out <- d_wide
}
return(d_out)
}
#how to use
# ls_compute_rate <- ds0 %>% compute_rate("year")
The suicide data comes from Florida Health Charts and contains suicide mortality between 2006 and 2017, broken down by suicide means, county, sex, age, and race. Additionally, we use Bridged-Race Population Estimates to help with age-specific rate calculations.
The groomed data sets are available for download: FL Suicides and FL Population Estimates.
# data prepared by "./manipulation/9-aggregator.R" combining population estimates and suicide counts
ds_population_suicide <- readr::read_rds(path_file_input)
ds_population_suicide_2 <- readr::read_rds(path_file_input2)
# map of florida counties
florida_counties_map <- ggplot2::map_data("county") %>%
dplyr::filter(region == "florida") %>%
dplyr::mutate_at(
"subregion"
, ~stringr::str_replace_all(
.
,c(
"de soto" = "Desoto"
,"st johns" ="Saint Johns"
,"st lucie" = "Saint Lucie"
)
)
) %>% tibble::as_tibble()
#mutate and filter data to include only ages 10-24
ds1 <- ds_population_suicide %>%
dplyr::mutate(
county = forcats::as_factor(county)
,year = as.integer(year)
,sex = factor(sex,levels = c("Male", "Female"))
,race_ethnicity = factor(paste0(race, " + ", ethnicity))
,race = factor(race)
,ethnicity = factor(ethnicity)
,age_group = factor(age_group
,levels = names(lvl_age_groups)
,labels = lvl_age_groups
)
,n_population = as.integer(n_population)
,n_suicides = as.integer(n_suicides)
) %>%
filter(age_group %in% age_groups_10_24) %>% # !!! Only YOUTH
select(
county, year, sex, age_group, race, ethnicity,
n_population, n_suicides, everything()
)
ds2 <- ds_population_suicide_2 %>%
mutate(
race = forcats::fct_recode(race3,
"Black & Other" = "Black",
"Black & Other" = "Other"
)
) %>%
group_by(county, year, sex, age, race, ethnicity ) %>%
summarize(
n_population = sum(n_population, na.rm = T)
,n_suicides = sum(n_suicides, na.rm = T)
,n_firearms = sum(n_firearms, na.rm = T)
,n_other = sum(n_other, na.rm = T)
) %>%
ungroup() %>%
mutate(
race_ethnicity = factor(paste0(race, " + ", ethnicity))
)
The observation unit is defined by the first 6 variables: county
, year
, sex
, race
, ethnicity
, age_group
. For each unit, there are two measures: the number of people (n_population
) and the number of observed suicide events (n_suicides
). The latter is broken down by means of suicide.
ds1 %>% glimpse()
Rows: 19,296
Columns: 16
$ county <fct> Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachu...
$ year <int> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200...
$ sex <fct> Female, Female, Female, Female, Female, Female, Female, Female, Femal...
$ age_group <fct> 10-14, 15-19, 20-24, 10-14, 15-19, 20-24, 10-14, 15-19, 20-24, 10-14,...
$ race <fct> Black & Other, Black & Other, Black & Other, Black & Other, Black & O...
$ ethnicity <fct> Hispanic, Hispanic, Hispanic, Non-Hispanic, Non-Hispanic, Non-Hispani...
$ n_population <int> 50, 125, 250, 2410, 3763, 4695, 390, 1038, 2060, 3215, 6993, 14660, 5...
$ n_suicides <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Drugs & Biological Substances` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Other Gases & Vapors` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Hanging, Strangulation, Suffocation` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Firearms Discharge` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Jump From High Place` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Other & Unspec & Sequelae` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Other & Unspec Sol/Liq & Vapor` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ race_ethnicity <fct> Black & Other + Hispanic, Black & Other + Hispanic, Black & Other + H...
ds2 %>% glimpse()
Rows: 875,824
Columns: 11
$ county <fct> Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Al...
$ year <int> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 20...
$ sex <fct> Female, Female, Female, Female, Female, Female, Female, Female, Female, Female, Female, Fema...
$ age <int> 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7,...
$ race <fct> Black & Other, Black & Other, White, White, Black & Other, Black & Other, White, White, Blac...
$ ethnicity <fct> Hispanic, Non-Hispanic, Hispanic, Non-Hispanic, Hispanic, Non-Hispanic, Hispanic, Non-Hispan...
$ n_population <dbl> 9, 413, 61, 672, 6, 368, 66, 602, 11, 401, 57, 655, 9, 364, 76, 629, 8, 363, 51, 633, 3, 383...
$ n_suicides <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ n_firearms <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ n_other <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ race_ethnicity <fct> Black & Other + Hispanic, Black & Other + Non-Hispanic, White + Hispanic, White + Non-Hispan...
Another data set, ds2
, augmented with CDC population estimates, has very similar structure, but differs from ds1
in three respects:
Instead of age_group
it features age
, measured in whole years, providing higher resolution
Mortality cause is grouped into two categories fireamrs
and other
, as opposed to a more granular, 7-category classification of mortality cause in ds1
Covers years 2000-2018
as opposed to 2006-2018
in ds1
data set | suicide means | age resolution | years covered |
---|---|---|---|
ds1 |
7 | age group | 2006 - 2017 |
ds2 |
2 | 1 year | 2000 - 2018 |
These differences arose from certain limitations in data procurement. We use ds2
to explore age difference with higher resolution, at the expense of reducing resolution in mortality cause.
The combined variable race_ethinicity
is the product of the two respective components and was made to be idendical in both data sets:
ds1 %>% distinct(race, ethnicity, race_ethnicity)
# A tibble: 4 x 3
race ethnicity race_ethnicity
<fct> <fct> <fct>
1 Black & Other Hispanic Black & Other + Hispanic
2 Black & Other Non-Hispanic Black & Other + Non-Hispanic
3 White Hispanic White + Hispanic
4 White Non-Hispanic White + Non-Hispanic
all.equal(
ds1 %>% distinct(race, ethnicity, race_ethnicity) ,
ds2 %>% distinct(race, ethnicity, race_ethnicity)
)
[1] "Attributes: < Component \"class\": Lengths (4, 3) differ (string compare on first 3) >"
[2] "Attributes: < Component \"class\": 3 string mismatches >"
To begin, let us answer the most fundamental question:
Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?
To aid interpretation we have created an additional metric 1 out of
, which is a direct inverse of Rate per 100K
, but offers a different perspective on the magnitude of the estimate. The numerical value of this metric answer the question “One in how many individuals commit suicide?”
# > Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?
d1 <- ds1 %>%
compute_rate("year") %>%
filter(suicide_cause == "suicide") %>%
select(-suicide_cause) %>%
mutate(
n_out_of = round(n_population/n_suicides,0)
,n_population = n_population/1000000
) %>%
tidyr::pivot_longer(
cols = c("n_suicides","n_population", "rate_suicides","n_out_of")
,names_to = "metric"
,values_to = "value"
)
labels <- c(
"n_suicides" = "Number of Suicides"
,"n_population" = "Population (in Millions)"
,"rate_suicides" = "Rate per 100K"
,"n_out_of" = "1 out of"
)
g1 <- d1 %>%
ggplot(aes(x = year, y = value)) +
geom_line(alpha = 0.5) +
geom_point(shape = 21, size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
facet_wrap(~metric, scales = "free_y",nrow = 2 , labeller = as_labeller(labels)) +
ggpmisc::stat_poly_eq(formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
,label.x = 0.9
,label.y = 0.05) +
geom_text(
data = d1 %>% dplyr::filter(year %in% c(2006, 2017))
, aes(label = round(value,2)), vjust =-0
)+
labs(
x = NULL
,y = NULL
)
g1
The number of youth suicides increased by 98
cases, rising from 203
in 2006 to 301
in 2017, constituting a 33% growth. If in 2006
one out every 17.5
thousands committed suicide, in 2017
one out of every 12
thousand individuals between age 10
and 24
took their own life. The demographic growth does not explain the increased number of suicide cases: the rate per 100,000
increased by 45% from 5.74
to 8.34
.
Given these general trends for the entire 10 - 24
group, we would like to understand how trends in population growth, suicide count and suicide rate compare among age group:
d2 <- ds1 %>%
compute_rate(c("year","age_group")) %>%
filter(suicide_cause == "suicide") %>%
select(-suicide_cause) %>%
mutate(
n_out_of = n_population/n_suicides
,n_population = n_population/1000000
) %>%
tidyr::pivot_longer(
cols = c("n_suicides","n_population", "rate_suicides" ,"n_out_of")
,names_to = "metric"
,values_to = "value"
)
labels <- c(
"n_suicides" = "Suicides"
,"n_population" = "Population \n (in Millions)"
,"rate_suicides" = "Rate per 100k"
,"n_out_of" = "1 Out of"
,"10-14" = "10-14"
,"15-19" = "15-19"
,"20-24" = "20-24"
)
g2 <- d2 %>%
ggplot(aes(x = year, y = value)) +
geom_line(alpha = 0.5) +
geom_point(shape = 21, size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
facet_grid(metric ~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
scale_y_continuous(labels = scales::comma, expand = expansion(mult=c(.2,.5))) +
# scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
ggpmisc::stat_poly_eq(formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
,label.x = 0.05
,label.y = 1
,color = "#D95F02") +
labs(
x = NULL
,y = NULL
)
g2
The 10-14
age group exhibits quite a dramatic decrease in the One out of
measure, dwarfing the scale for the other two age groups. We recreate it separately:
g3 <- d2 %>%
dplyr::filter(age_group %in% c("15-19","20-24")) %>%
dplyr::filter(metric %in% c("n_out_of")) %>%
ggplot(aes(x = year, y = value)) +
geom_line(alpha = 0.5) +
geom_point(shape = 21, size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
facet_wrap(~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
ggpmisc::stat_poly_eq(formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
,label.x = 0.05
,label.y = 1
,color = "#D95F02") +
labs(
x = NULL
,y = NULL
)
g3+labs(title = "One ouf of how many commit suicide? ")
Demographic changes deserved to be examined seperately:
d3 <- ds1 %>%
compute_rate(c("year","age_group","race_ethnicity")) %>%
filter(suicide_cause == "suicide") %>%
select(-suicide_cause) %>%
mutate(
n_population = n_population/1000000
) %>%
distinct()
labels <- c(
"Black & Other + Hispanic" = "Black & Other \n Hispanic"
,"Black & Other + Non-Hispanic" = "Black & Other \n Non-Hispanic"
,"White + Hispanic" = "White \n Hispanic"
,"White + Non-Hispanic" = "White \n Non-Hispanic"
,"10-14" = "10-14"
,"15-19" = "15-19"
,"20-24" = "20-24"
)
g2 <- d3 %>%
ggplot(aes(x = year, y = n_population, color = race_ethnicity)) +
geom_smooth(method = "lm", se = FALSE, color = "grey70",alpha =.5) +
geom_line(alpha = 0.99) +
geom_point(shape = 21, size = 3, alpha = 1) +
facet_grid(race_ethnicity ~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
# scale_color_viridis_d(begin = .0, end = .7, option = "viridis")+
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
ggpmisc::stat_poly_eq(formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
,label.x = 0.05
,label.y = 1
,color = "#D95F02") +
labs(
x = NULL
,y = "Population in millions"
,color = NULL
,title = "Population growth among Florida youth (10-24)"
# scale_color_viridis_d(option = "volcano", begin = .0 , end = .8)
)+theme(legend.position = "bottom")
g2
Note the sharp change in population count from 2011
to 2013
. The 15-19
group dropped, while the 20-24
group grew quite dramatically. A closer examination, shows that while this demographic growth was present in all racial groups, it was particularly pronounced among Non-white + Non-Hispanic
youth. One possible explanation could be attributed to cohort effect: disproportionate amount of those in 15-19
group transitioned to 20-24
group during these years, but were not matched by the similar number of 10-14
year old transitioning into 15-19
group. Another possible explanation could reside in migration patterns.
Q2 - What ages are most vulnerable to suicide?
To explore whether certain age transitions are associated with increased suicidality, we plot the distribution of suicide events within each year of age among the observed years (2006 - 2018).
# Hypothesis: does entering high-school associated with increased suicide events?
# Can we see the spike in mortality at 13-14 years of age?
# For that we need to view by year mortality event?
# d4 <- ds_suicide_by_age %>%
d4 <- ds2 %>%
mutate(age = as.integer(age)) %>%
filter(age %in% c(10:85)) %>%
filter(year %in% 2006:2018) %>%
group_by(year, age) %>%
summarize(
n_suicide = sum(n_suicides, na.rm = T)
) %>%
ungroup()
# d4 %>% glimpse()
g4 <- d4 %>%
ggplot(aes(x = age, y = n_suicide))+
geom_smooth(method = "lm", se= F, size = 1,color = "salmon")+
geom_smooth(method = "loess", se= F, size = 1,color = "cyan3")+
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE, color = "salmon"
# , vjust = 7
)+
geom_boxplot(aes( group = age), fill = NA)+
scale_x_continuous(breaks = seq(10,85,5))+
# scale_y_continuous(breaks = seq(0,100,10))+
geom_vline(xintercept = 24.5, size = 4, alpha = .1)+
geom_vline(xintercept = 17.5, size = 1, linetype = "dashed", color = "grey80")+
theme(
panel.grid.minor = element_blank()
)+
labs(
title = "Suicide events among persons of the same age (2006-2018)"
,x = "Age in years", y = "Count of suicides (all causes)"
)
g4
There appears to be two plateaux across the lifespan, each preceeded by sharp increase: the first from ages 25
to 40
and another from ages 45
to 60
, after which the frequency of suicide declines monotonically.
We see a marked spike in deaths during the 17 to 18 transition. It appears the majority of deaths in 10-14 groups could be attributed to 13 and 14 year olds (vertial dashed line). The plateau following the age 25 makes theoretical sense: after the turmoils of coming out of age and young adulthood, there life acquires a more stable character.
g4 <- d4 %>%
dplyr::filter(age %in% 10:40) %>%
ggplot(aes(x = age, y = n_suicide))+
geom_smooth(method = "lm", se= F, size = 1,color = "salmon")+
geom_smooth(method = "loess", se= F, size = 1,color = "cyan3")+
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE, color = "salmon"
# , vjust = 7
)+
geom_boxplot(aes( group = age), fill = NA)+
scale_x_continuous(breaks = seq(10,40,1))+
scale_y_continuous(breaks = seq(0,100,10))+
geom_vline(xintercept = 24.5, size = 4, alpha = .1)+
geom_vline(xintercept = 17.5, size = 1, linetype = "dashed", color = "grey80")+
theme(
panel.grid.minor = element_blank()
)+
labs(
title = "Suicide events among person of the same age (2006-2018)"
,x = "Age in years", y = "Count of suicides (all causes)"
)
g4
Also note, that when examining the trends within the 10-24
age group, the relationship between the age and the suicide mortality is very well summarized by a linear model (\(R^2 = .85\)):
# among 10-24 the increase across age is very linear
g5 <- d4 %>%
filter(age %in% c(10:24)) %>%
ggplot(aes(x = age, y = n_suicide))+
geom_point(shape = 21, alpha = .4, size = 2, position = position_jitter(width = .1))+
geom_smooth(method = "lm", se= F, size = 1,color = "salmon")+
geom_smooth(method = "loess", se= F, size = 1,color = "cyan3")+
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE, color = "salmon"
# , vjust = 7
)+
geom_boxplot(aes( group = age), fill = NA, outlier.shape = NA)+
scale_x_continuous(breaks = seq(10,40,1))+
scale_y_continuous(breaks = seq(0,100,10))+
geom_vline(xintercept = 24.5, size = 4, alpha = .1)+
geom_vline(xintercept = 17.5, size = 1, linetype = "dashed", color = "grey80")+
theme(
panel.grid.minor = element_blank()
)+
labs(
title = "Suicide events among person of the same age (2006-2018)"
,x = "Age in years", y = "Count of suicides (all causes)"
)
g5
Q3 - What are the most prevalent methods of youth suicide?
Florida Health Charts captures the causes of suicide mortality using 7
categories. We examine the raw event count and rates per 100,000
rates in the following two graphs:
#yearly count of suicide means
suicide_cause_order <- c(
"drug" = "Drug"
,"gun" = "Gun"
,"hanging" = "Hanging"
,"jump" = "Jump"
,"other_seq" = "Other Sequelae"
,"other_liq" = "Other Liqud"
,"other_gas" = "Other Gas & Vapor"
)
d6 <- ds1 %>%
compute_rate("year")
g6 <- d6 %>%
filter(!suicide_cause %in% c("suicide","non_gun_hang")) %>%
mutate(
suicide_cause = factor(suicide_cause
,levels = names(suicide_cause_order)
,labels = suicide_cause_order)
) %>%
ggplot(aes(x = reorder(suicide_cause,desc(suicide_cause)), y = n_suicides)) +
geom_col(alpha = 0.4, fill = "#1B9E77", color = "#666666") +
geom_text(aes(label = n_suicides), hjust = 1) +
coord_flip() +
facet_wrap(~year) +
labs(
x = "Number of cases"
,y = NULL
,title = "Breakdown of Yearly Suicide Counts"
)
g6
Notice that two methods gun
and hanging
consistently explain the most cases of suicide in this age group. Going further, we will collapse the other 5
categories into a single non-gun/hang
, so that we have sufficient n
to compute reliable rates.
g7 <- d6 %>%
filter(!suicide_cause %in% c("suicide","non_gun_hang")) %>%
mutate(
suicide_cause = factor(suicide_cause
,levels = names(suicide_cause_order)
,labels = suicide_cause_order)
) %>%
ggplot(aes(x = reorder(suicide_cause,desc(suicide_cause)), y = rate_suicides)) +
geom_col(alpha = 0.4, fill = "#1B9E77", color = "#666666") +
geom_text(aes(label = round(rate_suicides,1)), hjust = 1) +
coord_flip() +
facet_wrap(~year) +
labs(
x = "Rate per 100,000"
,y = NULL
,title = "Breakdown of Yearly Suicide Rates"
)
g7
Q4 - How do trends compare across the methds of suicide?
major_causes <- c(
"gun" = "Gun"
,"hanging" = "Hanging"
,"non_gun_hang" = "Non Gun/Hang")
major_causes_colors <- c(
)
g8 <- d6 %>%
filter(suicide_cause %in% names(major_causes)) %>%
mutate(
suicide_cause = factor(suicide_cause
,levels = names(major_causes)
,labels = major_causes)
) %>%
ggplot(aes(x = year,y = n_suicides, color = suicide_cause)) +
geom_line() +
geom_point(shape = 21, size = 3) +
geom_smooth(method = "lm", se = F) +
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
# , vjust = 7
) +
scale_x_continuous(breaks = seq(2006,2017,2), minor_breaks = seq(2006,2017,1)) +
scale_color_brewer(palette = "Dark2") +
labs(
x = NULL
,y = "Count"
, title = "Florida suicide mortality among youth (10-24) by means of death"
, color = "Method"
)
g8
When ignoring race and ethnicity, the average increase in the annual rate of suicide mortality from gun (+0.0996) is 1.5 times higher than that from Hanging (+.0672) and 2.6 times higher than other means (+.0377). Notice also, that mortality from firearms and hanging follow the trajectory closely described by a linear trend: \(R^2\) = .7 and \(R^2\) = .64, respectively.
g9 <- d6 %>%
filter(suicide_cause %in% names(major_causes)) %>%
mutate(
suicide_cause = factor(suicide_cause
,levels = names(major_causes)
,labels = major_causes)
) %>%
ggplot(aes(x = year,y = rate_suicides, color = suicide_cause)) +
geom_line() +
geom_point(shape = 21, size = 3) +
geom_smooth(method = "lm", se = F) +
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
) +
scale_x_continuous(breaks = seq(2006,2017,2), minor_breaks = seq(2006,2017,1)) +
scale_color_brewer(palette = "Dark2") +
labs(
x = NULL
,y = "Rate per 100,000"
, title = "Florida suicide mortality among youth (10-24) by means of death"
, color = "Method"
)
g9
Q5 - How do trends compare by race, sex, and means of means of suicide?
d10 <- ds1 %>%
compute_rate(c("year","sex","race_ethnicity")) %>%
filter(suicide_cause %in% c("gun","hanging","non_gun_hang")) %>%
mutate(
suicide_cause = factor(suicide_cause
,levels = c("gun","hanging","non_gun_hang")
,labels = c("Gun","Hang","Non Gun/Hang"))
)
g10 <- d10 %>%
ggplot(aes(x = year, y = rate_suicides, color = sex)) +
geom_line() +
geom_point(shape = 21) +
geom_smooth(method = "lm", se = FALSE) +
scale_x_continuous(breaks = seq(2007,2017,5), minor_breaks = seq(2006,2017,1)) +
# scale_color_brewer(palette = "Dark2") +
scale_color_viridis_d(option = "magma",begin = .2, end = .65)+
facet_grid(suicide_cause ~race_ethnicity
# , scales = "free"
) +
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
# , vjust = 7
) +
labs(
x = NULL
,y = "Rate per 100,000"
,title = "Rate of Suicides by Race and Sex"
,color = "Sex"
)
g10
The most prominent increase, dwarfing others, is observed among White Non-Hispanic men, rising from 6.8 in 2006 to 11.3 in 2017, constituting a 65% growth. Between these years, the rate per 100,000 in this category was increasing on average by .33
per year.
Among white women of both Hispanic and Non-Hispanic origin, this growth was substantially smaller, about +.06
per year, similar to the growth in the rate of suicide by means other than firearms or hanging. However, suicides by hanging was increasing by +.09
per year.
For Non-White Non-Hispanic women, the largest growth in suicide rates was observed from hanging (+.1
), while suicides by other means barely increased.
g11 <- d10 %>%
ggplot(aes(x = year, y = rate_suicides, color = suicide_cause)) +
geom_line() +
geom_point(shape = 21) +
geom_smooth(method = "lm", se = FALSE) +
scale_x_continuous(breaks = seq(2007,2017,5), minor_breaks = seq(2006,2017,1)) +
scale_color_brewer(palette = "Dark2") +
facet_grid(sex ~race_ethnicity
, scales = "free"
) +
ggpmisc::stat_poly_eq(
formula = y ~ + x
,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
,parse = TRUE
# , vjust = 7
) +
labs(
x = NULL
,y = "Rate per 100,000"
,title = "Rate of Suicides by Race and Sex"
,color = "Method"
)
g11
For the sake of documentation and reproducibility, the current report was rendered in the following environment. Click the line below to expand.
Environment
- Session info -------------------------------------------------------------------------------------------------------
setting value
version R version 4.0.2 (2020-06-22)
os Windows 10 x64
system x86_64, mingw32
ui RTerm
language (EN)
collate English_United States.1252
ctype English_United States.1252
tz America/New_York
date 2020-08-30
- Packages -----------------------------------------------------------------------------------------------------------
package * version date lib source
assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
backports 1.1.7 2020-05-13 [1] CRAN (R 4.0.0)
callr 3.4.3 2020-03-28 [1] CRAN (R 4.0.2)
cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.2)
colorspace 1.4-1 2019-03-18 [1] CRAN (R 4.0.2)
crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.2)
desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.2)
devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.2)
dplyr * 1.0.1 2020-07-31 [1] CRAN (R 4.0.2)
ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.2)
evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.2)
fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.2)
fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
generics 0.0.2 2018-11-29 [1] CRAN (R 4.0.2)
ggplot2 * 3.3.2 2020-06-19 [1] CRAN (R 4.0.2)
glue 1.4.1 2020-05-13 [1] CRAN (R 4.0.2)
gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.2)
htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.2)
knitr * 1.29 2020-06-23 [1] CRAN (R 4.0.2)
lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.2)
magrittr * 1.5 2014-11-22 [1] CRAN (R 4.0.2)
memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.2)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.2)
pillar 1.4.6 2020-07-10 [1] CRAN (R 4.0.2)
pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.2)
prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.2)
processx 3.4.3 2020-07-05 [1] CRAN (R 4.0.2)
ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.2)
remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.2)
rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.2)
scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.2)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.2)
stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.0)
stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.2)
tibble 3.0.3 2020-07-10 [1] CRAN (R 4.0.2)
tidyr 1.1.1 2020-07-31 [1] CRAN (R 4.0.2)
tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.2)
usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.2)
utf8 1.1.4 2018-05-24 [1] CRAN (R 4.0.2)
vctrs 0.3.2 2020-07-15 [1] CRAN (R 4.0.2)
withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.2)
xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
[1] C:/Users/an499583/OneDrive - University of Central Florida/Documents/R/win-library/4.0
[2] C:/Program Files/R/R-4.0.2/library