Abstract

This blogpost explores suicide trends among youth (ages 10 - 24) in Florida between 2006 and 2017, examining the effects of race and gender. The post answers the following questions with respect to population in focus:

Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?

Q2 - What ages are most vulnerable to suicide?

Q3 - What are the most prevalent methods of youth suicide?

Q4 - How do trends compare across the methods of suicide?

Q5 - How do trends compare by race, sex, and methods of suicide?

Environment

This section contains technical information for deeper analysis and reproduction. Casual readers are invited to skip it.

Packages used in this report.

# Attach these packages so their functions don't need to be qualified
# see http://r-pkgs.had.co.nz/namespace.html#search-path
library(magrittr) # pipes %>% 
library(ggplot2)  # graphs
library(dplyr)    # data wrangling
requireNamespace("tidyr")  # data tidying

Script dependencies.

source("./scripts/modeling/model-basic.R")  
source("./scripts/common-functions.R")

Definitions of global object (file paths, factor levels, object groups ) used throughout the report.

library(ggplot2)
# you will need to replace this path to the location where you stored your data file
path_file_input <- "data-unshared/derived/9-population-suicide.rds"
path_file_input2 <- "./data-unshared/derived/9-population-suicide-2.rds"

# to help with sorting the levels of the `age_group` factor
lvl_age_groups <- c(
  "less_than_1"         =   "<1"          
  ,"1_4"                =   "1-4"  
  ,"5_9"                =   "5-9"  
  ,"10_14"              =   "10-14"    
  ,"15_19"              =   "15-19"    
  ,"20_24"              =   "20-24"    
  ,"25_34"              =   "25-34"    
  ,"35_44"              =   "35-44"    
  ,"45_54"              =   "45-54"    
  ,"55_64"              =   "55-64"    
  ,"65_74"              =   "65-74"    
  ,"75_84"              =   "75-84"    
  ,"85_plus"            =   "85+"      
)
age_groups_in_focus <-   lvl_age_groups[4:12]
age_groups_10_24    <-   lvl_age_groups[4:6]

#set default ggplot theme
#set default ggplot theme
ggplot2::theme_set(
  ggplot2::theme_bw(
  )+
    theme(
      strip.background = element_rect(fill="grey90", color = NA)
    )
)

Definitions of custom functions.

#updated to new compute rate function, that includes option for wide data

compute_rate <- function( d  ,grouping_frame  ,wide = FALSE ){
  # d <- ds_population_suicide
  # grouping_frame <- c("year")
  # 
  d_wide <- d %>%
    dplyr::group_by(.dots = grouping_frame) %>%
    dplyr::summarize(
      n_population      = sum(n_population, na.rm = T)
      ,n_suicide        = sum(n_suicides, na.rm = T)
      ,n_gun            = sum(`Firearms Discharge`, na.rm=T)
      ,n_drug           = sum(`Drugs & Biological Substances`, na.rm=T)
      ,n_hanging        = sum(`Hanging, Strangulation, Suffocation`, na.rm=T)
      ,n_jump           = sum(`Jump From High Place`, na.rm=T)
      ,n_other_seq      = sum(`Other & Unspec & Sequelae`, na.rm = T)
      ,n_other_liq      = sum(`Other & Unspec Sol/Liq & Vapor`, na.rm = T)
      ,n_other_gas      = sum(`Other Gases & Vapors`, na.rm = T)
    ) %>% 
    dplyr::ungroup() %>% 
    dplyr::mutate(
      # n_other = n_suicide - n_drug - n_gun -n_hanging - n_jump
      # n_non_gun = n_suicide - n_gun
      n_non_gun_hang   = n_suicide - n_gun - n_hanging 
      
      ,rate_suicide                 = (n_suicide/n_population)*100000
      ,rate_gun                     = (n_gun/n_population)*100000
      ,rate_drug                    = (n_drug/n_population)*100000
      ,rate_hanging                 = (n_hanging/n_population)*100000
      ,rate_jump                    = (n_jump/n_population)*100000
      # ,rate_other                 = (n_other/n_population)*100000
      ,rate_other_seq               = (n_other_seq/n_population)*100000
      ,rate_other_liq               = (n_other_liq/n_population)*100000
      ,rate_other_gas               = (n_other_gas/n_population)*100000
      # ,rate_non_gun                 = (n_non_gun/n_population)*100000
      ,rate_non_gun_hang            = (n_non_gun_hang/n_population)*100000
      
    )
  # d_wide %>% glimpse()
  col_select <- c("n_suicide"
                  ,"n_drug"
                  ,"n_gun"
                  ,"n_hanging"
                  ,"n_jump"
                  ,"n_other_seq" 
                  ,"n_other_liq" 
                  ,"n_other_gas"
                  # ,"n_non_gun"
                  ,"n_non_gun_hang")
  d_n <- d_wide %>% dplyr::select(c(grouping_frame , col_select)) %>% 
    tidyr::pivot_longer(
      cols       = col_select
      ,names_to  = "suicide_cause"
      ,values_to = "n_suicides"
    ) %>% 
    # tidyr::gather("suicide_cause", "n_suicides", n_suicide, n_drug,n_gun, n_hanging, n_jump, n_other) %>% 
    dplyr::mutate(
      suicide_cause = gsub("^n_","",suicide_cause)
    )
  d_rate <- d_wide %>% dplyr::select(
    c(grouping_frame, gsub("^n_","rate_",col_select))
  ) %>%
    tidyr::pivot_longer(
      cols = gsub("^n_","rate_",col_select)
      ,names_to  = "suicide_cause"
      ,values_to = "rate_suicides"
    ) %>% 
    # tidyr::gather("suicide_cause", "rate_per_100k", rate_suicide, rate_drug,rate_gun, rate_hanging, rate_jump, rate_other) %>% 
    dplyr::mutate(
      suicide_cause = gsub("^rate_","",suicide_cause)
    )
  
  d_long <- d_wide %>% dplyr::select( c(grouping_frame,"n_population") ) %>% 
    dplyr::left_join(d_n) %>% 
    dplyr::left_join(d_rate)
  
  d_out <- d_long
  if(wide){
    d_out <- d_wide
  }
  
  return(d_out)
}


#how to use
# ls_compute_rate <- ds0 %>% compute_rate("year")

Data

The suicide data comes from Florida Health Charts and contains suicide mortality between 2006 and 2017, broken down by suicide means, county, sex, age, and race. Additionally, we use Bridged-Race Population Estimates to help with age-specific rate calculations.

The groomed data sets are available for download: FL Suicides and FL Population Estimates.

# data prepared by "./manipulation/9-aggregator.R" combining population estimates and suicide counts

ds_population_suicide <-   readr::read_rds(path_file_input)
ds_population_suicide_2 <-   readr::read_rds(path_file_input2)


# map of florida counties
florida_counties_map <- ggplot2::map_data("county") %>% 
  dplyr::filter(region == "florida") %>% 
  dplyr::mutate_at(
    "subregion"
    , ~stringr::str_replace_all(
      .
      ,c(
        "de soto" = "Desoto"
        ,"st johns" ="Saint Johns"
        ,"st lucie" = "Saint Lucie"
      )
    )
  ) %>% tibble::as_tibble()

#mutate and filter data to include only ages 10-24

ds1 <- ds_population_suicide %>%
  dplyr::mutate(
    county = forcats::as_factor(county)
    ,year          = as.integer(year)
    ,sex           = factor(sex,levels = c("Male", "Female"))
    ,race_ethnicity = factor(paste0(race, " + ", ethnicity))
    ,race          = factor(race)
    ,ethnicity     = factor(ethnicity)
    ,age_group     = factor(age_group
                            ,levels = names(lvl_age_groups)
                            ,labels = lvl_age_groups
    )
    ,n_population  = as.integer(n_population)
    ,n_suicides    = as.integer(n_suicides)
  ) %>% 
  filter(age_group %in% age_groups_10_24) %>% # !!! Only YOUTH
  select(
    county, year, sex, age_group, race, ethnicity, 
    n_population, n_suicides, everything()
  ) 

ds2 <- ds_population_suicide_2 %>% 
  mutate(
     race = forcats::fct_recode(race3,
                                 "Black & Other" = "Black",
                                 "Black & Other" = "Other"
    )
  ) %>% 
  group_by(county, year, sex, age, race, ethnicity ) %>% 
  summarize(
    n_population = sum(n_population, na.rm = T)
    ,n_suicides = sum(n_suicides, na.rm = T)
    ,n_firearms = sum(n_firearms, na.rm = T)
    ,n_other = sum(n_other, na.rm = T)
  ) %>% 
  ungroup() %>% 
  mutate(
    race_ethnicity = factor(paste0(race, " + ", ethnicity))
  )

The observation unit is defined by the first 6 variables: county, year, sex, race, ethnicity, age_group. For each unit, there are two measures: the number of people (n_population) and the number of observed suicide events (n_suicides). The latter is broken down by means of suicide.

ds1 %>% glimpse()

Rows: 19,296
Columns: 16
$ county                                <fct> Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachu...
$ year                                  <int> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 200...
$ sex                                   <fct> Female, Female, Female, Female, Female, Female, Female, Female, Femal...
$ age_group                             <fct> 10-14, 15-19, 20-24, 10-14, 15-19, 20-24, 10-14, 15-19, 20-24, 10-14,...
$ race                                  <fct> Black & Other, Black & Other, Black & Other, Black & Other, Black & O...
$ ethnicity                             <fct> Hispanic, Hispanic, Hispanic, Non-Hispanic, Non-Hispanic, Non-Hispani...
$ n_population                          <int> 50, 125, 250, 2410, 3763, 4695, 390, 1038, 2060, 3215, 6993, 14660, 5...
$ n_suicides                            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Drugs & Biological Substances`       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Other Gases & Vapors`                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Hanging, Strangulation, Suffocation` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Firearms Discharge`                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Jump From High Place`                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Other & Unspec & Sequelae`           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ `Other & Unspec Sol/Liq & Vapor`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ race_ethnicity                        <fct> Black & Other + Hispanic, Black & Other + Hispanic, Black & Other + H...

ds2 %>% glimpse()

Rows: 875,824
Columns: 11
$ county         <fct> Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Alachua, Al...
$ year           <int> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 20...
$ sex            <fct> Female, Female, Female, Female, Female, Female, Female, Female, Female, Female, Female, Fema...
$ age            <int> 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7,...
$ race           <fct> Black & Other, Black & Other, White, White, Black & Other, Black & Other, White, White, Blac...
$ ethnicity      <fct> Hispanic, Non-Hispanic, Hispanic, Non-Hispanic, Hispanic, Non-Hispanic, Hispanic, Non-Hispan...
$ n_population   <dbl> 9, 413, 61, 672, 6, 368, 66, 602, 11, 401, 57, 655, 9, 364, 76, 629, 8, 363, 51, 633, 3, 383...
$ n_suicides     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ n_firearms     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ n_other        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ race_ethnicity <fct> Black & Other + Hispanic, Black & Other + Non-Hispanic, White + Hispanic, White + Non-Hispan...

Another data set, ds2, augmented with CDC population estimates, has very similar structure, but differs from ds1 in three respects:

Instead of age_group it features age, measured in whole years, providing higher resolution
Mortality cause is grouped into two categories fireamrs and other, as opposed to a more granular, 7-category classification of mortality cause in ds1
Covers years 2000-2018 as opposed to 2006-2018 in ds1

Summary of differences in data sets used in the report:

data set	suicide means	age resolution	years covered
`ds1`	7	age group	2006 - 2017
`ds2`	2	1 year	2000 - 2018

These differences arose from certain limitations in data procurement. We use ds2 to explore age difference with higher resolution, at the expense of reducing resolution in mortality cause.

The combined variable race_ethinicity is the product of the two respective components and was made to be idendical in both data sets:

ds1 %>% distinct(race, ethnicity, race_ethnicity)

# A tibble: 4 x 3
  race          ethnicity    race_ethnicity              
  <fct>         <fct>        <fct>                       
1 Black & Other Hispanic     Black & Other + Hispanic    
2 Black & Other Non-Hispanic Black & Other + Non-Hispanic
3 White         Hispanic     White + Hispanic            
4 White         Non-Hispanic White + Non-Hispanic

all.equal(
ds1 %>% distinct(race, ethnicity, race_ethnicity) ,
ds2 %>% distinct(race, ethnicity, race_ethnicity) 
)

[1] "Attributes: < Component \"class\": Lengths (4, 3) differ (string compare on first 3) >"
[2] "Attributes: < Component \"class\": 3 string mismatches >"

Q1 - Overall Trend

To begin, let us answer the most fundamental question:

Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?

To aid interpretation we have created an additional metric 1 out of, which is a direct inverse of Rate per 100K, but offers a different perspective on the magnitude of the estimate. The numerical value of this metric answer the question “One in how many individuals commit suicide?”

# > Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017? 
d1 <- ds1 %>% 
  compute_rate("year") %>% 
  filter(suicide_cause == "suicide") %>% 
  select(-suicide_cause) %>% 
  mutate(
    n_out_of      = round(n_population/n_suicides,0)
    ,n_population = n_population/1000000
  ) %>% 
  tidyr::pivot_longer(
    cols       = c("n_suicides","n_population", "rate_suicides","n_out_of")
    ,names_to  = "metric"
    ,values_to = "value"
  ) 

labels <- c(
  "n_suicides"     = "Number of Suicides"
  ,"n_population"  = "Population (in Millions)"
  ,"rate_suicides" = "Rate per 100K"
  ,"n_out_of"      = "1 out of"
)

g1 <- d1 %>% 
  ggplot(aes(x = year, y = value)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
  facet_wrap(~metric, scales = "free_y",nrow = 2 , labeller = as_labeller(labels)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse = TRUE
                        ,label.x = 0.9
                        ,label.y = 0.05) +
  geom_text(
    data = d1 %>% dplyr::filter(year %in% c(2006, 2017))
    , aes(label = round(value,2)), vjust =-0
  )+
  labs(
    x  = NULL
    ,y = NULL
  )
g1

The number of youth suicides increased by 98 cases, rising from 203 in 2006 to 301 in 2017, constituting a 33% growth. If in 2006 one out every 17.5 thousands committed suicide, in 2017 one out of every 12 thousand individuals between age 10 and 24 took their own life. The demographic growth does not explain the increased number of suicide cases: the rate per 100,000 increased by 45% from 5.74 to 8.34.

Given these general trends for the entire 10 - 24 group, we would like to understand how trends in population growth, suicide count and suicide rate compare among age group:

d2 <- ds1 %>% 
  compute_rate(c("year","age_group")) %>% 
  filter(suicide_cause == "suicide") %>% 
  select(-suicide_cause) %>% 
  mutate(
    n_out_of     = n_population/n_suicides
    ,n_population = n_population/1000000
  ) %>% 
  tidyr::pivot_longer(
    cols       = c("n_suicides","n_population", "rate_suicides" ,"n_out_of")
    ,names_to  = "metric"
    ,values_to = "value"
  ) 

labels <- c(
  "n_suicides"     = "Suicides"
  ,"n_population"  = "Population \n (in Millions)"
  ,"rate_suicides" = "Rate per 100k"
  ,"n_out_of"      = "1 Out of"
  ,"10-14"         =   "10-14"    
  ,"15-19"         =   "15-19"    
  ,"20-24"         =   "20-24" 
  
)

g2 <- d2 %>% 
  ggplot(aes(x = year, y = value)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  facet_grid(metric ~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
  scale_y_continuous(labels = scales::comma, expand = expansion(mult=c(.2,.5))) +
  # scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse   = TRUE
                        ,label.x = 0.05
                        ,label.y = 1
                        ,color   = "#D95F02") +
  labs(
    x  = NULL
    ,y = NULL
  )

g2

The 10-14 age group exhibits quite a dramatic decrease in the One out of measure, dwarfing the scale for the other two age groups. We recreate it separately:

g3 <- d2 %>% 
  dplyr::filter(age_group %in% c("15-19","20-24")) %>% 
  dplyr::filter(metric %in% c("n_out_of")) %>% 
  ggplot(aes(x = year, y = value)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  facet_wrap(~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse   = TRUE
                        ,label.x = 0.05
                        ,label.y = 1
                        ,color   = "#D95F02") +
  labs(
    x  = NULL
    ,y = NULL
  )

g3+labs(title = "One ouf of how many commit suicide? ")

Demographic changes deserved to be examined seperately:

d3 <- ds1 %>% 
  compute_rate(c("year","age_group","race_ethnicity")) %>% 
  filter(suicide_cause == "suicide") %>% 
  select(-suicide_cause) %>% 
  mutate(
    n_population = n_population/1000000
  ) %>% 
  distinct()
 
labels <- c(
  "Black & Other + Hispanic"     = "Black & Other \n Hispanic"
  ,"Black & Other + Non-Hispanic"  = "Black & Other \n Non-Hispanic"
  ,"White + Hispanic" = "White \n Hispanic"
  ,"White + Non-Hispanic"      = "White \n Non-Hispanic"
  ,"10-14"         =   "10-14"   
  ,"15-19"         =   "15-19"    
  ,"20-24"         =   "20-24" 
  
)

g2 <- d3 %>% 
  ggplot(aes(x = year, y = n_population, color = race_ethnicity)) +
  geom_smooth(method = "lm", se = FALSE, color = "grey70",alpha =.5) +
  geom_line(alpha = 0.99) +
  geom_point(shape = 21, size = 3, alpha = 1) +
  facet_grid(race_ethnicity ~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
  # scale_color_viridis_d(begin = .0, end = .7, option = "viridis")+
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3), minor_breaks = seq(2006,2017,1)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse   = TRUE
                        ,label.x = 0.05
                        ,label.y = 1
                        ,color   = "#D95F02") +
  labs(
    x  = NULL
    ,y = "Population in millions"
    ,color = NULL
    ,title = "Population growth among Florida youth (10-24)"
  # scale_color_viridis_d(option = "volcano", begin = .0 , end = .8)
  )+theme(legend.position = "bottom")

g2

Note the sharp change in population count from 2011 to 2013. The 15-19 group dropped, while the 20-24 group grew quite dramatically. A closer examination, shows that while this demographic growth was present in all racial groups, it was particularly pronounced among Non-white + Non-Hispanic youth. One possible explanation could be attributed to cohort effect: disproportionate amount of those in 15-19 group transitioned to 20-24 group during these years, but were not matched by the similar number of 10-14year old transitioning into 15-19 group. Another possible explanation could reside in migration patterns.

Q2 - Suicide across lifespan

Q2 - What ages are most vulnerable to suicide?

To explore whether certain age transitions are associated with increased suicidality, we plot the distribution of suicide events within each year of age among the observed years (2006 - 2018).

# Hypothesis: does entering high-school associated with increased suicide events?
# Can we see the spike in mortality at 13-14 years of age?
# For that we need to view by year mortality event? 

# d4 <- ds_suicide_by_age %>%

d4 <- ds2 %>% 
  mutate(age = as.integer(age)) %>% 
  filter(age %in% c(10:85)) %>%
  filter(year %in% 2006:2018) %>% 
  group_by(year, age) %>% 
  summarize(
    n_suicide        = sum(n_suicides, na.rm = T)
  ) %>% 
  ungroup()
# d4 %>% glimpse()



g4 <- d4 %>% 
  ggplot(aes(x = age, y = n_suicide))+
  geom_smooth(method = "lm", se= F, size = 1,color = "salmon")+
  geom_smooth(method = "loess", se= F, size = 1,color = "cyan3")+
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE, color = "salmon"
    # , vjust = 7
  )+
  geom_boxplot(aes( group = age), fill = NA)+
  scale_x_continuous(breaks = seq(10,85,5))+
  # scale_y_continuous(breaks = seq(0,100,10))+
  geom_vline(xintercept = 24.5, size = 4, alpha = .1)+
  geom_vline(xintercept = 17.5, size = 1, linetype = "dashed", color = "grey80")+
  theme(
    panel.grid.minor = element_blank()
  )+
  labs(
    title = "Suicide events among persons of the same age (2006-2018)"
    ,x = "Age in years", y = "Count of suicides (all causes)"
  )
g4

There appears to be two plateaux across the lifespan, each preceeded by sharp increase: the first from ages 25 to 40 and another from ages 45 to 60, after which the frequency of suicide declines monotonically.

We see a marked spike in deaths during the 17 to 18 transition. It appears the majority of deaths in 10-14 groups could be attributed to 13 and 14 year olds (vertial dashed line). The plateau following the age 25 makes theoretical sense: after the turmoils of coming out of age and young adulthood, there life acquires a more stable character.

g4 <- d4 %>% 
  dplyr::filter(age %in% 10:40) %>% 
  ggplot(aes(x = age, y = n_suicide))+
  geom_smooth(method = "lm", se= F, size = 1,color = "salmon")+
  geom_smooth(method = "loess", se= F, size = 1,color = "cyan3")+
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE, color = "salmon"
    # , vjust = 7
  )+
  geom_boxplot(aes( group = age), fill = NA)+
  scale_x_continuous(breaks = seq(10,40,1))+
  scale_y_continuous(breaks = seq(0,100,10))+
  geom_vline(xintercept = 24.5, size = 4, alpha = .1)+
  geom_vline(xintercept = 17.5, size = 1, linetype = "dashed", color = "grey80")+
  theme(
    panel.grid.minor = element_blank()
  )+
  labs(
    title = "Suicide events among person of the same age (2006-2018)"
    ,x = "Age in years", y = "Count of suicides (all causes)"
  )
g4

Also note, that when examining the trends within the 10-24 age group, the relationship between the age and the suicide mortality is very well summarized by a linear model (\(R^2 = .85\)):

# among 10-24 the increase across age is very linear

g5 <- d4 %>% 
  filter(age %in% c(10:24)) %>% 
  ggplot(aes(x = age, y = n_suicide))+
  geom_point(shape = 21, alpha = .4, size = 2, position = position_jitter(width = .1))+
  geom_smooth(method = "lm", se= F, size = 1,color = "salmon")+
  geom_smooth(method = "loess", se= F, size = 1,color = "cyan3")+
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE, color = "salmon"
    # , vjust = 7
  )+
  geom_boxplot(aes( group = age), fill = NA, outlier.shape = NA)+
  scale_x_continuous(breaks = seq(10,40,1))+
  scale_y_continuous(breaks = seq(0,100,10))+
  geom_vline(xintercept = 24.5, size = 4, alpha = .1)+
  geom_vline(xintercept = 17.5, size = 1, linetype = "dashed", color = "grey80")+
  theme(
    panel.grid.minor = element_blank()
  )+
  labs(
    title = "Suicide events among person of the same age (2006-2018)"
    ,x = "Age in years", y = "Count of suicides (all causes)"
  )
g5

Q3 - Prevalent Methods

Q3 - What are the most prevalent methods of youth suicide?

Florida Health Charts captures the causes of suicide mortality using 7 categories. We examine the raw event count and rates per 100,000 rates in the following two graphs:

#yearly count of suicide means 

suicide_cause_order <- c(
  "drug"        = "Drug"
  ,"gun"          = "Gun"
  ,"hanging"      = "Hanging"
  ,"jump"         = "Jump"
  ,"other_seq"    = "Other Sequelae"
  ,"other_liq"    = "Other Liqud"
  ,"other_gas"    = "Other Gas & Vapor"
)




d6 <- ds1 %>% 
  compute_rate("year")


g6 <- d6 %>%  
  filter(!suicide_cause %in% c("suicide","non_gun_hang")) %>% 
  mutate(
    suicide_cause = factor(suicide_cause
                           ,levels = names(suicide_cause_order)
                           ,labels = suicide_cause_order)
  ) %>% 
  ggplot(aes(x = reorder(suicide_cause,desc(suicide_cause)), y = n_suicides)) +
  geom_col(alpha = 0.4, fill = "#1B9E77", color = "#666666") +
  geom_text(aes(label = n_suicides), hjust = 1) +
  coord_flip() +
  facet_wrap(~year) +
  labs(
    x        = "Number of cases"
    ,y       = NULL
    ,title   = "Breakdown of Yearly Suicide Counts"
  )
g6

Notice that two methods gun and hanging consistently explain the most cases of suicide in this age group. Going further, we will collapse the other 5 categories into a single non-gun/hang, so that we have sufficient n to compute reliable rates.

g7 <-  d6 %>%  
  filter(!suicide_cause %in% c("suicide","non_gun_hang")) %>% 
  mutate(
    suicide_cause = factor(suicide_cause
                           ,levels = names(suicide_cause_order)
                           ,labels = suicide_cause_order)
  ) %>% 
  ggplot(aes(x = reorder(suicide_cause,desc(suicide_cause)), y = rate_suicides)) +
  geom_col(alpha = 0.4, fill = "#1B9E77", color = "#666666") +
  geom_text(aes(label = round(rate_suicides,1)), hjust = 1) +
  coord_flip() +
  facet_wrap(~year) +
  labs(
    x        = "Rate per 100,000"
    ,y       = NULL
    ,title   = "Breakdown of Yearly Suicide Rates"
  )

g7

Q4 - Methods across Years

Q4 - How do trends compare across the methds of suicide?

major_causes <- c(
  "gun"           = "Gun"
  ,"hanging"      = "Hanging"
  ,"non_gun_hang" = "Non Gun/Hang")

major_causes_colors <- c(
  
)

g8 <- d6 %>% 
  filter(suicide_cause %in% names(major_causes)) %>%
  mutate(
    suicide_cause = factor(suicide_cause
                           ,levels = names(major_causes)
                           ,labels = major_causes)
  ) %>% 
  ggplot(aes(x = year,y = n_suicides, color = suicide_cause)) +
  geom_line() +
  geom_point(shape = 21, size = 3) +
  geom_smooth(method = "lm", se = F) +
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE
    # , vjust = 7
  ) +
  scale_x_continuous(breaks = seq(2006,2017,2), minor_breaks = seq(2006,2017,1)) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    x = NULL
    ,y = "Count"
    , title = "Florida suicide mortality among youth (10-24) by means of death"
    , color = "Method"
  ) 

g8

When ignoring race and ethnicity, the average increase in the annual rate of suicide mortality from gun (+0.0996) is 1.5 times higher than that from Hanging (+.0672) and 2.6 times higher than other means (+.0377). Notice also, that mortality from firearms and hanging follow the trajectory closely described by a linear trend: \(R^2\) = .7 and \(R^2\) = .64, respectively.

g9 <- d6 %>% 
  filter(suicide_cause %in% names(major_causes)) %>%
  mutate(
    suicide_cause = factor(suicide_cause
                           ,levels = names(major_causes)
                           ,labels = major_causes)
  ) %>% 
  ggplot(aes(x = year,y = rate_suicides, color = suicide_cause)) +
  geom_line() +
  geom_point(shape = 21, size = 3) +
  geom_smooth(method = "lm", se = F) +
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE
  ) +
  scale_x_continuous(breaks = seq(2006,2017,2), minor_breaks = seq(2006,2017,1)) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    x = NULL
    ,y = "Rate per 100,000"
    , title = "Florida suicide mortality among youth (10-24) by means of death"
    , color = "Method"
  )

g9

Q5 - Sex and Ethnicity

Q5 - How do trends compare by race, sex, and means of means of suicide?

d10 <- ds1 %>% 
  compute_rate(c("year","sex","race_ethnicity")) %>% 
  filter(suicide_cause %in% c("gun","hanging","non_gun_hang")) %>% 
  mutate(
    suicide_cause = factor(suicide_cause
                           ,levels = c("gun","hanging","non_gun_hang")
                           ,labels = c("Gun","Hang","Non Gun/Hang"))
  )
  

g10 <- d10 %>% 
  ggplot(aes(x = year, y = rate_suicides, color = sex)) +
  geom_line() +
  geom_point(shape = 21) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_x_continuous(breaks = seq(2007,2017,5), minor_breaks = seq(2006,2017,1)) +
  # scale_color_brewer(palette = "Dark2") +
  scale_color_viridis_d(option = "magma",begin = .2, end = .65)+
  facet_grid(suicide_cause ~race_ethnicity
             # , scales = "free"
  ) +
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE
    # , vjust = 7
  ) +
  labs(
    x      = NULL
    ,y     = "Rate per 100,000"
    ,title = "Rate of Suicides by Race and Sex"
    ,color = "Sex"
  )
g10

The most prominent increase, dwarfing others, is observed among White Non-Hispanic men, rising from 6.8 in 2006 to 11.3 in 2017, constituting a 65% growth. Between these years, the rate per 100,000 in this category was increasing on average by .33 per year.

Among white women of both Hispanic and Non-Hispanic origin, this growth was substantially smaller, about +.06 per year, similar to the growth in the rate of suicide by means other than firearms or hanging. However, suicides by hanging was increasing by +.09 per year.

For Non-White Non-Hispanic women, the largest growth in suicide rates was observed from hanging (+.1), while suicides by other means barely increased.

g11 <- d10 %>% 
  ggplot(aes(x = year, y = rate_suicides, color = suicide_cause)) +
  geom_line() +
  geom_point(shape = 21) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_x_continuous(breaks = seq(2007,2017,5), minor_breaks = seq(2006,2017,1)) +
  scale_color_brewer(palette = "Dark2") +
  facet_grid(sex ~race_ethnicity
             , scales = "free"
  ) +
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE
    # , vjust = 7
  ) +
  labs(
    x      = NULL
    ,y     = "Rate per 100,000"
    ,title = "Rate of Suicides by Race and Sex"
    ,color = "Method"
  )
g11

session information

For the sake of documentation and reproducibility, the current report was rendered in the following environment. Click the line below to expand.

Environment

- Session info -------------------------------------------------------------------------------------------------------
 setting  value                       
 version  R version 4.0.2 (2020-06-22)
 os       Windows 10 x64              
 system   x86_64, mingw32             
 ui       RTerm                       
 language (EN)                        
 collate  English_United States.1252  
 ctype    English_United States.1252  
 tz       America/New_York            
 date     2020-08-30                  

- Packages -----------------------------------------------------------------------------------------------------------
 package     * version date       lib source        
 assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.0.2)
 backports     1.1.7   2020-05-13 [1] CRAN (R 4.0.0)
 callr         3.4.3   2020-03-28 [1] CRAN (R 4.0.2)
 cli           2.0.2   2020-02-28 [1] CRAN (R 4.0.2)
 colorspace    1.4-1   2019-03-18 [1] CRAN (R 4.0.2)
 crayon        1.3.4   2017-09-16 [1] CRAN (R 4.0.2)
 desc          1.2.0   2018-05-01 [1] CRAN (R 4.0.2)
 devtools      2.3.1   2020-07-21 [1] CRAN (R 4.0.2)
 digest        0.6.25  2020-02-23 [1] CRAN (R 4.0.2)
 dplyr       * 1.0.1   2020-07-31 [1] CRAN (R 4.0.2)
 ellipsis      0.3.1   2020-05-15 [1] CRAN (R 4.0.2)
 evaluate      0.14    2019-05-28 [1] CRAN (R 4.0.2)
 fansi         0.4.1   2020-01-08 [1] CRAN (R 4.0.2)
 fs            1.5.0   2020-07-31 [1] CRAN (R 4.0.2)
 generics      0.0.2   2018-11-29 [1] CRAN (R 4.0.2)
 ggplot2     * 3.3.2   2020-06-19 [1] CRAN (R 4.0.2)
 glue          1.4.1   2020-05-13 [1] CRAN (R 4.0.2)
 gtable        0.3.0   2019-03-25 [1] CRAN (R 4.0.2)
 htmltools     0.5.0   2020-06-16 [1] CRAN (R 4.0.2)
 knitr       * 1.29    2020-06-23 [1] CRAN (R 4.0.2)
 lifecycle     0.2.0   2020-03-06 [1] CRAN (R 4.0.2)
 magrittr    * 1.5     2014-11-22 [1] CRAN (R 4.0.2)
 memoise       1.1.0   2017-04-21 [1] CRAN (R 4.0.2)
 munsell       0.5.0   2018-06-12 [1] CRAN (R 4.0.2)
 pillar        1.4.6   2020-07-10 [1] CRAN (R 4.0.2)
 pkgbuild      1.1.0   2020-07-13 [1] CRAN (R 4.0.2)
 pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.0.2)
 pkgload       1.1.0   2020-05-29 [1] CRAN (R 4.0.2)
 prettyunits   1.1.1   2020-01-24 [1] CRAN (R 4.0.2)
 processx      3.4.3   2020-07-05 [1] CRAN (R 4.0.2)
 ps            1.3.4   2020-08-11 [1] CRAN (R 4.0.2)
 purrr         0.3.4   2020-04-17 [1] CRAN (R 4.0.2)
 R6            2.4.1   2019-11-12 [1] CRAN (R 4.0.2)
 remotes       2.2.0   2020-07-21 [1] CRAN (R 4.0.2)
 rlang         0.4.7   2020-07-09 [1] CRAN (R 4.0.2)
 rmarkdown     2.3     2020-06-18 [1] CRAN (R 4.0.2)
 rprojroot     1.3-2   2018-01-03 [1] CRAN (R 4.0.2)
 scales        1.1.1   2020-05-11 [1] CRAN (R 4.0.2)
 sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.0.2)
 stringi       1.4.6   2020-02-17 [1] CRAN (R 4.0.0)
 stringr       1.4.0   2019-02-10 [1] CRAN (R 4.0.2)
 testthat      2.3.2   2020-03-02 [1] CRAN (R 4.0.2)
 tibble        3.0.3   2020-07-10 [1] CRAN (R 4.0.2)
 tidyr         1.1.1   2020-07-31 [1] CRAN (R 4.0.2)
 tidyselect    1.1.0   2020-05-11 [1] CRAN (R 4.0.2)
 usethis       1.6.1   2020-04-29 [1] CRAN (R 4.0.2)
 utf8          1.1.4   2018-05-24 [1] CRAN (R 4.0.2)
 vctrs         0.3.2   2020-07-15 [1] CRAN (R 4.0.2)
 withr         2.2.0   2020-04-20 [1] CRAN (R 4.0.2)
 xfun          0.16    2020-07-24 [1] CRAN (R 4.0.2)
 yaml          2.2.1   2020-02-01 [1] CRAN (R 4.0.2)

[1] C:/Users/an499583/OneDrive - University of Central Florida/Documents/R/win-library/4.0
[2] C:/Program Files/R/R-4.0.2/library

Florida Youth Suicide

April 30, 2020

Abstract

Environment

Data

Summary of differences in data sets used in the report:

Q1 - Overall Trend

Q2 - Suicide across lifespan

Q3 - Prevalent Methods

Q4 - Methods across Years

Q5 - Sex and Ethnicity

session information