Abstract

This blogpost explores youth (ages 10 - 24) suicide trends in Florida between 2006 and 2017, examining the effects of race and gender. The post answers the following questions:

Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?

Q1b - How do trends in population growth, suicide count and suicide rate compare among age group?

Q2 - How do means of suicide compare in each year?

Q3 - How do trends compare across the means of suicide?

Q4 - How do trends compare by race, sex, and means of means of suicide?

Environment

# Attach these packages so their functions don't need to be qualified
# see http://r-pkgs.had.co.nz/namespace.html#search-path
library(magrittr) # pipes %>% 
library(ggplot2)  # graphs
library(dplyr)    # data wrangling
requireNamespace("tidyr")  # data tidying

# you will need to replace this path to the location where you stored your data file
path_file_input <- "data-unshared/derived/9-population-suicide.csv"

# to help with sorting the levels of the `age_group` factor
lvl_age_groups <- c(
  "less_than_1"         =   "<1"          
  ,"1_4"                =   "1-4"  
  ,"5_9"                =   "5-9"  
  ,"10_14"              =   "10-14"    
  ,"15_19"              =   "15-19"    
  ,"20_24"              =   "20-24"    
  ,"25_34"              =   "25-34"    
  ,"35_44"              =   "35-44"    
  ,"45_54"              =   "45-54"    
  ,"55_64"              =   "55-64"    
  ,"65_74"              =   "65-74"    
  ,"75_84"              =   "75-84"    
  ,"85_plus"            =   "85+"      
)
age_groups_in_focus <-   lvl_age_groups[4:12]
age_groups_10_24    <-   lvl_age_groups[4:6]

#set default ggplot theme
ggplot2::theme_set(ggplot2::theme_bw())

#updated to new compute rate function, that includes option for wide data

compute_rate <- function( d  ,grouping_frame  ,wide = FALSE ){
  # d <- ds_population_suicide
  # grouping_frame <- c("year")
  # 
  d_wide <- d %>%
    dplyr::group_by(.dots = grouping_frame) %>%
    dplyr::summarize(
      n_population      = sum(n_population, na.rm = T)
      ,n_suicide        = sum(n_suicides, na.rm = T)
      ,n_gun            = sum(`Firearms Discharge`, na.rm=T)
      ,n_drug           = sum(`Drugs & Biological Substances`, na.rm=T)
      ,n_hanging        = sum(`Hanging, Strangulation, Suffocation`, na.rm=T)
      ,n_jump           = sum(`Jump From High Place`, na.rm=T)
      ,n_other_seq      = sum(`Other & Unspec & Sequelae`, na.rm = T)
      ,n_other_liq      = sum(`Other & Unspec Sol/Liq & Vapor`, na.rm = T)
      ,n_other_gas      = sum(`Other Gases & Vapors`, na.rm = T)
    ) %>% 
    dplyr::ungroup() %>% 
    dplyr::mutate(
      # n_other = n_suicide - n_drug - n_gun -n_hanging - n_jump
      n_non_gun = n_suicide - n_gun
      ,n_non_gun_hang_drug   = n_suicide - n_gun - n_drug - n_hanging 
      
      ,rate_suicide                 = (n_suicide/n_population)*100000
      ,rate_gun                     = (n_gun/n_population)*100000
      ,rate_drug                    = (n_drug/n_population)*100000
      ,rate_hanging                 = (n_hanging/n_population)*100000
      ,rate_jump                    = (n_jump/n_population)*100000
      # ,rate_other                 = (n_other/n_population)*100000
      ,rate_other_seq               = (n_other_seq/n_population)*100000
      ,rate_other_liq               = (n_other_liq/n_population)*100000
      ,rate_other_gas               = (n_other_gas/n_population)*100000
      ,rate_non_gun                 = (n_non_gun/n_population)*100000
      ,rate_non_gun_hang_drug       = (n_non_gun_hang_drug/n_population)*100000
      
    )
  # d_wide %>% glimpse()
  col_select <- c("n_suicide"
                  ,"n_drug"
                  ,"n_gun"
                  ,"n_hanging"
                  ,"n_jump"
                  ,"n_other_seq" 
                  ,"n_other_liq" 
                  ,"n_other_gas"
                  ,"n_non_gun"
                  ,"n_non_gun_hang_drug")
  d_n <- d_wide %>% dplyr::select(c(grouping_frame , col_select)) %>% 
    tidyr::pivot_longer(
      cols       = col_select
      ,names_to  = "suicide_cause"
      ,values_to = "n_suicides"
    ) %>% 
    # tidyr::gather("suicide_cause", "n_suicides", n_suicide, n_drug,n_gun, n_hanging, n_jump, n_other) %>% 
    dplyr::mutate(
      suicide_cause = gsub("^n_","",suicide_cause)
    )
  d_rate <- d_wide %>% dplyr::select(
    c(grouping_frame, gsub("^n_","rate_",col_select))
  ) %>%
    tidyr::pivot_longer(
      cols = gsub("^n_","rate_",col_select)
      ,names_to  = "suicide_cause"
      ,values_to = "rate_suicides"
    ) %>% 
    # tidyr::gather("suicide_cause", "rate_per_100k", rate_suicide, rate_drug,rate_gun, rate_hanging, rate_jump, rate_other) %>% 
    dplyr::mutate(
      suicide_cause = gsub("^rate_","",suicide_cause)
    )
  
  d_long <- d_wide %>% dplyr::select( c(grouping_frame,"n_population") ) %>% 
    dplyr::left_join(d_n) %>% 
    dplyr::left_join(d_rate)
  
  d_out <- d_long
  if(wide){
    d_out <- d_wide
  }
  
  return(d_out)
}


#how to use
# ls_compute_rate <- ds0 %>% compute_rate("year")

Data

The data comes from Florida Health Charts and contains suicide mortality between 2006 and 2017, broken down by suicide means, county, sex, age, and race. The the dataset is available for download here.

# data prepared by "./manipulation/9-aggregator.R" combining population estimates and suicide counts
ds_population_suicide <-   readr::read_csv(path_file_input)

# map of florida counties
florida_counties_map <- ggplot2::map_data("county") %>% 
  dplyr::filter(region == "florida") %>% 
  dplyr::mutate_at(
    "subregion"
    , ~stringr::str_replace_all(
      .
      ,c(
        "de soto" = "desoto"
        ,"st johns" ="saint johns"
        ,"st lucie" = "saint lucie"
      )
    )
  ) %>% tibble::as_tibble()

Data Tweaks

This dataset already comes well-gromed, so only some minor housekeeping tweaks are necessary to make graphing more convenient. The data is also filter to only include individuals between ages 10 and 24.

#mutate and filter data to include only ages 10-24

ds0 <- ds_population_suicide %>%
  dplyr::mutate(
    year          = as.integer(year)
    ,sex           = factor(sex,levels = c("Male", "Female"))
    ,race_ethnicity = factor(paste0(race, " + ", ethnicity))
    ,race          = factor(race)
    ,ethnicity     = factor(ethnicity)
    ,age_group     = factor(age_group
                            ,levels = names(lvl_age_groups)
                            ,labels = lvl_age_groups
                            )
    ,n_population  = as.integer(n_population)
    ,n_suicides    = as.integer(n_suicides)
  ) %>% filter(age_group %in% age_groups_10_24)


ds0 %>% dplyr::glimpse(70)

Rows: 19,296
Columns: 16
$ county                                <chr> "Alachua", "Alachua...
$ year                                  <int> 2006, 2006, 2006, 2...
$ sex                                   <fct> Female, Female, Fem...
$ race                                  <fct> Black & Other, Blac...
$ ethnicity                             <fct> Hispanic, Hispanic,...
$ age_group                             <fct> 10-14, 15-19, 20-24...
$ n_population                          <int> 50, 125, 250, 2410,...
$ n_suicides                            <int> NA, NA, NA, NA, NA,...
$ `Drugs & Biological Substances`       <dbl> NA, NA, NA, NA, NA,...
$ `Other Gases & Vapors`                <dbl> NA, NA, NA, NA, NA,...
$ `Hanging, Strangulation, Suffocation` <dbl> NA, NA, NA, NA, NA,...
$ `Firearms Discharge`                  <dbl> NA, NA, NA, NA, NA,...
$ `Jump From High Place`                <dbl> NA, NA, NA, NA, NA,...
$ `Other & Unspec & Sequelae`           <dbl> NA, NA, NA, NA, NA,...
$ `Other & Unspec Sol/Liq & Vapor`      <dbl> NA, NA, NA, NA, NA,...
$ race_ethnicity                        <fct> Black & Other + His...

The unit of analysis of this dataset is defined by the first 6 variables: county, year, sex, race, ethnicity, age_group. For each unit, there are two measures: the number of people (n_population) and the number of observed suicide events (n_suicides). The latter is broken down by 7 means of suicide. For convenience, we create a combined variable race_ethnicity.

ds0 %>% group_by(race, ethnicity, race_ethnicity) %>% count() %>% select(-n)

# A tibble: 4 x 3
# Groups:   race, ethnicity, race_ethnicity [4]
  race          ethnicity    race_ethnicity              
  <fct>         <fct>        <fct>                       
1 Black & Other Hispanic     Black & Other + Hispanic    
2 Black & Other Non-Hispanic Black & Other + Non-Hispanic
3 White         Hispanic     White + Hispanic            
4 White         Non-Hispanic White + Non-Hispanic

Q1 - Overall Trend

To begin, let us answer the most fundamental question:

Q1 - What is the overall trajectory of youth suicides in FL between 2006 and 2017?

d <- ds0 %>% 
  compute_rate("year") %>% 
  filter(suicide_cause == "suicide") %>% 
  select(-suicide_cause) %>% 
  tidyr::pivot_longer(
    cols       = c("n_suicides","n_population", "rate_suicides")
    ,names_to  = "metric"
    ,values_to = "value"
  ) 

labels <- c(
  "n_suicides"     = "Suicides"
  ,"n_population"  = "Population"
  ,"rate_suicides" = "Rate per 100k"
)
  


d %>% 
  ggplot(aes(x = year, y = value)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3)) +
  facet_wrap(~metric, scales = "free_y", labeller = as_labeller(labels)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse = TRUE
                        ,label.x = 0.9
                        ,label.y = 0.1) +
  labs(
    x  = NULL
    ,y = NULL
  )

Q1b - How do trends in population growth, suicide count and suicide rate compare among age group?

d <- ds0 %>% 
  compute_rate(c("year","age_group")) %>% 
  filter(suicide_cause == "suicide") %>% 
  select(-suicide_cause) %>% 
  mutate(
    one_out_of = n_population / n_suicides
  ) %>% 
  tidyr::pivot_longer(
    cols       = c("n_suicides","n_population", "rate_suicides", "one_out_of")
    ,names_to  = "metric"
    ,values_to = "value"
  ) 

labels <- c(
  "n_suicides"     = "Suicides"
  ,"n_population"  = "Population"
  ,"rate_suicides" = "Rate per 100k"
  ,"one_out_of"    = "One out of"
  ,"10-14"         =   "10-14"    
  ,"15-19"         =   "15-19"    
  ,"20-24"         =   "20-24" 
  
)

d %>% 
  filter(!metric == "one_out_of") %>% 
  ggplot(aes(x = year, y = value)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  facet_grid(metric ~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse   = TRUE
                        ,label.x = 0.05
                        ,label.y = 1
                        ,color   = "#D95F02") +
  labs(
    x  = NULL
    ,y = NULL
  )

d %>% 
  filter(metric == "one_out_of") %>% 
  mutate( value = value/1000) %>% 
  ggplot(aes(x = year, y = value)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  facet_wrap(metric ~ age_group, scales = "free_y", labeller = as_labeller(labels)) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2007,2017,3)) +
  ggpmisc::stat_poly_eq(formula = y ~ + x 
                        ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
                        ,parse   = TRUE
                        ,label.x = 0.05
                        ,label.y = 1
                        ,color   = "#D95F02") +
  labs(
    x  = NULL
    ,y = "Thousands"
  )

Q2 - Yearly Breakdowns

Q2 - How do means of suicide compare in each year?

major_causes <- c("gun","hanging","drug","non_gun","non_gun_hang_drug")

major_causes_order <- c(
  "gun"                = "Gun"
  ,"hanging"           = "Hanging"
  ,"drug"              = "Drug"
  ,"non_gun"           = "Non Gun"
  ,"non_gun_hang_drug" = "Non Major")

d <- ds0 %>% 
  compute_rate("year")
# d <- d$long

d %>%  
  filter(suicide_cause %in% major_causes) %>% 
  mutate(
    suicide_cause = factor(
      suicide_cause
      ,levels = names(major_causes_order)
      ,labels = major_causes_order
      )
  ) %>% 
  ggplot(aes(x = suicide_cause, y = n_suicides)) +
  geom_col(alpha = 0.4, fill = "#1B9E77", color = "#666666") +
  geom_text(aes(label = n_suicides), hjust = 1) +
  coord_flip() +
  facet_wrap(~year) +
  labs(
    x        = NULL
    ,y       = NULL
    ,caption = "Non-Major counts all types other then Gun, Hanging, and Drug"
    ,title   = "Breakdown of Yearly Suicide Counts"
  )

d %>%  
  filter(suicide_cause %in% major_causes) %>% 
  mutate(
    suicide_cause = factor(
      suicide_cause
      ,levels = names(major_causes_order)
      ,labels = major_causes_order
    )
  ) %>% 
  ggplot(aes(x = suicide_cause, y = rate_suicides)) +
  geom_col(alpha = 0.4, fill = "#1B9E77", color = "#666666") +
  geom_text(aes(label = round(rate_suicides,1)), hjust = 1.1) +
  coord_flip() +
  facet_wrap(~year) +
  labs(
    x        = NULL
    ,y       = NULL
    ,caption = "Non-Major counts all types other then Gun, Hanging, and Drug"
    ,title   = "Breakdown of Yearly Suicide Rates"
  )

Q3 - Means across Years

Q3 - How do trends compare across the means of suicide?

# g <- d %>% 
#   filter(suicide_cause %in% major_causes) %>% 
#   ggplot(aes(x = year, y = rate_suicides, color = suicide_cause)) +
#   geom_line() +
#   geom_point(shape = 21) +
#   geom_smooth(method = "lm", se = FALSE) +
#   ggpmisc::stat_poly_eq(
#     formula = y ~ + x
#     ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
#     ,parse = TRUE
#     # , vjust = 7
#   ) 
# g


g <- d %>% 
  filter(suicide_cause %in% major_causes) %>% 
  mutate(
    suicide_cause = factor(
      suicide_cause
      ,levels = names(major_causes_order)
      ,labels = major_causes_order
    )
  ) %>%
  ggplot(aes(x = year, y = rate_suicides)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  scale_x_continuous(breaks = seq(2007,2017,3)) +
  facet_wrap(~suicide_cause
             # , scales = "free_y"
             ) +
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE
    ,label.x = 0.05
    ,label.y = 0.99
  ) +
  labs(
    x        = NULL
    ,y       = NULL
    ,caption = "Non-Major counts all types other then Gun, Hanging, and Drug"
    ,title = "Rates per 100,000"
  )
  
g

#when ignoring race and ethnicity for age group 10-24 average increase of suicide
#mortality from gun (+0.1) per year is simaliar to average 
#increase from non-gun means (+0.105)

g <- d %>% 
  filter(suicide_cause %in% major_causes) %>% 
  mutate(
    suicide_cause = factor(
      suicide_cause
      ,levels = names(major_causes_order)
      ,labels = major_causes_order
    )
  ) %>%
  ggplot(aes(x = year, y = n_suicides)) +
  geom_line(alpha = 0.5) +
  geom_point(shape = 21, size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "#1B9E77") +
  scale_x_continuous(breaks = seq(2007,2017,3)) +
  facet_wrap(~suicide_cause
             # , scales = "free_y"
  ) +
  ggpmisc::stat_poly_eq(
    formula = y ~ + x
    ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
    ,parse = TRUE
    ,label.x = 0.05
    ,label.y = 0.99
  ) +
  labs(
    x        = NULL
    ,y       = NULL
    ,caption = "Non-Major counts all types other then Gun, Hanging, and Drug"
    ,title = "Counts of suicide events"
  )

g

When ignoring race and ethnicity for age group 10-24 average increase of suicide mortality from gun (+0.1) per year is simaliar to average increase from non-gun means (+0.105)

Q4 - Sex and Ethnicity