This report begins analytic inquiry of the data stored in the data transfer object
(dto
) prepared by the ./manipulation/0-greeter.R
script.
# Attach these packages so their functions don't need to be qualified
library(magrittr) # pipes
library(ggplot2) # graphing
# for other packages, please use qualifiers (e.g. dplyr::select() )
requireNamespace("dplyr") # data wrangling
requireNamespace("ggpubr") # documents
82.4 MB
[1] "list"
[1] "sentence" "codebook" "inmate"
# adjust column names
ds <- ds %>%
dplyr::mutate(
offense_group = substr(offense_arrest_cd,1,1) # less granular category
,begin_date = lubridate::as_date(begin_date) # ensure proper data format
) %>%
dplyr::select(c(
"person_id" # idnumbercomb, manually checked to represent a unique person
,"begin_date" # date the person began serving the aggregate sentence
,"offense_group" # one-letter code (we need "C" - drug-related fellonies)
,"offense_arrest_cd" # code for the offense committed
,"offense_count" # count of offenses in inmate's sentence
,"offense_arrest" # standardized description of the offense committed
))
ds %>% dplyr::glimpse(50)
Observations: 120,381
Variables: 6
$ person_id <dbl> 1, 1, 2, 2, 3, 5, 6...
$ begin_date <date> 1974-09-24, 1974-0...
$ offense_group <chr> "E", "E", "D", "D",...
$ offense_arrest_cd <chr> "E01", "E01", "D21"...
$ offense_count <dbl> 1, 2, 1, 2, 1, 1, 1...
$ offense_arrest <chr> "FORGERY 1ST DEGREE...
# view the contents of the codebook for these variables
ds_codebook %>%
dplyr::filter(field_name %in% c(
"begin_date"
,"offense_arrest_cd"
,"offense_arrest"
)) %>%
neat()
field_name | description | sample_data |
---|---|---|
offense_arrest_cd | code for the offense committed | E01 |
offense_arrest | standardized description of the offense committed | FORGERY 1ST DEGREE |
begin_date | date the person began serving the aggregate sentence | 27296 |
# compute the number of convictions and offenses for each person
ds <- ds %>%
dplyr::group_by(person_id) %>%
dplyr::mutate(
n_convictions = dplyr::n() # convictions
,n_offense_counts = sum(offense_count) # offenses, counts of
) %>%
dplyr::ungroup()
ds %>% dplyr::glimpse(50)
Observations: 120,381
Variables: 8
$ person_id <dbl> 1, 1, 2, 2, 3, 5, 6...
$ begin_date <date> 1974-09-24, 1974-0...
$ offense_group <chr> "E", "E", "D", "D",...
$ offense_arrest_cd <chr> "E01", "E01", "D21"...
$ offense_count <dbl> 1, 2, 1, 2, 1, 1, 1...
$ offense_arrest <chr> "FORGERY 1ST DEGREE...
$ n_convictions <int> 2, 2, 2, 2, 1, 1, 1...
$ n_offense_counts <dbl> 3, 3, 3, 3, 1, 1, 1...
# frequency distribution of recedivism / how many people for each level?
# anything > 1 = recedivism
t2 <- ds %>%
dplyr::group_by(n_convictions) %>%
dplyr::summarize(
n_people = length(unique(person_id)) # number of people with that many convictions
)
t2 %>%
ggplot2::ggplot(aes(x = n_convictions, y = n_people))+
geom_bar(stat = "identity", fill = 'salmon', alpha = .5, color = "black")+
theme_minimal() +
labs(title = "Frequency distribution of conviction count")
# Q. Distribution of offenses for people with a single conviction:
# For those who have a single convictions, how many people have 1,2,3,... offenses?
d1 <- ds %>%
dplyr::filter(n_convictions == 1) %>%
dplyr::group_by(n_offense_counts) %>%
dplyr::summarise(
n_people = n()
) %>%
dplyr::ungroup()
d1 %>%
ggplot2::ggplot(aes(x = n_offense_counts, y = n_people))+
geom_bar(stat = "identity",fill = 'aquamarine3', alpha = .5, color = "black")+
theme_minimal() +
labs(title = "Frequency distribution of offense count for people with 1 conviction" )
d1 %>%
dplyr::filter(! n_offense_counts == 1) %>% # remove convictions with a single offense count
ggplot2::ggplot(aes(x = n_offense_counts, y = n_people))+
geom_bar(stat = "identity",fill = 'aquamarine3', alpha = .5, color = "black")+
theme_minimal() +
labs(title = "Frequency distribution of offense count for people with 2+ conviction" )
# Q. Among the people with a single conviction
# For each offense group, how many unique persons were registered
# For each offense group, how many distinct convictions were registered
t3 <- ds %>%
dplyr::filter(n_convictions == 1) %>%
dplyr::group_by(offense_group) %>%
dplyr::summarize(
# WARNING: people will be double counted because
# a single conviction may contain multiple offenses from various offense groups
n_people = length(unique(person_id)) # ! WARNING
,n_offenses = sum(offense_count)
) %>%
dplyr::ungroup()
t3 %>%
ggplot2::ggplot(aes(x = offense_group ) ) +
geom_bar(aes(y = n_offenses),fill = "yellow",color = "black", stat = "identity", alpha = .3)+
geom_bar(aes(y = n_people), fill = "blue", stat = "identity", alpha = .6)+
theme_minimal() +
labs(title = "Offense count by offense group for people with 1 conviction")
# Q. For every offense group,
# how many individuals had at least one conviction with at leasT one offence in this offense group
t4 <- ds %>%
dplyr::mutate(
year = lubridate::year(begin_date)
) %>%
dplyr::group_by(offense_group, year) %>%
dplyr::summarize(
n_people = length(unique(person_id))
) %>%
dplyr::ungroup()
t4 %>%
dplyr::filter(year %in% 1970:2014) %>%
dplyr::filter(offense_group %in% c("A","B","C","D")) %>%
ggplot2::ggplot(aes(x = year, y = n_people, color = offense_group))+
geom_point()+
geom_line(aes(group = offense_group))+
geom_vline(xintercept = 1996)+
theme_minimal() +
labs(title = "Number of people with at least 1 conviction")
For the sake of documentation and reproducibility, the current report was rendered in the following environment. Click the line below to expand.
Environment
- Session info -------------------------------------------------------------------------------------------------------
setting value
version R version 3.5.2 (2018-12-20)
os Windows >= 8 x64
system x86_64, mingw32
ui RStudio
language (EN)
collate English_United States.1252
ctype English_United States.1252
tz America/Los_Angeles
date 2019-05-15
- Packages -----------------------------------------------------------------------------------------------------------
package * version date lib source
assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.5.3)
backports 1.1.4 2019-04-10 [1] CRAN (R 3.5.3)
callr 3.2.0 2019-03-15 [1] CRAN (R 3.5.3)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.5.3)
cli 1.1.0 2019-03-19 [1] CRAN (R 3.5.3)
codetools 0.2-15 2016-10-05 [2] CRAN (R 3.5.2)
colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.5.3)
crayon 1.3.4 2017-09-16 [1] CRAN (R 3.5.3)
crosstalk 1.0.0 2016-12-21 [1] CRAN (R 3.5.3)
desc 1.2.0 2018-05-01 [1] CRAN (R 3.5.3)
devtools 2.0.2 2019-04-08 [1] CRAN (R 3.5.3)
digest 0.6.18 2018-10-10 [1] CRAN (R 3.5.3)
dplyr 0.8.0.1 2019-02-15 [1] CRAN (R 3.5.3)
DT 0.6 2019-05-09 [1] CRAN (R 3.5.3)
evaluate 0.13 2019-02-12 [1] CRAN (R 3.5.3)
fansi 0.4.0 2018-10-05 [1] CRAN (R 3.5.3)
fs 1.3.1 2019-05-06 [1] CRAN (R 3.5.3)
ggplot2 * 3.1.1 2019-04-07 [1] CRAN (R 3.5.3)
ggpubr 0.2 2018-11-15 [1] CRAN (R 3.5.3)
glue 1.3.1 2019-03-12 [1] CRAN (R 3.5.3)
gtable 0.3.0 2019-03-25 [1] CRAN (R 3.5.3)
highr 0.8 2019-03-20 [1] CRAN (R 3.5.3)
hms 0.4.2 2018-03-10 [1] CRAN (R 3.5.3)
htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.5.3)
htmlwidgets 1.3 2018-09-30 [1] CRAN (R 3.5.3)
httpuv 1.5.1 2019-04-05 [1] CRAN (R 3.5.3)
httr 1.4.0 2018-12-11 [1] CRAN (R 3.5.3)
jsonlite 1.6 2018-12-07 [1] CRAN (R 3.5.3)
kableExtra 1.1.0 2019-03-16 [1] CRAN (R 3.5.3)
knitr * 1.22 2019-03-08 [1] CRAN (R 3.5.3)
labeling 0.3 2014-08-23 [1] CRAN (R 3.5.2)
later 0.8.0 2019-02-11 [1] CRAN (R 3.5.3)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.5.3)
lubridate 1.7.4 2018-04-11 [1] CRAN (R 3.5.3)
magrittr * 1.5 2014-11-22 [1] CRAN (R 3.5.3)
memoise 1.1.0 2017-04-21 [1] CRAN (R 3.5.3)
mime 0.6 2018-10-05 [1] CRAN (R 3.5.2)
munsell 0.5.0 2018-06-12 [1] CRAN (R 3.5.3)
pillar 1.3.1 2018-12-15 [1] CRAN (R 3.5.3)
pkgbuild 1.0.3 2019-03-20 [1] CRAN (R 3.5.3)
pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.5.3)
pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.5.3)
plyr 1.8.4 2016-06-08 [1] CRAN (R 3.5.3)
prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.5.3)
processx 3.3.1 2019-05-08 [1] CRAN (R 3.5.2)
promises 1.0.1 2018-04-13 [1] CRAN (R 3.5.3)
pryr 0.1.4 2018-02-18 [1] CRAN (R 3.5.3)
ps 1.3.0 2018-12-21 [1] CRAN (R 3.5.3)
purrr 0.3.2 2019-03-15 [1] CRAN (R 3.5.3)
R6 2.4.0 2019-02-14 [1] CRAN (R 3.5.3)
Rcpp 1.0.1 2019-03-17 [1] CRAN (R 3.5.3)
readr 1.3.1 2018-12-21 [1] CRAN (R 3.5.3)
readxl 1.3.1 2019-03-13 [1] CRAN (R 3.5.3)
remotes 2.0.4 2019-04-10 [1] CRAN (R 3.5.3)
rlang 0.3.4 2019-04-07 [1] CRAN (R 3.5.3)
rmarkdown 1.12 2019-03-14 [1] CRAN (R 3.5.3)
rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.5.3)
rstudioapi 0.10 2019-03-19 [1] CRAN (R 3.5.3)
rvest 0.3.3 2019-04-11 [1] CRAN (R 3.5.3)
scales 1.0.0 2018-08-09 [1] CRAN (R 3.5.3)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.5.3)
shiny 1.3.2 2019-04-22 [1] CRAN (R 3.5.2)
stringi 1.4.3 2019-03-12 [1] CRAN (R 3.5.3)
stringr 1.4.0 2019-02-10 [1] CRAN (R 3.5.3)
tibble 2.1.1 2019-03-16 [1] CRAN (R 3.5.3)
tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.5.3)
usethis 1.5.0 2019-04-07 [1] CRAN (R 3.5.3)
utf8 1.1.4 2018-05-24 [1] CRAN (R 3.5.3)
viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.5.3)
webshot 0.5.1 2018-09-28 [1] CRAN (R 3.5.3)
withr 2.1.2 2018-03-15 [1] CRAN (R 3.5.3)
xfun 0.6 2019-04-02 [1] CRAN (R 3.5.3)
xml2 1.2.0 2018-01-24 [1] CRAN (R 3.5.3)
xtable 1.8-4 2019-04-21 [1] CRAN (R 3.5.3)
yaml 2.2.0 2018-07-25 [1] CRAN (R 3.5.2)
[1] C:/Users/an499583/Documents/R/win-library/3.5
[2] C:/Program Files/R/R-3.5.2/library
Report rendered by an499583 at 2019-05-15, 17:40 -0700 in 9 seconds.