This document is the annotation layer to ./manipulation/0-greeter.R
script, which connects to data sources used in the study, imports them into RStudio environment and prepares them for analytic enquiry.
# Attach these packages so their functions don't need to be qualified
library(magrittr) # pipes
library(ggplot2) # graphing
# other packages used in the script
requireNamespace("dplyr") # data wrangling
requireNamespace("readxl") # import from Excel
# import custom functions and scripts
base::source("./scripts/common-functions.R") # generic toolkit
# paths to the data sources
# path_file_input_sentences <- "./data-unshared/raw/2017-10-04 AllSentences_updated_small.xlsx" # for testing
# path_file_input_inmateDB <- "./data-unshared/raw/inmateDB_small.xlsx" # for testing
path_file_input_sentences <- "./data-unshared/raw/2017-10-04 AllSentences_updated.xlsx"
path_file_input_inmateDB <- "./data-unshared/raw/inmateDB.xlsx"
# source 1 : Data from Nebraska Dept of Corrections
ds_sentence <- readxl::read_excel(path_file_input_sentences, sheet = "AllSentences")
ds_codebook <- readxl::read_excel(path_file_input_sentences, sheet = "Codebook", n_max = 50)
# source 2 : Public Inmate database
ds_inmate <- readxl::read_excel(path_file_input_inmateDB)
# what are the basic specs (size, nrow, ncol) of these objects?
ds_sentence %>% pryr::object_size(); ds_sentence %>% dim()
52.6 MB
[1] 120381 47
11.5 kB
[1] 44 3
20.1 MB
[1] 69300 32
Observations: 120,381
Variables: 47
$ `ID Number` <dbl> 53731, 44473, 54131, 62706, 47816, 46628, 46628, 46628, 92913...
$ Idnumbercomb <dbl> 44473, 44473, 54131, 62706, 47816, 46628, 46628, 46628, 92913...
$ `Previous ID` <dbl> 44473, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37142, 0, 0, 0, 0, 0, 0,...
$ `Inmate Name` <chr> "AARON BOBBY", "AARON ...
$ `Parole Eligibility Date` <dttm> 2003-05-09, 1994-03-20, 2000-08-18, 2007-04-11, 1996-08-11, ...
$ `Tentative Release Date` <dttm> 2006-02-10, 1994-07-20, 2001-02-15, 2008-04-11, 1997-09-26, ...
$ `Earn Dschrg Dt` <dttm> NA, NA, NA, 2008-03-21, 1997-04-10, NA, NA, NA, NA, NA, 1988...
$ `Actual Dschrg Dt` <dttm> NA, NA, NA, 2008-03-21, NA, NA, NA, NA, NA, NA, 1988-01-30, ...
$ `Good Time Law` <dbl> 6, 4, 6, 6, 4, 4, 4, 4, 1, 1, 1, 4, 7, 6, 7, 7, 7, 6, 1, 6, 6...
$ `Docket Count` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ County <chr> "BUFFALO", "LANCASTER", "MADISON", "DOUGLAS", "DOUGLAS", "DOU...
$ `County CD` <dbl> 10, 55, 59, 28, 28, 28, 28, 28, 28, 28, 55, 55, 27, 72, 19, 4...
$ `Offense Begin Date` <chr> "36655", "1800-01-01", "36756", "38581", "1800-01-01", "1800-...
$ `Offense Count` <dbl> 1, 1, 1, 1, 1, 1, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1...
$ `Offense Attmpt CD` <chr> NA, "A", NA, NA, NA, NA, NA, NA, NA, NA, "A", NA, NA, NA, "A"...
$ `Offense Type CD` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "A", NA, NA, NA, NA, NA, ...
$ `Offense Run CD` <chr> "CC", "CC", "CC", "CC", "CC", "CC", "CS", "CS", "CC", "CC", "...
$ `Offense Arrest CD` <chr> "C31", "D21", "B26", "C31", "D20", "B12", "K02", "C31", "E62"...
$ `Offense Arrest` <chr> "MANU/DIST/DEL/DISP OR POSS W/I", "THEFT", "SEXUAL ASSAULT ON...
$ `Cnvct Desc` <chr> "POSSESSION CON SUB W/I DELIVER", NA, NA, "POSSESSION W/I DEL...
$ `Felony Msdmnr CD` <chr> "3F", "F", "3AF", "3F", "4F", "4F", "3F", "2F", NA, NA, "4F",...
$ `Min Year` <dbl> 4, 1, 0, 4, 1, 2, 4, 4, 1, 0, 1, 1, 1, 4, 1, 2, 5, 4, 1, 0, 1...
$ `Min Month` <dbl> 0, 8, 0, 0, 6, 0, 0, 0, 4, 0, 0, 3, 8, 0, 6, 0, 0, 0, 8, 0, 0...
$ `Min Day` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Max Year` <dbl> 10, 2, 1, 6, 3, 4, 6, 6, 2, 1, 2, 2, 5, 7, 3, 3, 5, 6, 3, 1, ...
$ `Max Month` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Max Day` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Year` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Month` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Day` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Offense Jail Time Days` <dbl> 91, 0, 2, 125, 0, 0, 0, 0, 0, 0, 1, 86, 145, 134, 51, 3, 385,...
$ `Offense Dead Time Days` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 0, 0, 589, 0, 0...
$ `Habitual Criminal` <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "...
$ `Precedence IND` <chr> NA, NA, NA, "P", NA, NA, NA, NA, NA, NA, "P", "P", NA, "P", "...
$ `PE Date Chg CD` <dbl> 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ `Minimum Yr` <dbl> 4, 1, 0, 4, 1, 10, 10, 10, 1, 1, 1, 1, 1, 4, 1, 2, 5, 4, 1, 1...
$ `Minimum Mo` <dbl> 0, 8, 0, 0, 6, 0, 0, 0, 4, 4, 0, 3, 8, 0, 6, 0, 0, 0, 8, 0, 0...
$ `Minimum Day` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Maximum Yr` <dbl> 10, 2, 1, 6, 3, 16, 16, 16, 2, 2, 2, 2, 5, 7, 3, 3, 5, 6, 3, ...
$ `Maximum Mo` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Maximum Day` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Begin Date` <dttm> 2000-05-09, 1993-05-21, 2000-08-18, 2005-08-17, 1996-01-23, ...
$ `Sentence Jail Credit` <dbl> 91, 0, 2, 125, 100, 71, 71, 71, 0, 0, 1, 86, 145, 134, 51, 3,...
$ `Man Min Yr` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Min Mo` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Min Day` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Min Term Date` <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
# replace code values for date variables
ds_sentence[ds_sentence == "1800-01-01"] <- NA # '1800-01-01' indicates conversion error
# remove illegal characters in variable names
colnames(ds_sentence) <- gsub(" " ,"_",colnames(ds_sentence)) %>% tolower()
colnames(ds_sentence) <- gsub("__","_",colnames(ds_sentence)) # remove doubles
ds_sentence <- ds_sentence %>%
# create inambiguous label for unique individual identifier
dplyr::rename_(
"person_id" = "idnumbercomb" # manually checked for douplicates
) %>%
dplyr::mutate(
conviction_id = paste0(person_id,"-",offense_arrest_cd) # to discern multiple convictions on the same date
,year = lubridate::year(begin_date) # for aggregation and graphing
,month = lubridate::month(begin_date) # for aggregation and graphing
,offense_group = substr(offense_arrest_cd, 1, 1) # for aggregation and graphing
) %>%
dplyr::arrange(person_id, begin_date, offense_arrest_cd)
ds_sentence %>% dplyr::glimpse(100)
Observations: 120,381
Variables: 51
$ id_number <dbl> 1, 1, 2, 2, 3, 5, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, ...
$ person_id <dbl> 1, 1, 2, 2, 3, 5, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, ...
$ previous_id <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ inmate_name <chr> "ABNEY JAMES EDWARD", "ABNEY...
$ parole_eligibility_date <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ tentative_release_date <dttm> NA, NA, NA, NA, 1985-06-07, NA, 1980-08-05, NA, 1990-04-09, 19...
$ earn_dschrg_dt <dttm> 1982-05-14, 1982-05-14, 1980-10-05, 1980-10-05, 1988-08-28, 19...
$ actual_dschrg_dt <dttm> 1982-03-24, 1982-03-24, 1980-07-12, 1980-07-12, 1980-01-16, 19...
$ good_time_law <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ docket_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ county <chr> "OUT OF STATE", "OUT OF STATE", "OUT OF STATE", "OUT OF STATE",...
$ county_cd <dbl> 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,...
$ offense_begin_date <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ offense_count <dbl> 1, 2, 1, 2, 1, 1, 1, 1, 4, 2, 1, 3, 6, 5, 1, 2, 3, 1, 2, 2, 3, ...
$ offense_attmpt_cd <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ offense_type_cd <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ offense_run_cd <chr> "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC...
$ offense_arrest_cd <chr> "E01", "E01", "D21", "D21", "B41", "D21", "D11", "E01", "B11", ...
$ offense_arrest <chr> "FORGERY 1ST DEGREE", "FORGERY 1ST DEGREE", "THEFT", "THEFT", "...
$ cnvct_desc <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ felony_msdmnr_cd <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ min_year <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, ...
$ min_month <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ min_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ max_year <dbl> 10, 10, 5, 5, 20, 10, 3, 3, 35, 35, 10, 35, 15, 35, 3, 3, 3, 7,...
$ max_month <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ max_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_month <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ offense_jail_time_days <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ offense_dead_time_days <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ habitual_criminal <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"...
$ precedence_ind <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ pe_date_chg_cd <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ minimum_yr <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, ...
$ minimum_mo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ minimum_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ maximum_yr <dbl> 0, 0, 5, 5, 20, 10, 3, 3, 50, 50, 50, 50, 50, 50, 3, 3, 3, 7, 7...
$ maximum_mo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ maximum_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ begin_date <dttm> 1974-09-24, 1974-09-24, 1975-01-17, 1975-01-17, 1975-01-08, 19...
$ sentence_jail_credit <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_yr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_mo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_term_date <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ conviction_id <chr> "1-E01", "1-E01", "2-D21", "2-D21", "3-B41", "5-D21", "6-D11", ...
$ year <dbl> 1974, 1974, 1975, 1975, 1975, 1976, 1978, 1978, 1964, 1964, 196...
$ month <dbl> 9, 9, 1, 1, 1, 10, 9, 1, 11, 11, 11, 11, 11, 11, 8, 8, 8, 8, 8,...
$ offense_group <chr> "E", "E", "D", "D", "B", "D", "D", "E", "B", "B", "B", "B", "B"...
Observations: 44
Variables: 3
$ `Field Name` <chr> "ID Number", "Previous ID Number", "Inmate Name", "Pa...
$ Description <chr> "unique identifier for each inmate", "ID number of th...
$ `Sample Data` <chr> "1", "0", "ABNEY JAMES ...
# remove illegal characters in variable names
colnames(ds_codebook) <- gsub(" " ,"_",colnames(ds_codebook)) %>% tolower()
colnames(ds_codebook) <- gsub("__","_",colnames(ds_codebook)) # remove doubles
ds_codebook <- ds_codebook %>%
dplyr::mutate(
field_name = tolower( gsub(" " ,"_",field_name) ) # to be consistent with `ds_sentence`
,field_name = gsub("__","_",field_name) # to be consistent with `ds_sentence`
)
ds_codebook %>% dplyr::glimpse(80)
Observations: 44
Variables: 3
$ field_name <chr> "id_number", "previous_id_number", "inmate_name", "paro...
$ description <chr> "unique identifier for each inmate", "ID number of the ...
$ sample_data <chr> "1", "0", "ABNEY JAMES ...
Observations: 69,300
Variables: 32
$ `ID NUMBER` <dbl> 1702, 6145, 6452, 124...
$ `COMMITTED LAST NAME` <chr> "CLIFFORD", "KANE", "...
$ `FIRST NAME...3` <chr> "BRADLEY", "THOMAS", ...
$ `MIDDLE NAME...4` <chr> NA, NA, NA, NA, NA, N...
$ `NAME EXTENSION...5` <chr> NA, NA, NA, NA, NA, N...
$ `LEGAL LAST NAME` <chr> NA, NA, NA, NA, NA, N...
$ `FIRST NAME...7` <chr> NA, NA, NA, NA, NA, N...
$ `MIDDLE NAME...8` <chr> NA, NA, NA, NA, NA, N...
$ `NAME EXTENSION...9` <lgl> NA, NA, NA, NA, NA, N...
$ `DATE OF BIRTH` <dttm> NA, 1928-12-21, 1929...
$ `RACE DESC` <chr> NA, "WHITE", "WHITE",...
$ GENDER <chr> "MALE", "MALE", "MALE...
$ FACILITY <chr> NA, "NEBRASKA STATE P...
$ `CURRENT SENTENCE PARDONED OR COMMUTED DATE` <chr> NA, NA, NA, NA, NA, N...
$ `GUN CLAUSE` <chr> NA, NA, NA, NA, NA, N...
$ `SENTENCE BEGIN DATE` <dttm> NA, 1952-06-20, 1953...
$ `MIN TERM/YEAR` <chr> "0", "1", "2", "1", "...
$ `MIN MONTH` <chr> "0", "0", "0", "0", "...
$ `MIN DAY` <chr> "0", "0", "0", "0", "...
$ `MAX TERM/YEAR` <chr> "0", "3", "10", "9", ...
$ `MAX MONTH` <chr> "0", "0", "0", "0", N...
$ `MAX DAY` <chr> "0", "0", "0", "0", N...
$ `PAROLE ELIGIBILITY DATE` <chr> NA, "19165.0", NA, NA...
$ `EARLIEST POSSIBLE RELEASE DATE` <chr> NA, NA, NA, NA, "LFE"...
$ `GOOD TIME LAW` <chr> NA, "2926", NA, NA, "...
$ `INST RELEASE DATE` <dttm> 1986-01-06, 1952-08-...
$ `INST RELEASE TYPE` <chr> "MANDATORY DISCHARGE"...
$ `PAROLE BOARD NEXT REVIEW DATE(MONTH&YEAR)` <dttm> NA, NA, NA, NA, NA, ...
$ `PAROLE BOARD FINAL HEARING DATE(MONTH&YEAR)` <dttm> NA, NA, NA, NA, NA, ...
$ `PAROLE BOARD STATUS` <chr> NA, NA, "PAROLED", NA...
$ `PAROLE DATE` <dttm> NA, NA, 1980-12-09, ...
$ `PAROLE DISCHARGE DESC` <chr> NA, NA, "EARLY DISCHA...
# remove illegal characters in variable names
colnames(ds_inmate) <- gsub(" " ,"_",colnames(ds_inmate)) %>% tolower() # remove spaces between words
colnames(ds_inmate) <- gsub("__","_",colnames(ds_inmate)) # remove doubles
colnames(ds_inmate) <- gsub("['(']\\month&year[')']","",colnames(ds_inmate)) # remove special characters
colnames(ds_inmate) <- gsub("/","_",colnames(ds_inmate)) # remove special characters
colnames(ds_inmate) <- gsub("(...)(\\d{1})$","_\\2",colnames(ds_inmate)) # remove debree from auto-rename
# colnames(ds_inmate) <- gsub(" $","",colnames(ds_inmate)) # remove trailing space
ds_inmate %>% dplyr::glimpse(80)
Observations: 69,300
Variables: 32
$ id_number <dbl> 1702, 6145, 6452, 12444,...
$ committed_last_name <chr> "CLIFFORD", "KANE", "ATK...
$ first_name_3 <chr> "BRADLEY", "THOMAS", "LA...
$ middle_name_4 <chr> NA, NA, NA, NA, NA, NA, ...
$ name_extension_5 <chr> NA, NA, NA, NA, NA, NA, ...
$ legal_last_name <chr> NA, NA, NA, NA, NA, NA, ...
$ first_name_7 <chr> NA, NA, NA, NA, NA, NA, ...
$ middle_name_8 <chr> NA, NA, NA, NA, NA, NA, ...
$ name_extension_9 <lgl> NA, NA, NA, NA, NA, NA, ...
$ date_of_birth <dttm> NA, 1928-12-21, 1929-07...
$ race_desc <chr> NA, "WHITE", "WHITE", "W...
$ gender <chr> "MALE", "MALE", "MALE", ...
$ facility <chr> NA, "NEBRASKA STATE PENI...
$ current_sentence_pardoned_or_commuted_date <chr> NA, NA, NA, NA, NA, NA, ...
$ gun_clause <chr> NA, NA, NA, NA, NA, NA, ...
$ sentence_begin_date <dttm> NA, 1952-06-20, 1953-11...
$ min_term_year <chr> "0", "1", "2", "1", "10"...
$ min_month <chr> "0", "0", "0", "0", "0",...
$ min_day <chr> "0", "0", "0", "0", "0",...
$ max_term_year <chr> "0", "3", "10", "9", "LF...
$ max_month <chr> "0", "0", "0", "0", NA, ...
$ max_day <chr> "0", "0", "0", "0", NA, ...
$ parole_eligibility_date <chr> NA, "19165.0", NA, NA, "...
$ earliest_possible_release_date <chr> NA, NA, NA, NA, "LFE", N...
$ good_time_law <chr> NA, "2926", NA, NA, "292...
$ inst_release_date <dttm> 1986-01-06, 1952-08-31,...
$ inst_release_type <chr> "MANDATORY DISCHARGE", "...
$ parole_board_next_review_date <dttm> NA, NA, NA, NA, NA, NA,...
$ parole_board_final_hearing_date <dttm> NA, NA, NA, NA, NA, NA,...
$ parole_board_status <chr> NA, NA, "PAROLED", NA, "...
$ parole_date <dttm> NA, NA, 1980-12-09, NA,...
$ parole_discharge_desc <chr> NA, NA, "EARLY DISCHARGE...
ds_sentence %>%
dplyr::filter(person_id %in% c(46222,65392, 50495) ) %>% # for testing
dplyr::select(c(
"person_id" # idnumbercomb, manually checked to represent a unique person
,"inmate_name"
,"begin_date" # date the person began serving the aggregate sentence
,"offense_arrest_cd" # code for the offense committed
,"offense_count" # count of offenses in inmate's sentence
,"offense_arrest" # standardized description of the offense committed
)) %>%
neat(caption = "Sample records from `Sentences` data set")
person_id | inmate_name | begin_date | offense_arrest_cd | offense_count | offense_arrest |
---|---|---|---|---|---|
46222 | STICKNEY JEREMY J | 1994-11-17 | D11 | 1 | BURGLARY |
46222 | STICKNEY JEREMY J | 1994-11-17 | D11 | 1 | BURGLARY |
46222 | STICKNEY JEREMY J | 1994-11-17 | D31 | 2 | CRIMINAL MISCHIEF |
46222 | STICKNEY JEREMY J | 2007-08-06 | B35 | 2 | VIOLATION OF PROTECTION ORDER |
46222 | STICKNEY JEREMY J | 2007-08-06 | C21 | 1 | POS CNTRL SUB EXCEPT MARIJUANA |
46222 | STICKNEY JEREMY J | 2007-08-06 | L22 | 1 | TELECOMMUNICATION VIOLATION |
46222 | STICKNEY JEREMY | 2012-08-20 | C21 | 1 | POS CNTRL SUB EXCEPT MARIJUANA |
46222 | STICKNEY JEREMY | 2012-08-20 | C21 | 1 | POS CNTRL SUB EXCEPT MARIJUANA |
46222 | STICKNEY JEREMY | 2012-08-20 | K01 | 1 | CARRY/POSS CONCEALED WEAPON |
50495 | ROBERTS DAVID A | 1998-01-07 | D12 | 1 | POSSESSION OF BURGLARY TOOLS |
50495 | ROBERTS DAVID ALLEN | 2004-04-06 | D41 | 1 | CRIMINAL TRESPASS |
50495 | ROBERTS DAVID A | 2007-05-04 | D11 | 1 | BURGLARY |
50495 | ROBERTS DAVID A | 2007-05-04 | D20 | 2 | THEFT BY RECEIVING STOLEN PROP |
50495 | ROBERTS DAVID A | 2007-05-04 | D20 | 3 | THEFT BY RECEIVING STOLEN PROP |
65392 | WENTLING ALAN L | 2007-01-25 | C21 | 1 | POS CNTRL SUB EXCEPT MARIJUANA |
65392 | WENTLING ALAN L | 2007-01-25 | D11 | 1 | BURGLARY |
65392 | WENTLING ALAN L | 2007-01-25 | D20 | 1 | THEFT BY RECEIVING STOLEN PROP |
65392 | WENTLING ALAN L | 2010-06-11 | C21 | 1 | POS CNTRL SUB EXCEPT MARIJUANA |
65392 | WENTLING ALAN L | 2010-06-11 | D11 | 1 | BURGLARY |
65392 | WENTLING ALAN L | 2014-05-21 | D43 | 1 | THEFT BY UNLWFL TAKING OR DISP |
field_name | description | sample_data |
---|---|---|
id_number | unique identifier for each inmate | 1 |
previous_id_number | ID number of the person’s previous NDCS sentence (0=no prior ID available) | 0 |
inmate_name | name of the inmate | ABNEY JAMES EDWARD |
parole_eligibility_date | date at which the person is eligible for parole (null value indicates the person’s sentence does not allow for the possibility of parole OR that data were improperly converted from the previous data system [pre-1980]) | NA |
tentative_release_date | date at which the person is expected to discharge (null value indicates the person received a life sentence OR that data were improperly converted from the previous data system [pre-1980]) | NA |
earn_dschrg_dt | date at which the person is expected to discharge from parole | 30085 |
actual_dschrg_dt | date the person actually discharged from his/her sentence | 30034 |
good_time_law | good time law that governs the application of good time (null = lost in data conversion) | NA |
docket_count | count of dockets in inmate’s sentence | 1 |
county | county of commitment | OUT OF STATE |
county_cd | code for county of commitment | 94 |
offense_begin_date | date the person began serving the sentence for an individual offense (1800-01-01 = conversion error) | 1800-01-01 |
offense_count | count of offenses in inmate’s sentence | 1 |
offense_attmpt_cd | indicate whether the offense was an attempt, abet, or conspiracy OR flags if a mandatory minimum is attached to a person’s sentence | NA |
offense_type_cd | alphabetical characters appear if the offense was committed after the person was originally committed to NDCS | NA |
offense_run_cd | indicates consecutive vs. concurrent | CC |
offense_arrest_cd | code for the offense committed | E01 |
offense_arrest | standardized description of the offense committed | FORGERY 1ST DEGREE |
cnvct_desc | narrative description of the offense committed | NA |
felony_msdmnr_cd | indicates the class of felony/misdemeanor | NA |
min_year | minimum years in sentence for a specific offense | 1 |
min_month | minimum months in sentence for a specific offense | 0 |
min_day | minimum days in sentence for a specific offense | 0 |
max_year | maximum years in sentence for a specific offense | 10 |
max_month | maximum months in sentence for a specific offense | 0 |
max_day | maximum days in sentence for a specific offense | 0 |
man_year | mandatory minimum years in sentence for a specific offense | 0 |
man_month | mandatory minimum months in sentence for a specific offense | 0 |
man_day | mandatory minimum days in sentence for a specific offense | 0 |
offense_jail_time_days | number of days jail credit awarded for a specific offense | 0 |
offense_dead_time_days | number of days applied to extend a specific sentence due to inmates released from their sentence due to bond, escape, or abscond | 0 |
habitual_criminal | flags whether the offender received a habitual criminal enhancement on a specific offense | N |
minimum_yr | minimum years in the aggregate sentence | 0 |
minimum_mo | minimum months in the aggregate sentence | 0 |
minimum_day | minimum days in the aggregate sentence | 0 |
maximum_yr | maximum years in the aggregate sentence | 0 |
maximum_mo | maximum months in the aggregate sentence | 0 |
maximum_day | maximum days in the aggregate sentence | 0 |
begin_date | date the person began serving the aggregate sentence | 27296 |
sentence_jail_credit | number of jail credit days applied to the aggregate sentence | 0 |
man_min_yr | mandatory minimum years in the aggregate sentence | 0 |
man_min_mo | mandatory minimum months in the aggregate sentence | 0 |
man_min_day | mandatory minimum days in the aggregate sentence | 0 |
man_min_term_date | an individual with a mandatory minimum sentence may not be released prior to this date | NA |
ds_inmate %>%
dplyr::filter(id_number %in% c(46222,65392, 50495) ) %>%
dplyr::select(c(
"id_number"
,"committed_last_name"
,"date_of_birth"
,"race_desc"
,"gender"
,"sentence_begin_date"
)) %>%
neat(caption = "Sample records from `Inmate` data set")
id_number | committed_last_name | date_of_birth | race_desc | gender | sentence_begin_date |
---|---|---|---|---|---|
46222 | STICKNEY | 1976-04-23 | WHITE | MALE | 1994-11-17 |
50495 | ROBERTS | 1976-04-01 | WHITE | MALE | 1998-01-07 |
65392 | WENTLING | 1987-01-30 | WHITE | MALE | 2007-01-25 |
Let us create a data transfer object
(dto), a list object that would store all three data components in this project. This object (dto) will be the point of departure for all subsequent analytic efforst.
dto <- list(
"sentence" = ds_sentence # each row is a conviction, multiple rows per person
,"codebook" = ds_codebook # each row is a variable in `ds_sentence`
,"inmate" = ds_inmate #
)
dto %>% pryr::object_size()
82.4 MB
dto %>% saveRDS("./data-unshared/derived/0-dto.rds") # for piping further
ds_codebook %>% readr::write_csv("./data-public/derived/all-sentences-codebook.csv") # for read-only inspection
# see our research jounal at
# https://docs.google.com/document/d/1_EhkXgkBZTJ8nc02rr8Z4wrbzSbvvT6VZoQJi6DAhNQ/edit?usp=sharing
For the sake of documentation and reproducibility, the current report was rendered in the following environment. Click the line below to expand.
Environment
- Session info -------------------------------------------------------------------------------------------------------
setting value
version R version 3.5.2 (2018-12-20)
os Windows >= 8 x64
system x86_64, mingw32
ui RStudio
language (EN)
collate English_United States.1252
ctype English_United States.1252
tz America/Los_Angeles
date 2019-05-15
- Packages -----------------------------------------------------------------------------------------------------------
package * version date lib source
assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.5.3)
backports 1.1.4 2019-04-10 [1] CRAN (R 3.5.3)
callr 3.2.0 2019-03-15 [1] CRAN (R 3.5.3)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.5.3)
cli 1.1.0 2019-03-19 [1] CRAN (R 3.5.3)
codetools 0.2-15 2016-10-05 [2] CRAN (R 3.5.2)
colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.5.3)
crayon 1.3.4 2017-09-16 [1] CRAN (R 3.5.3)
crosstalk 1.0.0 2016-12-21 [1] CRAN (R 3.5.3)
desc 1.2.0 2018-05-01 [1] CRAN (R 3.5.3)
devtools 2.0.2 2019-04-08 [1] CRAN (R 3.5.3)
digest 0.6.18 2018-10-10 [1] CRAN (R 3.5.3)
dplyr 0.8.0.1 2019-02-15 [1] CRAN (R 3.5.3)
DT 0.6 2019-05-09 [1] CRAN (R 3.5.3)
evaluate 0.13 2019-02-12 [1] CRAN (R 3.5.3)
fansi 0.4.0 2018-10-05 [1] CRAN (R 3.5.3)
fs 1.3.1 2019-05-06 [1] CRAN (R 3.5.3)
ggplot2 * 3.1.1 2019-04-07 [1] CRAN (R 3.5.3)
ggpubr 0.2 2018-11-15 [1] CRAN (R 3.5.3)
glue 1.3.1 2019-03-12 [1] CRAN (R 3.5.3)
gtable 0.3.0 2019-03-25 [1] CRAN (R 3.5.3)
highr 0.8 2019-03-20 [1] CRAN (R 3.5.3)
hms 0.4.2 2018-03-10 [1] CRAN (R 3.5.3)
htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.5.3)
htmlwidgets 1.3 2018-09-30 [1] CRAN (R 3.5.3)
httpuv 1.5.1 2019-04-05 [1] CRAN (R 3.5.3)
httr 1.4.0 2018-12-11 [1] CRAN (R 3.5.3)
jsonlite 1.6 2018-12-07 [1] CRAN (R 3.5.3)
kableExtra 1.1.0 2019-03-16 [1] CRAN (R 3.5.3)
knitr * 1.22 2019-03-08 [1] CRAN (R 3.5.3)
labeling 0.3 2014-08-23 [1] CRAN (R 3.5.2)
later 0.8.0 2019-02-11 [1] CRAN (R 3.5.3)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.5.3)
lubridate 1.7.4 2018-04-11 [1] CRAN (R 3.5.3)
magrittr * 1.5 2014-11-22 [1] CRAN (R 3.5.3)
memoise 1.1.0 2017-04-21 [1] CRAN (R 3.5.3)
mime 0.6 2018-10-05 [1] CRAN (R 3.5.2)
munsell 0.5.0 2018-06-12 [1] CRAN (R 3.5.3)
pillar 1.3.1 2018-12-15 [1] CRAN (R 3.5.3)
pkgbuild 1.0.3 2019-03-20 [1] CRAN (R 3.5.3)
pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.5.3)
pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.5.3)
plyr 1.8.4 2016-06-08 [1] CRAN (R 3.5.3)
prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.5.3)
processx 3.3.1 2019-05-08 [1] CRAN (R 3.5.2)
promises 1.0.1 2018-04-13 [1] CRAN (R 3.5.3)
pryr 0.1.4 2018-02-18 [1] CRAN (R 3.5.3)
ps 1.3.0 2018-12-21 [1] CRAN (R 3.5.3)
purrr 0.3.2 2019-03-15 [1] CRAN (R 3.5.3)
R6 2.4.0 2019-02-14 [1] CRAN (R 3.5.3)
Rcpp 1.0.1 2019-03-17 [1] CRAN (R 3.5.3)
readr 1.3.1 2018-12-21 [1] CRAN (R 3.5.3)
readxl 1.3.1 2019-03-13 [1] CRAN (R 3.5.3)
remotes 2.0.4 2019-04-10 [1] CRAN (R 3.5.3)
rlang 0.3.4 2019-04-07 [1] CRAN (R 3.5.3)
rmarkdown 1.12 2019-03-14 [1] CRAN (R 3.5.3)
rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.5.3)
rstudioapi 0.10 2019-03-19 [1] CRAN (R 3.5.3)
rvest 0.3.3 2019-04-11 [1] CRAN (R 3.5.3)
scales 1.0.0 2018-08-09 [1] CRAN (R 3.5.3)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.5.3)
shiny 1.3.2 2019-04-22 [1] CRAN (R 3.5.2)
stringi 1.4.3 2019-03-12 [1] CRAN (R 3.5.3)
stringr 1.4.0 2019-02-10 [1] CRAN (R 3.5.3)
tibble 2.1.1 2019-03-16 [1] CRAN (R 3.5.3)
tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.5.3)
usethis 1.5.0 2019-04-07 [1] CRAN (R 3.5.3)
utf8 1.1.4 2018-05-24 [1] CRAN (R 3.5.3)
viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.5.3)
webshot 0.5.1 2018-09-28 [1] CRAN (R 3.5.3)
withr 2.1.2 2018-03-15 [1] CRAN (R 3.5.3)
xfun 0.6 2019-04-02 [1] CRAN (R 3.5.3)
xml2 1.2.0 2018-01-24 [1] CRAN (R 3.5.3)
xtable 1.8-4 2019-04-21 [1] CRAN (R 3.5.3)
yaml 2.2.0 2018-07-25 [1] CRAN (R 3.5.2)
[1] C:/Users/an499583/Documents/R/win-library/3.5
[2] C:/Program Files/R/R-3.5.2/library
Report rendered by an499583 at 2019-05-15, 17:01 -0700 in 19 seconds.