This document is the annotation layer to ./manipulation/0-greeter.R script, which connects to data sources used in the study, imports them into RStudio environment and prepares them for analytic enquiry.

I. Environment

# Attach these packages so their functions don't need to be qualified
library(magrittr)          # pipes
library(ggplot2)           # graphing

# other packages used in the script
requireNamespace("dplyr")    # data wrangling
requireNamespace("readxl")   # import from Excel

# import custom functions and scripts
base::source("./scripts/common-functions.R") # generic toolkit

# paths to the data sources
# path_file_input_sentences  <- "./data-unshared/raw/2017-10-04  AllSentences_updated_small.xlsx" # for testing
# path_file_input_inmateDB   <- "./data-unshared/raw/inmateDB_small.xlsx"                         # for testing
path_file_input_sentences       <- "./data-unshared/raw/2017-10-04  AllSentences_updated.xlsx"
path_file_input_inmateDB   <- "./data-unshared/raw/inmateDB.xlsx"

II. Input

# source 1 : Data from Nebraska Dept of Corrections
ds_sentence <- readxl::read_excel(path_file_input_sentences, sheet = "AllSentences")
ds_codebook     <- readxl::read_excel(path_file_input_sentences, sheet = "Codebook", n_max = 50) 

# source 2 : Public Inmate database 
ds_inmate   <- readxl::read_excel(path_file_input_inmateDB)

# what are the basic specs (size, nrow, ncol) of these objects?
ds_sentence %>% pryr::object_size(); ds_sentence %>% dim()

52.6 MB

[1] 120381     47

ds_codebook %>% pryr::object_size(); ds_codebook%>% dim()

11.5 kB

[1] 44  3

ds_inmate   %>% pryr::object_size(); ds_inmate%>% dim()

20.1 MB

[1] 69300    32

III. Groom

Sentences

# Tweak SENTENCES
ds_sentence %>% dplyr::glimpse(100)

Observations: 120,381
Variables: 47
$ `ID Number`               <dbl> 53731, 44473, 54131, 62706, 47816, 46628, 46628, 46628, 92913...
$ Idnumbercomb              <dbl> 44473, 44473, 54131, 62706, 47816, 46628, 46628, 46628, 92913...
$ `Previous ID`             <dbl> 44473, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37142, 0, 0, 0, 0, 0, 0,...
$ `Inmate Name`             <chr> "AARON                     BOBBY", "AARON                    ...
$ `Parole Eligibility Date` <dttm> 2003-05-09, 1994-03-20, 2000-08-18, 2007-04-11, 1996-08-11, ...
$ `Tentative Release Date`  <dttm> 2006-02-10, 1994-07-20, 2001-02-15, 2008-04-11, 1997-09-26, ...
$ `Earn Dschrg Dt`          <dttm> NA, NA, NA, 2008-03-21, 1997-04-10, NA, NA, NA, NA, NA, 1988...
$ `Actual Dschrg Dt`        <dttm> NA, NA, NA, 2008-03-21, NA, NA, NA, NA, NA, NA, 1988-01-30, ...
$ `Good Time Law`           <dbl> 6, 4, 6, 6, 4, 4, 4, 4, 1, 1, 1, 4, 7, 6, 7, 7, 7, 6, 1, 6, 6...
$ `Docket Count`            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ County                    <chr> "BUFFALO", "LANCASTER", "MADISON", "DOUGLAS", "DOUGLAS", "DOU...
$ `County CD`               <dbl> 10, 55, 59, 28, 28, 28, 28, 28, 28, 28, 55, 55, 27, 72, 19, 4...
$ `Offense Begin Date`      <chr> "36655", "1800-01-01", "36756", "38581", "1800-01-01", "1800-...
$ `Offense Count`           <dbl> 1, 1, 1, 1, 1, 1, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1...
$ `Offense Attmpt CD`       <chr> NA, "A", NA, NA, NA, NA, NA, NA, NA, NA, "A", NA, NA, NA, "A"...
$ `Offense Type CD`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "A", NA, NA, NA, NA, NA, ...
$ `Offense Run CD`          <chr> "CC", "CC", "CC", "CC", "CC", "CC", "CS", "CS", "CC", "CC", "...
$ `Offense Arrest CD`       <chr> "C31", "D21", "B26", "C31", "D20", "B12", "K02", "C31", "E62"...
$ `Offense Arrest`          <chr> "MANU/DIST/DEL/DISP OR POSS W/I", "THEFT", "SEXUAL ASSAULT ON...
$ `Cnvct Desc`              <chr> "POSSESSION CON SUB W/I DELIVER", NA, NA, "POSSESSION W/I DEL...
$ `Felony Msdmnr CD`        <chr> "3F", "F", "3AF", "3F", "4F", "4F", "3F", "2F", NA, NA, "4F",...
$ `Min Year`                <dbl> 4, 1, 0, 4, 1, 2, 4, 4, 1, 0, 1, 1, 1, 4, 1, 2, 5, 4, 1, 0, 1...
$ `Min Month`               <dbl> 0, 8, 0, 0, 6, 0, 0, 0, 4, 0, 0, 3, 8, 0, 6, 0, 0, 0, 8, 0, 0...
$ `Min Day`                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Max Year`                <dbl> 10, 2, 1, 6, 3, 4, 6, 6, 2, 1, 2, 2, 5, 7, 3, 3, 5, 6, 3, 1, ...
$ `Max Month`               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Max Day`                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Year`                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Month`               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Day`                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Offense Jail Time Days`  <dbl> 91, 0, 2, 125, 0, 0, 0, 0, 0, 0, 1, 86, 145, 134, 51, 3, 385,...
$ `Offense Dead Time Days`  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 0, 0, 589, 0, 0...
$ `Habitual Criminal`       <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "...
$ `Precedence IND`          <chr> NA, NA, NA, "P", NA, NA, NA, NA, NA, NA, "P", "P", NA, "P", "...
$ `PE Date Chg CD`          <dbl> 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ `Minimum Yr`              <dbl> 4, 1, 0, 4, 1, 10, 10, 10, 1, 1, 1, 1, 1, 4, 1, 2, 5, 4, 1, 1...
$ `Minimum Mo`              <dbl> 0, 8, 0, 0, 6, 0, 0, 0, 4, 4, 0, 3, 8, 0, 6, 0, 0, 0, 8, 0, 0...
$ `Minimum  Day`            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Maximum Yr`              <dbl> 10, 2, 1, 6, 3, 16, 16, 16, 2, 2, 2, 2, 5, 7, 3, 3, 5, 6, 3, ...
$ `Maximum Mo`              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Maximum Day`             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Begin Date`              <dttm> 2000-05-09, 1993-05-21, 2000-08-18, 2005-08-17, 1996-01-23, ...
$ `Sentence Jail Credit`    <dbl> 91, 0, 2, 125, 100, 71, 71, 71, 0, 0, 1, 86, 145, 134, 51, 3,...
$ `Man Min Yr`              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Min Mo`              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Min Day`             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ `Man Min Term Date`       <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...

# replace code values for date variables
ds_sentence[ds_sentence == "1800-01-01"] <- NA # '1800-01-01' indicates conversion error

# remove illegal characters in variable names
colnames(ds_sentence) <- gsub(" " ,"_",colnames(ds_sentence)) %>% tolower()
colnames(ds_sentence) <- gsub("__","_",colnames(ds_sentence)) # remove doubles

ds_sentence <- ds_sentence %>% 
  # create inambiguous label for unique individual identifier
  dplyr::rename_(
    "person_id" = "idnumbercomb" # manually checked for douplicates
  ) %>% 
  dplyr::mutate(
    conviction_id  = paste0(person_id,"-",offense_arrest_cd) # to discern multiple convictions on the same date
    ,year          = lubridate::year(begin_date)     # for aggregation and graphing
    ,month         = lubridate::month(begin_date)    # for aggregation and graphing 
    ,offense_group = substr(offense_arrest_cd, 1, 1) # for aggregation and graphing
  ) %>% 
  dplyr::arrange(person_id, begin_date, offense_arrest_cd)
ds_sentence %>% dplyr::glimpse(100)

Observations: 120,381
Variables: 51
$ id_number               <dbl> 1, 1, 2, 2, 3, 5, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, ...
$ person_id               <dbl> 1, 1, 2, 2, 3, 5, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, ...
$ previous_id             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ inmate_name             <chr> "ABNEY                     JAMES                EDWARD", "ABNEY...
$ parole_eligibility_date <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ tentative_release_date  <dttm> NA, NA, NA, NA, 1985-06-07, NA, 1980-08-05, NA, 1990-04-09, 19...
$ earn_dschrg_dt          <dttm> 1982-05-14, 1982-05-14, 1980-10-05, 1980-10-05, 1988-08-28, 19...
$ actual_dschrg_dt        <dttm> 1982-03-24, 1982-03-24, 1980-07-12, 1980-07-12, 1980-01-16, 19...
$ good_time_law           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ docket_count            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ county                  <chr> "OUT OF STATE", "OUT OF STATE", "OUT OF STATE", "OUT OF STATE",...
$ county_cd               <dbl> 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,...
$ offense_begin_date      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ offense_count           <dbl> 1, 2, 1, 2, 1, 1, 1, 1, 4, 2, 1, 3, 6, 5, 1, 2, 3, 1, 2, 2, 3, ...
$ offense_attmpt_cd       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ offense_type_cd         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ offense_run_cd          <chr> "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC", "CC...
$ offense_arrest_cd       <chr> "E01", "E01", "D21", "D21", "B41", "D21", "D11", "E01", "B11", ...
$ offense_arrest          <chr> "FORGERY 1ST DEGREE", "FORGERY 1ST DEGREE", "THEFT", "THEFT", "...
$ cnvct_desc              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ felony_msdmnr_cd        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ min_year                <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, ...
$ min_month               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ min_day                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ max_year                <dbl> 10, 10, 5, 5, 20, 10, 3, 3, 35, 35, 10, 35, 15, 35, 3, 3, 3, 7,...
$ max_month               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ max_day                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_year                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_month               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_day                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ offense_jail_time_days  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ offense_dead_time_days  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ habitual_criminal       <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"...
$ precedence_ind          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ pe_date_chg_cd          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ minimum_yr              <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, ...
$ minimum_mo              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ minimum_day             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ maximum_yr              <dbl> 0, 0, 5, 5, 20, 10, 3, 3, 50, 50, 50, 50, 50, 50, 3, 3, 3, 7, 7...
$ maximum_mo              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ maximum_day             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ begin_date              <dttm> 1974-09-24, 1974-09-24, 1975-01-17, 1975-01-17, 1975-01-08, 19...
$ sentence_jail_credit    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_yr              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_mo              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_day             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ man_min_term_date       <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ conviction_id           <chr> "1-E01", "1-E01", "2-D21", "2-D21", "3-B41", "5-D21", "6-D11", ...
$ year                    <dbl> 1974, 1974, 1975, 1975, 1975, 1976, 1978, 1978, 1964, 1964, 196...
$ month                   <dbl> 9, 9, 1, 1, 1, 10, 9, 1, 11, 11, 11, 11, 11, 11, 8, 8, 8, 8, 8,...
$ offense_group           <chr> "E", "E", "D", "D", "B", "D", "D", "E", "B", "B", "B", "B", "B"...

Codebook

# Tweak CODEBOOK for sentences
ds_codebook %>% dplyr::glimpse(80)

Observations: 44
Variables: 3
$ `Field Name`  <chr> "ID Number", "Previous ID Number", "Inmate Name", "Pa...
$ Description   <chr> "unique identifier for each inmate", "ID number of th...
$ `Sample Data` <chr> "1", "0", "ABNEY                     JAMES           ...

# remove illegal characters in variable names
colnames(ds_codebook) <- gsub(" " ,"_",colnames(ds_codebook)) %>% tolower()
colnames(ds_codebook) <- gsub("__","_",colnames(ds_codebook)) # remove doubles
ds_codebook <- ds_codebook %>% 
  dplyr::mutate(
    field_name  = tolower( gsub(" " ,"_",field_name) ) # to be consistent with `ds_sentence`
    ,field_name =          gsub("__","_",field_name)   # to be consistent with `ds_sentence`
  ) 
ds_codebook %>% dplyr::glimpse(80)

Observations: 44
Variables: 3
$ field_name  <chr> "id_number", "previous_id_number", "inmate_name", "paro...
$ description <chr> "unique identifier for each inmate", "ID number of the ...
$ sample_data <chr> "1", "0", "ABNEY                     JAMES             ...

Inmate DB

# Tweak INMATE DATABASE 
ds_inmate %>% dplyr::glimpse(80)

Observations: 69,300
Variables: 32
$ `ID NUMBER`                                   <dbl> 1702, 6145, 6452, 124...
$ `COMMITTED LAST NAME`                         <chr> "CLIFFORD", "KANE", "...
$ `FIRST NAME...3`                              <chr> "BRADLEY", "THOMAS", ...
$ `MIDDLE NAME...4`                             <chr> NA, NA, NA, NA, NA, N...
$ `NAME EXTENSION...5`                          <chr> NA, NA, NA, NA, NA, N...
$ `LEGAL LAST NAME`                             <chr> NA, NA, NA, NA, NA, N...
$ `FIRST NAME...7`                              <chr> NA, NA, NA, NA, NA, N...
$ `MIDDLE NAME...8`                             <chr> NA, NA, NA, NA, NA, N...
$ `NAME EXTENSION...9`                          <lgl> NA, NA, NA, NA, NA, N...
$ `DATE OF BIRTH`                               <dttm> NA, 1928-12-21, 1929...
$ `RACE DESC`                                   <chr> NA, "WHITE", "WHITE",...
$ GENDER                                        <chr> "MALE", "MALE", "MALE...
$ FACILITY                                      <chr> NA, "NEBRASKA STATE P...
$ `CURRENT SENTENCE PARDONED OR COMMUTED DATE`  <chr> NA, NA, NA, NA, NA, N...
$ `GUN CLAUSE`                                  <chr> NA, NA, NA, NA, NA, N...
$ `SENTENCE BEGIN DATE`                         <dttm> NA, 1952-06-20, 1953...
$ `MIN TERM/YEAR`                               <chr> "0", "1", "2", "1", "...
$ `MIN MONTH`                                   <chr> "0", "0", "0", "0", "...
$ `MIN DAY`                                     <chr> "0", "0", "0", "0", "...
$ `MAX TERM/YEAR`                               <chr> "0", "3", "10", "9", ...
$ `MAX MONTH`                                   <chr> "0", "0", "0", "0", N...
$ `MAX DAY`                                     <chr> "0", "0", "0", "0", N...
$ `PAROLE ELIGIBILITY DATE`                     <chr> NA, "19165.0", NA, NA...
$ `EARLIEST POSSIBLE RELEASE DATE`              <chr> NA, NA, NA, NA, "LFE"...
$ `GOOD TIME LAW`                               <chr> NA, "2926", NA, NA, "...
$ `INST RELEASE DATE`                           <dttm> 1986-01-06, 1952-08-...
$ `INST RELEASE TYPE`                           <chr> "MANDATORY DISCHARGE"...
$ `PAROLE BOARD NEXT REVIEW DATE(MONTH&YEAR)`   <dttm> NA, NA, NA, NA, NA, ...
$ `PAROLE BOARD FINAL HEARING DATE(MONTH&YEAR)` <dttm> NA, NA, NA, NA, NA, ...
$ `PAROLE BOARD STATUS`                         <chr> NA, NA, "PAROLED", NA...
$ `PAROLE DATE`                                 <dttm> NA, NA, 1980-12-09, ...
$ `PAROLE DISCHARGE DESC`                       <chr> NA, NA, "EARLY DISCHA...

# remove illegal characters in variable names
colnames(ds_inmate) <- gsub(" " ,"_",colnames(ds_inmate)) %>% tolower()      # remove spaces between words
colnames(ds_inmate) <- gsub("__","_",colnames(ds_inmate))                    # remove doubles

colnames(ds_inmate) <- gsub("['(']\\month&year[')']","",colnames(ds_inmate)) # remove special characters
colnames(ds_inmate) <- gsub("/","_",colnames(ds_inmate))                     # remove special characters
colnames(ds_inmate) <- gsub("(...)(\\d{1})$","_\\2",colnames(ds_inmate))              # remove debree from auto-rename

# colnames(ds_inmate) <- gsub(" $","",colnames(ds_inmate))                   # remove trailing space
ds_inmate %>% dplyr::glimpse(80)

Observations: 69,300
Variables: 32
$ id_number                                  <dbl> 1702, 6145, 6452, 12444,...
$ committed_last_name                        <chr> "CLIFFORD", "KANE", "ATK...
$ first_name_3                               <chr> "BRADLEY", "THOMAS", "LA...
$ middle_name_4                              <chr> NA, NA, NA, NA, NA, NA, ...
$ name_extension_5                           <chr> NA, NA, NA, NA, NA, NA, ...
$ legal_last_name                            <chr> NA, NA, NA, NA, NA, NA, ...
$ first_name_7                               <chr> NA, NA, NA, NA, NA, NA, ...
$ middle_name_8                              <chr> NA, NA, NA, NA, NA, NA, ...
$ name_extension_9                           <lgl> NA, NA, NA, NA, NA, NA, ...
$ date_of_birth                              <dttm> NA, 1928-12-21, 1929-07...
$ race_desc                                  <chr> NA, "WHITE", "WHITE", "W...
$ gender                                     <chr> "MALE", "MALE", "MALE", ...
$ facility                                   <chr> NA, "NEBRASKA STATE PENI...
$ current_sentence_pardoned_or_commuted_date <chr> NA, NA, NA, NA, NA, NA, ...
$ gun_clause                                 <chr> NA, NA, NA, NA, NA, NA, ...
$ sentence_begin_date                        <dttm> NA, 1952-06-20, 1953-11...
$ min_term_year                              <chr> "0", "1", "2", "1", "10"...
$ min_month                                  <chr> "0", "0", "0", "0", "0",...
$ min_day                                    <chr> "0", "0", "0", "0", "0",...
$ max_term_year                              <chr> "0", "3", "10", "9", "LF...
$ max_month                                  <chr> "0", "0", "0", "0", NA, ...
$ max_day                                    <chr> "0", "0", "0", "0", NA, ...
$ parole_eligibility_date                    <chr> NA, "19165.0", NA, NA, "...
$ earliest_possible_release_date             <chr> NA, NA, NA, NA, "LFE", N...
$ good_time_law                              <chr> NA, "2926", NA, NA, "292...
$ inst_release_date                          <dttm> 1986-01-06, 1952-08-31,...
$ inst_release_type                          <chr> "MANDATORY DISCHARGE", "...
$ parole_board_next_review_date              <dttm> NA, NA, NA, NA, NA, NA,...
$ parole_board_final_hearing_date            <dttm> NA, NA, NA, NA, NA, NA,...
$ parole_board_status                        <chr> NA, NA, "PAROLED", NA, "...
$ parole_date                                <dttm> NA, NA, 1980-12-09, NA,...
$ parole_discharge_desc                      <chr> NA, NA, "EARLY DISCHARGE...

IV. Inspect

Sentences

ds_sentence %>% 
  dplyr::filter(person_id %in% c(46222,65392, 50495) ) %>% # for testing
  dplyr::select(c(
        "person_id"          # idnumbercomb, manually checked to represent a unique person 
        ,"inmate_name"
        ,"begin_date"        # date the person began serving the aggregate sentence
        ,"offense_arrest_cd" # code for the offense committed   
        ,"offense_count"     # count of offenses in inmate's sentence
        ,"offense_arrest"    # standardized description of the offense committed
      )) %>% 
  neat(caption = "Sample records from `Sentences` data set")

Sample records from `Sentences` data set
person_id	inmate_name	begin_date	offense_arrest_cd	offense_count	offense_arrest
46222	STICKNEY JEREMY J	1994-11-17	D11	1	BURGLARY
46222	STICKNEY JEREMY J	1994-11-17	D11	1	BURGLARY
46222	STICKNEY JEREMY J	1994-11-17	D31	2	CRIMINAL MISCHIEF
46222	STICKNEY JEREMY J	2007-08-06	B35	2	VIOLATION OF PROTECTION ORDER
46222	STICKNEY JEREMY J	2007-08-06	C21	1	POS CNTRL SUB EXCEPT MARIJUANA
46222	STICKNEY JEREMY J	2007-08-06	L22	1	TELECOMMUNICATION VIOLATION
46222	STICKNEY JEREMY	2012-08-20	C21	1	POS CNTRL SUB EXCEPT MARIJUANA
46222	STICKNEY JEREMY	2012-08-20	C21	1	POS CNTRL SUB EXCEPT MARIJUANA
46222	STICKNEY JEREMY	2012-08-20	K01	1	CARRY/POSS CONCEALED WEAPON
50495	ROBERTS DAVID A	1998-01-07	D12	1	POSSESSION OF BURGLARY TOOLS
50495	ROBERTS DAVID ALLEN	2004-04-06	D41	1	CRIMINAL TRESPASS
50495	ROBERTS DAVID A	2007-05-04	D11	1	BURGLARY
50495	ROBERTS DAVID A	2007-05-04	D20	2	THEFT BY RECEIVING STOLEN PROP
50495	ROBERTS DAVID A	2007-05-04	D20	3	THEFT BY RECEIVING STOLEN PROP
65392	WENTLING ALAN L	2007-01-25	C21	1	POS CNTRL SUB EXCEPT MARIJUANA
65392	WENTLING ALAN L	2007-01-25	D11	1	BURGLARY
65392	WENTLING ALAN L	2007-01-25	D20	1	THEFT BY RECEIVING STOLEN PROP
65392	WENTLING ALAN L	2010-06-11	C21	1	POS CNTRL SUB EXCEPT MARIJUANA
65392	WENTLING ALAN L	2010-06-11	D11	1	BURGLARY
65392	WENTLING ALAN L	2014-05-21	D43	1	THEFT BY UNLWFL TAKING OR DISP

Codebook

ds_codebook %>% neat(caption = "Codebook for `Sentences` data set" )

Codebook for `Sentences` data set
field_name	description	sample_data
id_number	unique identifier for each inmate	1
previous_id_number	ID number of the person’s previous NDCS sentence (0=no prior ID available)	0
inmate_name	name of the inmate	ABNEY JAMES EDWARD
parole_eligibility_date	date at which the person is eligible for parole (null value indicates the person’s sentence does not allow for the possibility of parole OR that data were improperly converted from the previous data system [pre-1980])	NA
tentative_release_date	date at which the person is expected to discharge (null value indicates the person received a life sentence OR that data were improperly converted from the previous data system [pre-1980])	NA
earn_dschrg_dt	date at which the person is expected to discharge from parole	30085
actual_dschrg_dt	date the person actually discharged from his/her sentence	30034
good_time_law	good time law that governs the application of good time (null = lost in data conversion)	NA
docket_count	count of dockets in inmate’s sentence	1
county	county of commitment	OUT OF STATE
county_cd	code for county of commitment	94
offense_begin_date	date the person began serving the sentence for an individual offense (1800-01-01 = conversion error)	1800-01-01
offense_count	count of offenses in inmate’s sentence	1
offense_attmpt_cd	indicate whether the offense was an attempt, abet, or conspiracy OR flags if a mandatory minimum is attached to a person’s sentence	NA
offense_type_cd	alphabetical characters appear if the offense was committed after the person was originally committed to NDCS	NA
offense_run_cd	indicates consecutive vs. concurrent	CC
offense_arrest_cd	code for the offense committed	E01
offense_arrest	standardized description of the offense committed	FORGERY 1ST DEGREE
cnvct_desc	narrative description of the offense committed	NA
felony_msdmnr_cd	indicates the class of felony/misdemeanor	NA
min_year	minimum years in sentence for a specific offense	1
min_month	minimum months in sentence for a specific offense	0
min_day	minimum days in sentence for a specific offense	0
max_year	maximum years in sentence for a specific offense	10
max_month	maximum months in sentence for a specific offense	0
max_day	maximum days in sentence for a specific offense	0
man_year	mandatory minimum years in sentence for a specific offense	0
man_month	mandatory minimum months in sentence for a specific offense	0
man_day	mandatory minimum days in sentence for a specific offense	0
offense_jail_time_days	number of days jail credit awarded for a specific offense	0
offense_dead_time_days	number of days applied to extend a specific sentence due to inmates released from their sentence due to bond, escape, or abscond	0
habitual_criminal	flags whether the offender received a habitual criminal enhancement on a specific offense	N
minimum_yr	minimum years in the aggregate sentence	0
minimum_mo	minimum months in the aggregate sentence	0
minimum_day	minimum days in the aggregate sentence	0
maximum_yr	maximum years in the aggregate sentence	0
maximum_mo	maximum months in the aggregate sentence	0
maximum_day	maximum days in the aggregate sentence	0
begin_date	date the person began serving the aggregate sentence	27296
sentence_jail_credit	number of jail credit days applied to the aggregate sentence	0
man_min_yr	mandatory minimum years in the aggregate sentence	0
man_min_mo	mandatory minimum months in the aggregate sentence	0
man_min_day	mandatory minimum days in the aggregate sentence	0
man_min_term_date	an individual with a mandatory minimum sentence may not be released prior to this date	NA

Inmate DB

ds_inmate %>% 
  dplyr::filter(id_number %in% c(46222,65392, 50495) ) %>% 
  dplyr::select(c(
    "id_number"
    ,"committed_last_name"
    ,"date_of_birth"
    ,"race_desc"
    ,"gender"
    ,"sentence_begin_date"
  )) %>% 
  neat(caption = "Sample records from `Inmate` data set")

Sample records from `Inmate` data set
id_number	committed_last_name	date_of_birth	race_desc	gender	sentence_begin_date
46222	STICKNEY	1976-04-23	WHITE	MALE	1994-11-17
50495	ROBERTS	1976-04-01	WHITE	MALE	1998-01-07
65392	WENTLING	1987-01-30	WHITE	MALE	2007-01-25

V. Save to Disk

Let us create a data transfer object (dto), a list object that would store all three data components in this project. This object (dto) will be the point of departure for all subsequent analytic efforst.

dto <- list(
  "sentence"  = ds_sentence # each row is a conviction, multiple rows per person
  ,"codebook" = ds_codebook     # each row is a variable in `ds_sentence`
  ,"inmate"   = ds_inmate   # 
)

dto %>% pryr::object_size()

82.4 MB

dto %>% saveRDS("./data-unshared/derived/0-dto.rds") # for piping further
ds_codebook %>% readr::write_csv("./data-public/derived/all-sentences-codebook.csv") # for read-only inspection

# see our research jounal at
# https://docs.google.com/document/d/1_EhkXgkBZTJ8nc02rr8Z4wrbzSbvvT6VZoQJi6DAhNQ/edit?usp=sharing

Session Information

For the sake of documentation and reproducibility, the current report was rendered in the following environment. Click the line below to expand.

Environment

- Session info -------------------------------------------------------------------------------------------------------
 setting  value                       
 version  R version 3.5.2 (2018-12-20)
 os       Windows >= 8 x64            
 system   x86_64, mingw32             
 ui       RStudio                     
 language (EN)                        
 collate  English_United States.1252  
 ctype    English_United States.1252  
 tz       America/Los_Angeles         
 date     2019-05-15                  

- Packages -----------------------------------------------------------------------------------------------------------
 package     * version date       lib source        
 assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.5.3)
 backports     1.1.4   2019-04-10 [1] CRAN (R 3.5.3)
 callr         3.2.0   2019-03-15 [1] CRAN (R 3.5.3)
 cellranger    1.1.0   2016-07-27 [1] CRAN (R 3.5.3)
 cli           1.1.0   2019-03-19 [1] CRAN (R 3.5.3)
 codetools     0.2-15  2016-10-05 [2] CRAN (R 3.5.2)
 colorspace    1.4-1   2019-03-18 [1] CRAN (R 3.5.3)
 crayon        1.3.4   2017-09-16 [1] CRAN (R 3.5.3)
 crosstalk     1.0.0   2016-12-21 [1] CRAN (R 3.5.3)
 desc          1.2.0   2018-05-01 [1] CRAN (R 3.5.3)
 devtools      2.0.2   2019-04-08 [1] CRAN (R 3.5.3)
 digest        0.6.18  2018-10-10 [1] CRAN (R 3.5.3)
 dplyr         0.8.0.1 2019-02-15 [1] CRAN (R 3.5.3)
 DT            0.6     2019-05-09 [1] CRAN (R 3.5.3)
 evaluate      0.13    2019-02-12 [1] CRAN (R 3.5.3)
 fansi         0.4.0   2018-10-05 [1] CRAN (R 3.5.3)
 fs            1.3.1   2019-05-06 [1] CRAN (R 3.5.3)
 ggplot2     * 3.1.1   2019-04-07 [1] CRAN (R 3.5.3)
 ggpubr        0.2     2018-11-15 [1] CRAN (R 3.5.3)
 glue          1.3.1   2019-03-12 [1] CRAN (R 3.5.3)
 gtable        0.3.0   2019-03-25 [1] CRAN (R 3.5.3)
 highr         0.8     2019-03-20 [1] CRAN (R 3.5.3)
 hms           0.4.2   2018-03-10 [1] CRAN (R 3.5.3)
 htmltools     0.3.6   2017-04-28 [1] CRAN (R 3.5.3)
 htmlwidgets   1.3     2018-09-30 [1] CRAN (R 3.5.3)
 httpuv        1.5.1   2019-04-05 [1] CRAN (R 3.5.3)
 httr          1.4.0   2018-12-11 [1] CRAN (R 3.5.3)
 jsonlite      1.6     2018-12-07 [1] CRAN (R 3.5.3)
 kableExtra    1.1.0   2019-03-16 [1] CRAN (R 3.5.3)
 knitr       * 1.22    2019-03-08 [1] CRAN (R 3.5.3)
 labeling      0.3     2014-08-23 [1] CRAN (R 3.5.2)
 later         0.8.0   2019-02-11 [1] CRAN (R 3.5.3)
 lazyeval      0.2.2   2019-03-15 [1] CRAN (R 3.5.3)
 lubridate     1.7.4   2018-04-11 [1] CRAN (R 3.5.3)
 magrittr    * 1.5     2014-11-22 [1] CRAN (R 3.5.3)
 memoise       1.1.0   2017-04-21 [1] CRAN (R 3.5.3)
 mime          0.6     2018-10-05 [1] CRAN (R 3.5.2)
 munsell       0.5.0   2018-06-12 [1] CRAN (R 3.5.3)
 pillar        1.3.1   2018-12-15 [1] CRAN (R 3.5.3)
 pkgbuild      1.0.3   2019-03-20 [1] CRAN (R 3.5.3)
 pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.5.3)
 pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.5.3)
 plyr          1.8.4   2016-06-08 [1] CRAN (R 3.5.3)
 prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.5.3)
 processx      3.3.1   2019-05-08 [1] CRAN (R 3.5.2)
 promises      1.0.1   2018-04-13 [1] CRAN (R 3.5.3)
 pryr          0.1.4   2018-02-18 [1] CRAN (R 3.5.3)
 ps            1.3.0   2018-12-21 [1] CRAN (R 3.5.3)
 purrr         0.3.2   2019-03-15 [1] CRAN (R 3.5.3)
 R6            2.4.0   2019-02-14 [1] CRAN (R 3.5.3)
 Rcpp          1.0.1   2019-03-17 [1] CRAN (R 3.5.3)
 readr         1.3.1   2018-12-21 [1] CRAN (R 3.5.3)
 readxl        1.3.1   2019-03-13 [1] CRAN (R 3.5.3)
 remotes       2.0.4   2019-04-10 [1] CRAN (R 3.5.3)
 rlang         0.3.4   2019-04-07 [1] CRAN (R 3.5.3)
 rmarkdown     1.12    2019-03-14 [1] CRAN (R 3.5.3)
 rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.5.3)
 rstudioapi    0.10    2019-03-19 [1] CRAN (R 3.5.3)
 rvest         0.3.3   2019-04-11 [1] CRAN (R 3.5.3)
 scales        1.0.0   2018-08-09 [1] CRAN (R 3.5.3)
 sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.5.3)
 shiny         1.3.2   2019-04-22 [1] CRAN (R 3.5.2)
 stringi       1.4.3   2019-03-12 [1] CRAN (R 3.5.3)
 stringr       1.4.0   2019-02-10 [1] CRAN (R 3.5.3)
 tibble        2.1.1   2019-03-16 [1] CRAN (R 3.5.3)
 tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.5.3)
 usethis       1.5.0   2019-04-07 [1] CRAN (R 3.5.3)
 utf8          1.1.4   2018-05-24 [1] CRAN (R 3.5.3)
 viridisLite   0.3.0   2018-02-01 [1] CRAN (R 3.5.3)
 webshot       0.5.1   2018-09-28 [1] CRAN (R 3.5.3)
 withr         2.1.2   2018-03-15 [1] CRAN (R 3.5.3)
 xfun          0.6     2019-04-02 [1] CRAN (R 3.5.3)
 xml2          1.2.0   2018-01-24 [1] CRAN (R 3.5.3)
 xtable        1.8-4   2019-04-21 [1] CRAN (R 3.5.3)
 yaml          2.2.0   2018-07-25 [1] CRAN (R 3.5.2)

[1] C:/Users/an499583/Documents/R/win-library/3.5
[2] C:/Program Files/R/R-3.5.2/library

Report rendered by an499583 at 2019-05-15, 17:01 -0700 in 19 seconds.