This report provides demonstration of visualizing the model estimated by ./reports/technique-demonstration/technique-demonstration.R It does not require access to the raw data, instead it loads only estimated model results.
# Attach these packages so their functions don't need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path
library(ggplot2) #For graphing
library(magrittr) # Pipes
library(dplyr)
requireNamespace("dplyr", quietly=TRUE)
requireNamespace("TabularManifest") # devtools::install_github("Melinae/TabularManifest")
requireNamespace("knitr")
requireNamespace("scales") #For formating values in graphs
requireNamespace("RColorBrewer")# link to the source of the location mapping
# This script works with model results data estimated during /technique-demonstration/
path_input_micro <- "./data-public/derived/technique-demonstration/ls_model.rds"
path_input_meta  <- "./data-unshared/derived/0-ls_guide.rds"
# test whether the file exists / the link is good
testit::assert("File does not exist", base::file.exists(path_input_micro))
testit::assert("File does not exist", base::file.exists(path_input_meta))$call
NULL
$summary
 [1] "call"           "terms"          "family"         "deviance"       "aic"           
 [6] "contrasts"      "df.residual"    "null.deviance"  "df.null"        "iter"          
[11] "deviance.resid" "coefficients"   "aliased"        "dispersion"     "df"            
[16] "cov.unscaled"   "cov.scaled"    
$coefficients
 [1] "PRQuebec"                   "PROntario"                  "PRAlberta"                 
 [4] "PRBritish Columbia"         "age_group25"                "age_group30"               
 [7] "age_group35"                "age_group40"                "age_group45"               
[10] "age_group50"                "age_group55"                "age_group60"               
[13] "age_group65"                "age_group70"                "age_group75"               
[16] "age_group80"                "age_group85"                "age_group90"               
[19] "femaleTRUE"                 "maritalwidowed"             "maritalsingle"             
[22] "maritalmar_cohab"           "educ3high school"           "educ3more than high school"
[25] "poor_healthFALSE"           "FOLFrench only"             "FOLEnglish only"           
[28] "FOLBoth English and French"
$predicted_values
[1] "PR"          "age_group"   "female"      "educ3"       "marital"     "poor_health"
[7] "FOL"         "dv_hat"      "dv_hat_p"   [1] "dv ~ -1 + PR + age_group + female + marital + educ3 + poor_health + FOL"
Call:
glm(formula = equation_formula, family = binomial(link = "logit"), 
    data = ds_for_modeling)
Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.6773   0.0872   0.1688   0.3635   1.8669  
Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
PRQuebec                    4.33434    0.46789   9.264  < 2e-16 ***
PROntario                   4.55186    0.46640   9.760  < 2e-16 ***
PRAlberta                   4.56119    0.46713   9.764  < 2e-16 ***
PRBritish Columbia          4.51707    0.46663   9.680  < 2e-16 ***
age_group25                -0.39125    0.58658  -0.667 0.504771    
age_group30                -0.72434    0.54078  -1.339 0.180431    
age_group35                -1.41586    0.48782  -2.902 0.003703 ** 
age_group40                -1.68424    0.47577  -3.540 0.000400 ***
age_group45                -2.53001    0.46166  -5.480 4.25e-08 ***
age_group50                -2.46218    0.46289  -5.319 1.04e-07 ***
age_group55                -3.43099    0.45591  -7.526 5.25e-14 ***
age_group60                -3.94645    0.45496  -8.674  < 2e-16 ***
age_group65                -4.02185    0.45571  -8.825  < 2e-16 ***
age_group70                -4.17885    0.45581  -9.168  < 2e-16 ***
age_group75                -4.42325    0.45615  -9.697  < 2e-16 ***
age_group80                -4.85780    0.45685 -10.633  < 2e-16 ***
age_group85                -5.25667    0.46192 -11.380  < 2e-16 ***
age_group90                -5.41861    0.47663 -11.369  < 2e-16 ***
femaleTRUE                  0.71318    0.04691  15.203  < 2e-16 ***
maritalwidowed             -0.62827    0.08306  -7.564 3.90e-14 ***
maritalsingle              -0.02683    0.10860  -0.247 0.804852    
maritalmar_cohab            0.26822    0.07122   3.766 0.000166 ***
educ3high school            0.13361    0.05605   2.384 0.017141 *  
educ3more than high school  0.52122    0.05378   9.692  < 2e-16 ***
poor_healthFALSE            1.09996    0.04500  24.441  < 2e-16 ***
FOLFrench only              0.17020    0.10869   1.566 0.117358    
FOLEnglish only            -0.06443    0.08020  -0.803 0.421786    
FOLBoth English and French  0.09699    0.14881   0.652 0.514568    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
    Null deviance: 55452  on 40000  degrees of freedom
Residual deviance: 15224  on 39972  degrees of freedom
AIC: 15280
Number of Fisher Scoring iterations: 8                  PRQuebec                  PROntario                  PRAlberta 
                      4.33                       4.55                       4.56 
        PRBritish Columbia                age_group25                age_group30 
                      4.52                      -0.39                      -0.72 
               age_group35                age_group40                age_group45 
                     -1.42                      -1.68                      -2.53 
               age_group50                age_group55                age_group60 
                     -2.46                      -3.43                      -3.95 
               age_group65                age_group70                age_group75 
                     -4.02                      -4.18                      -4.42 
               age_group80                age_group85                age_group90 
                     -4.86                      -5.26                      -5.42 
                femaleTRUE             maritalwidowed              maritalsingle 
                      0.71                      -0.63                      -0.03 
          maritalmar_cohab           educ3high school educ3more than high school 
                      0.27                       0.13                       0.52 
          poor_healthFALSE             FOLFrench only            FOLEnglish only 
                      1.10                       0.17                      -0.06 
FOLBoth English and French 
                      0.10 Observations: 3,873
Variables: 9
$ PR          <fct> Alberta, Alberta, Alberta...
$ age_group   <fct> 35, 45, 75, 55, 55, 50, 7...
$ female      <fct> TRUE, TRUE, TRUE, FALSE, ...
$ educ3       <fct> more than high school, le...
$ marital     <fct> single, sep_divorced, mar...
$ poor_health <fct> FALSE, FALSE, FALSE, FALS...
$ FOL         <fct> English only, English onl...
$ dv_hat      <dbl> 5.388433, 3.779889, 2.389...
$ dv_hat_p    <dbl> 0.9954517, 0.9776841, 0.9...where_to_store_graphs <- "./reports/graphing-phase-only/prints/1/" # female marital educ3 poor_health
# where_to_store_graphs = "./reports/graphing-phase-only/prints/2/" # educ3 poor_health first
# where_to_store_graphs = "./reports/graphing-phase-only/prints/3/", # other collection of predictors
# define a function to print a graph onto disk as an image
# because some aspects of appearances are easier to control during printing, not graphing
quick_save <- function(g,name){
  ggplot2::ggsave(
    filename = paste0(name,".png"), 
    plot   = g,
    device = png,
    path   = where_to_store_graphs, # female marital educ poor_healt
    width  = 1600,
    height = 1200,
    # units = "cm",
    dpi    = 200,
    limitsize = FALSE
  )
}# declare the dependent variable and define descriptive labels
dv_name            <- "S_DEAD"
dv_label_prob      <- "Alive in X years"
dv_label_odds      <- "Odds(Dead)"
# select the predictors to evaluate graphically
# becasue we typically will have more predictors then we want to display
# these will define rows in the printed matrix of graphs
covar_order_values <- c("female","marital","educ3","poor_health") #for /prints/1/
# covar_order_values <- c("marital", "educ3","poor_health", "FOL") # for /prints/2/# create a function that would assign color
# to the values of predictors based on informed expectation
assign_color <- function(color_group){
  
  if( color_group == "female") {
    palette_color <- c(
      "TRUE"   = reference_color
      ,"FALSE" = increased_risk_1
    ) 
  } else if( color_group %in% c("educ5") ) { 
    palette_color <- c(
      "less than high school" = increased_risk_2
      ,"high school"           = increased_risk_1
      , "college"              = reference_color
      , "graduate"             = descreased_risk_1
      , "Dr."                  = descreased_risk_2
    ) 
  } else if( color_group %in% c("educ3") ) { 
    palette_color <- c(
      "less than high school"  = increased_risk_1
      ,"high school"           = reference_color
      ,"more than high school" = descreased_risk_1
    ) 
  } else if( color_group %in% c("marital") ) {
    palette_color <- c(
      "mar_cohab"     = descreased_risk_1
      ,"sep_divorced" = increased_risk_2
      ,"single"       = reference_color
      ,"widowed"      = increased_risk_1
    )
  } else if( color_group %in% c("poor_health") ) {
    palette_color <- c(
      "FALSE" = reference_color
      ,"TRUE" = increased_risk_2
    )
  } else if( color_group %in% c("FOL") ) {
    palette_color <- c(
      "Both English and French"      = descreased_risk_1
      ,"English only"                = reference_color 
      ,"French only"                 = increased_risk_1
      ,"Neither English nor French"  = increased_risk_2
    )                       
  } else if( color_group %in% c("OLN") ) {
    palette_color <- c(
      "Both English and French"      = descreased_risk_2
      ,"English only"                = reference_color 
      ,"French only"                 = increased_risk_1
      ,"Neither English nor French"  = increased_risk_2
    )
  } else {
    stop("The palette for this variable is not defined.")
  }
  
}
# declare shared grahpical setting
common_alpha <- .7          # shared transparency
common_natural <- "grey90"  # the "no-color" color
y_low = .2 # to remove white space
y_high = 1 # to remove white space
# load the custom graphing function, isolated in this script
base::source("./scripts/graphing/graph-logistic.R")
# color definitions are picked from  
# http://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7
list.files(where_to_store_graphs, full.names = TRUE)character(0)# 0 step : All colors are in
increased_risk_2  <- "#e41a1c"  # red     - further increased risk factor
increased_risk_1  <- "#ff7f00"  # organge - increased risk factor
reference_color   <- "#4daf4a"  # green   - REFERENCE  category
descreased_risk_1 <- "#377eb8"  # blue    - descreased risk factor
descreased_risk_2 <- "#984ea3"  # purple  - further descrease in risk factor
# color definitions are picked from  
# http://colorbrewer2.org/#type=qualitative&scheme=Set1&n=7
g0 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
  ) 
g0 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png"# 1 step of color logic: no color is added
increased_risk_2 <- common_natural  # red     - further increased risk factor
increased_risk_1 <- common_natural  # organge - increased risk factor
reference_color <- common_natural   # green   - REFERENCE  category
descreased_risk_1 <-common_natural  # blue    - descreased risk factor
descreased_risk_2 <- common_natural # purple  - further descrease in risk factor
# increased_risk_2 <- "#e41a1c"  # red     - further increased risk factor
# increased_risk_1 <- "#ff7f00"  # organge - increased risk factor
# reference_color <- "#4daf4a"   # green   - REFERENCE  category
# descreased_risk_1 <-"#377eb8"  # blue    - descreased risk factor
# descreased_risk_2 <- "#984ea3" # purple  - further descrease in risk factor
g1 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
) 
g1 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png" "./reports/graphing-phase-only/prints/1/g1.png"# 2 step of color logic: add only the reference group
increased_risk_2 <- common_natural  # red     - further increased risk factor
increased_risk_1 <- common_natural  # organge - increased risk factor
reference_color <- common_natural   # green   - REFERENCE  category
descreased_risk_1 <-common_natural  # blue    - descreased risk factor
descreased_risk_2 <- common_natural # purple  - further descrease in risk factor
# increased_risk_2 <- "#e41a1c"  # red     - further increased risk factor
# increased_risk_1 <- "#ff7f00"  # organge - increased risk factor
reference_color <- "#4daf4a"  # green   - REFERENCE  category
# descreased_risk_1 <-"#377eb8"  # blue    - descreased risk factor
# descreased_risk_2 <- "#984ea3" # purple  - further descrease in risk factor
g2 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
  ) 
g2 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png" "./reports/graphing-phase-only/prints/1/g1.png"
[3] "./reports/graphing-phase-only/prints/1/g2.png"# 3 step of color logic: add moderately increased risk
increased_risk_2 <- common_natural  # red - further increased risk factor
increased_risk_1 <- common_natural  # organge - increased risk factor
reference_color <- common_natural   # green  - REFERENCE  category
descreased_risk_1 <-common_natural  # blue - descreased risk factor
descreased_risk_2 <- common_natural # purple - further descrease in risk factor
# increased_risk_2 <- "#e41a1c"  # red - further increased risk factor
increased_risk_1 <- "#ff7f00"  # organge - increased risk factor
# reference_color <- "#4daf4a"   # green  - REFERENCE  category
# descreased_risk_1 <-"#377eb8"  # blue - descreased risk factor
# descreased_risk_2 <- "#984ea3" # purple - further descrease in risk factor
g3 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
  ) 
g3 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png" "./reports/graphing-phase-only/prints/1/g1.png"
[3] "./reports/graphing-phase-only/prints/1/g2.png" "./reports/graphing-phase-only/prints/1/g3.png"# 4 step of color logic: add moderately decreased risk
increased_risk_2 <- common_natural  # red - further increased risk factor
increased_risk_1 <- common_natural  # organge - increased risk factor
reference_color <- common_natural   # green  - REFERENCE  category
descreased_risk_1 <-common_natural  # blue - descreased risk factor
descreased_risk_2 <- common_natural # purple - further descrease in risk factor
# increased_risk_2 <- "#e41a1c"  # red - further increased risk factor
# increased_risk_1 <- "#ff7f00"  # organge - increased risk factor
# reference_color <- "#4daf4a"   # green  - REFERENCE  category
descreased_risk_1 <-"#377eb8"  # blue - descreased risk factor
# descreased_risk_2 <- "#984ea3" # purple - further descrease in risk factor
g4 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
  ) 
g4 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png" "./reports/graphing-phase-only/prints/1/g1.png"
[3] "./reports/graphing-phase-only/prints/1/g2.png" "./reports/graphing-phase-only/prints/1/g3.png"
[5] "./reports/graphing-phase-only/prints/1/g4.png"# 5 step of color logic: add substantially increased risk
increased_risk_2 <- common_natural  # red - further increased risk factor
increased_risk_1 <- common_natural  # organge - increased risk factor
reference_color <- common_natural   # green  - REFERENCE  category
descreased_risk_1 <-common_natural  # blue - descreased risk factor
descreased_risk_2 <- common_natural # purple - further descrease in risk factor
increased_risk_2 <- "#e41a1c"  # red - further increased risk factor
# increased_risk_1 <- "#ff7f00"  # organge - increased risk factor
# reference_color <- "#4daf4a"   # green  - REFERENCE  category
# descreased_risk_1 <-"#377eb8"  # blue - descreased risk factor
# descreased_risk_2 <- "#984ea3" # purple - further descrease in risk factor
g5 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
  ) 
g5 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png" "./reports/graphing-phase-only/prints/1/g1.png"
[3] "./reports/graphing-phase-only/prints/1/g2.png" "./reports/graphing-phase-only/prints/1/g3.png"
[5] "./reports/graphing-phase-only/prints/1/g4.png" "./reports/graphing-phase-only/prints/1/g5.png"# 6 step of color logic: add substantially decreased risk
increased_risk_2 <- common_natural  # red - further increased risk factor
increased_risk_1 <- common_natural  # organge - increased risk factor
reference_color <- common_natural   # green  - REFERENCE  category
descreased_risk_1 <-common_natural  # blue - descreased risk factor
descreased_risk_2 <- common_natural # purple - further descrease in risk factor
# increased_risk_2 <- "#e41a1c"  # red - further increased risk factor
# increased_risk_1 <- "#ff7f00"  # organge - increased risk factor
# reference_color <- "#4daf4a"   # green  - REFERENCE  category
# descreased_risk_1 <-"#377eb8"  # blue - descreased risk factor
descreased_risk_2 <- "#984ea3" # purple - further descrease in risk factor
g6 <- ds_predicted %>% 
  graph_logistic_point_complex_4(
    x_name       = "age_group"
    ,y_name      = "dv_hat_p"
    ,covar_order = covar_order_values
    ,alpha_level = common_alpha
    ,y_title     = dv_label_prob
    ,y_range     = c(y_low, y_high)
  ) 
g6 %>% print() # inspect[1] "./reports/graphing-phase-only/prints/1/g0.png" "./reports/graphing-phase-only/prints/1/g1.png"
[3] "./reports/graphing-phase-only/prints/1/g2.png" "./reports/graphing-phase-only/prints/1/g3.png"
[5] "./reports/graphing-phase-only/prints/1/g4.png" "./reports/graphing-phase-only/prints/1/g5.png"
[7] "./reports/graphing-phase-only/prints/1/g6.png"For the sake of documentation and reproducibility, the current report was rendered on a system using the following software.
Report rendered by an499583 at 2019-11-05, 12:06 -0500 in 63 seconds.R version 3.5.2 (2018-12-20)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    
attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods   base     
other attached packages:
[1] dplyr_0.8.3        magrittr_1.5       RColorBrewer_1.1-2 dichromat_2.0-0    ggplot2_3.1.1     
[6] extrafont_0.17     knitr_1.23        
loaded via a namespace (and not attached):
 [1] tidyselect_0.2.5            xfun_0.7                    reshape2_1.4.3             
 [4] purrr_0.3.2                 haven_2.1.0                 carData_3.0-2              
 [7] colorspace_1.4-1            vctrs_0.1.0                 htmltools_0.3.6            
[10] yaml_2.2.0                  utf8_1.1.4                  rlang_0.4.0                
[13] pillar_1.4.1                foreign_0.8-71              glue_1.3.1                 
[16] withr_2.1.2                 readxl_1.3.1                plyr_1.8.4                 
[19] stringr_1.4.0               munsell_0.5.0               gtable_0.3.0               
[22] cellranger_1.1.0            zip_2.0.2                   evaluate_0.14              
[25] labeling_0.3                rio_0.5.16                  forcats_0.4.0              
[28] curl_3.3                    fansi_0.4.0                 Rttf2pt1_1.3.7             
[31] Rcpp_1.0.1                  scales_1.0.0                backports_1.1.4            
[34] TabularManifest_0.1-16.9003 abind_1.4-5                 testit_0.9                 
[37] hms_0.4.2                   digest_0.6.19               stringi_1.4.3              
[40] openxlsx_4.1.0.1            cowplot_0.9.4               cli_1.1.0                  
[43] tools_3.5.2                 lazyeval_0.2.2              tibble_2.1.3               
[46] crayon_1.3.4                extrafontdb_1.0             car_3.0-3                  
[49] pkgconfig_2.0.2             zeallot_0.1.0               data.table_1.12.2          
[52] assertthat_0.2.1            rmarkdown_1.13              rstudioapi_0.10            
[55] R6_2.4.0                    compiler_3.5.2