Read data

Read the data from the kaggle website https://www.kaggle.com/jsphyg/star-wars#characters.csv.

library("openxlsx")
library("missRanger")
library("randomForest")
charactersStarWars <- read.xlsx("charactersStarWars.xlsx")

Data Preprocessing

Change characters into factors.

set.seed(1)

rownames(charactersStarWars) <- charactersStarWars[,1]
charactersStarWars <- charactersStarWars[,c(2,3,4,5,6,8,9,10,11)]
charactersStarWars$hair_color <- factor(gsub(charactersStarWars$hair_color, pattern = ",.*", replacement = ""))
charactersStarWars$skin_color <- factor(gsub(charactersStarWars$skin_color, pattern = ",.*", replacement = ""))
charactersStarWars$gender <- factor(charactersStarWars$gender)
charactersStarWars$eye_color <- factor(gsub(charactersStarWars$eye_color, pattern = "[,-].*", replacement = ""))
charactersStarWars$homeworld <- factor(charactersStarWars$homeworld)
charactersStarWars$species <- factor(charactersStarWars$species)

charactersStarWarsFilled <- missRanger(charactersStarWars)
## 
## Missing value imputation by random forests
## 
##   Variables to impute:       height, mass, hair_color, skin_color, eye_color, gender, homeworld, species
##   Variables used to impute:  height, mass, hair_color, skin_color, eye_color, gender, homeworld, species, Jedi
## iter 1:  ........
## iter 2:  ........
## iter 3:  ........
## iter 4:  ........
## iter 5:  ........
head(charactersStarWarsFilled)
##                height mass hair_color skin_color eye_color gender homeworld
## Luke Skywalker    172   77      blond       fair      blue   male  Tatooine
## C-3PO             167   75       none       gold    yellow   male  Tatooine
## R2-D2              96   32       none      white       red   male     Naboo
## Darth Vader       202  136       none      white    yellow   male  Tatooine
## Leia Organa       150   49      brown      light     brown female  Alderaan
## Owen Lars         178  120      brown      light      blue   male  Tatooine
##                species Jedi
## Luke Skywalker   Human    1
## C-3PO            Droid    0
## R2-D2            Droid    0
## Darth Vader      Human    1
## Leia Organa      Human    0
## Owen Lars        Human    0

Create a gbm model

Let’s use gbm library to create a gbm model with 250 trees 3 levels deep.

library("gbm")
set.seed(1)

model <- gbm(Jedi~height + mass + hair_color + skin_color + eye_color + gender , data = charactersStarWarsFilled)
## Distribution not specified, assuming bernoulli ...

Create a DALEX explainer

Let’s wrap gbm model into a DALEX explainer.

library("DALEX")
model_explained <- explain(model, charactersStarWarsFilled, y = charactersStarWarsFilled$Jedi)
## Preparation of a new explainer is initiated
##   -> model label       :  gbm  (  default  )
##   -> data              :  87  rows  9  cols 
##   -> target variable   :  87  values 
##   -> data              :  A column identical to the target variable `y` has been found in the `data`.  (  WARNING  )
##   -> data              :  It is highly recommended to pass `data` without the target variable column
##   -> predict function  :  yhat.gbm  will be used (  default  )
##   -> predicted values  :  numerical, min =  0.002511505 , mean =  0.2720332 , max =  0.9663875  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.5234405 , mean =  -0.00766543 , max =  0.7765105  
##   -> model_info        :  package gbm , ver. 2.1.5 , task classification (  default  ) 
##   A new explainer has been created! 

Feature Importance explainer

Calculate Feature Importnace explainer.

library("ingredients")
library("auditor")
plot(feature_importance(model_explained, loss_function = DALEX:::loss_one_minus_auc))

plot_roc(model_evaluation(model_explained))

modelStudio app

Calculate modelStudio dashboard.

library(modelStudio)
op <- modelStudioOptions(
  subtitle = "What makes a Jedi (or Sith)?"
)
modelStudioStarWars <- modelStudio(mexp, 
                  new_observation = charactersFilled[,1:6],
                  options = op)
modelStudioStarWars

r2d3::save_d3_html(modelStudioStarWars, file = "index.html")