Problem Set 1

tidymodels example

1 Setup

First, let’s load the packages and data…

# Load necessary packages
library(pacman)
p_load(here, readr, skimr, fst, tidymodels, glmnet)
# Load the data
raw_df =
  here('projects', 'project-000', 'sales-data.fst') |> 
  read_fst(as.data.table = TRUE)
  # read_fst(as.data.table = TRUE, from = 1, to = 2e4)
  # read_fst(as.data.table = TRUE) |> sample_frac(0.05)

Note: When working with a relatively large dataset like this one, it can help to use a subset first. If you don’t care about representativeness, you could just read the first few rows of the dataset, e.g., the commented-out from = 1, to = 1e4 above. If you want a representative sample, you could sample_n or sample_frac from the full dataset (also commented out above). This approach can save you a lot of time when you’re testing out your code. For example, the prep step of the was 20-times faster when I used a 5% sample.

2 Data cleaning and preprocessing

Before we write the official recipe, which is more about engineering features, I’m going to fix any strangeness in the dataset—e.g., the variables that are incorrectly class-ed. Examples include variables that should be numeric but were read as character due to $ or , and codes that should be factors but were read as character.

# Recode variables
clean_df =
  raw_df |>
  # Parse characters to numbers
  mutate(
    sale_price = sale_price |> parse_number(),
    land_square_feet = land_square_feet |> parse_number(),
    building_square_feet = building_square_feet |> parse_number(),
    number_of_units = number_of_units |> parse_number(),
    percent_ownership = percent_ownership |> parse_number(),
    other_improvements = other_improvements |> parse_number(),
  ) |>
  # Convert numeric variables to factors
  mutate(
    across(
      c(
        ends_with('material'),
        ends_with('finish'),
        contains('class'),
        pin, use, type_of_residence, design_plan, multi_code
      ),
    as.character
    )
  ) |>
  # Remove unwanted variables :(
  select(!starts_with('square_root')) |>
  select(!contains('squared')) |>
  select(!starts_with('estimate')) |>
  select(!c(
    deed_no, census_tract,
    town_code, town_and_neighborhood,
    neighborhood_code, neigborhood_code_mapping,
    total_building_square_feet, property_address,
    sale_date, pure_market_filter
  ))

I’m also going to remove observations with a sales price less than or equal to $1. Ideally, we would have a flag for “non-arm’s-length” sales. We don’t, so this crude filter is the best we can do to remove non-arm’s-length sales.

# Filter out non-arm's-length sales
clean_df = clean_df |> filter(sale_price > 1)

# Check it out
clean_df |> skim()
Data summary
Name clean_df
Number of rows 333615
Number of columns 64
Key NULL
_______________________
Column type frequency:
character 15
logical 1
numeric 48
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pin 0 1 14 21 0 267980 0
property_class 0 1 3 3 0 14 0
type_of_residence 0 1 1 1 0 8 0
wall_material 0 1 1 1 0 4 0
roof_material 0 1 1 1 0 6 0
basement_finish 1 1 1 1 0 3 0
attic_finish 0 1 1 1 0 4 0
design_plan 0 1 1 1 0 3 0
garage_1_material 0 1 1 1 0 5 0
garage_2_material 0 1 1 1 0 4 0
multi_code 0 1 1 2 0 27 0
modeling_group 0 1 2 2 0 2 0
use 0 1 1 1 0 2 0
condo_class_factor 333615 0 NA NA 0 0 0
condition_desirability_and_utility 0 1 0 2 314723 17 0

Variable type: logical

skim_variable n_missing complete_rate mean count
large_lot 333615 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
land_square_feet 0 1.00 7018.69 13323.39 149.00 3720.00 5060.00 7980.00 2980767.00 ▇▁▁▁▁
apartments 0 1.00 0.39 1.05 -5.00 0.00 0.00 0.00 6.00 ▁▁▇▁▁
rooms 0 1.00 7.08 3.35 2.00 5.00 6.00 8.00 241.00 ▇▁▁▁▁
bedrooms 0 1.00 3.54 1.53 1.00 3.00 3.00 4.00 90.00 ▇▁▁▁▁
basement 0 1.00 1.72 0.97 1.00 1.00 1.00 3.00 4.00 ▇▂▁▃▁
central_heating 0 1.00 1.16 0.37 0.00 1.00 1.00 1.00 2.00 ▁▁▇▁▂
other_heating 0 1.00 4.89 0.56 2.00 5.00 5.00 5.00 5.00 ▁▁▁▁▇
central_air 0 1.00 0.47 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▇
fireplaces 0 1.00 0.31 0.54 0.00 0.00 0.00 1.00 2.00 ▇▁▂▁▁
attic_type 0 1.00 2.49 0.79 1.00 2.00 3.00 3.00 3.00 ▂▁▂▁▇
half_baths 0 1.00 0.46 0.57 0.00 0.00 0.00 1.00 9.00 ▇▁▁▁▁
cathedral_ceiling 0 1.00 0.88 0.97 0.00 0.00 0.00 2.00 2.00 ▇▁▁▁▆
construction_quality 0 1.00 1.99 0.11 1.00 2.00 2.00 2.00 3.00 ▁▁▇▁▁
renovation 333006 0.00 1.12 0.33 1.00 1.00 1.00 1.00 2.00 ▇▁▁▁▁
site_desirability 0 1.00 2.00 0.08 1.00 2.00 2.00 2.00 3.00 ▁▁▇▁▁
garage_1_size 0 1.00 2.27 1.36 0.00 1.00 3.00 3.00 7.00 ▅▁▇▁▁
garage_1_attachment 0 1.00 1.37 0.75 0.00 1.00 2.00 2.00 2.00 ▂▁▅▁▇
garage_1_area 0 1.00 1.58 0.76 0.00 1.00 2.00 2.00 2.00 ▂▁▁▁▇
garage_2_size 0 1.00 7.00 0.11 1.00 7.00 7.00 7.00 7.00 ▁▁▁▁▇
garage_2_attachment 0 1.00 0.00 0.04 0.00 0.00 0.00 0.00 2.00 ▇▁▁▁▁
garage_2_area 0 1.00 0.01 0.15 0.00 0.00 0.00 0.00 4.00 ▇▁▁▁▁
porch 0 1.00 2.66 0.75 1.00 3.00 3.00 3.00 3.00 ▂▁▁▁▇
other_improvements 0 1.00 15.27 517.79 0.00 0.00 0.00 0.00 84336.00 ▇▁▁▁▁
building_square_feet 0 1.00 1806.23 1046.84 392.00 1128.00 1473.00 2174.00 21796.00 ▇▁▁▁▁
repair_condition 0 1.00 2.00 0.12 1.00 2.00 2.00 2.00 3.00 ▁▁▇▁▁
number_of_commercial_units 0 1.00 0.01 0.12 0.00 0.00 0.00 0.00 5.00 ▇▁▁▁▁
sale_price 0 1.00 288388.40 343122.18 2.00 119000.00 214100.00 349000.00 71000000.00 ▇▁▁▁▁
longitude 3424 0.99 -87.78 0.15 -88.26 -87.84 -87.75 -87.68 -87.52 ▁▁▃▇▃
latitude 3424 0.99 41.85 0.17 41.47 41.73 41.87 42.00 42.15 ▃▅▇▇▆
multi_property_indicator 0 1.00 0.02 0.14 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
full_baths 0 1.00 1.69 0.92 1.00 1.00 1.00 2.00 42.00 ▇▁▁▁▁
age 0 1.00 65.80 32.20 1.00 46.00 61.00 91.00 181.00 ▃▇▅▂▁
number_of_units 333615 0.00 NaN NA NA NA NA NA NA
percent_ownership 332778 0.00 52.91 16.12 10.00 50.00 50.00 50.00 98.00 ▁▁▇▂▁
multi_family_indicator 282226 0.15 211.06 0.24 211.00 211.00 211.00 211.00 212.00 ▇▁▁▁▁
o_hare_noise 3424 0.99 0.01 0.12 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
floodplain 3424 0.99 0.02 0.15 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
road_proximity 3424 0.99 0.08 0.27 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
condo_strata 333615 0.00 NaN NA NA NA NA NA NA
sale_year 0 1.00 2016.06 2.00 2013.00 2014.00 2016.00 2018.00 2019.00 ▇▃▅▅▇
sale_quarter 0 1.00 78.80 7.99 65.00 72.00 79.00 86.00 92.00 ▇▆▇▇▇
sale_half_year 0 1.00 39.65 3.99 33.00 36.00 40.00 43.00 46.00 ▇▆▅▇▇
sale_quarter_of_year 0 1.00 2.56 1.07 1.00 2.00 3.00 3.00 4.00 ▆▇▁▇▆
sale_month_of_year 0 1.00 6.65 3.27 1.00 4.00 7.00 9.00 12.00 ▇▆▆▆▇
sale_half_of_year 0 1.00 1.53 0.50 1.00 1.00 2.00 2.00 2.00 ▇▁▁▁▇
most_recent_sale 214 1.00 0.76 0.43 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
age_decade 0 1.00 6.58 3.22 0.10 4.60 6.10 9.10 18.10 ▃▇▅▂▁
garage_indicator 0 1.00 0.83 0.37 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇

Let’s define a recipe for cleaning and preprocessing the data. As discussed in class, we’ll at least need to convert categorical predictors to dummies and standardize the numeric predictors.

# Define a recipe for preprocessing the data
a_recipe =
  # Start the recipe
  recipe(sale_price ~ ., data = clean_df) |> 
  # Update ID variable role
  update_role(pin, new_role = "id") |>
  # Drop variables with too many missing values
  step_filter_missing(all_predictors(), threshold = 0.01) |>
  # Impute missing values in numeric predictors with the median
  step_impute_median(all_numeric_predictors()) |>
  # Interact numeric predictors with each other
  step_interact(~ all_numeric_predictors():all_numeric_predictors()) |>
  # Normalize numeric predictors
  step_normalize(all_numeric_predictors()) |>
  # Impute missing values in categorical predictors with the most common value
  step_impute_mode(all_nominal_predictors()) |>
  # Collapse infrequent factor levels into 'other'
  step_other(all_nominal_predictors(), threshold = 0.05) |>
  # Create dummy variables for categorical predictors
  step_dummy(all_nominal_predictors(), one_hot = TRUE) |>
  # Remove near-zero-variance predictors
  step_nzv(all_predictors()) |>
  # Remove predictors that are highly correlated with other predictors
  step_corr(all_predictors(), threshold = 0.9) |>
  # Remove variables that are linear combinations of other predictors
  step_lincomb(all_predictors())

# Let's see the recipe
a_recipe
── Recipe ──────────────────────────────────────────────────────────────────────
── Inputs 
Number of variables by role
outcome:    1
predictor: 62
id:         1
── Operations 
• Missing value column filter on: all_predictors()
• Median imputation for: all_numeric_predictors()
• Interactions with: all_numeric_predictors():all_numeric_predictors()
• Centering and scaling for: all_numeric_predictors()
• Mode imputation for: all_nominal_predictors()
• Collapsing factor levels for: all_nominal_predictors()
• Dummy variables from: all_nominal_predictors()
• Sparse, unbalanced variable filter on: all_predictors()
• Correlation filter on: all_predictors()
• Linear combination filter on: all_predictors()

Notice that executing this recipe once takes quite some time.

# Time the execution of the recipe
tictoc::tic()
baked_df = a_recipe |> prep() |> bake(new_data = NULL)
Warning: !  The following column has zero variance so scaling cannot be used:
  garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
  before normalizing.
tictoc::toc()
198.264 sec elapsed
# Check out the baked data
baked_df |> skim()
Data summary
Name baked_df
Number of rows 333615
Number of columns 156
_______________________
Column type frequency:
character 1
numeric 155
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pin 0 1 14 21 0 267980 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sale_year 0 1 0.00 1.00 -1.53 -1.03 -0.03 0.97 1.4700e+00 ▇▃▅▅▇
sale_price 0 1 288388.40 343122.18 2.00 119000.00 214100.00 349000.00 7.1000e+07 ▇▁▁▁▁
land_square_feet_x_basement 0 1 0.00 1.00 -0.41 -0.30 -0.21 0.10 2.6801e+02 ▇▁▁▁▁
land_square_feet_x_central_heating 0 1 0.00 1.00 -0.46 -0.24 -0.10 0.05 3.4300e+02 ▇▁▁▁▁
land_square_feet_x_garage_1_size 0 1 0.00 1.00 -0.35 -0.26 -0.14 0.06 2.3421e+02 ▇▁▁▁▁
land_square_feet_x_garage_1_attachment 0 1 0.00 1.00 -0.46 -0.17 -0.07 0.10 2.9191e+02 ▇▁▁▁▁
land_square_feet_x_building_square_feet 0 1 0.00 1.00 -0.26 -0.17 -0.12 -0.02 3.0528e+02 ▇▁▁▁▁
land_square_feet_x_most_recent_sale 0 1 0.00 1.00 -0.44 -0.36 -0.12 0.14 2.3849e+02 ▇▁▁▁▁
land_square_feet_x_age_decade 0 1 0.00 1.00 -0.37 -0.15 -0.05 0.04 3.4956e+02 ▇▁▁▁▁
apartments_x_basement 0 1 0.00 1.00 -8.84 -0.32 -0.32 -0.32 1.3320e+01 ▁▇▁▁▁
apartments_x_central_heating 0 1 0.00 1.00 -6.38 -0.34 -0.34 -0.34 6.9000e+00 ▁▁▇▁▁
apartments_x_cathedral_ceiling 0 1 0.00 1.00 -6.18 -0.30 -0.30 -0.30 6.7600e+00 ▁▁▇▁▁
apartments_x_porch 0 1 0.00 1.00 -5.69 -0.34 -0.34 -0.34 6.0900e+00 ▁▁▇▁▁
apartments_x_full_baths 0 1 0.00 1.00 -8.08 -0.28 -0.28 -0.28 1.5760e+01 ▁▇▁▁▁
apartments_x_sale_half_of_year 0 1 0.00 1.00 -6.28 -0.35 -0.35 -0.35 6.7600e+00 ▁▁▇▁▁
apartments_x_most_recent_sale 0 1 0.00 1.00 -5.82 -0.31 -0.31 -0.31 6.3100e+00 ▁▁▇▁▁
apartments_x_garage_indicator 0 1 0.00 1.00 -6.71 -0.30 -0.30 -0.30 7.4000e+00 ▁▁▇▁▁
rooms_x_bedrooms 0 1 0.00 1.00 -0.55 -0.29 -0.23 0.05 3.2051e+02 ▇▁▁▁▁
rooms_x_full_baths 0 1 0.00 1.00 -0.62 -0.47 -0.32 0.09 7.1930e+01 ▇▁▁▁▁
bedrooms_x_basement 0 1 0.00 1.00 -1.26 -0.75 -0.49 0.79 2.1500e+01 ▇▁▁▁▁
bedrooms_x_central_heating 0 1 0.00 1.00 -1.40 -0.41 -0.41 -0.08 5.8020e+01 ▇▁▁▁▁
bedrooms_x_other_heating 0 1 0.00 1.00 -1.97 -0.30 -0.30 0.35 5.5590e+01 ▇▁▁▁▁
bedrooms_x_attic_type 0 1 0.00 1.00 -1.51 -0.55 0.02 0.60 5.0160e+01 ▇▁▁▁▁
bedrooms_x_half_baths 0 1 0.00 1.00 -0.56 -0.56 -0.56 0.44 5.8320e+01 ▇▁▁▁▁
bedrooms_x_cathedral_ceiling 0 1 0.00 1.00 -0.78 -0.78 -0.78 0.64 1.0570e+01 ▇▁▁▁▁
bedrooms_x_garage_1_size 0 1 0.00 1.00 -1.16 -0.73 0.13 0.56 3.7580e+01 ▇▁▁▁▁
bedrooms_x_garage_1_area 0 1 0.00 1.00 -1.56 -0.42 0.16 0.73 5.0110e+01 ▇▁▁▁▁
bedrooms_x_porch 0 1 0.00 1.00 -1.71 -0.68 -0.07 0.55 5.3620e+01 ▇▁▁▁▁
bedrooms_x_age 0 1 0.00 1.00 -1.24 -0.57 -0.29 0.26 4.3410e+01 ▇▁▁▁▁
bedrooms_x_sale_month_of_year 0 1 0.00 1.00 -1.39 -0.71 -0.16 0.52 4.3040e+01 ▇▁▁▁▁
bedrooms_x_most_recent_sale 0 1 0.00 1.00 -1.35 -0.34 0.16 0.67 1.9780e+01 ▇▁▁▁▁
basement_x_central_heating 0 1 0.00 1.00 -1.67 -0.80 0.06 0.93 5.2600e+00 ▇▇▁▁▁
basement_x_central_air 0 1 0.00 1.00 -0.75 -0.75 -0.75 0.12 2.7300e+00 ▇▃▁▂▁
basement_x_fireplaces 0 1 0.00 1.00 -0.51 -0.51 -0.51 0.41 6.7800e+00 ▇▁▁▁▁
basement_x_attic_type 0 1 0.00 1.00 -1.13 -0.46 -0.46 0.55 2.5600e+00 ▇▁▂▂▁
basement_x_half_baths 0 1 0.00 1.00 -0.68 -0.68 -0.68 0.13 2.1950e+01 ▇▁▁▁▁
basement_x_cathedral_ceiling 0 1 0.00 1.00 -0.71 -0.71 -0.71 0.25 3.1500e+00 ▇▃▁▁▁
basement_x_garage_1_size 0 1 0.00 1.00 -1.14 -0.56 -0.26 0.61 7.0600e+00 ▇▂▁▁▁
basement_x_garage_1_attachment 0 1 0.00 1.00 -1.26 -0.71 -0.15 0.40 3.1600e+00 ▅▇▁▁▁
basement_x_garage_1_area 0 1 0.00 1.00 -1.26 -0.34 -0.34 0.58 2.4100e+00 ▃▇▂▂▁
basement_x_building_square_feet 0 1 0.00 1.00 -1.12 -0.72 -0.29 0.38 2.3950e+01 ▇▁▁▁▁
basement_x_repair_condition 0 1 0.00 1.00 -1.24 -0.73 -0.73 1.32 4.3800e+00 ▇▂▃▁▁
basement_x_full_baths 0 1 0.00 1.00 -0.85 -0.85 -0.39 0.54 1.8230e+01 ▇▁▁▁▁
basement_x_age 0 1 0.00 1.00 -1.46 -0.66 -0.18 0.34 7.6800e+00 ▇▂▁▁▁
basement_x_sale_half_of_year 0 1 0.00 1.00 -0.91 -0.91 -0.35 0.21 3.0000e+00 ▇▁▁▁▁
basement_x_most_recent_sale 0 1 0.00 1.00 -1.16 -0.28 -0.28 0.61 2.3700e+00 ▅▇▂▃▁
central_heating_x_other_heating 0 1 0.00 1.00 -2.92 -0.34 -0.34 -0.34 2.2300e+00 ▁▁▇▁▂
central_heating_x_central_air 0 1 0.00 1.00 -0.90 -0.90 -0.90 0.95 2.7900e+00 ▇▁▇▁▁
central_heating_x_fireplaces 0 1 0.00 1.00 -0.54 -0.54 -0.54 0.98 5.5300e+00 ▇▂▁▁▁
central_heating_x_attic_type 0 1 0.00 1.00 -2.31 -0.68 0.13 0.13 2.5800e+00 ▂▂▇▁▁
central_heating_x_half_baths 0 1 0.00 1.00 -0.73 -0.73 -0.73 0.69 1.9160e+01 ▇▁▁▁▁
central_heating_x_garage_1_size 0 1 0.00 1.00 -1.41 -0.87 0.21 0.21 6.1400e+00 ▆▇▂▁▁
central_heating_x_porch 0 1 0.00 1.00 -2.53 -0.01 -0.01 -0.01 2.5100e+00 ▁▁▇▁▁
central_heating_x_building_square_feet 0 1 0.00 1.00 -1.22 -0.57 -0.32 0.18 1.8110e+01 ▇▁▁▁▁
central_heating_x_full_baths 0 1 0.00 1.00 -1.32 -0.66 -0.01 -0.01 2.6160e+01 ▇▁▁▁▁
central_heating_x_age 0 1 0.00 1.00 -1.41 -0.60 -0.32 0.40 4.9400e+00 ▇▃▁▁▁
central_heating_x_sale_month_of_year 0 1 0.00 1.00 -1.64 -0.79 -0.15 0.49 3.4700e+00 ▅▇▅▁▁
central_heating_x_most_recent_sale 0 1 0.00 1.00 -1.49 0.22 0.22 0.22 1.9200e+00 ▃▁▇▁▂
central_heating_x_garage_indicator 0 1 0.00 1.00 -1.78 0.07 0.07 0.07 1.9300e+00 ▂▁▇▁▂
other_heating_x_garage_1_attachment 0 1 0.00 1.00 -1.78 -0.45 0.88 0.88 8.8000e-01 ▂▁▅▁▇
other_heating_x_sale_year 0 1 0.00 1.00 -5.19 0.18 0.19 0.20 2.1000e-01 ▁▁▁▁▇
other_heating_x_sale_half_year 0 1 0.00 1.00 -4.32 -0.47 0.20 0.71 1.2200e+00 ▁▁▁▇▇
other_heating_x_age_decade 0 1 0.00 1.00 -1.98 -0.62 -0.12 0.81 3.6500e+00 ▃▇▃▂▁
central_air_x_fireplaces 0 1 0.00 1.00 -0.49 -0.49 -0.49 -0.49 3.5900e+00 ▇▁▂▁▁
central_air_x_half_baths 0 1 0.00 1.00 -0.61 -0.61 -0.61 1.35 1.7100e+01 ▇▁▁▁▁
central_air_x_cathedral_ceiling 0 1 0.00 1.00 -0.61 -0.61 -0.61 0.55 1.7200e+00 ▇▁▁▁▃
central_air_x_garage_1_attachment 0 1 0.00 1.00 -0.78 -0.78 -0.78 0.49 1.7700e+00 ▇▁▃▁▂
central_air_x_sale_month_of_year 0 1 0.00 1.00 -0.78 -0.78 -0.78 0.73 2.2300e+00 ▇▁▂▁▂
central_air_x_most_recent_sale 0 1 0.00 1.00 -0.76 -0.76 -0.76 1.31 1.3100e+00 ▇▁▁▁▅
fireplaces_x_half_baths 0 1 0.00 1.00 -0.42 -0.42 -0.42 -0.42 2.7080e+01 ▇▁▁▁▁
fireplaces_x_cathedral_ceiling 0 1 0.00 1.00 -0.40 -0.40 -0.40 -0.40 4.2700e+00 ▇▁▁▁▁
fireplaces_x_garage_1_attachment 0 1 0.00 1.00 -0.51 -0.51 -0.51 0.78 4.6600e+00 ▇▂▁▁▁
fireplaces_x_most_recent_sale 0 1 0.00 1.00 -0.50 -0.50 -0.50 -0.50 3.5000e+00 ▇▁▂▁▁
attic_type_x_garage_1_size 0 1 0.00 1.00 -1.41 -0.66 0.09 0.83 3.8200e+00 ▇▂▇▁▁
attic_type_x_garage_1_attachment 0 1 0.00 1.00 -1.56 -0.62 -0.15 1.25 1.2500e+00 ▅▅▆▂▇
attic_type_x_garage_1_area 0 1 0.00 1.00 -1.69 -0.82 0.05 0.92 9.2000e-01 ▃▃▁▂▇
attic_type_x_porch 0 1 0.00 1.00 -1.95 -1.28 0.76 0.76 7.6000e-01 ▁▂▁▂▇
attic_type_x_building_square_feet 0 1 0.00 1.00 -1.28 -0.62 -0.27 0.40 1.8600e+01 ▇▁▁▁▁
attic_type_x_full_baths 0 1 0.00 1.00 -1.13 -0.44 -0.44 0.60 1.3050e+01 ▇▁▁▁▁
attic_type_x_sale_month_of_year 0 1 0.00 1.00 -1.55 -0.85 -0.15 0.74 1.9400e+00 ▇▇▆▅▅
attic_type_x_sale_half_of_year 0 1 0.00 1.00 -1.58 -1.01 -0.45 1.24 1.2400e+00 ▆▁▇▂▇
attic_type_x_most_recent_sale 0 1 0.00 1.00 -1.49 -0.70 0.87 0.87 8.7000e-01 ▃▂▁▂▇
attic_type_x_age_decade 0 1 0.00 1.00 -1.72 -0.72 -0.15 0.41 4.0100e+00 ▅▇▂▁▁
half_baths_x_cathedral_ceiling 0 1 0.00 1.00 -0.50 -0.50 -0.50 -0.50 1.8690e+01 ▇▁▁▁▁
half_baths_x_garage_1_attachment 0 1 0.00 1.00 -0.68 -0.68 -0.68 0.48 1.5590e+01 ▇▁▁▁▁
half_baths_x_sale_month_of_year 0 1 0.00 1.00 -0.69 -0.69 -0.69 0.66 1.5480e+01 ▇▁▁▁▁
half_baths_x_most_recent_sale 0 1 0.00 1.00 -0.67 -0.67 -0.67 1.20 1.6200e+01 ▇▁▁▁▁
cathedral_ceiling_x_construction_quality 0 1 0.00 1.00 -0.90 -0.90 -0.90 1.16 2.1800e+00 ▇▁▁▆▁
cathedral_ceiling_x_garage_1_attachment 0 1 0.00 1.00 -0.71 -0.71 -0.71 0.50 1.7000e+00 ▇▁▂▁▃
cathedral_ceiling_x_most_recent_sale 0 1 0.00 1.00 -0.73 -0.73 -0.73 1.42 1.4200e+00 ▇▁▁▁▃
construction_quality_x_garage_1_area 0 1 0.00 1.00 -2.06 -0.75 0.57 0.57 1.8800e+00 ▂▁▁▇▁
construction_quality_x_porch 0 1 0.00 1.00 -2.84 0.47 0.47 0.47 2.4600e+00 ▂▁▁▇▁
construction_quality_x_building_square_feet 0 1 0.00 1.00 -1.50 -0.64 -0.32 0.35 3.0030e+01 ▇▁▁▁▁
construction_quality_x_full_baths 0 1 0.00 1.00 -1.29 -0.74 -0.74 0.35 4.4090e+01 ▇▁▁▁▁
construction_quality_x_sale_year 0 1 0.00 1.00 -9.38 0.07 0.09 0.11 9.5900e+00 ▁▁▇▁▁
construction_quality_x_sale_half_year 0 1 0.00 1.00 -5.10 -0.77 0.12 0.79 6.5600e+00 ▁▃▇▁▁
construction_quality_x_most_recent_sale 0 1 0.00 1.00 -1.76 -0.59 0.57 0.57 1.7400e+00 ▂▁▁▇▁
site_desirability_x_sale_year 0 1 0.00 1.00 -12.56 0.00 0.03 0.05 1.2660e+01 ▁▁▇▁▁
site_desirability_x_sale_half_of_year 0 1 0.00 1.00 -2.04 -1.05 0.94 0.94 2.9300e+00 ▇▁▇▁▁
garage_1_size_x_garage_1_attachment 0 1 0.00 1.00 -1.49 -0.69 -0.29 0.92 4.1200e+00 ▆▆▇▁▁
garage_1_size_x_garage_1_area 0 1 0.00 1.00 -1.62 -0.86 0.67 0.67 3.7200e+00 ▅▂▇▁▁
garage_1_size_x_porch 0 1 0.00 1.00 -1.46 -0.73 0.72 0.72 3.6200e+00 ▇▁▇▁▁
garage_1_size_x_full_baths 0 1 0.00 1.00 -1.01 -0.76 -0.25 0.52 3.1180e+01 ▇▁▁▁▁
garage_1_size_x_sale_month_of_year 0 1 0.00 1.00 -1.21 -0.89 -0.25 0.71 5.5100e+00 ▇▅▁▁▁
garage_1_size_x_most_recent_sale 0 1 0.00 1.00 -1.13 -1.13 0.17 0.82 3.4200e+00 ▇▁▇▁▁
garage_1_size_x_age_decade 0 1 0.00 1.00 -1.21 -0.79 -0.10 0.51 8.1400e+00 ▇▂▁▁▁
garage_1_attachment_x_porch 0 1 0.00 1.00 -1.60 -0.70 -0.26 1.08 1.0800e+00 ▃▂▆▁▇
garage_1_attachment_x_full_baths 0 1 0.00 1.00 -1.29 -0.70 -0.12 -0.12 4.7610e+01 ▇▁▁▁▁
garage_1_attachment_x_sale_month_of_year 0 1 0.00 1.00 -1.28 -0.86 -0.16 0.68 2.0800e+00 ▇▆▆▂▃
garage_1_attachment_x_most_recent_sale 0 1 0.00 1.00 -1.19 -1.19 -0.04 1.11 1.1100e+00 ▇▁▅▁▇
garage_1_area_x_porch 0 1 0.00 1.00 -1.75 -0.91 0.77 0.77 7.7000e-01 ▂▂▁▁▇
garage_2_size_x_sale_year 0 1 0.00 1.00 -53.96 -0.04 0.02 0.08 1.1000e-01 ▁▁▁▁▇
porch_x_building_square_feet 0 1 0.00 1.00 -1.37 -0.57 -0.28 0.35 1.6780e+01 ▇▁▁▁▁
porch_x_full_baths 0 1 0.00 1.00 -1.22 -0.52 -0.52 0.53 4.2580e+01 ▇▁▁▁▁
porch_x_sale_month_of_year 0 1 0.00 1.00 -1.62 -0.84 0.03 0.91 1.7800e+00 ▇▇▅▅▆
porch_x_sale_half_of_year 0 1 0.00 1.00 -1.71 -0.60 -0.60 1.08 1.0800e+00 ▃▇▁▁▇
porch_x_most_recent_sale 0 1 0.00 1.00 -1.54 -0.78 0.75 0.75 7.5000e-01 ▃▂▁▁▇
porch_x_age_decade 0 1 0.00 1.00 -1.79 -0.75 -0.08 0.40 4.0600e+00 ▅▇▂▁▁
building_square_feet_x_sale_month_of_year 0 1 0.00 1.00 -1.20 -0.65 -0.21 0.35 1.6890e+01 ▇▁▁▁▁
building_square_feet_x_age_decade 0 1 0.00 1.00 -1.10 -0.56 -0.34 0.21 1.8610e+01 ▇▁▁▁▁
repair_condition_x_sale_year 0 1 0.00 1.00 -8.03 0.01 0.03 0.04 8.1100e+00 ▁▁▇▁▁
repair_condition_x_sale_half_year 0 1 0.00 1.00 -4.92 -0.76 0.09 0.73 6.2700e+00 ▁▃▇▁▁
full_baths_x_age 0 1 0.00 1.00 -1.14 -0.58 -0.37 0.16 4.4820e+01 ▇▁▁▁▁
full_baths_x_sale_month_of_year 0 1 0.00 1.00 -1.17 -0.60 -0.25 0.43 5.1510e+01 ▇▁▁▁▁
full_baths_x_most_recent_sale 0 1 0.00 1.00 -1.19 -0.26 -0.26 0.67 9.9600e+00 ▇▁▁▁▁
age_x_sale_month_of_year 0 1 0.00 1.00 -1.36 -0.81 -0.18 0.57 4.8300e+00 ▇▅▂▁▁
sale_quarter_of_year_x_sale_month_of_year 0 1 0.00 1.00 -1.27 -0.81 0.04 0.43 1.8000e+00 ▇▂▆▁▅
sale_half_of_year_x_most_recent_sale 0 1 0.00 1.00 -1.48 -0.21 -0.21 1.05 1.0500e+00 ▅▁▇▁▇
most_recent_sale_x_age_decade 0 1 0.00 1.00 -1.25 -1.23 0.10 0.63 3.3600e+00 ▇▇▃▂▁
age_decade_x_garage_indicator 0 1 0.00 1.00 -1.42 -0.91 0.10 0.67 3.5000e+00 ▆▇▃▂▁
property_class_X202 0 1 0.11 0.31 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
property_class_X203 0 1 0.31 0.46 0.00 0.00 0.00 1.00 1.0000e+00 ▇▁▁▁▃
property_class_X205 0 1 0.06 0.25 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
property_class_X211 0 1 0.14 0.35 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▂
property_class_X278 0 1 0.08 0.28 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
property_class_X295 0 1 0.07 0.26 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
property_class_other 0 1 0.13 0.34 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
type_of_residence_X1 0 1 0.39 0.49 0.00 0.00 0.00 1.00 1.0000e+00 ▇▁▁▁▅
type_of_residence_X2 0 1 0.39 0.49 0.00 0.00 0.00 1.00 1.0000e+00 ▇▁▁▁▅
type_of_residence_X4 0 1 0.08 0.28 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
type_of_residence_X5 0 1 0.10 0.30 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
wall_material_X1 0 1 0.32 0.47 0.00 0.00 0.00 1.00 1.0000e+00 ▇▁▁▁▃
wall_material_X2 0 1 0.43 0.50 0.00 0.00 0.00 1.00 1.0000e+00 ▇▁▁▁▆
wall_material_X3 0 1 0.23 0.42 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▂
roof_material_X1 0 1 0.89 0.31 0.00 1.00 1.00 1.00 1.0000e+00 ▁▁▁▁▇
roof_material_X2 0 1 0.09 0.28 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
basement_finish_X3 0 1 0.68 0.46 0.00 0.00 1.00 1.00 1.0000e+00 ▃▁▁▁▇
attic_finish_X0 0 1 0.68 0.47 0.00 0.00 1.00 1.00 1.0000e+00 ▃▁▁▁▇
attic_finish_X1 0 1 0.12 0.33 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
attic_finish_X3 0 1 0.20 0.40 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▂
design_plan_X2 0 1 0.69 0.46 0.00 0.00 1.00 1.00 1.0000e+00 ▃▁▁▁▇
garage_1_material_X1 0 1 0.58 0.49 0.00 0.00 1.00 1.00 1.0000e+00 ▆▁▁▁▇
garage_1_material_X2 0 1 0.20 0.40 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▂
garage_1_material_X3 0 1 0.05 0.22 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁
condition_desirability_and_utility_other 0 1 0.06 0.23 0.00 0.00 0.00 0.00 1.0000e+00 ▇▁▁▁▁

So if you are then asking R to run this recipe each time you fit a model (for each fold of cross-validation, for each value of the tuning parameters), that can add up to a lot of time.

One more note: I used step_other to collapse infrequent factor levels to an “other” category. This step will help you avoid creating a huge number of dummy variables that are mostly 0s. However, it could cause you to lose important information.

3 Elasticnet

Now we can set up our workflow. We will need to specify and model. Then we can add our recipe.

# Define the model specification
net_model =
  linear_reg(penalty = tune(), mixture = tune()) |>
  set_engine('glmnet')
# Define the workflow
net_wf =
  workflow() |>
  add_model(net_model) |>
  add_recipe(a_recipe)

# Print the workflow
net_wf
══ Workflow ════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: linear_reg()

── Preprocessor ────────────────────────────────────────────────────────────────
10 Recipe Steps

• step_filter_missing()
• step_impute_median()
• step_interact()
• step_normalize()
• step_impute_mode()
• step_other()
• step_dummy()
• step_nzv()
• step_corr()
• step_lincomb()

── Model ───────────────────────────────────────────────────────────────────────
Linear Regression Model Specification (regression)

Main Arguments:
  penalty = tune()
  mixture = tune()

Computational engine: glmnet 

We’re finally ready to tune the model… after we create a resampling object (define the folds) and define the set of tuning parameters we wish to evaluate.

This may take some time. I’m going to use tictoc to time the execution of this code chunk again.

Note that parallelization can help here—to a degree. Tuning 5 levels of lambda took 99 seconds to run sequentially on my computer. When I parallelized (with 5 workers in future), the same code took approxiately 30 seconds.

# Start timer
tictoc::tic()

# Define the resampling object
set.seed(12)
cv_folds = vfold_cv(clean_df, v = 5)
# Parallelize the tuning process
library(future)
plan(multisession, workers = parallelly::availableCores() - 1)
# Tune the model
tune_results =
  tune_grid(
    net_wf,
    resamples = cv_folds,
    grid = expand_grid(
      penalty = 10^seq(-5, 3, length.out = 50),
      mixture = seq(0, 1, length.out = 11)
    ),
    metrics = metric_set(rmse, mae, rsq),
    control = control_grid(parallel_over = 'everything')
  )
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold1: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold2: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold3: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold4: preprocessor 1/1:
  !  The following columns have zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units,
    garage_2_attachment_x_multi_property_indicator, and
    garage_2_area_x_multi_property_indicator.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
! Fold5: preprocessor 1/1:
  !  The following column has zero variance so scaling cannot be used:
    garage_2_attachment_x_number_of_commercial_units.
  ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
    before normalizing.
# Stop timer
tictoc::toc()
2462.463 sec elapsed
# Check out the tuning results: RMSE
tune_results |> show_best(metric = 'rmse', n = 10)
# A tibble: 10 × 8
     penalty mixture .metric .estimator    mean     n std_err .config           
       <dbl>   <dbl> <chr>   <chr>        <dbl> <int>   <dbl> <chr>             
 1 0.00001       0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 2 0.0000146     0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 3 0.0000212     0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 4 0.0000309     0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 5 0.0000450     0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 6 0.0000655     0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 7 0.0000954     0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 8 0.000139      0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
 9 0.000202      0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
10 0.000295      0.3 rmse    standard   261620.     5  23943. Preprocessor1_Mod…
# Check out the tuning results: MAE
tune_results |> show_best(metric = 'mae', n = 10)
# A tibble: 10 × 8
   penalty mixture .metric .estimator    mean     n std_err .config             
     <dbl>   <dbl> <chr>   <chr>        <dbl> <int>   <dbl> <chr>               
 1    687.     0.1 mae     standard   137617.     5    267. Preprocessor1_Model…
 2    471.     0.2 mae     standard   137618.     5    266. Preprocessor1_Model…
 3    324.     0.2 mae     standard   137623.     5    261. Preprocessor1_Model…
 4    471.     0.1 mae     standard   137625.     5    263. Preprocessor1_Model…
 5    324.     0.3 mae     standard   137625.     5    265. Preprocessor1_Model…
 6   1000      0.1 mae     standard   137626.     5    273. Preprocessor1_Model…
 7    222.     0.4 mae     standard   137627.     5    263. Preprocessor1_Model…
 8    222.     0.5 mae     standard   137628.     5    267. Preprocessor1_Model…
 9    324.     0.4 mae     standard   137631.     5    267. Preprocessor1_Model…
10    222.     0.3 mae     standard   137631.     5    261. Preprocessor1_Model…
# Plot the tuning results
tune_results |> autoplot()