# Load necessary packages
library(pacman)
p_load(here, readr, skimr, fst, tidymodels, glmnet)
# Load the data
raw_df =
here('projects', 'project-000', 'sales-data.fst') |>
read_fst(as.data.table = TRUE)
# read_fst(as.data.table = TRUE, from = 1, to = 2e4)
# read_fst(as.data.table = TRUE) |> sample_frac(0.05)Problem Set 1
tidymodels example
1 Setup
First, let’s load the packages and data…
Note: When working with a relatively large dataset like this one, it can help to use a subset first. If you don’t care about representativeness, you could just read the first few rows of the dataset, e.g., the commented-out from = 1, to = 1e4 above. If you want a representative sample, you could sample_n or sample_frac from the full dataset (also commented out above). This approach can save you a lot of time when you’re testing out your code. For example, the prep step of the was 20-times faster when I used a 5% sample.
2 Data cleaning and preprocessing
Before we write the official recipe, which is more about engineering features, I’m going to fix any strangeness in the dataset—e.g., the variables that are incorrectly class-ed. Examples include variables that should be numeric but were read as character due to $ or , and codes that should be factors but were read as character.
# Recode variables
clean_df =
raw_df |>
# Parse characters to numbers
mutate(
sale_price = sale_price |> parse_number(),
land_square_feet = land_square_feet |> parse_number(),
building_square_feet = building_square_feet |> parse_number(),
number_of_units = number_of_units |> parse_number(),
percent_ownership = percent_ownership |> parse_number(),
other_improvements = other_improvements |> parse_number(),
) |>
# Convert numeric variables to factors
mutate(
across(
c(
ends_with('material'),
ends_with('finish'),
contains('class'),
pin, use, type_of_residence, design_plan, multi_code
),
as.character
)
) |>
# Remove unwanted variables :(
select(!starts_with('square_root')) |>
select(!contains('squared')) |>
select(!starts_with('estimate')) |>
select(!c(
deed_no, census_tract,
town_code, town_and_neighborhood,
neighborhood_code, neigborhood_code_mapping,
total_building_square_feet, property_address,
sale_date, pure_market_filter
))I’m also going to remove observations with a sales price less than or equal to $1. Ideally, we would have a flag for “non-arm’s-length” sales. We don’t, so this crude filter is the best we can do to remove non-arm’s-length sales.
# Filter out non-arm's-length sales
clean_df = clean_df |> filter(sale_price > 1)
# Check it out
clean_df |> skim()| Name | clean_df |
| Number of rows | 333615 |
| Number of columns | 64 |
| Key | NULL |
| _______________________ | |
| Column type frequency: | |
| character | 15 |
| logical | 1 |
| numeric | 48 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| pin | 0 | 1 | 14 | 21 | 0 | 267980 | 0 |
| property_class | 0 | 1 | 3 | 3 | 0 | 14 | 0 |
| type_of_residence | 0 | 1 | 1 | 1 | 0 | 8 | 0 |
| wall_material | 0 | 1 | 1 | 1 | 0 | 4 | 0 |
| roof_material | 0 | 1 | 1 | 1 | 0 | 6 | 0 |
| basement_finish | 1 | 1 | 1 | 1 | 0 | 3 | 0 |
| attic_finish | 0 | 1 | 1 | 1 | 0 | 4 | 0 |
| design_plan | 0 | 1 | 1 | 1 | 0 | 3 | 0 |
| garage_1_material | 0 | 1 | 1 | 1 | 0 | 5 | 0 |
| garage_2_material | 0 | 1 | 1 | 1 | 0 | 4 | 0 |
| multi_code | 0 | 1 | 1 | 2 | 0 | 27 | 0 |
| modeling_group | 0 | 1 | 2 | 2 | 0 | 2 | 0 |
| use | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| condo_class_factor | 333615 | 0 | NA | NA | 0 | 0 | 0 |
| condition_desirability_and_utility | 0 | 1 | 0 | 2 | 314723 | 17 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| large_lot | 333615 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| land_square_feet | 0 | 1.00 | 7018.69 | 13323.39 | 149.00 | 3720.00 | 5060.00 | 7980.00 | 2980767.00 | ▇▁▁▁▁ |
| apartments | 0 | 1.00 | 0.39 | 1.05 | -5.00 | 0.00 | 0.00 | 0.00 | 6.00 | ▁▁▇▁▁ |
| rooms | 0 | 1.00 | 7.08 | 3.35 | 2.00 | 5.00 | 6.00 | 8.00 | 241.00 | ▇▁▁▁▁ |
| bedrooms | 0 | 1.00 | 3.54 | 1.53 | 1.00 | 3.00 | 3.00 | 4.00 | 90.00 | ▇▁▁▁▁ |
| basement | 0 | 1.00 | 1.72 | 0.97 | 1.00 | 1.00 | 1.00 | 3.00 | 4.00 | ▇▂▁▃▁ |
| central_heating | 0 | 1.00 | 1.16 | 0.37 | 0.00 | 1.00 | 1.00 | 1.00 | 2.00 | ▁▁▇▁▂ |
| other_heating | 0 | 1.00 | 4.89 | 0.56 | 2.00 | 5.00 | 5.00 | 5.00 | 5.00 | ▁▁▁▁▇ |
| central_air | 0 | 1.00 | 0.47 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
| fireplaces | 0 | 1.00 | 0.31 | 0.54 | 0.00 | 0.00 | 0.00 | 1.00 | 2.00 | ▇▁▂▁▁ |
| attic_type | 0 | 1.00 | 2.49 | 0.79 | 1.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▂▁▂▁▇ |
| half_baths | 0 | 1.00 | 0.46 | 0.57 | 0.00 | 0.00 | 0.00 | 1.00 | 9.00 | ▇▁▁▁▁ |
| cathedral_ceiling | 0 | 1.00 | 0.88 | 0.97 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | ▇▁▁▁▆ |
| construction_quality | 0 | 1.00 | 1.99 | 0.11 | 1.00 | 2.00 | 2.00 | 2.00 | 3.00 | ▁▁▇▁▁ |
| renovation | 333006 | 0.00 | 1.12 | 0.33 | 1.00 | 1.00 | 1.00 | 1.00 | 2.00 | ▇▁▁▁▁ |
| site_desirability | 0 | 1.00 | 2.00 | 0.08 | 1.00 | 2.00 | 2.00 | 2.00 | 3.00 | ▁▁▇▁▁ |
| garage_1_size | 0 | 1.00 | 2.27 | 1.36 | 0.00 | 1.00 | 3.00 | 3.00 | 7.00 | ▅▁▇▁▁ |
| garage_1_attachment | 0 | 1.00 | 1.37 | 0.75 | 0.00 | 1.00 | 2.00 | 2.00 | 2.00 | ▂▁▅▁▇ |
| garage_1_area | 0 | 1.00 | 1.58 | 0.76 | 0.00 | 1.00 | 2.00 | 2.00 | 2.00 | ▂▁▁▁▇ |
| garage_2_size | 0 | 1.00 | 7.00 | 0.11 | 1.00 | 7.00 | 7.00 | 7.00 | 7.00 | ▁▁▁▁▇ |
| garage_2_attachment | 0 | 1.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | ▇▁▁▁▁ |
| garage_2_area | 0 | 1.00 | 0.01 | 0.15 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| porch | 0 | 1.00 | 2.66 | 0.75 | 1.00 | 3.00 | 3.00 | 3.00 | 3.00 | ▂▁▁▁▇ |
| other_improvements | 0 | 1.00 | 15.27 | 517.79 | 0.00 | 0.00 | 0.00 | 0.00 | 84336.00 | ▇▁▁▁▁ |
| building_square_feet | 0 | 1.00 | 1806.23 | 1046.84 | 392.00 | 1128.00 | 1473.00 | 2174.00 | 21796.00 | ▇▁▁▁▁ |
| repair_condition | 0 | 1.00 | 2.00 | 0.12 | 1.00 | 2.00 | 2.00 | 2.00 | 3.00 | ▁▁▇▁▁ |
| number_of_commercial_units | 0 | 1.00 | 0.01 | 0.12 | 0.00 | 0.00 | 0.00 | 0.00 | 5.00 | ▇▁▁▁▁ |
| sale_price | 0 | 1.00 | 288388.40 | 343122.18 | 2.00 | 119000.00 | 214100.00 | 349000.00 | 71000000.00 | ▇▁▁▁▁ |
| longitude | 3424 | 0.99 | -87.78 | 0.15 | -88.26 | -87.84 | -87.75 | -87.68 | -87.52 | ▁▁▃▇▃ |
| latitude | 3424 | 0.99 | 41.85 | 0.17 | 41.47 | 41.73 | 41.87 | 42.00 | 42.15 | ▃▅▇▇▆ |
| multi_property_indicator | 0 | 1.00 | 0.02 | 0.14 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| full_baths | 0 | 1.00 | 1.69 | 0.92 | 1.00 | 1.00 | 1.00 | 2.00 | 42.00 | ▇▁▁▁▁ |
| age | 0 | 1.00 | 65.80 | 32.20 | 1.00 | 46.00 | 61.00 | 91.00 | 181.00 | ▃▇▅▂▁ |
| number_of_units | 333615 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| percent_ownership | 332778 | 0.00 | 52.91 | 16.12 | 10.00 | 50.00 | 50.00 | 50.00 | 98.00 | ▁▁▇▂▁ |
| multi_family_indicator | 282226 | 0.15 | 211.06 | 0.24 | 211.00 | 211.00 | 211.00 | 211.00 | 212.00 | ▇▁▁▁▁ |
| o_hare_noise | 3424 | 0.99 | 0.01 | 0.12 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| floodplain | 3424 | 0.99 | 0.02 | 0.15 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| road_proximity | 3424 | 0.99 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| condo_strata | 333615 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| sale_year | 0 | 1.00 | 2016.06 | 2.00 | 2013.00 | 2014.00 | 2016.00 | 2018.00 | 2019.00 | ▇▃▅▅▇ |
| sale_quarter | 0 | 1.00 | 78.80 | 7.99 | 65.00 | 72.00 | 79.00 | 86.00 | 92.00 | ▇▆▇▇▇ |
| sale_half_year | 0 | 1.00 | 39.65 | 3.99 | 33.00 | 36.00 | 40.00 | 43.00 | 46.00 | ▇▆▅▇▇ |
| sale_quarter_of_year | 0 | 1.00 | 2.56 | 1.07 | 1.00 | 2.00 | 3.00 | 3.00 | 4.00 | ▆▇▁▇▆ |
| sale_month_of_year | 0 | 1.00 | 6.65 | 3.27 | 1.00 | 4.00 | 7.00 | 9.00 | 12.00 | ▇▆▆▆▇ |
| sale_half_of_year | 0 | 1.00 | 1.53 | 0.50 | 1.00 | 1.00 | 2.00 | 2.00 | 2.00 | ▇▁▁▁▇ |
| most_recent_sale | 214 | 1.00 | 0.76 | 0.43 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▂▁▁▁▇ |
| age_decade | 0 | 1.00 | 6.58 | 3.22 | 0.10 | 4.60 | 6.10 | 9.10 | 18.10 | ▃▇▅▂▁ |
| garage_indicator | 0 | 1.00 | 0.83 | 0.37 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▂▁▁▁▇ |
Let’s define a recipe for cleaning and preprocessing the data. As discussed in class, we’ll at least need to convert categorical predictors to dummies and standardize the numeric predictors.
# Define a recipe for preprocessing the data
a_recipe =
# Start the recipe
recipe(sale_price ~ ., data = clean_df) |>
# Update ID variable role
update_role(pin, new_role = "id") |>
# Drop variables with too many missing values
step_filter_missing(all_predictors(), threshold = 0.01) |>
# Impute missing values in numeric predictors with the median
step_impute_median(all_numeric_predictors()) |>
# Interact numeric predictors with each other
step_interact(~ all_numeric_predictors():all_numeric_predictors()) |>
# Normalize numeric predictors
step_normalize(all_numeric_predictors()) |>
# Impute missing values in categorical predictors with the most common value
step_impute_mode(all_nominal_predictors()) |>
# Collapse infrequent factor levels into 'other'
step_other(all_nominal_predictors(), threshold = 0.05) |>
# Create dummy variables for categorical predictors
step_dummy(all_nominal_predictors(), one_hot = TRUE) |>
# Remove near-zero-variance predictors
step_nzv(all_predictors()) |>
# Remove predictors that are highly correlated with other predictors
step_corr(all_predictors(), threshold = 0.9) |>
# Remove variables that are linear combinations of other predictors
step_lincomb(all_predictors())
# Let's see the recipe
a_recipe
── Recipe ──────────────────────────────────────────────────────────────────────
── Inputs
Number of variables by role
outcome: 1
predictor: 62
id: 1
── Operations
• Missing value column filter on: all_predictors()
• Median imputation for: all_numeric_predictors()
• Interactions with: all_numeric_predictors():all_numeric_predictors()
• Centering and scaling for: all_numeric_predictors()
• Mode imputation for: all_nominal_predictors()
• Collapsing factor levels for: all_nominal_predictors()
• Dummy variables from: all_nominal_predictors()
• Sparse, unbalanced variable filter on: all_predictors()
• Correlation filter on: all_predictors()
• Linear combination filter on: all_predictors()
Notice that executing this recipe once takes quite some time.
# Time the execution of the recipe
tictoc::tic()
baked_df = a_recipe |> prep() |> bake(new_data = NULL)Warning: ! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
tictoc::toc()198.264 sec elapsed
# Check out the baked data
baked_df |> skim()| Name | baked_df |
| Number of rows | 333615 |
| Number of columns | 156 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 155 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| pin | 0 | 1 | 14 | 21 | 0 | 267980 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| sale_year | 0 | 1 | 0.00 | 1.00 | -1.53 | -1.03 | -0.03 | 0.97 | 1.4700e+00 | ▇▃▅▅▇ |
| sale_price | 0 | 1 | 288388.40 | 343122.18 | 2.00 | 119000.00 | 214100.00 | 349000.00 | 7.1000e+07 | ▇▁▁▁▁ |
| land_square_feet_x_basement | 0 | 1 | 0.00 | 1.00 | -0.41 | -0.30 | -0.21 | 0.10 | 2.6801e+02 | ▇▁▁▁▁ |
| land_square_feet_x_central_heating | 0 | 1 | 0.00 | 1.00 | -0.46 | -0.24 | -0.10 | 0.05 | 3.4300e+02 | ▇▁▁▁▁ |
| land_square_feet_x_garage_1_size | 0 | 1 | 0.00 | 1.00 | -0.35 | -0.26 | -0.14 | 0.06 | 2.3421e+02 | ▇▁▁▁▁ |
| land_square_feet_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -0.46 | -0.17 | -0.07 | 0.10 | 2.9191e+02 | ▇▁▁▁▁ |
| land_square_feet_x_building_square_feet | 0 | 1 | 0.00 | 1.00 | -0.26 | -0.17 | -0.12 | -0.02 | 3.0528e+02 | ▇▁▁▁▁ |
| land_square_feet_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -0.44 | -0.36 | -0.12 | 0.14 | 2.3849e+02 | ▇▁▁▁▁ |
| land_square_feet_x_age_decade | 0 | 1 | 0.00 | 1.00 | -0.37 | -0.15 | -0.05 | 0.04 | 3.4956e+02 | ▇▁▁▁▁ |
| apartments_x_basement | 0 | 1 | 0.00 | 1.00 | -8.84 | -0.32 | -0.32 | -0.32 | 1.3320e+01 | ▁▇▁▁▁ |
| apartments_x_central_heating | 0 | 1 | 0.00 | 1.00 | -6.38 | -0.34 | -0.34 | -0.34 | 6.9000e+00 | ▁▁▇▁▁ |
| apartments_x_cathedral_ceiling | 0 | 1 | 0.00 | 1.00 | -6.18 | -0.30 | -0.30 | -0.30 | 6.7600e+00 | ▁▁▇▁▁ |
| apartments_x_porch | 0 | 1 | 0.00 | 1.00 | -5.69 | -0.34 | -0.34 | -0.34 | 6.0900e+00 | ▁▁▇▁▁ |
| apartments_x_full_baths | 0 | 1 | 0.00 | 1.00 | -8.08 | -0.28 | -0.28 | -0.28 | 1.5760e+01 | ▁▇▁▁▁ |
| apartments_x_sale_half_of_year | 0 | 1 | 0.00 | 1.00 | -6.28 | -0.35 | -0.35 | -0.35 | 6.7600e+00 | ▁▁▇▁▁ |
| apartments_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -5.82 | -0.31 | -0.31 | -0.31 | 6.3100e+00 | ▁▁▇▁▁ |
| apartments_x_garage_indicator | 0 | 1 | 0.00 | 1.00 | -6.71 | -0.30 | -0.30 | -0.30 | 7.4000e+00 | ▁▁▇▁▁ |
| rooms_x_bedrooms | 0 | 1 | 0.00 | 1.00 | -0.55 | -0.29 | -0.23 | 0.05 | 3.2051e+02 | ▇▁▁▁▁ |
| rooms_x_full_baths | 0 | 1 | 0.00 | 1.00 | -0.62 | -0.47 | -0.32 | 0.09 | 7.1930e+01 | ▇▁▁▁▁ |
| bedrooms_x_basement | 0 | 1 | 0.00 | 1.00 | -1.26 | -0.75 | -0.49 | 0.79 | 2.1500e+01 | ▇▁▁▁▁ |
| bedrooms_x_central_heating | 0 | 1 | 0.00 | 1.00 | -1.40 | -0.41 | -0.41 | -0.08 | 5.8020e+01 | ▇▁▁▁▁ |
| bedrooms_x_other_heating | 0 | 1 | 0.00 | 1.00 | -1.97 | -0.30 | -0.30 | 0.35 | 5.5590e+01 | ▇▁▁▁▁ |
| bedrooms_x_attic_type | 0 | 1 | 0.00 | 1.00 | -1.51 | -0.55 | 0.02 | 0.60 | 5.0160e+01 | ▇▁▁▁▁ |
| bedrooms_x_half_baths | 0 | 1 | 0.00 | 1.00 | -0.56 | -0.56 | -0.56 | 0.44 | 5.8320e+01 | ▇▁▁▁▁ |
| bedrooms_x_cathedral_ceiling | 0 | 1 | 0.00 | 1.00 | -0.78 | -0.78 | -0.78 | 0.64 | 1.0570e+01 | ▇▁▁▁▁ |
| bedrooms_x_garage_1_size | 0 | 1 | 0.00 | 1.00 | -1.16 | -0.73 | 0.13 | 0.56 | 3.7580e+01 | ▇▁▁▁▁ |
| bedrooms_x_garage_1_area | 0 | 1 | 0.00 | 1.00 | -1.56 | -0.42 | 0.16 | 0.73 | 5.0110e+01 | ▇▁▁▁▁ |
| bedrooms_x_porch | 0 | 1 | 0.00 | 1.00 | -1.71 | -0.68 | -0.07 | 0.55 | 5.3620e+01 | ▇▁▁▁▁ |
| bedrooms_x_age | 0 | 1 | 0.00 | 1.00 | -1.24 | -0.57 | -0.29 | 0.26 | 4.3410e+01 | ▇▁▁▁▁ |
| bedrooms_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.39 | -0.71 | -0.16 | 0.52 | 4.3040e+01 | ▇▁▁▁▁ |
| bedrooms_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.35 | -0.34 | 0.16 | 0.67 | 1.9780e+01 | ▇▁▁▁▁ |
| basement_x_central_heating | 0 | 1 | 0.00 | 1.00 | -1.67 | -0.80 | 0.06 | 0.93 | 5.2600e+00 | ▇▇▁▁▁ |
| basement_x_central_air | 0 | 1 | 0.00 | 1.00 | -0.75 | -0.75 | -0.75 | 0.12 | 2.7300e+00 | ▇▃▁▂▁ |
| basement_x_fireplaces | 0 | 1 | 0.00 | 1.00 | -0.51 | -0.51 | -0.51 | 0.41 | 6.7800e+00 | ▇▁▁▁▁ |
| basement_x_attic_type | 0 | 1 | 0.00 | 1.00 | -1.13 | -0.46 | -0.46 | 0.55 | 2.5600e+00 | ▇▁▂▂▁ |
| basement_x_half_baths | 0 | 1 | 0.00 | 1.00 | -0.68 | -0.68 | -0.68 | 0.13 | 2.1950e+01 | ▇▁▁▁▁ |
| basement_x_cathedral_ceiling | 0 | 1 | 0.00 | 1.00 | -0.71 | -0.71 | -0.71 | 0.25 | 3.1500e+00 | ▇▃▁▁▁ |
| basement_x_garage_1_size | 0 | 1 | 0.00 | 1.00 | -1.14 | -0.56 | -0.26 | 0.61 | 7.0600e+00 | ▇▂▁▁▁ |
| basement_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -1.26 | -0.71 | -0.15 | 0.40 | 3.1600e+00 | ▅▇▁▁▁ |
| basement_x_garage_1_area | 0 | 1 | 0.00 | 1.00 | -1.26 | -0.34 | -0.34 | 0.58 | 2.4100e+00 | ▃▇▂▂▁ |
| basement_x_building_square_feet | 0 | 1 | 0.00 | 1.00 | -1.12 | -0.72 | -0.29 | 0.38 | 2.3950e+01 | ▇▁▁▁▁ |
| basement_x_repair_condition | 0 | 1 | 0.00 | 1.00 | -1.24 | -0.73 | -0.73 | 1.32 | 4.3800e+00 | ▇▂▃▁▁ |
| basement_x_full_baths | 0 | 1 | 0.00 | 1.00 | -0.85 | -0.85 | -0.39 | 0.54 | 1.8230e+01 | ▇▁▁▁▁ |
| basement_x_age | 0 | 1 | 0.00 | 1.00 | -1.46 | -0.66 | -0.18 | 0.34 | 7.6800e+00 | ▇▂▁▁▁ |
| basement_x_sale_half_of_year | 0 | 1 | 0.00 | 1.00 | -0.91 | -0.91 | -0.35 | 0.21 | 3.0000e+00 | ▇▁▁▁▁ |
| basement_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.16 | -0.28 | -0.28 | 0.61 | 2.3700e+00 | ▅▇▂▃▁ |
| central_heating_x_other_heating | 0 | 1 | 0.00 | 1.00 | -2.92 | -0.34 | -0.34 | -0.34 | 2.2300e+00 | ▁▁▇▁▂ |
| central_heating_x_central_air | 0 | 1 | 0.00 | 1.00 | -0.90 | -0.90 | -0.90 | 0.95 | 2.7900e+00 | ▇▁▇▁▁ |
| central_heating_x_fireplaces | 0 | 1 | 0.00 | 1.00 | -0.54 | -0.54 | -0.54 | 0.98 | 5.5300e+00 | ▇▂▁▁▁ |
| central_heating_x_attic_type | 0 | 1 | 0.00 | 1.00 | -2.31 | -0.68 | 0.13 | 0.13 | 2.5800e+00 | ▂▂▇▁▁ |
| central_heating_x_half_baths | 0 | 1 | 0.00 | 1.00 | -0.73 | -0.73 | -0.73 | 0.69 | 1.9160e+01 | ▇▁▁▁▁ |
| central_heating_x_garage_1_size | 0 | 1 | 0.00 | 1.00 | -1.41 | -0.87 | 0.21 | 0.21 | 6.1400e+00 | ▆▇▂▁▁ |
| central_heating_x_porch | 0 | 1 | 0.00 | 1.00 | -2.53 | -0.01 | -0.01 | -0.01 | 2.5100e+00 | ▁▁▇▁▁ |
| central_heating_x_building_square_feet | 0 | 1 | 0.00 | 1.00 | -1.22 | -0.57 | -0.32 | 0.18 | 1.8110e+01 | ▇▁▁▁▁ |
| central_heating_x_full_baths | 0 | 1 | 0.00 | 1.00 | -1.32 | -0.66 | -0.01 | -0.01 | 2.6160e+01 | ▇▁▁▁▁ |
| central_heating_x_age | 0 | 1 | 0.00 | 1.00 | -1.41 | -0.60 | -0.32 | 0.40 | 4.9400e+00 | ▇▃▁▁▁ |
| central_heating_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.64 | -0.79 | -0.15 | 0.49 | 3.4700e+00 | ▅▇▅▁▁ |
| central_heating_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.49 | 0.22 | 0.22 | 0.22 | 1.9200e+00 | ▃▁▇▁▂ |
| central_heating_x_garage_indicator | 0 | 1 | 0.00 | 1.00 | -1.78 | 0.07 | 0.07 | 0.07 | 1.9300e+00 | ▂▁▇▁▂ |
| other_heating_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -1.78 | -0.45 | 0.88 | 0.88 | 8.8000e-01 | ▂▁▅▁▇ |
| other_heating_x_sale_year | 0 | 1 | 0.00 | 1.00 | -5.19 | 0.18 | 0.19 | 0.20 | 2.1000e-01 | ▁▁▁▁▇ |
| other_heating_x_sale_half_year | 0 | 1 | 0.00 | 1.00 | -4.32 | -0.47 | 0.20 | 0.71 | 1.2200e+00 | ▁▁▁▇▇ |
| other_heating_x_age_decade | 0 | 1 | 0.00 | 1.00 | -1.98 | -0.62 | -0.12 | 0.81 | 3.6500e+00 | ▃▇▃▂▁ |
| central_air_x_fireplaces | 0 | 1 | 0.00 | 1.00 | -0.49 | -0.49 | -0.49 | -0.49 | 3.5900e+00 | ▇▁▂▁▁ |
| central_air_x_half_baths | 0 | 1 | 0.00 | 1.00 | -0.61 | -0.61 | -0.61 | 1.35 | 1.7100e+01 | ▇▁▁▁▁ |
| central_air_x_cathedral_ceiling | 0 | 1 | 0.00 | 1.00 | -0.61 | -0.61 | -0.61 | 0.55 | 1.7200e+00 | ▇▁▁▁▃ |
| central_air_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -0.78 | -0.78 | -0.78 | 0.49 | 1.7700e+00 | ▇▁▃▁▂ |
| central_air_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -0.78 | -0.78 | -0.78 | 0.73 | 2.2300e+00 | ▇▁▂▁▂ |
| central_air_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -0.76 | -0.76 | -0.76 | 1.31 | 1.3100e+00 | ▇▁▁▁▅ |
| fireplaces_x_half_baths | 0 | 1 | 0.00 | 1.00 | -0.42 | -0.42 | -0.42 | -0.42 | 2.7080e+01 | ▇▁▁▁▁ |
| fireplaces_x_cathedral_ceiling | 0 | 1 | 0.00 | 1.00 | -0.40 | -0.40 | -0.40 | -0.40 | 4.2700e+00 | ▇▁▁▁▁ |
| fireplaces_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -0.51 | -0.51 | -0.51 | 0.78 | 4.6600e+00 | ▇▂▁▁▁ |
| fireplaces_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -0.50 | -0.50 | -0.50 | -0.50 | 3.5000e+00 | ▇▁▂▁▁ |
| attic_type_x_garage_1_size | 0 | 1 | 0.00 | 1.00 | -1.41 | -0.66 | 0.09 | 0.83 | 3.8200e+00 | ▇▂▇▁▁ |
| attic_type_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -1.56 | -0.62 | -0.15 | 1.25 | 1.2500e+00 | ▅▅▆▂▇ |
| attic_type_x_garage_1_area | 0 | 1 | 0.00 | 1.00 | -1.69 | -0.82 | 0.05 | 0.92 | 9.2000e-01 | ▃▃▁▂▇ |
| attic_type_x_porch | 0 | 1 | 0.00 | 1.00 | -1.95 | -1.28 | 0.76 | 0.76 | 7.6000e-01 | ▁▂▁▂▇ |
| attic_type_x_building_square_feet | 0 | 1 | 0.00 | 1.00 | -1.28 | -0.62 | -0.27 | 0.40 | 1.8600e+01 | ▇▁▁▁▁ |
| attic_type_x_full_baths | 0 | 1 | 0.00 | 1.00 | -1.13 | -0.44 | -0.44 | 0.60 | 1.3050e+01 | ▇▁▁▁▁ |
| attic_type_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.55 | -0.85 | -0.15 | 0.74 | 1.9400e+00 | ▇▇▆▅▅ |
| attic_type_x_sale_half_of_year | 0 | 1 | 0.00 | 1.00 | -1.58 | -1.01 | -0.45 | 1.24 | 1.2400e+00 | ▆▁▇▂▇ |
| attic_type_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.49 | -0.70 | 0.87 | 0.87 | 8.7000e-01 | ▃▂▁▂▇ |
| attic_type_x_age_decade | 0 | 1 | 0.00 | 1.00 | -1.72 | -0.72 | -0.15 | 0.41 | 4.0100e+00 | ▅▇▂▁▁ |
| half_baths_x_cathedral_ceiling | 0 | 1 | 0.00 | 1.00 | -0.50 | -0.50 | -0.50 | -0.50 | 1.8690e+01 | ▇▁▁▁▁ |
| half_baths_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -0.68 | -0.68 | -0.68 | 0.48 | 1.5590e+01 | ▇▁▁▁▁ |
| half_baths_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -0.69 | -0.69 | -0.69 | 0.66 | 1.5480e+01 | ▇▁▁▁▁ |
| half_baths_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -0.67 | -0.67 | -0.67 | 1.20 | 1.6200e+01 | ▇▁▁▁▁ |
| cathedral_ceiling_x_construction_quality | 0 | 1 | 0.00 | 1.00 | -0.90 | -0.90 | -0.90 | 1.16 | 2.1800e+00 | ▇▁▁▆▁ |
| cathedral_ceiling_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -0.71 | -0.71 | -0.71 | 0.50 | 1.7000e+00 | ▇▁▂▁▃ |
| cathedral_ceiling_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -0.73 | -0.73 | -0.73 | 1.42 | 1.4200e+00 | ▇▁▁▁▃ |
| construction_quality_x_garage_1_area | 0 | 1 | 0.00 | 1.00 | -2.06 | -0.75 | 0.57 | 0.57 | 1.8800e+00 | ▂▁▁▇▁ |
| construction_quality_x_porch | 0 | 1 | 0.00 | 1.00 | -2.84 | 0.47 | 0.47 | 0.47 | 2.4600e+00 | ▂▁▁▇▁ |
| construction_quality_x_building_square_feet | 0 | 1 | 0.00 | 1.00 | -1.50 | -0.64 | -0.32 | 0.35 | 3.0030e+01 | ▇▁▁▁▁ |
| construction_quality_x_full_baths | 0 | 1 | 0.00 | 1.00 | -1.29 | -0.74 | -0.74 | 0.35 | 4.4090e+01 | ▇▁▁▁▁ |
| construction_quality_x_sale_year | 0 | 1 | 0.00 | 1.00 | -9.38 | 0.07 | 0.09 | 0.11 | 9.5900e+00 | ▁▁▇▁▁ |
| construction_quality_x_sale_half_year | 0 | 1 | 0.00 | 1.00 | -5.10 | -0.77 | 0.12 | 0.79 | 6.5600e+00 | ▁▃▇▁▁ |
| construction_quality_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.76 | -0.59 | 0.57 | 0.57 | 1.7400e+00 | ▂▁▁▇▁ |
| site_desirability_x_sale_year | 0 | 1 | 0.00 | 1.00 | -12.56 | 0.00 | 0.03 | 0.05 | 1.2660e+01 | ▁▁▇▁▁ |
| site_desirability_x_sale_half_of_year | 0 | 1 | 0.00 | 1.00 | -2.04 | -1.05 | 0.94 | 0.94 | 2.9300e+00 | ▇▁▇▁▁ |
| garage_1_size_x_garage_1_attachment | 0 | 1 | 0.00 | 1.00 | -1.49 | -0.69 | -0.29 | 0.92 | 4.1200e+00 | ▆▆▇▁▁ |
| garage_1_size_x_garage_1_area | 0 | 1 | 0.00 | 1.00 | -1.62 | -0.86 | 0.67 | 0.67 | 3.7200e+00 | ▅▂▇▁▁ |
| garage_1_size_x_porch | 0 | 1 | 0.00 | 1.00 | -1.46 | -0.73 | 0.72 | 0.72 | 3.6200e+00 | ▇▁▇▁▁ |
| garage_1_size_x_full_baths | 0 | 1 | 0.00 | 1.00 | -1.01 | -0.76 | -0.25 | 0.52 | 3.1180e+01 | ▇▁▁▁▁ |
| garage_1_size_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.21 | -0.89 | -0.25 | 0.71 | 5.5100e+00 | ▇▅▁▁▁ |
| garage_1_size_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.13 | -1.13 | 0.17 | 0.82 | 3.4200e+00 | ▇▁▇▁▁ |
| garage_1_size_x_age_decade | 0 | 1 | 0.00 | 1.00 | -1.21 | -0.79 | -0.10 | 0.51 | 8.1400e+00 | ▇▂▁▁▁ |
| garage_1_attachment_x_porch | 0 | 1 | 0.00 | 1.00 | -1.60 | -0.70 | -0.26 | 1.08 | 1.0800e+00 | ▃▂▆▁▇ |
| garage_1_attachment_x_full_baths | 0 | 1 | 0.00 | 1.00 | -1.29 | -0.70 | -0.12 | -0.12 | 4.7610e+01 | ▇▁▁▁▁ |
| garage_1_attachment_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.28 | -0.86 | -0.16 | 0.68 | 2.0800e+00 | ▇▆▆▂▃ |
| garage_1_attachment_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.19 | -1.19 | -0.04 | 1.11 | 1.1100e+00 | ▇▁▅▁▇ |
| garage_1_area_x_porch | 0 | 1 | 0.00 | 1.00 | -1.75 | -0.91 | 0.77 | 0.77 | 7.7000e-01 | ▂▂▁▁▇ |
| garage_2_size_x_sale_year | 0 | 1 | 0.00 | 1.00 | -53.96 | -0.04 | 0.02 | 0.08 | 1.1000e-01 | ▁▁▁▁▇ |
| porch_x_building_square_feet | 0 | 1 | 0.00 | 1.00 | -1.37 | -0.57 | -0.28 | 0.35 | 1.6780e+01 | ▇▁▁▁▁ |
| porch_x_full_baths | 0 | 1 | 0.00 | 1.00 | -1.22 | -0.52 | -0.52 | 0.53 | 4.2580e+01 | ▇▁▁▁▁ |
| porch_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.62 | -0.84 | 0.03 | 0.91 | 1.7800e+00 | ▇▇▅▅▆ |
| porch_x_sale_half_of_year | 0 | 1 | 0.00 | 1.00 | -1.71 | -0.60 | -0.60 | 1.08 | 1.0800e+00 | ▃▇▁▁▇ |
| porch_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.54 | -0.78 | 0.75 | 0.75 | 7.5000e-01 | ▃▂▁▁▇ |
| porch_x_age_decade | 0 | 1 | 0.00 | 1.00 | -1.79 | -0.75 | -0.08 | 0.40 | 4.0600e+00 | ▅▇▂▁▁ |
| building_square_feet_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.20 | -0.65 | -0.21 | 0.35 | 1.6890e+01 | ▇▁▁▁▁ |
| building_square_feet_x_age_decade | 0 | 1 | 0.00 | 1.00 | -1.10 | -0.56 | -0.34 | 0.21 | 1.8610e+01 | ▇▁▁▁▁ |
| repair_condition_x_sale_year | 0 | 1 | 0.00 | 1.00 | -8.03 | 0.01 | 0.03 | 0.04 | 8.1100e+00 | ▁▁▇▁▁ |
| repair_condition_x_sale_half_year | 0 | 1 | 0.00 | 1.00 | -4.92 | -0.76 | 0.09 | 0.73 | 6.2700e+00 | ▁▃▇▁▁ |
| full_baths_x_age | 0 | 1 | 0.00 | 1.00 | -1.14 | -0.58 | -0.37 | 0.16 | 4.4820e+01 | ▇▁▁▁▁ |
| full_baths_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.17 | -0.60 | -0.25 | 0.43 | 5.1510e+01 | ▇▁▁▁▁ |
| full_baths_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.19 | -0.26 | -0.26 | 0.67 | 9.9600e+00 | ▇▁▁▁▁ |
| age_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.36 | -0.81 | -0.18 | 0.57 | 4.8300e+00 | ▇▅▂▁▁ |
| sale_quarter_of_year_x_sale_month_of_year | 0 | 1 | 0.00 | 1.00 | -1.27 | -0.81 | 0.04 | 0.43 | 1.8000e+00 | ▇▂▆▁▅ |
| sale_half_of_year_x_most_recent_sale | 0 | 1 | 0.00 | 1.00 | -1.48 | -0.21 | -0.21 | 1.05 | 1.0500e+00 | ▅▁▇▁▇ |
| most_recent_sale_x_age_decade | 0 | 1 | 0.00 | 1.00 | -1.25 | -1.23 | 0.10 | 0.63 | 3.3600e+00 | ▇▇▃▂▁ |
| age_decade_x_garage_indicator | 0 | 1 | 0.00 | 1.00 | -1.42 | -0.91 | 0.10 | 0.67 | 3.5000e+00 | ▆▇▃▂▁ |
| property_class_X202 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| property_class_X203 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.0000e+00 | ▇▁▁▁▃ |
| property_class_X205 | 0 | 1 | 0.06 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| property_class_X211 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▂ |
| property_class_X278 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| property_class_X295 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| property_class_other | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| type_of_residence_X1 | 0 | 1 | 0.39 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.0000e+00 | ▇▁▁▁▅ |
| type_of_residence_X2 | 0 | 1 | 0.39 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.0000e+00 | ▇▁▁▁▅ |
| type_of_residence_X4 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| type_of_residence_X5 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| wall_material_X1 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.0000e+00 | ▇▁▁▁▃ |
| wall_material_X2 | 0 | 1 | 0.43 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.0000e+00 | ▇▁▁▁▆ |
| wall_material_X3 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▂ |
| roof_material_X1 | 0 | 1 | 0.89 | 0.31 | 0.00 | 1.00 | 1.00 | 1.00 | 1.0000e+00 | ▁▁▁▁▇ |
| roof_material_X2 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| basement_finish_X3 | 0 | 1 | 0.68 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.0000e+00 | ▃▁▁▁▇ |
| attic_finish_X0 | 0 | 1 | 0.68 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.0000e+00 | ▃▁▁▁▇ |
| attic_finish_X1 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| attic_finish_X3 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▂ |
| design_plan_X2 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.0000e+00 | ▃▁▁▁▇ |
| garage_1_material_X1 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.0000e+00 | ▆▁▁▁▇ |
| garage_1_material_X2 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▂ |
| garage_1_material_X3 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
| condition_desirability_and_utility_other | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0000e+00 | ▇▁▁▁▁ |
So if you are then asking R to run this recipe each time you fit a model (for each fold of cross-validation, for each value of the tuning parameters), that can add up to a lot of time.
One more note: I used step_other to collapse infrequent factor levels to an “other” category. This step will help you avoid creating a huge number of dummy variables that are mostly 0s. However, it could cause you to lose important information.
3 Elasticnet
Now we can set up our workflow. We will need to specify and model. Then we can add our recipe.
# Define the model specification
net_model =
linear_reg(penalty = tune(), mixture = tune()) |>
set_engine('glmnet')
# Define the workflow
net_wf =
workflow() |>
add_model(net_model) |>
add_recipe(a_recipe)
# Print the workflow
net_wf══ Workflow ════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: linear_reg()
── Preprocessor ────────────────────────────────────────────────────────────────
10 Recipe Steps
• step_filter_missing()
• step_impute_median()
• step_interact()
• step_normalize()
• step_impute_mode()
• step_other()
• step_dummy()
• step_nzv()
• step_corr()
• step_lincomb()
── Model ───────────────────────────────────────────────────────────────────────
Linear Regression Model Specification (regression)
Main Arguments:
penalty = tune()
mixture = tune()
Computational engine: glmnet
We’re finally ready to tune the model… after we create a resampling object (define the folds) and define the set of tuning parameters we wish to evaluate.
This may take some time. I’m going to use tictoc to time the execution of this code chunk again.
Note that parallelization can help here—to a degree. Tuning 5 levels of lambda took 99 seconds to run sequentially on my computer. When I parallelized (with 5 workers in future), the same code took approxiately 30 seconds.
# Start timer
tictoc::tic()
# Define the resampling object
set.seed(12)
cv_folds = vfold_cv(clean_df, v = 5)
# Parallelize the tuning process
library(future)
plan(multisession, workers = parallelly::availableCores() - 1)
# Tune the model
tune_results =
tune_grid(
net_wf,
resamples = cv_folds,
grid = expand_grid(
penalty = 10^seq(-5, 3, length.out = 50),
mixture = seq(0, 1, length.out = 11)
),
metrics = metric_set(rmse, mae, rsq),
control = control_grid(parallel_over = 'everything')
)! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold1: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold2: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold3: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold4: preprocessor 1/1:
! The following columns have zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units,
garage_2_attachment_x_multi_property_indicator, and
garage_2_area_x_multi_property_indicator.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
! Fold5: preprocessor 1/1:
! The following column has zero variance so scaling cannot be used:
garage_2_attachment_x_number_of_commercial_units.
ℹ Consider using ?step_zv (`?recipes::step_zv()`) to remove those columns
before normalizing.
# Stop timer
tictoc::toc()2462.463 sec elapsed
# Check out the tuning results: RMSE
tune_results |> show_best(metric = 'rmse', n = 10)# A tibble: 10 × 8
penalty mixture .metric .estimator mean n std_err .config
<dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 0.00001 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
2 0.0000146 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
3 0.0000212 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
4 0.0000309 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
5 0.0000450 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
6 0.0000655 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
7 0.0000954 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
8 0.000139 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
9 0.000202 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
10 0.000295 0.3 rmse standard 261620. 5 23943. Preprocessor1_Mod…
# Check out the tuning results: MAE
tune_results |> show_best(metric = 'mae', n = 10)# A tibble: 10 × 8
penalty mixture .metric .estimator mean n std_err .config
<dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 687. 0.1 mae standard 137617. 5 267. Preprocessor1_Model…
2 471. 0.2 mae standard 137618. 5 266. Preprocessor1_Model…
3 324. 0.2 mae standard 137623. 5 261. Preprocessor1_Model…
4 471. 0.1 mae standard 137625. 5 263. Preprocessor1_Model…
5 324. 0.3 mae standard 137625. 5 265. Preprocessor1_Model…
6 1000 0.1 mae standard 137626. 5 273. Preprocessor1_Model…
7 222. 0.4 mae standard 137627. 5 263. Preprocessor1_Model…
8 222. 0.5 mae standard 137628. 5 267. Preprocessor1_Model…
9 324. 0.4 mae standard 137631. 5 267. Preprocessor1_Model…
10 222. 0.3 mae standard 137631. 5 261. Preprocessor1_Model…
# Plot the tuning results
tune_results |> autoplot()