# A tsibble: 99,013 x 5 [1M]
# Key: area_name, process [314]
date area_name process value index
<date> <chr> <chr> <int> <mth>
1 1989-01-01 Alabama VCS 3434 1989 Jan
2 1989-02-01 Alabama VCS 3514 1989 Feb
3 1989-03-01 Alabama VCS 3395 1989 Mar
4 1989-04-01 Alabama VCS 2369 1989 Apr
5 1989-05-01 Alabama VCS 1720 1989 May
6 1989-06-01 Alabama VCS 1215 1989 Jun
7 1989-07-01 Alabama VCS 1673 1989 Jul
8 1989-08-01 Alabama VCS 1117 1989 Aug
9 1989-09-01 Alabama VCS 1189 1989 Sep
10 1989-10-01 Alabama VCS 1382 1989 Oct
# ℹ 99,003 more rows
keys <-attributes(ts_obj)$key
Features Engineering
The next step is to collapse the time series into a feature table. This includes looping over each series and:
Check for missing values and impute if possible
Drop series that does not have a sufficient number of observations
Calculate a set of features for each series
We will use the tsfeatures library to create for each series a set of features such as:
Trend
AutoCorrelation features
Arch stat features
Nonlinearity measurment feature
features_df <-NULLfeatures_df <-lapply(1:nrow(keys), function(i) { d <-NULL d <- ts_obj |> dplyr::filter( area_name == keys$area_name[i], process == keys$process[i] ) s <-TRUE# Check for missing values and zeros z <-which(d$value ==0) m <-which(is.na(d$value))if (length(m) >0) {if (length(m) <nrow(d) *0.1&&length(z) ==0) {if (any(diff(m) ==1)) { x <- m[which(diff(m) ==1)]for (n in x) { d$value[n] <- (d$value[n -12] + d$value[n -24] + d$value[n -36]) /3 } y <-which(is.na(d$value))if (length(y) >0) {for (n in y) {if (n <nrow(d)) { d$value[n] <- (d$value[n -1] + d$value[n +1]) /2 } else { d$value[n] <- (d$value[n -12] + d$value[n -24]) /2 } } } } else {for (n in m) {if (n <nrow(d)) { d$value[n] <- (d$value[n -1] + d$value[n +1]) /2 } else { d$value[n] <- (d$value[n -12] + d$value[n -24]) /2 } } } } else { s <-FALSE } }if (s) { f <- tsfeatures::tsfeatures(d$value) f$arch_stat <- tsfeatures::arch_stat(d$value) f <-cbind(f, t(as.data.frame(tsfeatures::autocorr_features(d$value)))) f$nonlinearity <- tsfeatures::nonlinearity(d$value) f <-cbind(f, t(as.data.frame(tsfeatures::pacf_features(d$value))))row.names(f) <-NULL f$area_name <- keys$area_name[i] f$process <- keys$process[i] f$nperiods <-NULL f$frequency <-NULL f$seasonal_period <-NULL f$success <-TRUE } else { f <-data.frame(success =FALSE) }return(f)}) |> dplyr::bind_rows()
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
Warning in tsfeatures::tsfeatures(d$value): Some series are constant and cannot
be scaled, so scaling has been disabled (`scale = FALSE`).
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
zero-variance series
Warning in firstmin_ac(x, acfv): No minimum was found.
Warning in firstmin_ac(x, acfv): Some series are constant and cannot be scaled,
so scaling has been disabled (`scale = FALSE`).
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
zero-variance series
Warning in firstmin_ac(x, acfv): No minimum was found.
Warning in firstmin_ac(x, acfv): Some series are constant and cannot be scaled,
so scaling has been disabled (`scale = FALSE`).
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
zero-variance series
Warning in firstmin_ac(x, acfv): No minimum was found.
Warning in firstmin_ac(x, acfv): Some series are constant and cannot be scaled,
so scaling has been disabled (`scale = FALSE`).
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
zero-variance series
Warning in firstmin_ac(x, acfv): No minimum was found.