Extending mlr3 to time series forecasting.
[!IMPORTANT] This package is in an early stage of development and should be considered experimental. If you are interested in experimenting with it, we welcome your feedback!
Installation
Install the development version from GitHub:
# install.packages("pak")
pak::pak("mlr-org/mlr3forecast")
Usage
Univariate
library(mlr3forecast)
library(mlr3learners)
task = tsk("airpassengers")
task$select(setdiff(task$feature_names, "date"))
measure = msr("regr.rmse")
ff = Forecaster$new(lrn("regr.ranger"), 1:3)$train(task)
newdata = data.frame(passengers = rep(NA_real_, 3L))
prediction = ff$predict_newdata(newdata, task)
prediction
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 NA 448.8710
#> 2 NA 475.2456
#> 3 NA 480.5179
prediction = ff$predict(task, 142:144)
prediction
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 461 456.4968
#> 2 390 411.1712
#> 3 432 393.9585
prediction$score(measure)
#> regr.rmse
#> 25.26957
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
resampling = rsmp("forecast_holdout", ratio = 0.8)
rr = resample(task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 105.8215
resampling = rsmp("forecast_cv")
rr = resample(task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 54.28352
Multivariate
library(mlr3learners)
library(mlr3pipelines)
task = tsk("airpassengers")
# datefeatures currently requires POSIXct
graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE)
)
new_task = graph$train(task)[[1L]]
ff = Forecaster$new(lrn("regr.ranger"), 1:3)$train(new_task)
prediction = ff$predict(new_task, 142:144)
prediction$score(measure)
#> regr.rmse
#> 17.0878
row_ids = new_task$nrow - 0:2
ff$predict_newdata(new_task$data(rows = row_ids), new_task)
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 432 405.5814
#> 2 390 388.3657
#> 3 461 390.9778
newdata = new_task$data(rows = row_ids, cols = new_task$feature_names)
ff$predict_newdata(newdata, new_task)
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 NA 405.5814
#> 2 NA 388.3657
#> 3 NA 390.9778
resampling = rsmp("forecast_holdout", ratio = 0.8)
rr = resample(new_task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 81.91252
resampling = rsmp("forecast_cv")
rr = resample(new_task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 41.87113
mlr3pipelines integration
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
glrn = as_learner(graph %>>% ff)$train(task)
prediction = glrn$predict(task, 142:144)
prediction$score(measure)
#> regr.rmse
#> 33.74039
Example: Forecasting electricity demand
library(data.table)
library(mlr3learners)
library(mlr3pipelines)
task = tsibbledata::vic_elec |>
as.data.table() |>
setnames(tolower) |>
_[
year(time) == 2014L,
.(demand = sum(demand) / 1e3, temperature = max(temperature), holiday = any(holiday)),
by = date
] |>
as_task_fcst(target = "demand", index = "date")
graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(year = FALSE, is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE)
)
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
glrn = as_learner(graph %>>% ff)$train(task)
max_date = task$data()[.N, date]
newdata = data.frame(
date = max_date + 1:14,
demand = rep(NA_real_, 14L),
temperature = 26,
holiday = c(TRUE, rep(FALSE, 13L))
)
prediction = glrn$predict_newdata(newdata, task)
prediction
#> <PredictionRegr> for 14 observations:
#> row_ids truth response
#> 1 NA 187.6208
#> 2 NA 191.8121
#> 3 NA 183.6753
#> --- --- ---
#> 12 NA 213.8759
#> 13 NA 218.4198
#> 14 NA 218.8139