Extending mlr3 to time series forecasting.
[!IMPORTANT] This package is in an early stage of development and should be considered experimental. If you are interested in experimenting with it, we welcome your feedback!
Installation
Install the development version from GitHub:
# install.packages("pak")
pak::pak("mlr-org/mlr3forecast")
Usage
Univariate
library(mlr3forecast)
library(mlr3learners)
task = tsk("airpassengers")
task$select(setdiff(task$feature_names, "date"))
measure = msr("regr.rmse")
ff = Forecaster$new(lrn("regr.ranger"), 1:3)$train(task)
newdata = data.frame(passengers = rep(NA_real_, 3L))
prediction = ff$predict_newdata(newdata, task)
prediction
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 NA 449.5651
#> 2 NA 476.3000
#> 3 NA 485.6691
prediction = ff$predict(task, 142:144)
prediction
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 461 457.6632
#> 2 390 414.3476
#> 3 432 405.2270
prediction$score(measure)
#> regr.rmse
#> 20.982
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
resampling = rsmp("forecast_holdout", ratio = 0.8)
rr = resample(task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 105.1238
resampling = rsmp("forecast_cv")
rr = resample(task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 50.97585
Multivariate
library(mlr3learners)
library(mlr3pipelines)
task = tsk("airpassengers")
# datefeatures currently requires POSIXct
graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE)
)
new_task = graph$train(task)[[1L]]
ff = Forecaster$new(lrn("regr.ranger"), 1:3)$train(new_task)
prediction = ff$predict(new_task, 142:144)
prediction$score(measure)
#> regr.rmse
#> 19.31127
row_ids = new_task$nrow - 0:2
ff$predict_newdata(new_task$data(rows = row_ids), new_task)
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 432 402.8860
#> 2 390 389.8817
#> 3 461 385.0539
newdata = new_task$data(rows = row_ids, cols = new_task$feature_names)
ff$predict_newdata(newdata, new_task)
#> <PredictionRegr> for 3 observations:
#> row_ids truth response
#> 1 NA 402.8860
#> 2 NA 389.8817
#> 3 NA 385.0539
resampling = rsmp("forecast_holdout", ratio = 0.8)
rr = resample(new_task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 83.11707
resampling = rsmp("forecast_cv")
rr = resample(new_task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 45.38122
mlr3pipelines integration
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
glrn = as_learner(graph %>>% ff)$train(task)
prediction = glrn$predict(task, 142:144)
prediction$score(measure)
#> regr.rmse
#> 17.79356
Example: Forecasting electricity demand
library(mlr3learners)
library(mlr3pipelines)
task = tsibbledata::vic_elec |>
as.data.table() |>
setnames(tolower) |>
_[
year(time) == 2014L,
.(demand = sum(demand) / 1e3, temperature = max(temperature), holiday = any(holiday)),
by = date
] |>
as_task_fcst(target = "demand", index = "date")
graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(year = FALSE, is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE)
)
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
glrn = as_learner(graph %>>% ff)$train(task)
max_date = task$data()[.N, date]
newdata = data.frame(
date = max_date + 1:14,
demand = rep(NA_real_, 14L),
temperature = 26,
holiday = c(TRUE, rep(FALSE, 13L))
)
prediction = glrn$predict_newdata(newdata, task)
prediction
#> <PredictionRegr> for 14 observations:
#> row_ids truth response
#> 1 NA 186.8597
#> 2 NA 191.5299
#> 3 NA 183.7704
#> --- --- ---
#> 12 NA 214.0419
#> 13 NA 217.9057
#> 14 NA 219.0338
Global Forecasting
library(mlr3learners)
library(mlr3pipelines)
library(tsibble)
task = tsibbledata::aus_livestock |>
as.data.table() |>
setnames(tolower) |>
_[, month := as.Date(month)] |>
_[, .(count = sum(count)), by = .(state, month)] |>
setorder(state, month) |>
as_task_fcst(target = "count", index = "month", key = "state")
graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE, day_of_week = FALSE, day_of_month = FALSE, day_of_year = FALSE,
is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE
)
)
task = graph$train(task)[[1L]]
ff = Forecaster$new(lrn("regr.ranger"), 1:3)$train(task)
prediction = ff$predict(task, 4460:4464)
prediction$score(measure)
#> regr.rmse
#> 21192.14
ff = Forecaster$new(lrn("regr.ranger"), 1:3)
resampling = rsmp("forecast_holdout", ratio = 0.8)
rr = resample(task, ff, resampling)
rr$aggregate(measure)
#> regr.rmse
#> 82821.13