Chapter 5 Forecasters Toolbox

Author
Affiliation

Tyler George

Cornell College
STA 364 Spring 2025 Block 5

A tidy forecasting workflow

Setup

library(fpp3)

A tidy forecasting workflow

The process of producing forecasts can be split up into a few fundamental steps.

  1. Preparing data
  2. Data visualization
  3. Specifying a model
  4. Model estimation
  5. Accuracy & performance evaluation
  6. Producing forecasts

A tidy forecasting workflow

Data preparation (tidy)

gdppc <- global_economy |>
  mutate(GDP_per_capita = GDP/Population) |>
  select(Year, Country, GDP, Population, GDP_per_capita)
gdppc
# A tsibble: 15,150 x 5 [1Y]
# Key:       Country [263]
    Year Country             GDP Population GDP_per_capita
   <dbl> <fct>             <dbl>      <dbl>          <dbl>
 1  1960 Afghanistan  537777811.    8996351           59.8
 2  1961 Afghanistan  548888896.    9166764           59.9
 3  1962 Afghanistan  546666678.    9345868           58.5
 4  1963 Afghanistan  751111191.    9533954           78.8
 5  1964 Afghanistan  800000044.    9731361           82.2
 6  1965 Afghanistan 1006666638.    9938414          101. 
 7  1966 Afghanistan 1399999967.   10152331          138. 
 8  1967 Afghanistan 1673333418.   10372630          161. 
 9  1968 Afghanistan 1373333367.   10604346          130. 
10  1969 Afghanistan 1408888922.   10854428          130. 
# ℹ 15,140 more rows

Data visualisation

gdppc |>
  filter(Country=="Sweden") |>
  autoplot(GDP_per_capita) +
    labs(title = "GDP per capita for Sweden", y = "$US")

Model estimation

The model() function trains models to data.

fit <- gdppc |>
  model(trend_model = TSLM(GDP_per_capita ~ trend())) # TSLM to be covered later
fit |> head(5)
# A mable: 5 x 2
# Key:     Country [5]
  Country        trend_model
  <fct>              <model>
1 Afghanistan         <TSLM>
2 Albania             <TSLM>
3 Algeria             <TSLM>
4 American Samoa      <TSLM>
5 Andorra             <TSLM>
Note

A is a model table, each cell corresponds to a fitted model.

Producing forecasts

fit |> forecast(h = "3 years")
# A fable: 789 x 5 [1Y]
# Key:     Country, .model [263]
   Country  .model  Year   GDP_per_capita
   <fct>    <chr>  <dbl>           <dist>
 1 Afghani… trend…  2018     N(526, 9653)
 2 Afghani… trend…  2019     N(534, 9689)
 3 Afghani… trend…  2020     N(542, 9727)
 4 Albania  trend…  2018  N(4716, 476419)
 5 Albania  trend…  2019  N(4867, 481086)
 6 Albania  trend…  2020  N(5018, 486012)
 7 Algeria  trend…  2018  N(4410, 643094)
 8 Algeria  trend…  2019  N(4489, 645311)
 9 Algeria  trend…  2020  N(4568, 647602)
10 America… trend…  2018 N(12491, 652926)
# ℹ 779 more rows
# ℹ 1 more variable: .mean <dbl>
Note

A is a forecast table with point forecasts and distributions.

Visualising forecasts

fit |> forecast(h = "3 years") |>
  filter(Country=="Sweden") |>
  autoplot(gdppc) +
    labs(title = "GDP per capita for Sweden", y = "$US")

Some simple forecasting methods

MEAN(y): Average method

  • Forecast of all future values is equal to mean of historical data \(\{y_1,\dots,y_T\}\).
  • Forecasts: \(\hat{y}_{T+h|T} = \bar{y} = (y_1+\dots+y_T)/T\)

. . .

MEAN(y): Average method Plot

bricks <- aus_production |>
  filter(!is.na(Bricks)) |>
  mutate(average = mean(Bricks))

fc <- bricks |>
  filter(row_number() == n()) |> as_tibble() |>
  unnest(Quarter = list(as.Date(Quarter) + months(c(0, 12*5))))

bricks |>
  ggplot(aes(x = Quarter, y = Bricks)) +
  geom_line() +
  geom_line(aes(y = average), colour = "#0072B2", linetype = "dashed") +
  geom_line(aes(y = average), data = fc, colour = "#0072B2") +
  labs(title = "Clay brick production in Australia")

NAIVE(y): Naïve method

  • Forecasts equal to last observed value.
  • Forecasts: \(\hat{y}_{T+h|T} =y_T\).
  • Consequence of efficient market hypothesis.

. . .

bricks |>
  filter(!is.na(Bricks)) |>
  model(NAIVE(Bricks)) |>
  forecast(h = "5 years") |>
  autoplot(filter(bricks, year(Quarter) > 1990), level = NULL) +
    geom_point(data = slice(bricks, n()), aes(y=Bricks), colour = "#0072B2") +
    labs(title = "Clay brick production in Australia")

SNAIVE(y ~ lag(m)): Seasonal naïve method

  • Forecasts equal to last value from same season.
  • Forecasts: \(\hat{y}_{T+h|T} =y_{T+h-m(k+1)}\), where \(m=\) seasonal period and \(k\) is the integer part of \((h-1)/m\).

. . .

bricks |>
  model(SNAIVE(Bricks ~ lag("year"))) |>
  forecast(h = "5 years") |>
  autoplot(filter(bricks, year(Quarter) > 1990), level = NULL) +
  geom_point(data = slice(bricks, (n()-3):n()), aes(y=Bricks), colour = "#0072B2") +
  labs(title = "Clay brick production in Australia")

RW(y ~ drift()): Drift method

  • Forecasts equal to last value plus average change.
  • Forecasts:

\[\begin{align*} \hat{y}_{T+h|T} & = y_{T} + \frac{h}{T-1}\sum_{t=2}^T (y_t-y_{t-1})\\ & = y_T + \frac{h}{T-1}(y_T -y_1). \end{align*}\]

  • Equivalent to extrapolating a line drawn between first and last observations.
aus_production |>
  filter(!is.na(Bricks)) |>
  model(RW(Bricks ~ drift())) |>
  forecast(h = "5 years") |>
  autoplot(aus_production, level = NULL) +
  geom_line(data = slice(aus_production, range(cumsum(!is.na(Bricks)))),
            aes(y=Bricks), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Clay brick production in Australia")

Model fitting

The model() function trains models to data.

brick_fit <-  aus_production |>
  filter(!is.na(Bricks)) |>
  model(
    Seasonal_naive = SNAIVE(Bricks),
    Naive = NAIVE(Bricks),
    Drift = RW(Bricks ~ drift()),
    Mean = MEAN(Bricks)
  )
# A mable: 1 x 4
  Seasonal_naive   Naive         Drift    Mean
         <model> <model>       <model> <model>
1       <SNAIVE> <NAIVE> <RW w/ drift>  <MEAN>

Producing forecasts

brick_fc <- brick_fit |>
  forecast(h = "5 years")
# A fable: 80 x 4 [1Q]
# Key:     .model [4]
  .model   Quarter       Bricks .mean
  <chr>      <qtr>       <dist> <dbl>
1 Seasona… 2005 Q3 N(428, 2336)   428
2 Seasona… 2005 Q4 N(397, 2336)   397
3 Seasona… 2006 Q1 N(355, 2336)   355
4 Seasona… 2006 Q2 N(435, 2336)   435
# ℹ 76 more rows

Visualising forecasts

brick_fc |>
  autoplot(aus_production, level = NULL) +
  labs(title = "Clay brick production in Australia",
       y = "Millions of bricks") +
  guides(colour = guide_legend(title = "Forecast"))

Facebook closing stock price

# Extract training data
fb_stock <- gafa_stock |>
  filter(Symbol == "FB") |>
  mutate(trading_day = row_number()) |>
  update_tsibble(index=trading_day, regular=TRUE)

# Specify, estimate and forecast
fb_stock |>
  model(
    Mean = MEAN(Close),
    Naive = NAIVE(Close),
    Drift = RW(Close ~ drift())
  ) |>
  forecast(h=42) |>
  autoplot(fb_stock, level = NULL) +
  labs(title = "Facebook closing stock price", y="$US") +
  guides(colour=guide_legend(title="Forecast"))

Your turn

  • Produce forecasts using an appropriate benchmark method for household wealth (hh_budget). Plot the results using autoplot().
  • Produce forecasts using an appropriate benchmark method for Australian takeaway food turnover (aus_retail). Plot the results using autoplot().

Residual diagnostics

Fitted values

  • \(\hat{y}_{t|t-1}\) is the forecast of \(y_t\) based on observations \(y_1,\dots,y_{t-1}\).
  • We call these “fitted values”.
  • Sometimes drop the subscript: \(\hat{y}_t \equiv \hat{y}_{t|t-1}\).
  • Often not true forecasts since parameters are estimated on all data.

. . .

For example:

  • \(\hat{y}_{t} = \bar{y}\) for average method.
  • \(\hat{y}_{t} = y_{t-1} + (y_{T}-y_1)/(T-1)\) for drift method.

Forecasting residuals

Residuals in forecasting: difference between observed value and its fitted value: \(e_t = y_t-\hat{y}_{t|t-1}\).

. . .

Assumptions

  1. \(\{e_t\}\) uncorrelated. If they aren’t, then information left in residuals that should be used in computing forecasts.
  2. \(\{e_t\}\) have mean zero. If they don’t, then forecasts are biased.

. . .

Useful properties (for distributions & prediction intervals)

  1. \(\{e_t\}\) have constant variance.
  2. \(\{e_t\}\) are normally distributed.

Facebook closing stock price

fb_stock |> autoplot(Close)

Facebook closing stock price

fit <- fb_stock |> model(NAIVE(Close))
augment(fit) |> head(5)
# A tsibble: 5 x 7 [1]
# Key:       Symbol, .model [1]
  Symbol .model       trading_day Close .fitted .resid .innov
  <chr>  <chr>              <int> <dbl>   <dbl>  <dbl>  <dbl>
1 FB     NAIVE(Close)           1  54.7    NA   NA     NA    
2 FB     NAIVE(Close)           2  54.6    54.7 -0.150 -0.150
3 FB     NAIVE(Close)           3  57.2    54.6  2.64   2.64 
4 FB     NAIVE(Close)           4  57.9    57.2  0.720  0.720
5 FB     NAIVE(Close)           5  58.2    57.9  0.310  0.310

. . .

Naive forecasts:

\(\hat{y}_{t|t-1} = y_{t-1}\)

\(e_t = y_t - \hat{y}_{t|t-1} = y_t-y_{t-1}\)

Where .fitted = \(\hat{y}_{t|t-1}\) and .resid = \(\phantom{\hat{y}_{|}}{e}_{t}\phantom{\hat{y}_{|}}\)

Facebook closing stock price

augment(fit) |>
  ggplot(aes(x = trading_day)) +
  geom_line(aes(y = Close, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted"))

Facebook closing stock price

augment(fit) |>
  filter(trading_day > 1100) |>
  ggplot(aes(x = trading_day)) +
  geom_line(aes(y = Close, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted"))

Facebook closing stock price

augment(fit) |>
  autoplot(.resid) +
  labs(y = "$US",
       title = "Residuals from naïve method")

Facebook closing stock price

augment(fit) |>
  ggplot(aes(x = .resid)) +
  geom_histogram(bins = 150) +
  labs(title = "Histogram of residuals")

Facebook closing stock price

augment(fit) |>
  ACF(.resid) |>
  autoplot() + labs(title = "ACF of residuals")

gg_tsresiduals() function

gg_tsresiduals(fit)

ACF of residuals

  • We assume that the residuals are white noise (uncorrelated, mean zero, constant variance). If they aren’t, then there is information left in the residuals that should be used in computing forecasts.

  • So a standard residual diagnostic is to check the ACF of the residuals of a forecasting method.

  • We expect these to look like white noise.

Portmanteau tests: Box-Pierce test

\(r_k =\) autocorrelation of residual at lag \(k\)

. . .

Consider a whole set of \(r_{k}\) values, and develop a test to see whether the set is significantly different from a zero set.

Box-Pierce test \[Q = T \sum_{k=1}^\ell r_k^2\] where \(\ell\) is max lag being considered and \(T\) is number of observations.

  • If each \(r_k\) close to zero, \(Q\) will be small.
  • If some \(r_k\) values large (positive or negative), \(Q\) will be large.

Portmanteau tests: Ljung-Box test (more accurate) (1/2)

\(r_k =\) autocorrelation of residual at lag \(k\)

. . .

Consider a whole set of \(r_{k}\) values, and develop a test to see whether the set is significantly different from a zero set.

Ljung-Box test \[Q^* = T(T+2) \sum_{k=1}^\ell (T-k)^{-1}r_k^2\]$ where \(\ell\) is max lag being considered and \(T\) is number of observations.

Portmanteau tests: Ljung-Box test (more accurate) (2/2)

  • If each \(r_k\) close to zero, \(Q^*\) will be small.
  • If some \(r_k\) values large (positive or negative), \(Q\) will be large.
  • Use \(\ell=10\) for non-seasonal data, \(\ell=2m\) for seasonal data (where \(m\) is seasonal period).
  • Better performance, especially in small samples. Test is not good when \(\ell\) is large, so if these values are larger than \(T/5\), then use \(\ell=T/5\)
  • How large is too large?

Portmanteau tests

  • If data are WN, \(Q\) and \(Q^*\) have \(\chi^2\) distribution with \(\ell\) degrees of freedom.

  • lag \(= \ell\)

. . .

augment(fit) |>
  features(.resid, box_pierce, lag=10)
# A tibble: 1 × 4
  Symbol .model       bp_stat bp_pvalue
  <chr>  <chr>          <dbl>     <dbl>
1 FB     NAIVE(Close)    12.1     0.281

. . .

augment(fit) |>
  features(.resid, ljung_box, lag=10)
# A tibble: 1 × 4
  Symbol .model       lb_stat lb_pvalue
  <chr>  <chr>          <dbl>     <dbl>
1 FB     NAIVE(Close)    12.1     0.276

Distributional forecasts and prediction intervals

Forecast distributions

  • A forecast \(\hat{y}_{T+h|T}\) is (usually) the mean of the conditional distribution \(y_{T+h} \mid y_1, \dots, y_{T}\).
  • Most time series models produce normally distributed forecasts.
  • The forecast distribution describes the probability of observing any future value.

Forecast distributions

Assuming residuals are normal, uncorrelated, sd = \(\hat\sigma\):

. . .

Method Forecast Distribution
Mean: \(y_{T+h\|T} \sim N(\bar{y}, (1 + 1/T)\hat{\sigma}^2)\)
Naïve: \(y_{T+h\|T} \sim N(y_T, h\hat{\sigma}^2)\)
Seasonal naïve: \(y_{T+h\|T} \sim N(y_{T+h-m(k+1)}, (k+1)\hat{\sigma}^2)\)
Drift: \(y_{T+h\|T} \sim N(y_T + \frac{h}{T-1}(y_T - y_1),h\frac{T+h}{T}\hat{\sigma}^2)\)

where \(k\) is the integer part of \((h-1)/m\).

. . .

Note

When \(h=1\) and \(T\) is large, these all give the same approximate forecast variance: \(\hat{\sigma}^2\).

Prediction intervals

  • A prediction interval gives a region within which we expect \(y_{T+h}\) to lie with a specified probability.
  • Assuming forecast errors are normally distributed, then a 95% PI is

. . .

\[\hat{y}_{T+h|T} \pm 1.96 \hat\sigma_h\]

where \(\hat\sigma_h\) is the st dev of the \(h\)-step distribution.

. . .

Note

When \(h=1\), \(\hat\sigma_h\) can be estimated from the residuals.

Prediction intervals

brick_fc |> hilo(level = 95)
# A tsibble: 80 x 5 [1Q]
# Key:       .model [4]
   .model  Quarter       Bricks .mean
   <chr>     <qtr>       <dist> <dbl>
 1 Season… 2005 Q3 N(428, 2336)   428
 2 Season… 2005 Q4 N(397, 2336)   397
 3 Season… 2006 Q1 N(355, 2336)   355
 4 Season… 2006 Q2 N(435, 2336)   435
 5 Season… 2006 Q3 N(428, 4672)   428
 6 Season… 2006 Q4 N(397, 4672)   397
 7 Season… 2007 Q1 N(355, 4672)   355
 8 Season… 2007 Q2 N(435, 4672)   435
 9 Season… 2007 Q3 N(428, 7008)   428
10 Season… 2007 Q4 N(397, 7008)   397
# ℹ 70 more rows
# ℹ 1 more variable: `95%` <hilo>

Prediction intervals

  • Point forecasts often useless without a measure of uncertainty (such as prediction intervals).
  • Prediction intervals require a stochastic model (with random errors, etc).
  • For most models, prediction intervals get wider as the forecast horizon increases.
  • Use level argument to control coverage.
  • Check residual assumptions before believing them.
  • Usually too narrow due to unaccounted uncertainty.

Forecasting with transformations

Modelling with transformations

eggs <- prices |>
  filter(!is.na(eggs)) |> select(eggs)
eggs |> autoplot() +
  labs(title="Annual egg prices",
       y="$US (adjusted for inflation)")

Modelling with transformations

Transformations used in the left of the formula will be automatically back-transformed. To model log-transformed egg prices, you could use:

fit <- eggs |>
  model(RW(log(eggs) ~ drift()))
fit
# A mable: 1 x 1
  `RW(log(eggs) ~ drift())`
                    <model>
1             <RW w/ drift>

Forecasting with transformations

fc <- fit |>
  forecast(h = 50)
fc
# A fable: 50 x 4 [1Y]
# Key:     .model [1]
   .model                   year             eggs .mean
   <chr>                   <dbl>           <dist> <dbl>
 1 RW(log(eggs) ~ drift())  1994 t(N(4.1, 0.018))  61.8
 2 RW(log(eggs) ~ drift())  1995 t(N(4.1, 0.036))  61.4
 3 RW(log(eggs) ~ drift())  1996 t(N(4.1, 0.055))  61.0
 4 RW(log(eggs) ~ drift())  1997 t(N(4.1, 0.074))  60.6
 5 RW(log(eggs) ~ drift())  1998 t(N(4.1, 0.093))  60.2
 6 RW(log(eggs) ~ drift())  1999    t(N(4, 0.11))  59.8
 7 RW(log(eggs) ~ drift())  2000    t(N(4, 0.13))  59.4
 8 RW(log(eggs) ~ drift())  2001    t(N(4, 0.15))  59.0
 9 RW(log(eggs) ~ drift())  2002    t(N(4, 0.18))  58.6
10 RW(log(eggs) ~ drift())  2003     t(N(4, 0.2))  58.3
# ℹ 40 more rows

Forecasting with transformations

fc |> autoplot(eggs) +
  labs(title="Annual egg prices",
       y="US$ (adjusted for inflation)")

Bias adjustment

  • Back-transformed point forecasts are medians.
  • Back-transformed PI have the correct coverage.

. . .

Back-transformed means (if you are math inclined)

Let \(X\) be have mean \(\mu\) and variance \(\sigma^2\).

Let \(f(x)\) be back-transformation function, and \(Y=f(X)\).

Taylor series expansion about \(\mu\): \[f(X) = f(\mu) + (X-\mu)f'(\mu) + \frac{1}{2}(X-\mu)^2f''(\mu).\]

Note

\(E[Y] = E[f(X)] = f(\mu) + \frac12 \sigma^2 f''(\mu)\)

Bias adjustment

Box-Cox back-transformation (if math inclined): \[\begin{align*} y_t &= \left\{\begin{array}{ll} \exp(w_t) & \quad \lambda = 0; \\ (\lambda W_t+1)^{1/\lambda} & \quad \lambda \ne 0. \end{array}\right. \\ f(x) &= \begin{cases} e^x & \quad\lambda=0;\\ (\lambda x + 1)^{1/\lambda} & \quad\lambda\ne0. \end{cases}\\ f''(x) &= \begin{cases} e^x & \quad\lambda=0;\\ (1-\lambda)(\lambda x + 1)^{1/\lambda-2} & \quad\lambda\ne0. \end{cases} \end{align*}\]

Note

\(E[Y] = \begin{cases} e^\mu\left[1+\frac{\sigma^2}{2}\right] & \quad\lambda=0;\\ (\lambda \mu + 1)^{1/\lambda}\left[1+\frac{\sigma^2(1-\lambda)}{2(\lambda\mu+1)^2}\right] & \quad\lambda\ne0. \end{cases}\)

Bias adjustment

fc |>
  autoplot(eggs, level = 80, point_forecast = lst(mean, median)) +
  labs(title="Annual egg prices",
       y="US$ (adjusted for inflation)")

Forecasting and decomposition

Forecasting and decomposition

\[y_t = \hat{S}_t + \hat{A}_t\]

  • \(\hat{A}_t\) is seasonally adjusted component

  • \(\hat{S}_t\) is seasonal component.

    • Forecast \(\hat{S}_t\) using SNAIVE.
    • Forecast \(\hat{A}_t\) using non-seasonal time series method.
    • Combine forecasts of \(\hat{S}_t\) and \(\hat{A}_t\) to get forecasts of original data.

US Retail Employment

us_retail_employment <- us_employment |>
  filter(year(Month) >= 1990, Title == "Retail Trade") |>
  select(-Series_ID)
us_retail_employment
# A tsibble: 357 x 3 [1M]
      Month Title        Employed
      <mth> <chr>           <dbl>
 1 1990 Jan Retail Trade   13256.
 2 1990 Feb Retail Trade   12966.
 3 1990 Mar Retail Trade   12938.
 4 1990 Apr Retail Trade   13012.
 5 1990 May Retail Trade   13108.
 6 1990 Jun Retail Trade   13183.
 7 1990 Jul Retail Trade   13170.
 8 1990 Aug Retail Trade   13160.
 9 1990 Sep Retail Trade   13113.
10 1990 Oct Retail Trade   13185.
# ℹ 347 more rows

US Retail Employment

dcmp <- us_retail_employment |>
  model(STL(Employed)) |>
  components() |> select(-.model)
dcmp
# A tsibble: 357 x 6 [1M]
      Month Employed  trend season_year remainder season_adjust
      <mth>    <dbl>  <dbl>       <dbl>     <dbl>         <dbl>
 1 1990 Jan   13256. 13288.      -33.0      0.836        13289.
 2 1990 Feb   12966. 13269.     -258.     -44.6          13224.
 3 1990 Mar   12938. 13250.     -290.     -22.1          13228.
 4 1990 Apr   13012. 13231.     -220.       1.05         13232.
 5 1990 May   13108. 13211.     -114.      11.3          13223.
 6 1990 Jun   13183. 13192.      -24.3     15.5          13207.
 7 1990 Jul   13170. 13172.      -23.2     21.6          13193.
 8 1990 Aug   13160. 13151.       -9.52    17.8          13169.
 9 1990 Sep   13113. 13131.      -39.5     22.0          13153.
10 1990 Oct   13185. 13110.       61.6     13.2          13124.
# ℹ 347 more rows

US Retail Employment

dcmp_forecast<- dcmp |>
                model(NAIVE(season_adjust)) |>
                forecast()

  autoplot(dcmp_forecast,dcmp) +
  labs(title = "Naive forecasts of seasonally adjusted data")

US Retail Employment

us_retail_employment |>
  model(stlf = decomposition_model(
    STL(Employed ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  )) |>
  forecast() |>
  autoplot(us_retail_employment)

Decomposition models

decomposition_model() creates a decomposition model

  • You must provide a method for forecasting the season_adjust series.
  • A seasonal naive method is used by default for the seasonal components.
  • The variances from both the seasonally adjusted and seasonal forecasts are combined.

Evaluating forecast accuracy

Training and test sets (1/2)

Training and test sets (2/2)

  • A model which fits the training data well will not necessarily forecast well.
  • A perfect fit can always be obtained by using a model with enough parameters.
  • Over-fitting a model to data is just as bad as failing to identify a systematic pattern in the data.
  • The test set must not be used for any aspect of model development or calculation of forecasts.
  • Forecast accuracy is based only on the test set.

Forecast errors

Forecast “error”: the difference between an observed value and its forecast. \[ e_{T+h} = y_{T+h} - \hat{y}_{T+h|T}, \] where the training data is given by \(\{y_1,\dots,y_T\}\)

  • Unlike residuals, forecast errors on the test set involve multi-step forecasts.
  • These are true forecast errors as the test data is not used in computing \(\hat{y}_{T+h|T}\).

Measures of forecast accuracy

train <- aus_production |>
  filter(between(year(Quarter), 1992, 2007))
beer <- aus_production |>
  filter(year(Quarter) >= 1992)
beer_fc_plot <- train |>
  model(
    Mean = MEAN(Beer),
    Naive = NAIVE(Beer),
    Seasonal_naive = SNAIVE(Beer),
    Drift = RW(Beer ~ drift())
  ) |>
  forecast(h=11) |>
  autoplot(beer, level = NULL) +
    labs(title = "Forecasts for quarterly beer production",
         y = "Megalitres") +
    guides(colour=guide_legend(title="Forecast"))
beer_fc_plot

Measures of forecast accuracy

\(y_{T+h}= (T+h)\)th observation, \(h=1,\dots,H\)

\(\hat{y}_{T+h|T}=\) its forecast based on data up to time \(T\).

\(e_{T+h} = y_{T+h} - \hat{y}_{{T+h}|{T}}\)

. . .

Name Abb. Formula
Mean Abosolue Error MAE \(\text{mean}(\|e_{T+h}\|)\)
Mean Squared Error MSE \(\text{mean}(e_{T+h}^2)\)
Root MSE RMSE \(\sqrt{\text{mean}(e_{T+h}^2)\)
Mean Absolute Percent Error MAPE \(100\text{mean}(\|e_{T+h}\|/ \|y_{T+h}\|)\)
  • MAE, MSE, RMSE are all scale dependent.
  • MAPE is scale independent but is only sensible if \(y_t\gg 0\) for all \(t\), and \(y\) has a natural zero.

Mean Absolute Scaled Error (non-seasonal)

\[ \text{MASE} = \text{mean}(|e_{T+h}|/Q) \]

where \(Q\) is a stable measure of the scale of the time series \(\{y_t\}\).

Proposed by Hyndman and Koehler (IJF, 2006).

For non-seasonal time series,

\[ Q = (T-1)^{-1}\sum_{t=2}^T |y_t-y_{t-1}| \] works well. Then MASE is equivalent to MAE relative to a naïve method.

Mean Absolute Scaled Error (seasonal)

\[ \text{MASE} = \text{mean}(|e_{T+h}|/Q) \]

where \(Q\) is a stable measure of the scale of the time series \(\{y_t\}\).

Proposed by Hyndman and Koehler (IJF, 2006).

For seasonal time series, \[ Q = (T-m)^{-1}\sum_{t=m+1}^T |y_t-y_{t-m}| \] works well. Then MASE is equivalent to MAE relative to a seasonal naïve method.

Measures of forecast accuracy: Time Series

beer_fc_plot

Measures of forecast accuracy: Model Fits

recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
train <- recent_production |>
  filter(year(Quarter) <= 2007)
beer_fit <- train |>
  model(
    Mean = MEAN(Beer),
    Naive = NAIVE(Beer),
    Seasonal_naive = SNAIVE(Beer),
    Drift = RW(Beer ~ drift())
  )
beer_fc <- beer_fit |>
  forecast(h = 10)

Measures of forecast accuracy: On Training data (1/2)

  • Smaller is better
  • Use to evaluate models individually (based on training data, overfitting possible)

. . .

accuracy(beer_fit)
# A tibble: 4 × 10
  .model         .type           ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
  <chr>          <chr>        <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Mean           Training  0         43.6  35.2 -0.937  7.89  2.46  2.60 -0.109
2 Naive          Training  4.76e- 1  65.3  54.7 -0.916 12.2   3.83  3.89 -0.241
3 Seasonal_naive Training -2.13e+ 0  16.8  14.3 -0.554  3.31  1     1    -0.288
4 Drift          Training -5.41e-15  65.3  54.8 -1.03  12.2   3.83  3.89 -0.241

Measures of forecast accuracy: On Training data (2/2)

accuracy(beer_fit) |>
  arrange(.model) |> # Sorts model names alphabetically
  select(.model, .type, RMSE, MAE, MAPE, MASE) #The metrics we defined
# A tibble: 4 × 6
  .model         .type     RMSE   MAE  MAPE  MASE
  <chr>          <chr>    <dbl> <dbl> <dbl> <dbl>
1 Drift          Training  65.3  54.8 12.2   3.83
2 Mean           Training  43.6  35.2  7.89  2.46
3 Naive          Training  65.3  54.7 12.2   3.83
4 Seasonal_naive Training  16.8  14.3  3.31  1   

Measures of forecast accuracy: On Testing data

  • Smaller is better
  • Can be used to compare models and evaluate models individually
  • Gives a realistic expectation of error if we rebuild model on all data for future forecasts

. . .

accuracy(beer_fc, recent_production) |>
  arrange(.model) |>
  select(.model, .type, RMSE, MAE, MAPE, MASE)
# A tibble: 4 × 6
  .model         .type  RMSE   MAE  MAPE  MASE
  <chr>          <chr> <dbl> <dbl> <dbl> <dbl>
1 Drift          Test   64.9  58.9 14.6  4.12 
2 Mean           Test   38.4  34.8  8.28 2.44 
3 Naive          Test   62.7  57.4 14.2  4.01 
4 Seasonal_naive Test   14.3  13.4  3.17 0.937