Copyright (c) Microsoft Corporation.
Licensed under the MIT License.

This notebook builds on the output from “Basic models” by including regressor variables in the ARIMA model(s). We fit the following model types:

As part of the modelling, we also compute a new independent variable maxpricediff, the log-ratio of the price of this brand compared to the best competing price. A positive maxpricediff means this brand is cheaper than all the other brands, and a negative maxpricediff means it is more expensive.

srcdir <- here::here("R_utils")
for(src in dir(srcdir, full.names=TRUE)) source(src)

load_objects("grocery_sales", "data.Rdata")

cl <- make_cluster(libs=c("tidyr", "dplyr", "fable", "tsibble", "feasts"))

# add extra regression variables to training and test datasets
add_regvars <- function(df)
{
    df %>%
        group_by(store, brand) %>%
        group_modify(~ {
            pricevars <- grep("price", names(.x), value=TRUE)
            thispricevar <- unique(paste0("price", .y$brand))
            best_other_price <- do.call(pmin, .x[setdiff(pricevars, thispricevar)])
            .x$price <- .x[[thispricevar]]
            .x$maxpricediff <- log(best_other_price/.x$price)
            .x
        }) %>%
        ungroup() %>%
        mutate(week=yearweek(week)) %>%  # need to recreate this variable because of tsibble/vctrs issues
        as_tsibble(week, key=c(store, brand))
}

oj_trainreg <- parallel::parLapply(cl, oj_train, add_regvars)
oj_testreg <- parallel::parLapply(cl, oj_test, add_regvars)

save_objects(oj_trainreg, oj_testreg,
             example="grocery_sales", file="data_reg.Rdata")

oj_modelset_reg <- parallel::parLapply(cl, oj_trainreg, function(df)
{
    model(df,
        ar_trend=ARIMA(logmove ~ pdq() + PDQ(0, 0, 0) + trend()),

        ar_reg=ARIMA(logmove ~ pdq() + PDQ(0, 0, 0) + deal + feat + maxpricediff +
            price1 + price2 + price3 + price4 + price5 + price6 + price7 + price8 + price9 + price10 + price11),

        ar_reg_price=ARIMA(logmove ~ pdq() + PDQ(0, 0, 0) + deal + feat + maxpricediff + price),

        ar_reg_price_trend=ARIMA(logmove ~ pdq() + PDQ(0, 0, 0) + trend() + deal + feat + maxpricediff + price),

        .safely=FALSE
    )
})

oj_fcast_reg <- parallel::clusterMap(cl, get_forecasts, oj_modelset_reg, oj_testreg)

destroy_cluster(cl)

save_objects(oj_modelset_reg, oj_fcast_reg,
             example="grocery_sales", file="model_reg.Rdata")

do.call(rbind, oj_fcast_reg) %>%
    mutate_at(-(1:3), exp) %>%
    eval_forecasts()

This shows that the models incorporating price are a significant improvement over the previous naive models. The model that uses stepwise selection to choose the best price variable does worse than the one where we choose the price beforehand, confirming the suspicion that stepwise leads to overfitting in this case.

LS0tCnRpdGxlOiBBUklNQS1SZWdyZXNzaW9uIG1vZGVscwpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpfQ29weXJpZ2h0IChjKSBNaWNyb3NvZnQgQ29ycG9yYXRpb24uXzxici8+Cl9MaWNlbnNlZCB1bmRlciB0aGUgTUlUIExpY2Vuc2UuXwoKYGBge3IsIGVjaG89RkFMU0UsIHJlc3VsdHM9ImhpZGUiLCBtZXNzYWdlPUZBTFNFfQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHRzaWJibGUpCmxpYnJhcnkoZmVhc3RzKQpsaWJyYXJ5KGZhYmxlKQpgYGAKClRoaXMgbm90ZWJvb2sgYnVpbGRzIG9uIHRoZSBvdXRwdXQgZnJvbSAiQmFzaWMgbW9kZWxzIiBieSBpbmNsdWRpbmcgcmVncmVzc29yIHZhcmlhYmxlcyBpbiB0aGUgQVJJTUEgbW9kZWwocykuIFdlIGZpdCB0aGUgZm9sbG93aW5nIG1vZGVsIHR5cGVzOgoKLSBgYXJfdHJlbmRgIGluY2x1ZGVzIG9ubHkgYSBsaW5lYXIgdHJlbmQgb3ZlciB0aW1lLgotIGBhcl9yZWdgIGFsbG93cyBzdGVwd2lzZSBzZWxlY3Rpb24gb2YgaW5kZXBlbmRlbnQgcmVncmVzc29ycy4KLSBgYXJfcmVnX3ByaWNlYDogcmF0aGVyIHRoYW4gYWxsb3dpbmcgdGhlIGFsZ29yaXRobSB0byBzZWxlY3QgZnJvbSB0aGUgMTEgcHJpY2UgdmFyaWFibGVzLCB3ZSB1c2Ugb25seSB0aGUgcHJpY2UgcmVsZXZhbnQgdG8gZWFjaCBicmFuZC4gVGhpcyBpcyB0byBndWFyZCBhZ2FpbnN0IHBvc3NpYmxlIG92ZXJmaXR0aW5nLCBzb21ldGhpbmcgdGhhdCBjbGFzc2ljYWwgc3RlcHdpc2UgcHJvY2VkdXJlcyBhcmUgd29udCB0byBkby4KLSBgYXJfcmVnX3ByaWNlX3RyZW5kYCBpcyB0aGUgc2FtZSBhcyBgYXJfcmVnX3ByaWNlYCwgYnV0IGluY2x1ZGluZyBhIGxpbmVhciB0cmVuZC4KCkFzIHBhcnQgb2YgdGhlIG1vZGVsbGluZywgd2UgYWxzbyBjb21wdXRlIGEgbmV3IGluZGVwZW5kZW50IHZhcmlhYmxlIGBtYXhwcmljZWRpZmZgLCB0aGUgbG9nLXJhdGlvIG9mIHRoZSBwcmljZSBvZiB0aGlzIGJyYW5kIGNvbXBhcmVkIHRvIHRoZSBiZXN0IGNvbXBldGluZyBwcmljZS4gQSBwb3NpdGl2ZSBgbWF4cHJpY2VkaWZmYCBtZWFucyB0aGlzIGJyYW5kIGlzIGNoZWFwZXIgdGhhbiBhbGwgdGhlIG90aGVyIGJyYW5kcywgYW5kIGEgbmVnYXRpdmUgYG1heHByaWNlZGlmZmAgbWVhbnMgaXQgaXMgbW9yZSBleHBlbnNpdmUuCgpgYGB7cn0Kc3JjZGlyIDwtIGhlcmU6OmhlcmUoIlJfdXRpbHMiKQpmb3Ioc3JjIGluIGRpcihzcmNkaXIsIGZ1bGwubmFtZXM9VFJVRSkpIHNvdXJjZShzcmMpCgpsb2FkX29iamVjdHMoImdyb2Nlcnlfc2FsZXMiLCAiZGF0YS5SZGF0YSIpCgpjbCA8LSBtYWtlX2NsdXN0ZXIobGlicz1jKCJ0aWR5ciIsICJkcGx5ciIsICJmYWJsZSIsICJ0c2liYmxlIiwgImZlYXN0cyIpKQoKIyBhZGQgZXh0cmEgcmVncmVzc2lvbiB2YXJpYWJsZXMgdG8gdHJhaW5pbmcgYW5kIHRlc3QgZGF0YXNldHMKYWRkX3JlZ3ZhcnMgPC0gZnVuY3Rpb24oZGYpCnsKICAgIGRmICU+JQogICAgICAgIGdyb3VwX2J5KHN0b3JlLCBicmFuZCkgJT4lCiAgICAgICAgZ3JvdXBfbW9kaWZ5KH4gewogICAgICAgICAgICBwcmljZXZhcnMgPC0gZ3JlcCgicHJpY2UiLCBuYW1lcygueCksIHZhbHVlPVRSVUUpCiAgICAgICAgICAgIHRoaXNwcmljZXZhciA8LSB1bmlxdWUocGFzdGUwKCJwcmljZSIsIC55JGJyYW5kKSkKICAgICAgICAgICAgYmVzdF9vdGhlcl9wcmljZSA8LSBkby5jYWxsKHBtaW4sIC54W3NldGRpZmYocHJpY2V2YXJzLCB0aGlzcHJpY2V2YXIpXSkKICAgICAgICAgICAgLngkcHJpY2UgPC0gLnhbW3RoaXNwcmljZXZhcl1dCiAgICAgICAgICAgIC54JG1heHByaWNlZGlmZiA8LSBsb2coYmVzdF9vdGhlcl9wcmljZS8ueCRwcmljZSkKICAgICAgICAgICAgLngKICAgICAgICB9KSAlPiUKICAgICAgICB1bmdyb3VwKCkgJT4lCiAgICAgICAgbXV0YXRlKHdlZWs9eWVhcndlZWsod2VlaykpICU+JSAgIyBuZWVkIHRvIHJlY3JlYXRlIHRoaXMgdmFyaWFibGUgYmVjYXVzZSBvZiB0c2liYmxlL3ZjdHJzIGlzc3VlcwogICAgICAgIGFzX3RzaWJibGUod2Vlaywga2V5PWMoc3RvcmUsIGJyYW5kKSkKfQoKb2pfdHJhaW5yZWcgPC0gcGFyYWxsZWw6OnBhckxhcHBseShjbCwgb2pfdHJhaW4sIGFkZF9yZWd2YXJzKQpval90ZXN0cmVnIDwtIHBhcmFsbGVsOjpwYXJMYXBwbHkoY2wsIG9qX3Rlc3QsIGFkZF9yZWd2YXJzKQoKc2F2ZV9vYmplY3RzKG9qX3RyYWlucmVnLCBval90ZXN0cmVnLAogICAgICAgICAgICAgZXhhbXBsZT0iZ3JvY2VyeV9zYWxlcyIsIGZpbGU9ImRhdGFfcmVnLlJkYXRhIikKCm9qX21vZGVsc2V0X3JlZyA8LSBwYXJhbGxlbDo6cGFyTGFwcGx5KGNsLCBval90cmFpbnJlZywgZnVuY3Rpb24oZGYpCnsKICAgIG1vZGVsKGRmLAogICAgICAgIGFyX3RyZW5kPUFSSU1BKGxvZ21vdmUgfiBwZHEoKSArIFBEUSgwLCAwLCAwKSArIHRyZW5kKCkpLAoKICAgICAgICBhcl9yZWc9QVJJTUEobG9nbW92ZSB+IHBkcSgpICsgUERRKDAsIDAsIDApICsgZGVhbCArIGZlYXQgKyBtYXhwcmljZWRpZmYgKwogICAgICAgICAgICBwcmljZTEgKyBwcmljZTIgKyBwcmljZTMgKyBwcmljZTQgKyBwcmljZTUgKyBwcmljZTYgKyBwcmljZTcgKyBwcmljZTggKyBwcmljZTkgKyBwcmljZTEwICsgcHJpY2UxMSksCgogICAgICAgIGFyX3JlZ19wcmljZT1BUklNQShsb2dtb3ZlIH4gcGRxKCkgKyBQRFEoMCwgMCwgMCkgKyBkZWFsICsgZmVhdCArIG1heHByaWNlZGlmZiArIHByaWNlKSwKCiAgICAgICAgYXJfcmVnX3ByaWNlX3RyZW5kPUFSSU1BKGxvZ21vdmUgfiBwZHEoKSArIFBEUSgwLCAwLCAwKSArIHRyZW5kKCkgKyBkZWFsICsgZmVhdCArIG1heHByaWNlZGlmZiArIHByaWNlKSwKCiAgICAgICAgLnNhZmVseT1GQUxTRQogICAgKQp9KQoKb2pfZmNhc3RfcmVnIDwtIHBhcmFsbGVsOjpjbHVzdGVyTWFwKGNsLCBnZXRfZm9yZWNhc3RzLCBval9tb2RlbHNldF9yZWcsIG9qX3Rlc3RyZWcpCgpkZXN0cm95X2NsdXN0ZXIoY2wpCgpzYXZlX29iamVjdHMob2pfbW9kZWxzZXRfcmVnLCBval9mY2FzdF9yZWcsCiAgICAgICAgICAgICBleGFtcGxlPSJncm9jZXJ5X3NhbGVzIiwgZmlsZT0ibW9kZWxfcmVnLlJkYXRhIikKCmRvLmNhbGwocmJpbmQsIG9qX2ZjYXN0X3JlZykgJT4lCiAgICBtdXRhdGVfYXQoLSgxOjMpLCBleHApICU+JQogICAgZXZhbF9mb3JlY2FzdHMoKQpgYGAKClRoaXMgc2hvd3MgdGhhdCB0aGUgbW9kZWxzIGluY29ycG9yYXRpbmcgcHJpY2UgYXJlIGEgc2lnbmlmaWNhbnQgaW1wcm92ZW1lbnQgb3ZlciB0aGUgcHJldmlvdXMgbmFpdmUgbW9kZWxzLiBUaGUgbW9kZWwgdGhhdCB1c2VzIHN0ZXB3aXNlIHNlbGVjdGlvbiB0byBjaG9vc2UgdGhlIGJlc3QgcHJpY2UgdmFyaWFibGUgZG9lcyB3b3JzZSB0aGFuIHRoZSBvbmUgd2hlcmUgd2UgY2hvb3NlIHRoZSBwcmljZSBiZWZvcmVoYW5kLCBjb25maXJtaW5nIHRoZSBzdXNwaWNpb24gdGhhdCBzdGVwd2lzZSBsZWFkcyB0byBvdmVyZml0dGluZyBpbiB0aGlzIGNhc2UuCg==