library(tidyverse)
library(magrittr)
library(keras)
library(tidymodels) # Or only load the 'rsample' and recipes on its own
── Attaching packages ───────────────────────────────────────────── tidymodels 0.1.3 ──
✓ broom        0.7.9      ✓ rsample      0.1.0 
✓ dials        0.0.10     ✓ tune         0.1.6 
✓ infer        1.0.0      ✓ workflows    0.2.3 
✓ modeldata    0.1.1      ✓ workflowsets 0.1.0 
✓ parsnip      0.1.7      ✓ yardstick    0.0.8 
✓ recipes      0.1.16     
── Conflicts ──────────────────────────────────────────────── tidymodels_conflicts() ──
x scales::discard()        masks purrr::discard()
x magrittr::extract()      masks tidyr::extract()
x dplyr::filter()          masks stats::filter()
x xts::first()             masks dplyr::first()
x recipes::fixed()         masks stringr::fixed()
x yardstick::get_weights() masks keras::get_weights()
x dplyr::lag()             masks stats::lag()
x xts::last()              masks dplyr::last()
x dials::momentum()        masks TTR::momentum()
x magrittr::set_names()    masks purrr::set_names()
x yardstick::spec()        masks readr::spec()
x recipes::step()          masks stats::step()
• Use tidymodels_prefer() to resolve common conflicts.

3 Intro: Ways to create sequences…

Workshop Stock prediction

Task:

  1. Get some stock data (tip: Use tidyquant)
    • Limit yourself for now to on e stock
    • Limit yourself to one variable (preferably some price data)
  2. Develop a one-step ahead prediction of prices (or their movements)

Load some data

Select a stock abnd load the data

  • We will use the tidyquant package to download stock data
library(tidyquant) # My favorite package to get stock data
Loading required package: lubridate

Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union

Loading required package: PerformanceAnalytics
Loading required package: xts
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Attaching package: ‘xts’

The following objects are masked from ‘package:dplyr’:

    first, last


Attaching package: ‘PerformanceAnalytics’

The following object is masked from ‘package:graphics’:

    legend

Loading required package: quantmod
Loading required package: TTR
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
══ Need to Learn tidyquant? ═══════════════════════════════════════════════════════════
Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
</> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(timetk) 
Registered S3 method overwritten by 'tune':
  method                   from   
  required_pkgs.model_spec parsnip
tickers = c("NVO") # We can also try AAPL etc
            
data_stocks <- tq_get(tickers,
               from = "2000-01-01",
               to = "2021-11-17",
               get = "stock.prices" # What we want to get.... here prices
               )
Warning: `type_convert()` only converts columns of type 'character'.
- `df` has no columns of type 'character'

Some plots for exploration…

data_stocks %>% glimpse()
Rows: 5,505
Columns: 8
$ symbol   <chr> "NVO", "NVO", "NVO", "NVO", "NVO", "NVO", "NVO", "NVO", "NVO", "NVO"…
$ date     <date> 2000-01-03, 2000-01-04, 2000-01-05, 2000-01-06, 2000-01-07, 2000-01…
$ open     <dbl> 2.7000, 2.6775, 2.6325, 2.6275, 2.7400, 2.9025, 2.9275, 2.9200, 2.91…
$ high     <dbl> 2.7050, 2.6900, 2.6350, 2.6950, 2.7525, 2.9750, 2.9300, 2.9250, 2.91…
$ low      <dbl> 2.6700, 2.6475, 2.6025, 2.6275, 2.7000, 2.9025, 2.8850, 2.9150, 2.88…
$ close    <dbl> 2.6900, 2.6775, 2.6275, 2.6700, 2.7400, 2.9650, 2.9150, 2.9250, 2.88…
$ volume   <dbl> 385000, 197500, 210000, 287500, 245000, 530000, 762500, 290000, 1650…
$ adjusted <dbl> 1.841958, 1.833398, 1.799161, 1.828262, 1.876194, 2.030262, 1.996025…
data_stocks %>% head()
data_stocks %>% 
  plot_time_series(date, adjusted)
Warning: closing unused connection 3 (https://query2.finance.yahoo.com/v7/finance/download/NOVO?period1=946684800&period2=1637107200&interval=1d&events=history&crumb=obYTN1Sl/24)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
# # ggplot alternative
# data_stocks %>%
#   ggplot(aes(x = date, y = adjusted,)) +
#   geom_line() +
#   labs(x = 'Date', y = "Adjusted Price") 

Preprocessing

# Limit data
data <- data_stocks %>%
  rename(index = date, value = adjusted) %>%
  select(index, value) %>%
  arrange(index) 
  • It is always easier to model change rather than absolute prices, so we create a variable measuring the percentage change of price instead
# Remodel value as percentage change
data %<>%
  distinct(index, .keep_all = TRUE) %>%
  tidyr::fill(value, .direction = "downup") %>%
  mutate(value = (value - lag(value,1)) / lag(value,1) ) %>%
  drop_na()
data %>%
  ggplot(aes(x = index, y = value)) +
  geom_line() +
  labs(x = 'Date', y = "Price change in pct") 

data %>%
    plot_acf_diagnostics(date, value)

Train & Test split

  • We do a time-series split which keeps the sequencing of the data
# We use time_splits here to maintain the sequences
data_split <- data %>% initial_time_split(prop = 0.75)
data_train <- data_split %>% training()
data_test <- data_split %>% testing()
  • Lets see from where till when the train/test samples are
# See ehat we got
data_train %>% pull(index) %>% min()
[1] "2000-01-04"
data_train %>% pull(index) %>% max()
[1] "2016-06-01"
data_test %>% pull(index) %>% min()
[1] "2016-06-02"
data_test %>% pull(index) %>% max()
[1] "2021-11-16"
data_train %>% mutate(split = 'training') %>%
  bind_rows(data_test %>% mutate(split = 'testing')) %>%
  ggplot(aes(x = index, y = value, col = split)) +
  geom_line() 

Define a reciepe

  • We only apply min-max scaling herewith step_range
data_recipe <- data_train %>%
  recipe(value ~ .) %>% 
  step_normalize(value) %>%
  step_arrange(index) %>%
  prep()
  • We save the min and max to rescale later again
# Preserve the values for later (to reconstruct original values)
prep_history <- tibble(
  mean = data_recipe$steps[[1]]$means,
  sds = data_recipe$steps[[1]]$sds
)
prep_history

Get processedv train & test data

  • We now create a x and y split. Since we here always predict the next observation, that’s easy. We will just set y= lead(x, 1)
  • We replace the last missing observation with the lagged value
# Number of lags
n_lag = 1

# Train data
x_train <- data_recipe %>% juice() 

y_train <- data_recipe %>%  juice() %>%
  mutate(value = value %>% lead(n_lag)) %>%
  tidyr::fill(value, .direction = "downup") 

# And the same for the test data
x_test <- data_recipe %>% bake(data_test) 

y_test <- data_recipe %>%  bake(data_test) %>%  
  mutate(value = value %>% lead(n_lag)) %>%
  tidyr::fill(value, .direction = "downup") 

Transform to a 3d tensor for keras

# TRansforming the x sequence to a 3d tensor (necessary for LSTMs)
x_train_arr <- x_train %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))
x_test_arr <- x_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))

y_train_arr <- y_train %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
y_test_arr <- y_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
x_train_arr %>% dim()
[1] 4128    1    1
x_train_arr %>% glimpse()
 num [1:4128, 1, 1] -0.29 -1.011 0.781 1.298 4.173 ...

Setting up the LSTM

LSTM

Define model

model <- keras_model_sequential() %>%
  # LSTM layer
  layer_lstm(units = 32, 
             dropout=0.2, 
             recurrent_dropout=0.2,
             input_shape = c(1, 1), # dim(x_train_arr)[-1], # n timesteps, n feutures
             return_sequences = TRUE) %>%
  # LSTM layer
  layer_lstm(units = 32, 
             dropout=0.2, 
             recurrent_dropout=0.2,
             return_sequences = FALSE) %>%
  # A DENSE LAYER IN BETWEEN
  layer_dense(units = 32, activation = 'relu') %>%
  #Final prediction layer
  layer_dense(units = 1, activation = 'linear')
# Compile model
model %>% 
  compile(loss = "mse", 
          metric = 'mse', 
          optimizer = optimizer_adam())
model %>% summary()
Model: "sequential_2"
___________________________________________________________________________________________________
 Layer (type)                               Output Shape                            Param #        
===================================================================================================
 lstm_4 (LSTM)                              (None, 1, 32)                           4352           
                                                                                                   
 lstm_3 (LSTM)                              (None, 32)                              8320           
                                                                                                   
 dense_5 (Dense)                            (None, 32)                              1056           
                                                                                                   
 dense_4 (Dense)                            (None, 1)                               33             
                                                                                                   
===================================================================================================
Total params: 13,761
Trainable params: 13,761
Non-trainable params: 0
___________________________________________________________________________________________________

Fitting the model

  • Next, we can fit our LSTM using a for loop (we do this to manually reset states).
  • We set shuffle = FALSE to preserve sequences
hist_model <- model %>% fit(x          = x_train_arr, 
                            y          = y_train_arr, 
                            epochs     = 10,
                            verbose    = TRUE, 
                            batch_size = 64,
                            validation_split = 0.25, 
                            shuffle    = FALSE)
Epoch 1/10

 1/49 [..............................] - ETA: 3:37 - loss: 2.3393 - mse: 2.3393
16/49 [========>.....................] - ETA: 0s - loss: 1.6075 - mse: 1.6075  
28/49 [================>.............] - ETA: 0s - loss: 1.1546 - mse: 1.1546
42/49 [========================>.....] - ETA: 0s - loss: 1.1615 - mse: 1.1615
49/49 [==============================] - 5s 4ms/step - loss: 1.1273 - mse: 1.1273

49/49 [==============================] - 6s 25ms/step - loss: 1.1273 - mse: 1.1273 - val_loss: 0.6197 - val_mse: 0.6197
Epoch 2/10

 1/49 [..............................] - ETA: 0s - loss: 2.3410 - mse: 2.3410
13/49 [======>.......................] - ETA: 0s - loss: 1.8175 - mse: 1.8175
24/49 [=============>................] - ETA: 0s - loss: 1.2626 - mse: 1.2626
37/49 [=====================>........] - ETA: 0s - loss: 1.2325 - mse: 1.2325
49/49 [==============================] - 0s 4ms/step - loss: 1.1257 - mse: 1.1257

49/49 [==============================] - 0s 8ms/step - loss: 1.1257 - mse: 1.1257 - val_loss: 0.6196 - val_mse: 0.6196
Epoch 3/10

 1/49 [..............................] - ETA: 0s - loss: 2.3439 - mse: 2.3439
16/49 [========>.....................] - ETA: 0s - loss: 1.6042 - mse: 1.6042
28/49 [================>.............] - ETA: 0s - loss: 1.1526 - mse: 1.1526
38/49 [======================>.......] - ETA: 0s - loss: 1.2159 - mse: 1.2159
48/49 [============================>.] - ETA: 0s - loss: 1.1269 - mse: 1.1269
49/49 [==============================] - 0s 4ms/step - loss: 1.1238 - mse: 1.1238

49/49 [==============================] - 0s 8ms/step - loss: 1.1238 - mse: 1.1238 - val_loss: 0.6194 - val_mse: 0.6194
Epoch 4/10

 1/49 [..............................] - ETA: 0s - loss: 2.3396 - mse: 2.3396
13/49 [======>.......................] - ETA: 0s - loss: 1.8181 - mse: 1.8181
25/49 [==============>...............] - ETA: 0s - loss: 1.2322 - mse: 1.2322
39/49 [======================>.......] - ETA: 0s - loss: 1.1980 - mse: 1.1980
49/49 [==============================] - 0s 4ms/step - loss: 1.1244 - mse: 1.1244

49/49 [==============================] - 0s 8ms/step - loss: 1.1244 - mse: 1.1244 - val_loss: 0.6195 - val_mse: 0.6195
Epoch 5/10

 1/49 [..............................] - ETA: 0s - loss: 2.3547 - mse: 2.3547
13/49 [======>.......................] - ETA: 0s - loss: 1.8137 - mse: 1.8137
24/49 [=============>................] - ETA: 0s - loss: 1.2605 - mse: 1.2605
35/49 [====================>.........] - ETA: 0s - loss: 1.2059 - mse: 1.2059
45/49 [==========================>...] - ETA: 0s - loss: 1.1266 - mse: 1.1266
49/49 [==============================] - 0s 5ms/step - loss: 1.1226 - mse: 1.1226

49/49 [==============================] - 0s 8ms/step - loss: 1.1226 - mse: 1.1226 - val_loss: 0.6202 - val_mse: 0.6202
Epoch 6/10

 1/49 [..............................] - ETA: 0s - loss: 2.3570 - mse: 2.3570
11/49 [=====>........................] - ETA: 0s - loss: 1.8965 - mse: 1.8965
21/49 [===========>..................] - ETA: 0s - loss: 1.3975 - mse: 1.3975
32/49 [==================>...........] - ETA: 0s - loss: 1.1233 - mse: 1.1233
42/49 [========================>.....] - ETA: 0s - loss: 1.1592 - mse: 1.1592
49/49 [==============================] - 0s 5ms/step - loss: 1.1245 - mse: 1.1245

49/49 [==============================] - 0s 9ms/step - loss: 1.1245 - mse: 1.1245 - val_loss: 0.6199 - val_mse: 0.6199
Epoch 7/10

 1/49 [..............................] - ETA: 0s - loss: 2.3392 - mse: 2.3392
14/49 [=======>......................] - ETA: 0s - loss: 1.7256 - mse: 1.7256
21/49 [===========>..................] - ETA: 0s - loss: 1.3887 - mse: 1.3887
31/49 [=================>............] - ETA: 0s - loss: 1.1203 - mse: 1.1203
42/49 [========================>.....] - ETA: 0s - loss: 1.1551 - mse: 1.1551
49/49 [==============================] - 0s 5ms/step - loss: 1.1201 - mse: 1.1201

49/49 [==============================] - 0s 9ms/step - loss: 1.1201 - mse: 1.1201 - val_loss: 0.6203 - val_mse: 0.6203
Epoch 8/10

 1/49 [..............................] - ETA: 0s - loss: 2.3961 - mse: 2.3961
11/49 [=====>........................] - ETA: 0s - loss: 1.8939 - mse: 1.8939
20/49 [===========>..................] - ETA: 0s - loss: 1.4159 - mse: 1.4159
31/49 [=================>............] - ETA: 0s - loss: 1.1249 - mse: 1.1249
38/49 [======================>.......] - ETA: 0s - loss: 1.2146 - mse: 1.2146
48/49 [============================>.] - ETA: 0s - loss: 1.1260 - mse: 1.1260
49/49 [==============================] - 0s 6ms/step - loss: 1.1229 - mse: 1.1229

49/49 [==============================] - 0s 10ms/step - loss: 1.1229 - mse: 1.1229 - val_loss: 0.6203 - val_mse: 0.6203
Epoch 9/10

 1/49 [..............................] - ETA: 0s - loss: 2.3652 - mse: 2.3652
13/49 [======>.......................] - ETA: 0s - loss: 1.8149 - mse: 1.8149
25/49 [==============>...............] - ETA: 0s - loss: 1.2320 - mse: 1.2320
36/49 [=====================>........] - ETA: 0s - loss: 1.2170 - mse: 1.2170
47/49 [===========================>..] - ETA: 0s - loss: 1.1354 - mse: 1.1354
49/49 [==============================] - 0s 5ms/step - loss: 1.1212 - mse: 1.1212

49/49 [==============================] - 0s 9ms/step - loss: 1.1212 - mse: 1.1212 - val_loss: 0.6205 - val_mse: 0.6205
Epoch 10/10

 1/49 [..............................] - ETA: 0s - loss: 2.3811 - mse: 2.3811
13/49 [======>.......................] - ETA: 0s - loss: 1.8244 - mse: 1.8244
23/49 [=============>................] - ETA: 0s - loss: 1.3081 - mse: 1.3081
33/49 [===================>..........] - ETA: 0s - loss: 1.1167 - mse: 1.1167
43/49 [=========================>....] - ETA: 0s - loss: 1.1569 - mse: 1.1569
49/49 [==============================] - 0s 5ms/step - loss: 1.1253 - mse: 1.1253

49/49 [==============================] - 0s 9ms/step - loss: 1.1253 - mse: 1.1253 - val_loss: 0.6205 - val_mse: 0.6205
hist_model %>% plot()
`geom_smooth()` using formula 'y ~ x'

model %>% evaluate(x_test_arr, y_test_arr)

 1/43 [..............................] - ETA: 17s - loss: 0.6425 - mse: 0.6425
27/43 [=================>............] - ETA: 0s - loss: 0.6133 - mse: 0.6133 
43/43 [==============================] - 0s 2ms/step - loss: 0.6714 - mse: 0.6714

43/43 [==============================] - 0s 2ms/step - loss: 0.6714 - mse: 0.6714
     loss       mse 
0.6713502 0.6713502 

Predicting Stock changes

  • We first predict the output of our test data
model_pred <- model %>% predict(x_test_arr) %>% as.numeric()
  • However, we need to rescale the output. For min-max scaling, this function will do the trick
reverse_norm<- function(x, mean, sds) {
  x_re <- (x * sds) + mean
  return(x_re)
  }
  • We apply it with our data and the saved min and max values from the recipe
eval <- tibble(
  index = data_test %>% pull(index),
  truth = data_test %>% pull(value),
  pred = model_pred %>% reverse_norm(x = ., mean = prep_history$mean, sds = prep_history$sds)
) 
eval %>% 
  pivot_longer(-index) %>%
  ggplot(aes(x = index, y = value, col = name)) +
  geom_line()

Well… soso

Brief intro to working with time sequences and time series generators

Example timeseries:

  • Ok, lets take a brief look at how to work with sequention data i different ways, and prepare them as inputs for an LSTM
  • We, for the sake of illustration, just create a simple sequence with the numbers from 1-100 (its easier to inspect the sequence, in reality we would obviously feed it with different outputs)
data_example
  [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23
 [24]  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46
 [47]  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69
 [70]  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92
 [93]  93  94  95  96  97  98  99 100

Many-to-One predictions

  • In this setup, we will use several periods to predict one subsequent observations.
n_timesteps <- 5  # Define that we would like to have 5 timesteps
batch_size <- 6 # Batch size (somewhat arbitrary)
n_features <- 1 # Number of features. Since we only predict the outcome based on its own sequence, it will be 1
  • We will set up Keras timeseries_generator, which will feed the LSTM (or other architecture) with on-the-fly generated sequences
train_gen <- 
  timeseries_generator(
    data = data_example, # The data we will use to create the sequences.
    targets = data_example, # The putcome data, in this case the same, since we just want to predict the subsequent period
    length = n_timesteps, # How many previous steps in the sequence should be used for the prediction
    sampling_rate = 1, # Should we use every observation in the sequence or skip some?
    stride = 2, # How many steps should be skipped
    shuffle = FALSE, # Should the sequence be shuffled? In time-series prediction, we want to preserve the order of sequences, so always FALSE
    batch_size = batch_size # size of the batches generated. USe this batch size also later in the LSTM
    )
  • Remember, this is a lazy function, meaning it will generate the sequences on-the-fly when they are needed.
  • Therefore, it can not directly be inspected.
train_gen
<keras.preprocessing.sequence.TimeseriesGenerator>
  • However, we can extract single batches and inspect them.
  • This is helpful to get a feeling what the different arguments of the generator do, and to thest that they create the sequence you want.
  • Here, two arrrays will be returned, where the first one is the generated input sequences, the second one the corresponding output.
batch_0 <- train_gen[0]
batch_0
[[1]]
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    2    3    4    5
[2,]    3    4    5    6    7
[3,]    5    6    7    8    9
[4,]    7    8    9   10   11
[5,]    9   10   11   12   13
[6,]   11   12   13   14   15

[[2]]
[1]  6  8 10 12 14 16
# create the model
model <- keras_model_sequential()  %>%
  # Add the layer. We will make it as simple as possible here with just one LSTM and an output layer.
  layer_lstm(
    units = 32, 
    batch_input_shape  = c(batch_size, n_timesteps, n_features), # the first layer in a model needs to know the shape of the input data
    #dropout = 0.1,
    #recurrent_dropout = 0.1,
    return_sequences = FALSE, # by default, an LSTM just returns the final state
    stateful = TRUE) %>% 
  # Final output layer
  layer_dense(units = 1)

model %>% compile(loss = 'mse', optimizer = optimizer_adam(), metrics = 'mse')
n_steps <- round((length(data_example) - n_timesteps) / batch_size, 1) 

hist <- model %>% fit_generator(
  generator = train_gen,
  steps_per_epoch = n_steps,
  epochs = 10
  )
Warning in fit_generator(., generator = train_gen, steps_per_epoch = n_steps,  :
  `fit_generator` is deprecated. Use `fit` instead, it now accept generators.
Epoch 1/10

 1/15 [=>............................] - ETA: 22s - loss: 5344.6724 - mse: 5344.6724
14/15 [===========================>..] - ETA: 0s - loss: 3911.8208 - mse: 3911.8208 
15/15 [==============================] - 2s 4ms/step - loss: 3690.8801 - mse: 3690.8801

15/15 [==============================] - 2s 41ms/step - loss: 3690.8801 - mse: 3690.8801
Epoch 2/10

 1/15 [=>............................] - ETA: 0s - loss: 3683.7498 - mse: 3683.7498
14/15 [===========================>..] - ETA: 0s - loss: 3854.2834 - mse: 3854.2834
15/15 [==============================] - 0s 4ms/step - loss: 3634.7844 - mse: 3634.7844

15/15 [==============================] - 0s 12ms/step - loss: 3634.7844 - mse: 3634.7844
Epoch 3/10

 1/15 [=>............................] - ETA: 0s - loss: 7048.9839 - mse: 7048.9839
15/15 [==============================] - 0s 4ms/step - loss: 3524.8340 - mse: 3524.8340

15/15 [==============================] - 0s 12ms/step - loss: 3524.8340 - mse: 3524.8340
Epoch 4/10

 1/15 [=>............................] - ETA: 0s - loss: 285.9961 - mse: 285.9961
15/15 [==============================] - 0s 3ms/step - loss: 3449.3716 - mse: 3449.3716

15/15 [==============================] - 1s 39ms/step - loss: 3449.3716 - mse: 3449.3716
Epoch 5/10

 1/15 [=>............................] - ETA: 0s - loss: 4960.4878 - mse: 4960.4878
13/15 [=========================>....] - ETA: 0s - loss: 3189.6267 - mse: 3189.6267
15/15 [==============================] - 0s 4ms/step - loss: 3391.7959 - mse: 3391.7954

15/15 [==============================] - 0s 12ms/step - loss: 3391.7959 - mse: 3391.7954
Epoch 6/10

 1/15 [=>............................] - ETA: 0s - loss: 2687.5127 - mse: 2687.5127
13/15 [=========================>....] - ETA: 0s - loss: 3154.0664 - mse: 3154.0664
15/15 [==============================] - 0s 4ms/step - loss: 3329.5364 - mse: 3329.5364

15/15 [==============================] - 0s 13ms/step - loss: 3329.5364 - mse: 3329.5364
Epoch 7/10

 1/15 [=>............................] - ETA: 0s - loss: 74.5161 - mse: 74.5161
15/15 [==============================] - 0s 3ms/step - loss: 3266.2383 - mse: 3266.2383

15/15 [==============================] - 0s 11ms/step - loss: 3266.2383 - mse: 3266.2383
Epoch 8/10

 1/15 [=>............................] - ETA: 0s - loss: 6527.1382 - mse: 6527.1382
15/15 [==============================] - 0s 4ms/step - loss: 3173.4714 - mse: 3173.4714

15/15 [==============================] - 0s 12ms/step - loss: 3173.4714 - mse: 3173.4714
Epoch 9/10

 1/15 [=>............................] - ETA: 0s - loss: 169.7121 - mse: 169.7121
12/15 [=======================>......] - ETA: 0s - loss: 3187.5933 - mse: 3187.5933
15/15 [==============================] - 0s 5ms/step - loss: 3050.2288 - mse: 3050.2288

15/15 [==============================] - 0s 13ms/step - loss: 3050.2288 - mse: 3050.2288
Epoch 10/10

 1/15 [=>............................] - ETA: 0s - loss: 2942.9104 - mse: 2942.9104
12/15 [=======================>......] - ETA: 0s - loss: 3208.3115 - mse: 3208.3115
15/15 [==============================] - 0s 5ms/step - loss: 2979.4160 - mse: 2979.4160

15/15 [==============================] - 0s 13ms/step - loss: 2979.4160 - mse: 2979.4160

Your turn

  • Play a bit around with the arguments in the generator, and se what outputs it produces. This will give you some intuition
  • For instance, what happens if you set stride to time_p + 1 ?

Many to many predictions

  • In case we want to predict a sequence of several timesteps.
  • Unfortunately, the generator has no option for that, so we have to prepare sepperate targets on our own.
  • Ih wrote a handy fun ction that does so, which you can use.
# Define a function that outputs time_p timesteps for y
gen_timeseries_output <- function(data, timesteps = 1, stride = 1, timesteps_out = 1){
  
  target <- matrix(nrow = length(data), ncol = timesteps_out)
  
  data <- data %>% as.numeric()
  
  for (i in seq(1, length(data), by = (timesteps + stride - 1) )) {
    target[i,] <- data[(i+1):(i+timesteps_out)]
  }
  
  return(target)
}
  • Let’s try it
n_timesteps_out <- 5

outcome_sequence <- data_example %>%
  gen_timeseries_output(timesteps_out = n_timesteps_out)
  • Lets inspect
outcome_sequence %>% head(20)
  • Seems to produce what we want
  • Now we can feed that as target into the generator
train_gen_seq <- 
  timeseries_generator(
    data = data,
    targets = outcome_sequence,
    length = 5,
  sampling_rate = 1,
  stride = 1,
  shuffle = FALSE,
  batch_size = 16
)
  • Lets instect
batch_0_seq = train_gen_seq[0]
batch_0_seq
  • Looks about right, dosnt it?
n_x <- 1 # number of features
time_x <- 4 # 4 days
time_y <- 1 # ... to predict one day ahead
# TRansforming the x sequence to a 3d tensor (necessary for LSTMs)
x_train_arr_n1 <- data_recipe %>% juice()  %>% pull(value) %>% as.matrix(ncol = time_x) %>% array_reshape(dim = c(nrow(.), ncol(.), 1))

x_test_arr_n1 <- x_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))

y_train_arr2 <- y_train %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
y_test_arr2 <- y_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
x <- data_recipe %>% juice()  %>% pull(value) %>% matrix(ncol = time_x)
dim(x)[2]
length(x)[2]
x_train_arr %>% dim()
x_train_arr %>% glimpse()

Multi-episode LSTM

Transform to a 3d tensor for keras

tsteps_x = 5
tsteps_y = 5
train_arr <- x_train %>% pull(value) %>% as.numeric() %>% matrix(ncol = (tsteps_x + tsteps_y))
x_train_arr <- train_arr[,1:tsteps_x] %>% array_reshape(dim = c(length(.), 1, 1))
#x_train %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))
#x_test %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))

#y_train %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
#y_test %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
---
title:  'Sequence-2-Sequence forecasting (R)'
author: "Daniel S. Hain (dsh@business.aau.dk)"
date: "Updated `r format(Sys.time(), '%B %d, %Y')`"
output:
  html_notebook:
    code_folding: show
    df_print: paged
    toc: true
    toc_depth: 2
    toc_float:
      collapsed: false
    theme: flatly
---

```{r setup, include=FALSE}
### Generic preamble
rm(list=ls())
Sys.setenv(LANG = "en") # For english language
options(scipen = 5) # To deactivate annoying scientific number notation
```


```{r}
library(tidyverse)
library(magrittr)
library(keras)
library(tidymodels) # Or only load the 'rsample' and recipes on its own
```

3 Intro: Ways to create sequences...

# Workshop Stock prediction

Task:

1. Get some stock data (tip: Use tidyquant)
    * Limit yourself for now to on e stock
    * Limit yourself to one variable (preferably some price data)
2. Develop a one-step ahead prediction of prices (or their movements)

# Load some data

## Select a stock abnd load the data

* We will use the tidyquant package to download stock data

```{r}
library(tidyquant) # My favorite package to get stock data
library(timetk) 
```

```{r}
tickers = c("NVO") # We can also try AAPL etc
            
data_stocks <- tq_get(tickers,
               from = "2000-01-01",
               to = "2021-11-17",
               get = "stock.prices" # What we want to get.... here prices
               )
```


## Some plots for exploration...

```{r}
data_stocks %>% glimpse()
```

```{r}
data_stocks %>% head()
```

```{r}
data_stocks %>% 
  plot_time_series(date, adjusted)

# # ggplot alternative
# data_stocks %>%
#   ggplot(aes(x = date, y = adjusted,)) +
#   geom_line() +
#   labs(x = 'Date', y = "Adjusted Price") 
```

# Preprocessing

```{r}
# Limit data
data <- data_stocks %>%
  rename(index = date, value = adjusted) %>%
  select(index, value) %>%
  arrange(index) 
```

* It is always easier to model change rather than absolute prices, so we create a variable measuring the percentage change of price instead

```{r}
# Remodel value as percentage change
data %<>%
  distinct(index, .keep_all = TRUE) %>%
  tidyr::fill(value, .direction = "downup") %>%
  mutate(value = (value - lag(value,1)) / lag(value,1) ) %>%
  drop_na()
```

```{r}
data %>%
  ggplot(aes(x = index, y = value)) +
  geom_line() +
  labs(x = 'Date', y = "Price change in pct") 
```


```{r}
data %>%
    plot_acf_diagnostics(date, value)
```


## Train & Test split

* We do a time-series split which keeps the sequencing of the data

```{r}
# We use time_splits here to maintain the sequences
data_split <- data %>% initial_time_split(prop = 0.75)
```

```{r}
data_train <- data_split %>% training()
data_test <- data_split %>% testing()
```

* Lets see from where till when the train/test samples are

```{r}
# See ehat we got
data_train %>% pull(index) %>% min()
data_train %>% pull(index) %>% max()
data_test %>% pull(index) %>% min()
data_test %>% pull(index) %>% max()
```

```{r}
data_train %>% mutate(split = 'training') %>%
  bind_rows(data_test %>% mutate(split = 'testing')) %>%
  ggplot(aes(x = index, y = value, col = split)) +
  geom_line() 
```

## Define a reciepe

* We only apply min-max scaling herewith `step_range`

```{r}
data_recipe <- data_train %>%
  recipe(value ~ .) %>% 
  step_normalize(value) %>%
  step_arrange(index) %>%
  prep()
```

* We save the min and max to rescale later again

```{r}
# Preserve the values for later (to reconstruct original values)
prep_history <- tibble(
  mean = data_recipe$steps[[1]]$means,
  sds = data_recipe$steps[[1]]$sds
)
```

```{r}
prep_history
```

## Get processedv train & test data

* We now create a x and y split. Since we here always predict the next observation, that's easy. We will just set y= lead(x, 1)
* We replace the last missing observation with the lagged value

```{r}
# Number of lags
n_lag = 1

# Train data
x_train <- data_recipe %>% juice() 

y_train <- data_recipe %>%  juice() %>%
  mutate(value = value %>% lead(n_lag)) %>%
  tidyr::fill(value, .direction = "downup") 

# And the same for the test data
x_test <- data_recipe %>% bake(data_test) 

y_test <- data_recipe %>%  bake(data_test) %>%  
  mutate(value = value %>% lead(n_lag)) %>%
  tidyr::fill(value, .direction = "downup") 
```

## Transform to a 3d tensor for keras

```{r}
# TRansforming the x sequence to a 3d tensor (necessary for LSTMs)
x_train_arr <- x_train %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))
x_test_arr <- x_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))

y_train_arr <- y_train %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
y_test_arr <- y_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
```


```{r}
x_train_arr %>% dim()
```

```{r}
x_train_arr %>% glimpse()
```


# Setting up the LSTM

# LSTM

## Define model

```{r}
model <- keras_model_sequential() %>%
  # LSTM layer
  layer_lstm(units = 32, 
             dropout=0.2, 
             recurrent_dropout=0.2,
             input_shape = c(1, 1), # dim(x_train_arr)[-1], # n timesteps, n feutures
             return_sequences = TRUE) %>%
  # LSTM layer
  layer_lstm(units = 32, 
             dropout=0.2, 
             recurrent_dropout=0.2,
             return_sequences = FALSE) %>%
  # A DENSE LAYER IN BETWEEN
  layer_dense(units = 32, activation = 'relu') %>%
  #Final prediction layer
  layer_dense(units = 1, activation = 'linear')
```

```{r}
# Compile model
model %>% 
  compile(loss = "mse", 
          metric = 'mse', 
          optimizer = optimizer_adam())
```


```{r}
model %>% summary()
```

## Fitting the model

* Next, we can fit our LSTM using a for loop (we do this to manually reset states). 
* We set `shuffle = FALSE` to preserve sequences

```{r}
hist_model <- model %>% fit(x          = x_train_arr, 
                            y          = y_train_arr, 
                            epochs     = 10,
                            verbose    = TRUE, 
                            batch_size = 64,
                            validation_split = 0.25, 
                            shuffle    = FALSE)
```

```{r}
hist_model %>% plot()
```


```{r}
model %>% evaluate(x_test_arr, y_test_arr)
```

## Predicting Stock changes

* We first predict the output of our test data

```{r}
model_pred <- model %>% predict(x_test_arr) %>% as.numeric()
```

* However, we need to rescale the output. For min-max scaling, this function will do the trick

```{r}
reverse_norm<- function(x, mean, sds) {
  x_re <- (x * sds) + mean
  return(x_re)
  }
```

* We apply it with our data and the saved min and max values from the recipe

```{r}
eval <- tibble(
  index = data_test %>% pull(index),
  truth = data_test %>% pull(value),
  pred = model_pred %>% reverse_norm(x = ., mean = prep_history$mean, sds = prep_history$sds)
) 
```

```{r, fig.width=7.5, fig.height=5}
eval %>% 
  pivot_longer(-index) %>%
  ggplot(aes(x = index, y = value, col = name)) +
  geom_line()
```
Well... soso


# Brief intro to working with time sequences and time series generators

## Example timeseries:

* Ok, lets take a brief look at how to work with sequention data i different ways, and prepare them as inputs for an LSTM
* We, for the sake of illustration, just create a simple sequence with the numbers from 1-100 (its easier to inspect the sequence, in reality we would obviously feed it with different outputs)

```{r}
# Generate an example sequence
data_example <- 1:100  #%>% array_reshape(dim = c(length(.), 1, 1))
```

## Many-to-One predictions

* In this setup, we will use several periods to predict one subsequent observations.

```{r}
n_timesteps <- 5  # Define that we would like to have 5 timesteps
batch_size <- 6 # Batch size (somewhat arbitrary)
n_features <- 1 # Number of features. Since we only predict the outcome based on its own sequence, it will be 1
```

* We will set up Keras `timeseries_generator`, which will feed the LSTM (or other architecture) with on-the-fly generated sequences 

```{r}
train_gen <- 
  timeseries_generator(
    data = data_example, # The data we will use to create the sequences.
    targets = data_example, # The putcome data, in this case the same, since we just want to predict the subsequent period
    length = n_timesteps, # How many previous steps in the sequence should be used for the prediction
    sampling_rate = 1, # Should we use every observation in the sequence or skip some?
    stride = 2, # How many steps should be skipped
    shuffle = FALSE, # Should the sequence be shuffled? In time-series prediction, we want to preserve the order of sequences, so always FALSE
    batch_size = batch_size # size of the batches generated. USe this batch size also later in the LSTM
    )

```

* Remember, this is a lazy function, meaning it will generate the sequences on-the-fly when they are needed.
* Therefore, it can not directly be inspected.

```{r}
train_gen
```

* However, we can extract single batches and inspect them.
* This is helpful to get a feeling what the different arguments of the generator do, and to thest that they create the sequence you want.
* Here, two arrrays will be returned, where the first one is the generated input sequences, the second one the corresponding output.

```{r}
batch_0 <- train_gen[0]
batch_0
```

```{r}
# create the model
model <- keras_model_sequential()  %>%
  # Add the layer. We will make it as simple as possible here with just one LSTM and an output layer.
  layer_lstm(
    units = 32, 
    batch_input_shape  = c(batch_size, n_timesteps, n_features), # the first layer in a model needs to know the shape of the input data
    #dropout = 0.1,
    #recurrent_dropout = 0.1,
    return_sequences = FALSE, # by default, an LSTM just returns the final state
    stateful = TRUE) %>% 
  # Final output layer
  layer_dense(units = 1)

model %>% compile(loss = 'mse', optimizer = optimizer_adam(), metrics = 'mse')
```




```{r}
n_steps <- round((length(data_example) - n_timesteps) / batch_size, 1) 

hist <- model %>% fit_generator(
  generator = train_gen,
  steps_per_epoch = n_steps,
  epochs = 10
  )
```

## Your turn

* Play a bit around with the arguments in the generator, and se what outputs it produces. This will give you some intuition
* For instance, what happens if you set `stride` to `time_p + 1` ?


## Many to many predictions

* In case we want to predict a sequence of several timesteps.
* Unfortunately, the generator has no option for that, so we have to prepare sepperate targets on our own.
* Ih wrote a handy fun ction that does so, which you can use.


```{r}
# Define a function that outputs time_p timesteps for y
gen_timeseries_output <- function(data, timesteps = 1, stride = 1, timesteps_out = 1){
  
  target <- matrix(nrow = length(data), ncol = timesteps_out)
  
  data <- data %>% as.numeric()
  
  for (i in seq(1, length(data), by = (timesteps + stride - 1) )) {
    target[i,] <- data[(i+1):(i+timesteps_out)]
  }
  
  return(target)
}
```

* Let's try it

```{r}
n_timesteps_out <- 5

outcome_sequence <- data_example %>%
  gen_timeseries_output(timesteps_out = n_timesteps_out)
```

* Lets inspect

```{r}
outcome_sequence %>% head(20)
```

* Seems to produce what we want
* Now we can feed that as target into the generator

```{r}
train_gen_seq <- 
  timeseries_generator(
    data = data,
    targets = outcome_sequence,
    length = 5,
  sampling_rate = 1,
  stride = 1,
  shuffle = FALSE,
  batch_size = 16
)

```

* Lets instect

```{r}
batch_0_seq = train_gen_seq[0]
batch_0_seq
```

* Looks about right, dosnt it?




<!----

# Multiple timesteps stoc prediction

## Many-to-one

```{r}
n_x <- 1 # number of features
time_x <- 4 # 4 days
time_y <- 1 # ... to predict one day ahead
```


```{r}
# TRansforming the x sequence to a 3d tensor (necessary for LSTMs)
x_train_arr_n1 <- data_recipe %>% juice()  %>% pull(value) %>% as.matrix(ncol = time_x) %>% array_reshape(dim = c(nrow(.), ncol(.), 1))

x_test_arr_n1 <- x_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))

y_train_arr2 <- y_train %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
y_test_arr2 <- y_test %>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
```

```{r}
x <- data_recipe %>% juice()  %>% pull(value) %>% matrix(ncol = time_x)
dim(x)[2]
length(x)[2]
```

```{r}
x_train_arr %>% dim()
```

```{r}
x_train_arr %>% glimpse()
```



# Multi-episode LSTM

## Transform to a 3d tensor for keras

```{r}
tsteps_x = 5
tsteps_y = 5
```


```{r}
train_arr <- x_train %>% pull(value) %>% as.numeric() %>% matrix(ncol = (tsteps_x + tsteps_y))
```

```{r}
x_train_arr <- train_arr[,1:tsteps_x] %>% array_reshape(dim = c(length(.), 1, 1))
```


```{r}
#x_train %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))
#x_test %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1, 1))

#y_train %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
#y_test %<>% pull(value) %>% as.numeric() %>% array_reshape(dim = c(length(.), 1))
```
