Model Step 3 - Model Metrics

Published

January 24, 2025

Background

This documents monitors the model performance. It is refreshed on a daily basis. The following metrics are monitored:

  • Root Mean Squared Error (RMSE),
  • R Squared (RSQ), and
  • Mean Absolute Error (MAE).

Setup

Load the required libraries and evaluation data. The table below is the first five rows of the evaluation data.

Code
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vetiver)
library(pins)
library(yardstick)
## 
## Attaching package: 'yardstick'
## 
## The following object is masked from 'package:readr':
## 
##     spec
library(glue)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

board <- pins::board_connect()
## Connecting to Posit Connect 2024.12.0 at <https://pub.current.posit.team>
v <- vetiver_pin_read(board, params$name, version = params$version)
v_meta <- pin_meta(board, params$name)

con <- DBI::dbConnect(
  odbc::odbc(),
  Driver      = "postgresql",
  Server      = Sys.getenv("DB_SERVER"),
  Port        = "5432",
  Database    = "soleng",
  UID         = Sys.getenv("DB_USER"),
  PWD         = Sys.getenv("DB_PASSWORD"),
  BoolsAsChar = "",
  timeout     = 10
)
bike_model_data <- tbl(con, DBI::Id(schema="content", name="bike_model_data"))

train_start_date <- lubridate::as_date(v$metadata$user$train_dates[1])
train_end_date <- lubridate::as_date(v$metadata$user$train_dates[2])
test_start_date <- lubridate::as_date(v$metadata$user$test_dates[1])
test_end_date <- lubridate::as_date(v$metadata$user$test_dates[2])

test_data <- bike_model_data %>%
  filter(
    date >= train_start_date,
    date <= train_end_date
  ) %>%
  collect()
  
test_data %>%
  head() %>%
  kable() %>%
  kable_material()
id hour date month dow n_bikes lat lon
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2025-01-11 1 Saturday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2025-01-12 1 Sunday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2025-01-13 1 Monday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2025-01-14 1 Tuesday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2025-01-15 1 Wednesday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2025-01-16 1 Thursday 0 38.87035 -76.94528

Compute metrics

Use vetiver to compute the latest evaluation metrics. The metrics are stored as a pin on Posit Connect. The table below is the first 5 rows of the evaluation metrics

Code
## compute predictions for your evaluation data
## `handler_startup` is designed to get the R process ready to make predictions
suppressPackageStartupMessages(handler_startup(v))

# Specifically load the packages required by the model. Check 
# `v$metadata$required_pkgs` to see the required pacakges. These packages must
# be specicially defined so that Posit Connect knows to install them when
# deploying this document.
library(parsnip)
library(ranger)
library(recipes)
## 
## Attaching package: 'recipes'
## The following object is masked from 'package:stringr':
## 
##     fixed
## The following object is masked from 'package:stats':
## 
##     step
library(workflows)
library(slider)

preds <- augment(v, test_data)

latest_metrics <- preds %>%
  arrange(date) %>%
  vetiver_compute_metrics(
    date_var = date,
    period = "day",
    truth = n_bikes,
    estimate = .pred
  )

pin_name <- "katie.masiello@posit.co/bike-predict-model-metrics"

if (pin_exists(board, pin_name)) {
  print("Pin already exists, updating existing pin...")
  vetiver_pin_metrics(board, latest_metrics, pin_name, overwrite = TRUE)
} else {
  print("Creating metrics pin for the first time...")
  pin_write(board, latest_metrics, pin_name)
}
## [1] "Pin already exists, updating existing pin..."
## ! Use a fully specified name including user name: "katie.masiello@posit.co/bike-predict-model-metrics", not "bike-predict-model-metrics".
## Writing to pin 'katie.masiello@posit.co/bike-predict-model-metrics'
## # A tibble: 219 × 5
##    .index        .n .metric .estimator .estimate
##    <date>     <int> <chr>   <chr>          <dbl>
##  1 2024-09-30  1548 rmse    standard       3.61 
##  2 2024-09-30  1548 rsq     standard       0.690
##  3 2024-09-30  1548 mae     standard       2.86 
##  4 2024-10-01  6192 rmse    standard       3.26 
##  5 2024-10-01  6192 rsq     standard       0.791
##  6 2024-10-01  6192 mae     standard       2.58 
##  7 2024-10-03   776 rmse    standard       3.19 
##  8 2024-10-03   776 rsq     standard       0.746
##  9 2024-10-03   776 mae     standard       2.46 
## 10 2024-10-04  1552 rmse    standard       2.73 
## # ℹ 209 more rows

all_time_metrics <- pin_read(board, pin_name)

all_time_metrics %>%
  head() %>%
  kable() %>%
  kable_material()
.index .n .metric .estimator .estimate
2024-09-30 1548 rmse standard 3.6100768
2024-09-30 1548 rsq standard 0.6896454
2024-09-30 1548 mae standard 2.8603761
2024-10-01 6192 rmse standard 3.2587760
2024-10-01 6192 rsq standard 0.7912248
2024-10-01 6192 mae standard 2.5755073

Visualize metrics

Use vetiver to visualize the all time model metrics.

Code
vetiver_plot_metrics(all_time_metrics) +
  labs(
    title = "Model Metrics",
    size = "Number of\nObservations"
  )