Model Step 3 - Model Metrics

Published

March 28, 2025

Background

This documents monitors the model performance. It is refreshed on a daily basis. The following metrics are monitored:

  • Root Mean Squared Error (RMSE),
  • R Squared (RSQ), and
  • Mean Absolute Error (MAE).

Setup

Load the required libraries and evaluation data. The table below is the first five rows of the evaluation data.

Code
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vetiver)
library(pins)
library(yardstick)
## 
## Attaching package: 'yardstick'
## 
## The following object is masked from 'package:readr':
## 
##     spec
library(glue)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

board <- pins::board_connect()
## Connecting to Posit Connect 2025.03.0 at <https://pub.current.posit.team>
v <- vetiver_pin_read(board, params$name, version = params$version)
v_meta <- pin_meta(board, params$name)

con <- DBI::dbConnect(
  odbc::odbc(),
  Driver      = "postgresql",
  Server      = Sys.getenv("DB_SERVER"),
  Port        = "5432",
  Database    = "soleng",
  UID         = Sys.getenv("DB_USER"),
  PWD         = Sys.getenv("DB_PASSWORD"),
  BoolsAsChar = "",
  timeout     = 10
)
bike_model_data <- tbl(con, DBI::Id(schema="content", name="bike_model_data"))

train_start_date <- lubridate::as_date(v$metadata$user$train_dates[1])
train_end_date <- lubridate::as_date(v$metadata$user$train_dates[2])
test_start_date <- lubridate::as_date(v$metadata$user$test_dates[1])
test_end_date <- lubridate::as_date(v$metadata$user$test_dates[2])

test_data <- bike_model_data %>%
  filter(
    date >= train_start_date,
    date <= train_end_date
  ) %>%
  collect()
  
test_data %>%
  head() %>%
  kable() %>%
  kable_material()
id hour date month dow n_bikes lat lon
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2023-12-10 12 Sunday 1 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2023-12-11 12 Monday 1 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2023-12-12 12 Tuesday 1 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2023-12-13 12 Wednesday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2023-12-15 12 Friday 0 38.87035 -76.94528
0099b016-32c9-4536-ac4c-dcc1a117bd95 0 2023-12-16 12 Saturday 0 38.87035 -76.94528

Compute metrics

Use vetiver to compute the latest evaluation metrics. The metrics are stored as a pin on Posit Connect. The table below is the first 5 rows of the evaluation metrics

Code
## compute predictions for your evaluation data
## `handler_startup` is designed to get the R process ready to make predictions
suppressPackageStartupMessages(handler_startup(v))

# Specifically load the packages required by the model. Check 
# `v$metadata$required_pkgs` to see the required pacakges. These packages must
# be specicially defined so that Posit Connect knows to install them when
# deploying this document.
library(parsnip)
library(ranger)
library(recipes)
## 
## Attaching package: 'recipes'
## The following object is masked from 'package:stringr':
## 
##     fixed
## The following object is masked from 'package:stats':
## 
##     step
library(workflows)
library(slider)

preds <- augment(v, test_data)

latest_metrics <- preds %>%
  arrange(date) %>%
  vetiver_compute_metrics(
    date_var = date,
    period = "day",
    truth = n_bikes,
    estimate = .pred
  )

pin_name <- "katie.masiello@posit.co/bike-predict-model-metrics"

if (pin_exists(board, pin_name)) {
  print("Pin already exists, updating existing pin...")
  vetiver_pin_metrics(board, latest_metrics, pin_name, overwrite = TRUE)
} else {
  print("Creating metrics pin for the first time...")
  pin_write(board, latest_metrics, pin_name)
}
## [1] "Pin already exists, updating existing pin..."
## ! Use a fully specified name including user name: "katie.masiello@posit.co/bike-predict-model-metrics", not "bike-predict-model-metrics".
## Writing to pin 'katie.masiello@posit.co/bike-predict-model-metrics'
## # A tibble: 417 × 5
##    .index        .n .metric .estimator .estimate
##    <date>     <int> <chr>   <chr>          <dbl>
##  1 2023-12-10  8844 rmse    standard       2.97 
##  2 2023-12-10  8844 rsq     standard       0.860
##  3 2023-12-10  8844 mae     standard       2.34 
##  4 2023-12-11  9581 rmse    standard       3.03 
##  5 2023-12-11  9581 rsq     standard       0.878
##  6 2023-12-11  9581 mae     standard       2.43 
##  7 2023-12-12  8846 rmse    standard       2.85 
##  8 2023-12-12  8846 rsq     standard       0.881
##  9 2023-12-12  8846 mae     standard       2.27 
## 10 2023-12-13  8856 rmse    standard       2.67 
## # ℹ 407 more rows

all_time_metrics <- pin_read(board, pin_name)

all_time_metrics %>%
  head() %>%
  kable() %>%
  kable_material()
.index .n .metric .estimator .estimate
2023-12-10 8844 rmse standard 2.9707359
2023-12-10 8844 rsq standard 0.8604777
2023-12-10 8844 mae standard 2.3425498
2023-12-11 9581 rmse standard 3.0257613
2023-12-11 9581 rsq standard 0.8776588
2023-12-11 9581 mae standard 2.4257192

Visualize metrics

Use vetiver to visualize the all time model metrics.

Code
vetiver_plot_metrics(all_time_metrics) +
  labs(
    title = "Model Metrics",
    size = "Number of\nObservations"
  )