<- DBI::dbConnect(
con ::odbc(),
odbcDriver = "postgresql",
Server = Sys.getenv("DB_SERVER"),
Port = "5432",
Database = "soleng",
UID = Sys.getenv("DB_USER"),
PWD = Sys.getenv("DB_PASSWORD"),
BoolsAsChar = "",
timeout = 10
)
Model Step 1 - Train and Deploy Model
This notebook trains a model to predict the number of bikes at a given bike docking station. The model is trained using the bike_model_data table from Content DB. The trained model is then:
- pinned to Posit Connect
- deployed as a plumber API to Posit Connect using vetiver.
Get data
Connect to the database:
Split the data into a train/test split:
<- tbl(con, DBI::Id(schema="content", name="bike_model_data"))
all_days
# Get a vector that contains all of the dates.
<- all_days %>%
dates distinct(date) %>%
collect() %>%
arrange(desc(date)) %>%
pull(date) %>%
as.Date()
# Split the data into test and train.
<- 2
n_days_test <- 10
n_days_to_train
<- dates[n_days_test + 1]
train_end_date <- train_end_date - n_days_to_train
train_start_date
# Training data split.
<- all_days %>%
train_data filter(
>= train_start_date,
date <= train_end_date
date %>%
) distinct() %>%
collect()
= min(train_data$date)
start = max(train_data$date)
end = scales::comma(nrow(train_data))
num_obs
print(glue::glue(
"The model will be trained on data from {start} to {end} ",
"({num_obs} observations). "))
## The model will be trained on data from 2025-01-24 to 2025-02-03 (8,630 observations).
# Test data split.
<- all_days %>%
test_data filter(date > train_end_date) %>%
distinct() %>%
collect()
= min(test_data$date)
start = max(test_data$date)
end = scales::comma(nrow(test_data))
num_obs
print(glue::glue(
"The model will be tested on data from {start} to {end} ",
"({num_obs} observations). "))
## The model will be tested on data from 2025-02-04 to 2025-02-05 (1,572 observations).
Train the model
Data preprocessing
Define a recipe to clean the data.
# Define a recipe to clean the data.
<-
recipe_spec recipe(n_bikes ~ ., data = train_data) %>%
step_dummy(dow) %>%
step_integer(id, date)
# Preview the cleaned training data.
%>%
recipe_spec prep(train_data) %>%
bake(head(train_data)) %>%
glimpse()
## Rows: 6
## Columns: 13
## $ id <int> 1, 1, 1, 1, 1, 1
## $ hour <dbl> 0, 0, 0, 0, 0, 0
## $ date <int> 1, 2, 3, 4, 5, 6
## $ month <dbl> 1, 1, 1, 1, 1, 1
## $ lat <dbl> 38.87035, 38.87035, 38.87035, 38.87035, 38.87035, 38.870…
## $ lon <dbl> -76.94528, -76.94528, -76.94528, -76.94528, -76.94528, -…
## $ n_bikes <dbl> 1, 1, 1, 1, 2, 0
## $ dow_Monday <dbl> 0, 0, 0, 1, 0, 0
## $ dow_Saturday <dbl> 0, 1, 0, 0, 0, 0
## $ dow_Sunday <dbl> 0, 0, 1, 0, 0, 0
## $ dow_Thursday <dbl> 0, 0, 0, 0, 0, 0
## $ dow_Tuesday <dbl> 0, 0, 0, 0, 1, 0
## $ dow_Wednesday <dbl> 0, 0, 0, 0, 0, 1
Fit model
Fit a random forest model:
<-
model_spec rand_forest() %>%
set_mode("regression") %>%
set_engine("ranger")
<-
model_workflow workflow() %>%
add_recipe(recipe_spec) %>%
add_model(model_spec)
<- fit(model_workflow, data = train_data)
model_fit
model_fit## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: rand_forest()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 2 Recipe Steps
##
## • step_dummy()
## • step_integer()
##
## ── Model ───────────────────────────────────────────────────────────────────────
## Ranger result
##
## Call:
## ranger::ranger(x = maybe_data_frame(x), y = y, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1))
##
## Type: Regression
## Number of trees: 500
## Sample size: 8630
## Number of independent variables: 12
## Mtry: 3
## Target node size: 5
## Variable importance mode: none
## Splitrule: variance
## OOB prediction error (MSE): 16.87436
## R squared (OOB): 0.3083622
Model evaluation
<- predict(model_fit, test_data)
predictions
<- test_data %>%
results mutate(preds = predictions$.pred)
oos_metrics(results$n_bikes, results$preds)
## # A tibble: 1 × 4
## rmse mae ccc r2
## <dbl> <dbl> <dbl> <dbl>
## 1 4.56 3.53 0.302 0.257
Model deployment
vetiver
Create a vetiver
model object.
<- "bike_predict_model_r"
model_name <- glue("katie.masiello@posit.co/{model_name}")
pin_name
# Get the train and test data ranges. This will be passed into the pin metadata
# so that other scripts can access this information.
<- list(
date_metadata train_dates = c(
as.character(min(train_data$date)),
as.character(max(train_data$date))
),test_dates = c(
as.character(min(test_data$date)),
as.character(max(test_data$date))
)
)
print(date_metadata)
## $train_dates
## [1] "2025-01-24" "2025-02-03"
##
## $test_dates
## [1] "2025-02-04" "2025-02-05"
# Create the vetiver model.
<- vetiver_model(
v
model_fit,
model_name,versioned = TRUE,
save_ptype = train_data %>%
head(1) %>%
select(-n_bikes),
metadata = date_metadata
)
v##
## ── bike_predict_model_r ─ <bundled_workflow> model for deployment
## A ranger regression modeling workflow using 7 features
pins
Save the model as a pin to Posit Connect:
# Use Posit Connect as a board.
<- pins::board_connect(
board server = Sys.getenv("CONNECT_SERVER"),
key = Sys.getenv("CONNECT_API_KEY"),
versioned = TRUE
)# Write the model to the board.
%>%
board vetiver_pin_write(vetiver_model = v)
plumber
Then, deploy the model as a plumber API to Posit Connect.
# Add server
::addServer(
rsconnecturl = "https://pub.current.posit.team/__api__",
name = "pub.current"
)
# Add account
::connectApiUser(
rsconnectaccount = "katie.masiello@posit.co",
server = "pub.current",
apiKey = Sys.getenv("CONNECT_API_KEY"),
)
# Deploy to Connect
vetiver_deploy_rsconnect(
board = board,
name = pin_name,
appId = "442",
launch.browser = FALSE,
appTitle = "Bikeshare Prediction: 03b - Model - API",
predict_args = list(debug = FALSE),
account = "katie.masiello@posit.co",
server = "pub.current"
)## Building Plumber API...
## Bundle created with R version 4.4.1 is compatible with environment Kubernetes::654654567442.dkr.ecr.us-east-2.amazonaws.com/ptd-adhoc-pct:content-r4.4.1-py3.10.14-quarto1.4.557 with R version 4.4.1 from /opt/R/4.4.1/bin/R
## Bundle requested R version 4.4.1; using /opt/R/4.4.1/bin/R from Kubernetes::654654567442.dkr.ecr.us-east-2.amazonaws.com/ptd-adhoc-pct:content-r4.4.1-py3.10.14-quarto1.4.557 which has version 4.4.1
## Performing manifest.json to packrat transformation.
## Determining session server location ...
## Connecting to session server http://service-8318f7b4-a5c2-41e6-9b67-ab2c5744c3b7.posit-team:50734 ...
## Unable to connect to session server address http://service-8318f7b4-a5c2-41e6-9b67-ab2c5744c3b7.posit-team:50734: Get "http://service-8318f7b4-a5c2-41e6-9b67-ab2c5744c3b7.posit-team:50734": dial tcp: lookup service-8318f7b4-a5c2-41e6-9b67-ab2c5744c3b7.posit-team on 172.20.0.10:53: no such host
## [rsc-session] Content GUID: 6570e768-2118-4e5c-aee5-97b7027ab1b0
## [rsc-session] Content ID: 442
## 2025/02/06 16:55:58.633357477 [rsc-session] Bundle ID: 2267
## 2025/02/06 16:55:58.633360138 [rsc-session] Job Key: kukTZmPCNyptG0G7
## Running on host: packrat-restore-kgtnh-dprqp
## 2025/02/06 16:55:59.187973925 Process ID: 39
## Linux distribution: Ubuntu 22.04.5 LTS (jammy)
## Running as user: uid=999 gid=999 groups=999
## 2025/02/06 16:55:59.195791679 Connect version: 2024.12.0
## 2025/02/06 16:55:59.195815177 LANG: en_US.UTF-8
## 2025/02/06 16:55:59.195825713 Working directory: /opt/rstudio-connect/mnt/app
## Using R 4.4.1
## 2025/02/06 16:55:59.196148562 R.home(): /opt/R/4.4.1/lib/R
## Using user agent string: 'RStudio R (4.4.1 x86_64-pc-linux-gnu x86_64 linux-gnu)'
## Configuring packrat to use available credentials for private repository access.
## 2025/02/06 16:55:59.196998081 # Validating R library read / write permissions --------------------------------
## Using R library for packrat bootstrap: /opt/rstudio-connect/mnt/R/654654567442.dkr.ecr.us-east-2.amazonaws.com_ptd-adhoc-pct__content-r4.4.1-py3.10.14-quarto1.4.557/4.4.1
## 2025/02/06 16:55:59.210029343 # Validating managed packrat installation --------------------------------------
## Vendored packrat archive: /opt/rstudio-connect/ext/R/packrat_0.9.2.9000_70625806c44bda42a7f3aeaa92ee65542cc590be.tar.gz
## Vendored packrat SHA: 70625806c44bda42a7f3aeaa92ee65542cc590be
## Managed packrat SHA: 70625806c44bda42a7f3aeaa92ee65542cc590be
## Managed packrat version: 0.9.2.9000
## Managed packrat is up-to-date.
## 2025/02/06 16:55:59.233117562 # Validating packrat cache read / write permissions ----------------------------
## Using packrat cache directory: /opt/rstudio-connect/mnt/packrat/654654567442.dkr.ecr.us-east-2.amazonaws.com_ptd-adhoc-pct__content-r4.4.1-py3.10.14-quarto1.4.557/4.4.1
## 2025/02/06 16:55:59.455576727 # Setting packrat options and preparing lockfile -------------------------------
## Audited package hashes with local packrat installation.
## # Resolving R package repositories ---------------------------------------------
## Received repositories from Connect's configuration:
## - RSPM = "https://pkg.current.posit.team/cran/latest"
## 2025/02/06 16:55:59.967915143 - CRAN = "https://pkg.current.posit.team/cran/latest"
## Rewrote Posit Package Manager URLs to install binary packages:
## 2025/02/06 16:56:00.370381548 - Rewrote "RSPM" from "https://pkg.current.posit.team/cran/latest" to "https://pkg.current.posit.team/cran/__linux__/jammy/latest".
## - Rewrote "CRAN" from "https://pkg.current.posit.team/cran/latest" to "https://pkg.current.posit.team/cran/__linux__/jammy/latest".
## 2025/02/06 16:56:00.370566683 Received repositories from published content:
## 2025/02/06 16:56:00.370681169 - CRAN = "https://cloud.r-project.org"
## Combining repositories from configuration and content.
## 2025/02/06 16:56:00.372063049 Packages will be installed using the following repositories:
## 2025/02/06 16:56:00.372396095 - RSPM = "https://pkg.current.posit.team/cran/__linux__/jammy/latest"
## 2025/02/06 16:56:00.372438505 - CRAN = "https://pkg.current.posit.team/cran/__linux__/jammy/latest"
## 2025/02/06 16:56:00.372440598 - CRAN.1 = "https://cloud.r-project.org"
## # Installing required R packages with `packrat::restore()` ---------------------
## Installing KernSmooth (2.23-22) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.590252442 Installing MASS (7.3-61) ...
## OK (symlinked cache)
## Installing R6 (2.5.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.605396217 Installing RColorBrewer (1.1-3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.612525770 Installing Rcpp (1.0.13) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.619617398 Installing SQUAREM (2021.1) ...
## OK (symlinked cache)
## Installing bit (4.5.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.634252726 Installing cli (3.6.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.641245120 Installing clipr (0.8.0) ...
## OK (symlinked cache)
## Installing codetools (0.2-20) ...
## OK (symlinked cache)
## Installing colorspace (2.1-1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.662730880 Installing cpp11 (0.5.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.669575168 Installing crayon (1.5.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.676432289 Installing curl (5.2.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.683431310 Installing data.table (1.16.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.690474871 Installing digest (0.6.36) ...
## OK (symlinked cache)
## Installing fansi (1.0.6) ...
## OK (symlinked cache)
## Installing farver (2.1.2) ...
## OK (symlinked cache)
## Installing fastmap (1.2.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.719232594 Installing fs (1.6.4) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.726619616 Installing generics (0.1.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.733783803 Installing glue (1.7.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.740746463 Installing gower (1.0.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.748108165 Installing isoband (0.2.7) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.755639812 Installing jsonlite (1.8.9) ...
## OK (symlinked cache)
## Installing labeling (0.4.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.769999903 Installing lattice (0.22-6) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.777271161 Installing listenv (0.9.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.784367187 Installing magrittr (2.0.3) ...
## OK (symlinked cache)
## Installing mime (0.12) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.798880111 Installing nnet (7.3-19) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.805992010 Installing numDeriv (2016.8-1.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.812962989 Installing parallelly (1.38.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.819999067 Installing pkgconfig (2.0.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.829893423 Installing prettyunits (1.2.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.836946649 Installing rapidoc (9.3.4) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.844255743 Installing rappdirs (0.3.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.851516722 Installing rlang (1.1.4) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.858798957 Installing rpart (4.1.23) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.865809883 Installing shape (1.4.6.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.873214172 Installing sodium (1.3.2) ...
## OK (symlinked cache)
## Installing stringi (1.8.4) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.887845692 Installing swagger (5.17.14.1) ...
## OK (symlinked cache)
## Installing sys (3.4.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.902564928 Installing timeDate (4041.110) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.910335553 Installing utf8 (1.2.4) ...
## OK (symlinked cache)
## Installing viridisLite (0.4.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.925183616 Installing whisker (0.4.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.932286968 Installing withr (3.0.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.939615301 Installing yaml (2.3.10) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.946737993 Installing class (7.3-22) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.953792444 Installing RcppEigen (0.3.4.0.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.960977008 Installing bit64 (4.5.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.967859122 Installing globals (0.16.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.974819257 Installing munsell (0.5.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.981795800 Installing timechange (0.3.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.988695707 Installing tzdb (0.4.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:00.995546376 Installing progressr (0.14.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.002527733 Installing webutils (1.2.2) ...
## OK (symlinked cache)
## Installing Matrix (1.7-0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.016682364 Installing nlme (3.1-166) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.023893238 Installing ellipsis (0.3.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.031332671 Installing later (1.3.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.038439903 Installing lifecycle (1.0.4) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.045725327 Installing lobstr (1.1.2) ...
## OK (symlinked cache)
## Installing diagram (1.6.5) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.060490266 Installing askpass (1.2.0) ...
## OK (symlinked cache)
## Installing future (1.34.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.074543464 Installing lubridate (1.9.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.081819729 Installing ranger (0.16.0) ...
## OK (symlinked cache)
## Installing survival (3.7-0) ...
## OK (symlinked cache)
## Installing mgcv (1.9-1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.103800135 Installing promises (1.3.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.110956300 Installing gtable (0.3.5) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.118086048 Installing scales (1.3.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.125360146 Installing vctrs (0.6.5) ...
## OK (symlinked cache)
## Installing openssl (2.2.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.139831467 Installing future.apply (1.11.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.146994715 Installing httpuv (1.6.15) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.154245364 Installing clock (0.7.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.161988755 Installing hms (1.1.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.169657754 Installing pillar (1.9.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.177528897 Installing purrr (1.0.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.185130440 Installing stringr (1.5.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.192789592 Installing tidyselect (1.2.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.200234608 Installing httr (1.4.7) ...
## OK (symlinked cache)
## Installing lava (1.8.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.214688824 Installing plumber (1.2.2) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.222196274 Installing progress (1.2.3) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.229427266 Installing tibble (3.2.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.236550097 Installing bundle (0.1.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.243767750 Installing prodlim (2024.06.25) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.251435896 Installing butcher (0.3.4) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.258926842 Installing cereal (0.1.0) ...
## OK (symlinked cache)
## Installing dplyr (1.1.4) ...
## OK (symlinked cache)
## Installing ggplot2 (3.5.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.280117458 Installing hardhat (1.4.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.287110813 Installing modelenv (0.1.1) ...
## OK (symlinked cache)
## Installing pins (1.3.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.301700069 Installing vroom (1.6.5) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.308941376 Installing ipred (0.9-15) ...
## OK (symlinked cache)
## Installing tidyr (1.3.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.323081155 Installing readr (2.1.5) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.330358263 Installing parsnip (1.2.1) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.337254923 Installing recipes (1.1.0) ...
## OK (symlinked cache)
## 2025/02/06 16:56:01.344087456 Installing vetiver (0.2.5) ...
## OK (symlinked cache)
## Installing workflows (1.1.4) ...
## OK (symlinked cache)
## Completed packrat build using Kubernetes::654654567442.dkr.ecr.us-east-2.amazonaws.com/ptd-adhoc-pct:content-r4.4.1-py3.10.14-quarto1.4.557 against R version: '4.4.1'
## Launching Plumber API...
::dbDisconnect(con) DBI