| Function | Works |
|---|---|
tidypredict_fit(), tidypredict_sql(),
parse_model()
|
|
tidypredict_to_column() |
|
tidypredict_test() |
|
tidypredict_interval(),
tidypredict_sql_interval()
|
|
parsnip |
tidypredict_ functions
library(catboost)
# Prepare data
X <- data.matrix(mtcars[, c("mpg", "cyl", "disp")])
y <- mtcars$hp
pool <- catboost.load_pool(
X,
label = y,
feature_names = as.list(c("mpg", "cyl", "disp"))
)
model <- catboost.train(
pool,
params = list(
iterations = 10L,
depth = 3L,
learning_rate = 0.5,
loss_function = "RMSE",
logging_level = "Silent",
allow_writing_files = FALSE
)
)Create the R formula
r tidypredict_fit(model)Add the prediction to the original table ```r library(dplyr)
mtcars %>% tidypredict_to_column(model) %>% glimpse() ```
- Confirm that
tidypredictresults match to the model’spredict()results. Thexg_dfargument expects the matrix data set.r tidypredict_test(model, xg_df = X)
Supported objectives
CatBoost supports many objective functions. The following objectives
are supported by tidypredict:
Binary classification example
X_bin <- data.matrix(mtcars[, c("mpg", "cyl", "disp")])
y_bin <- mtcars$am
pool_bin <- catboost.load_pool(
X_bin,
label = y_bin,
feature_names = as.list(c("mpg", "cyl", "disp"))
)
model_bin <- catboost.train(
pool_bin,
params = list(
iterations = 10L,
depth = 3L,
learning_rate = 0.5,
loss_function = "Logloss",
logging_level = "Silent",
allow_writing_files = FALSE
)
)
tidypredict_test(model_bin, xg_df = X_bin)Multiclass classification example
X_multi <- data.matrix(iris[, 1:4])
y_multi <- as.integer(iris$Species) - 1L
pool_multi <- catboost.load_pool(
X_multi,
label = y_multi,
feature_names = as.list(colnames(iris)[1:4])
)
model_multi <- catboost.train(
pool_multi,
params = list(
iterations = 10L,
depth = 3L,
learning_rate = 0.5,
loss_function = "MultiClass",
logging_level = "Silent",
allow_writing_files = FALSE
)
)
# Multiclass returns a list of formulas, one per class
formulas <- tidypredict_fit(model_multi)
names(formulas)Test multiclass predictions:
tidypredict_test(model_multi, xg_df = X_multi)Categorical features
CatBoost models can use categorical features with one-hot encoding.
With parsnip/bonsai (recommended)
When using parsnip/bonsai, categorical features are handled automatically:
library(parsnip)
library(bonsai)
df_cat <- data.frame(
num_feat = mtcars$mpg,
cat_feat = factor(ifelse(mtcars$am == 1, "manual", "auto")),
target = mtcars$hp
)
model_spec <- boost_tree(trees = 10, tree_depth = 3) |>
set_engine("catboost", logging_level = "Silent", one_hot_max_size = 10) |>
set_mode("regression")
model_fit <- fit(model_spec, target ~ num_feat + cat_feat, data = df_cat)
# Categorical features are handled automatically
tidypredict_fit(model_fit)With raw CatBoost
For raw CatBoost models, you need to manually establish the hash-to-category mapping:
pool_cat <- catboost.load_pool(
df_cat[, c("num_feat", "cat_feat")],
label = df_cat$target
)
model_cat <- catboost.train(
pool_cat,
params = list(
iterations = 10L,
depth = 3L,
learning_rate = 0.5,
loss_function = "RMSE",
logging_level = "Silent",
allow_writing_files = FALSE,
one_hot_max_size = 10
)
)
# Parse and set category mapping manually
pm_cat <- parse_model(model_cat)
pm_cat <- set_catboost_categories(pm_cat, model_cat, df_cat)
# Now use the parsed model
tidypredict_fit(pm_cat)Parse model spec
Here is an example of the model spec:
pm <- parse_model(model)
str(pm, 2)
str(pm$trees[1])