Convert decision tree objects into tidy data frames with broomstick
.
The goal of broomstick is to extend the
broom
package to work with
decision trees. It is currently borrowing heavily from the prototype
package treezy
.
You can install broomstick from github with:
# install.packages("remotes")
remotes::install_github("njtierney/broomstick")
library(rpart)
library(broomstick)
fit_rpart <- rpart(Kyphosis ~ Age + Number + Start,
data = kyphosis)
tidy(fit_rpart)
#> # A tibble: 3 × 2
#> variable importance
#> <chr> <dbl>
#> 1 Start 8.20
#> 2 Age 3.10
#> 3 Number 1.52
augment(fit_rpart)
#> # A tibble: 81 × 6
#> Kyphosis Age Number Start .fitted[,"absent"] [,"present"] .resid[,"absent"]
#> <fct> <int> <int> <int> <dbl> <dbl> <dbl>
#> 1 absent 71 3 5 0.421 0.579 -0.579
#> 2 absent 158 3 14 0.857 0.143 -0.143
#> 3 present 128 4 5 0.421 0.579 -1.58
#> 4 absent 2 5 1 0.421 0.579 -0.579
#> 5 absent 1 4 15 1 0 0
#> 6 absent 1 2 16 1 0 0
#> 7 absent 61 2 17 1 0 0
#> 8 absent 37 3 16 1 0 0
#> 9 absent 113 2 16 1 0 0
#> 10 present 59 6 12 0.429 0.571 -1.57
#> # ℹ 71 more rows
#> # ℹ 1 more variable: .resid[2] <dbl>
library(gbm)
#> Loaded gbm 2.1.8.1
library(MASS)
fit_gbm <- gbm(calories ~., data = UScereal)
#> Distribution not specified, assuming gaussian ...
tidy(fit_gbm)
#> # A tibble: 10 × 2
#> variable importance
#> <chr> <dbl>
#> 1 1 25.4
#> 2 2 22.2
#> 3 3 17.5
#> 4 4 11.3
#> 5 5 8.36
#> 6 6 8.13
#> 7 7 4.99
#> 8 8 2.13
#> 9 9 0
#> 10 10 0
library(randomForest)
#> randomForest 4.7-1.1
#> Type rfNews() to see new features/changes/bug fixes.
ozone_rf <- randomForest(Ozone ~ .,
data = airquality,
importance = TRUE,
na.action = na.omit)
tidy(ozone_rf)
#> Warning: This function is deprecated as of broom 0.7.0 and will be removed from
#> a future release. Please see tibble::as_tibble().
#> # A tibble: 5 × 4
#> term X.IncMSE IncNodePurity imp_sd
#> <chr> <dbl> <dbl> <dbl>
#> 1 Solar.R 165. 18373. 10.8
#> 2 Wind 326. 31790. 17.2
#> 3 Temp 471. 35042. 17.6
#> 4 Month 109. 10771. 8.90
#> 5 Day 57.7 15353. 9.13
glance(ozone_rf)
#> mean_mse mean_rsq
#> 1 336.4239 0.6934116
augment(ozone_rf)
#> Warning in augment.randomForest.method(x, data, ...): casewise importance
#> measures are not available. Run randomForest(..., localImp = TRUE) for more
#> detailed results.
#> # A tibble: 153 × 8
#> Ozone Solar.R Wind Temp Month Day .oob_times .fitted
#> <int> <int> <dbl> <int> <int> <int> <int> <dbl>
#> 1 41 190 7.4 67 5 1 191 40.7
#> 2 36 118 8 72 5 2 177 24.0
#> 3 12 149 12.6 74 5 3 191 27.9
#> 4 18 313 11.5 62 5 4 200 24.3
#> 5 NA NA 14.3 56 5 5 NA NA
#> 6 28 NA 14.9 66 5 6 NA NA
#> 7 23 299 8.6 65 5 7 186 28.9
#> 8 19 99 13.8 59 5 8 201 19.6
#> 9 8 19 20.1 61 5 9 178 16.1
#> 10 NA 194 8.6 69 5 10 NA NA
#> # ℹ 143 more rows