Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name = "SoleModels"
uuid = "4249d9c7-3290-4ddd-961c-e1d3ec2467f8"
license = "MIT"
authors = ["Michele GHIOTTI", "Giovanni PAGLIARINI", "Edoardo PONSANESI", "Eduard I. STAN"]
version = "0.10.5"
version = "0.10.6"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
12 changes: 6 additions & 6 deletions ext/DecisionTreeExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ function SoleModels.solemodel(
classlabels = nothing,
featurenames = nothing,
keep_condensed = false,
dt_bestguess = false,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay good to have a kwarg for this. Can we name it differently? Like I guess we could remove the "dt_" part, because we may want to have the same argument in other SoleModels.solemodel methods in other package extension unrelated with "decision trees".

Maybe "alphanumeric_tiebreaker", or "argmax_tiebreaker", or "tiebreaker::Symbol" that can either be :argmax or :alphanumeric

kwargs...
) where {T,orig_O}
# TODO rewrite error according to orig_O
Expand Down Expand Up @@ -98,12 +99,11 @@ function SoleModels.solemodel(
# # O = nothing
# end

if isnothing(weights)
m = DecisionEnsemble{O}(trees, info; parity_func=x->first(sort(collect(keys(x)))))
else
m = DecisionEnsemble{O}(trees, weights, info; parity_func=x->first(sort(collect(keys(x)))))
end
return m
parity_func = dt_bestguess ? x->first(sort(collect(keys(x)))) : x->argmax(x)

return isnothing(weights) ?
DecisionEnsemble{O}(trees, info; parity_func) :
DecisionEnsemble{O}(trees, weights, info; parity_func)
end

function SoleModels.solemodel(
Expand Down
20 changes: 13 additions & 7 deletions test/DecisionTreeExt/adaboost.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# using SoleModels
# using MLJ
# using DataFrames, Random
# using DecisionTree
# const DT = DecisionTree

Comment on lines +1 to +6
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These seem useful actually?

X, y = @load_iris
X = DataFrame(X)

Expand Down Expand Up @@ -27,7 +33,7 @@ model = Stump(;
# Bind the model and data into a machine
mach = machine(model, X_train, y_train)
# Fit the model
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)

weights = mach.fitresult[2]
classlabels = sort(mach.fitresult[3])
Expand Down Expand Up @@ -72,7 +78,7 @@ ada_accuracy = sum(preds .== y_test)/length(y_test)
Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
dt_model = Tree(max_depth=-1, min_samples_leaf=1, min_samples_split=2)
dt_mach = machine(dt_model, X_train, y_train)
fit!(dt_mach, verbosity=0)
MLJ.fit!(dt_mach, verbosity=0)
dt_solem = solemodel(fitted_params(dt_mach).tree)
dt_preds = apply(dt_solem, X_test)
dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
Expand All @@ -81,7 +87,7 @@ dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree
rm_model = Forest(; max_depth=3, min_samples_leaf=1, min_samples_split=2, n_trees=10, rng)
rm_mach = machine(rm_model, X_train, y_train)
fit!(rm_mach, verbosity=0)
MLJ.fit!(rm_mach, verbosity=0)
classlabels = (rm_mach).fitresult[2]
classlabels = classlabels[sortperm((rm_mach).fitresult[3])]
featurenames = report(rm_mach).features
Expand Down Expand Up @@ -111,19 +117,19 @@ println("RandomForest accuracy: ", rm_accuracy)
# solemodel
model = Stump(; n_iter, rng=Xoshiro(seed))
mach = machine(model, X_train, y_train)
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)
weights = mach.fitresult[2]
classlabels = sort(mach.fitresult[3])
featurenames = MLJ.report(mach).features
solem = solemodel(MLJ.fitted_params(mach).stumps; weights, classlabels, featurenames)
solem = solemodel(MLJ.fitted_params(mach).stumps; weights, classlabels, featurenames, dt_bestguess=true)
preds = apply(solem, X_test)

# decisiontree
yl_train = CategoricalArrays.levelcode.(y_train)
yl_train = MLJ.levelcode.(y_train)
dt_model, dt_coeffs = DT.build_adaboost_stumps(yl_train, Matrix(X_train), n_iter; rng=Xoshiro(seed))
dt_preds = DT.apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))

code_preds = CategoricalArrays.levelcode.(preds)
code_preds = MLJ.levelcode.(preds)
@test code_preds == dt_preds
end
end
Expand Down
16 changes: 8 additions & 8 deletions test/DecisionTreeExt/forest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ println("Test set type: ", typeof(X_test), " - ", typeof(y_test))
Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree

model = Forest(
max_depth=3,
min_samples_leaf=1,
min_samples_split=2,
n_trees = 10,
max_depth=3,
min_samples_leaf=1,
min_samples_split=2,
n_trees = 10,
)

# Bind the model and data into a machine
mach = machine(model, X_train, y_train)
# Fit the model
fit!(mach)
MLJ.fit!(mach)


classlabels = (mach).fitresult[2]
Expand Down Expand Up @@ -73,10 +73,10 @@ printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics =
# solemodel
model = Forest(; n_trees, rng=Xoshiro(seed))
mach = machine(model, X_train, y_train)
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)
classlabels = (mach).fitresult[2][sortperm((mach).fitresult[3])]
featurenames = MLJ.report(mach).features
solem = solemodel(MLJ.fitted_params(mach).forest; classlabels, featurenames)
solem = solemodel(MLJ.fitted_params(mach).forest; classlabels, featurenames, dt_bestguess=true)
preds = apply!(solem, X_test, y_test)

# decisiontree
Expand All @@ -97,7 +97,7 @@ X_test, y_test = X[test, :], y[test]

model = Forest(; n_trees, rng=Xoshiro(seed))
mach = machine(model, X_train, y_train)
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)
classlabels = (mach).fitresult[2][sortperm((mach).fitresult[3])]
featurenames = MLJ.report(mach).features
solem = solemodel(MLJ.fitted_params(mach).forest; classlabels, featurenames)
Expand Down
14 changes: 10 additions & 4 deletions test/DecisionTreeExt/tree.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# using SoleModels
# using MLJ
# using DataFrames, Random
# using DecisionTree
# const DT = DecisionTree

Comment on lines +1 to +6
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same

X, y = @load_iris
X = DataFrame(X)

Expand All @@ -24,7 +30,7 @@ model = Tree(
# Bind the model and data into a machine
mach = machine(model, X_train, y_train)
# Fit the model
fit!(mach)
MLJ.fit!(mach)


solem = solemodel(fitted_params(mach).tree)
Expand Down Expand Up @@ -80,16 +86,16 @@ printmodel.(sort(interesting_rules, by = readmetrics); show_metrics = (; round_d
# solemodel
model = Tree(; max_depth, rng=Xoshiro(seed))
mach = machine(model, X_train, y_train)
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)
solem = solemodel(MLJ.fitted_params(mach).tree)
preds = apply!(solem, X_test, y_test)

# decisiontree
y_coded_train = @. CategoricalArrays.levelcode(y_train)
y_coded_train = @. MLJ.levelcode(y_train)
dt_model = DT.build_tree(y_coded_train, Matrix(X_train), 0, max_depth; rng=Xoshiro(seed))
dt_preds = DT.apply_tree(dt_model, Matrix(X_test))

preds_coded = CategoricalArrays.levelcode.(CategoricalArray(preds))
preds_coded = MLJ.levelcode.(MLJ.CategoricalArray(preds))
@test preds_coded == dt_preds
end
end
Expand Down
29 changes: 18 additions & 11 deletions test/XGBoostExt/xgboost_classifier.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
using SoleModels
using MLJ
using DataFrames, Random
import MLJModelInterface as MMI
import XGBoost as XGB
using JLD2

X, y = @load_iris
X = DataFrame(X)

Expand Down Expand Up @@ -27,7 +34,7 @@ model = XGTrees(;
# Bind the model and data into a machine
mach = machine(model, X_train, y_train)
# Fit the model
fit!(mach; verbosity=0)
MLJ.fit!(mach; verbosity=0)

get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in MMI.classes(classes_seen))
get_classlabels(encoding) = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
Expand All @@ -44,16 +51,16 @@ solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, ke
# Make test instances flow into the model
X_test_f32 = mapcols(col -> Float32.(col), X_test)
preds = apply(solem, X_test_f32)
predsl = CategoricalArrays.levelcode.(CategoricalArrays.categorical(preds)) .- 1
predsl = MLJ.levelcode.(MLJ.categorical(preds)) .- 1

apply!(solem, X_test, y_test)
@test solem.info.supporting_predictions == preds
preds! = apply!(solem, X_test, y_test)
@test preds! == preds
@test solem.info.supporting_labels == y_test

# ---------------------------------------------------------------------------- #
# julia XGBoost #
# ---------------------------------------------------------------------------- #
yl_train = CategoricalArrays.levelcode.(CategoricalArrays.categorical(y_train)) .- 1
yl_train = MLJ.levelcode.(MLJ.categorical(y_train)) .- 1
# create and train a gradient boosted tree model of 5 trees
bst = XGB.xgboost(
(X_train, yl_train),
Expand All @@ -77,7 +84,7 @@ xg_accuracy = sum(preds .== y_test)/length(y_test)
Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
dt_model = Tree(max_depth=-1, min_samples_leaf=1, min_samples_split=2)
dt_mach = machine(dt_model, X_train, y_train)
fit!(dt_mach, verbosity=0)
MLJ.fit!(dt_mach, verbosity=0)
dt_solem = solemodel(fitted_params(dt_mach).tree)
dt_preds = apply(dt_solem, X_test)
dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
Expand All @@ -86,7 +93,7 @@ dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree
rm_model = Forest(;max_depth=3, min_samples_leaf=1, min_samples_split=2, n_trees=10, rng)
rm_mach = machine(rm_model, X_train, y_train)
fit!(rm_mach, verbosity=0)
MLJ.fit!(rm_mach, verbosity=0)
classlabels = (rm_mach).fitresult[2]
classlabels = classlabels[sortperm((rm_mach).fitresult[3])]
featurenames = report(rm_mach).features
Expand Down Expand Up @@ -121,7 +128,7 @@ println("RandomForest accuracy: ", rm_accuracy)
for eta in 0.1:0.1:0.3
model = XGTrees(; num_round, eta, objective="multi:softmax")
mach = machine(model, X_train, y_train)
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)
trees = XGB.trees(mach.fitresult[1])
encoding = get_encoding(mach.fitresult[2])
classlabels = get_classlabels(encoding)
Expand All @@ -130,9 +137,9 @@ println("RandomForest accuracy: ", rm_accuracy)
X_test_f32 = mapcols(col -> Float32.(col), X_test)
apply!(solem, X_test_f32, y_test)
preds = solem.info.supporting_predictions
predsl = CategoricalArrays.levelcode.(CategoricalArrays.categorical(preds)) .- 1
predsl = MLJ.levelcode.(MLJ.categorical(preds)) .- 1

yl_train = CategoricalArrays.levelcode.(CategoricalArrays.categorical(y_train)) .- 1
yl_train = MLJ.levelcode.(MLJ.categorical(y_train)) .- 1
bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
xg_preds = XGB.predict(bst, X_test)

Expand Down Expand Up @@ -165,7 +172,7 @@ y = MLJ.CategoricalArray{String,1,UInt32}(data["y"])
X_test, y_test = X[test, :], y[test]
model = XGTrees(; num_round, eta, seed)
mach = machine(model, X, y)
fit!(mach, rows=train, verbosity=0)
MLJ.fit!(mach, rows=train, verbosity=0)
trees = XGB.trees(mach.fitresult[1])
encoding = get_encoding(mach.fitresult[2])
classlabels = get_classlabels(encoding)
Expand Down
10 changes: 8 additions & 2 deletions test/XGBoostExt/xgboost_regressor.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
using SoleModels
using MLJ
using DataFrames, Random
import MLJModelInterface as MMI
import XGBoost as XGB

X, y = @load_boston
X = DataFrame(X)

Expand Down Expand Up @@ -27,7 +33,7 @@ model = XGTrees(;
# Bind the model and data into a machine
mach = machine(model, X_train, y_train)
# Fit the model
fit!(mach; verbosity=0)
MLJ.fit!(mach; verbosity=0)

trees = XGB.trees(mach.fitresult[1])
featurenames = mach.report.vals[1].features
Expand Down Expand Up @@ -69,7 +75,7 @@ preds = apply!(solem, X_test_f32, y_test; base_score)
model = XGTrees(; num_round, max_depth, objective="reg:squarederror")
mach = machine(model, X_train, y_train)
mach.model.base_score = base_score
fit!(mach, verbosity=0)
MLJ.fit!(mach, verbosity=0)
trees = XGB.trees(mach.fitresult[1])
featurenames = mach.report.vals[1].features
solem = solemodel(trees, Matrix(X_train), y_train; featurenames)
Expand Down
6 changes: 3 additions & 3 deletions test/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,9 @@ branch_r = @test_nowarn Branch(formula_r, (branch_r, "yes"))
rule_r = @test_nowarn Rule(formula_r, branch_r)
branch_r_mixed = @test_nowarn Branch(formula_r, (rule_r, "no"))

dtmodel0 = @test_nowarn DecisionTree("1")
dtmodel = @test_nowarn DecisionTree(branch_r)
@test_nowarn DecisionTree(branch_r_mixed)
dtmodel0 = @test_nowarn SoleModels.DecisionTree("1")
dtmodel = @test_nowarn SoleModels.DecisionTree(branch_r)
@test_nowarn SoleModels.DecisionTree(branch_r_mixed)
Comment on lines -173 to +175
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mh why is this needed? Is there a clash on "DecisionTree"? Maybe with MLJ?

# msmodel = MixedModel(dtmodel)

complex_mixed_model = @test_nowarn Branch(formula_r, (dtmodel, dlmodel_integer))
Expand Down
2 changes: 1 addition & 1 deletion test/juliacon2024.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ end;
mach = begin
Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
model = Tree(max_depth=-1, rng = Random.MersenneTwister(42))
machine(model, X_train, y_train) |> fit!
machine(model, X_train, y_train) |> MLJ.fit!
end

# Inspect the tree
Expand Down
10 changes: 5 additions & 5 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -220,18 +220,18 @@ rule_r = @test_nowarn Rule(formula_r, branch_r)
branch_r_mixed = @test_nowarn Branch(formula_r, (rule_r, "no"))

############################### DecisionTree ###############################################
dt1 = @test_nowarn DecisionTree(b_p)
dt2 = @test_nowarn DecisionTree(b_fdx)
dt1 = @test_nowarn SoleModels.DecisionTree(b_p)
dt2 = @test_nowarn SoleModels.DecisionTree(b_fdx)

dtmodel0 = @test_nowarn DecisionTree("1")
dtmodel = @test_nowarn DecisionTree(branch_r)
dtmodel0 = @test_nowarn SoleModels.DecisionTree("1")
dtmodel = @test_nowarn SoleModels.DecisionTree(branch_r)

############################## DecisionForest ##############################################
df = @test_nowarn DecisionForest([dt1,dt2])

############################### MixedModel #########################################
b_msm = @test_nowarn Branch(st_q,outcome_int,outcome_float)
dt_msm = @test_nowarn DecisionTree(b_msm)
dt_msm = @test_nowarn SoleModels.DecisionTree(b_msm)
msm = @test_nowarn MixedModel(dt_msm)

msmodel = @test_nowarn MixedModel(dtmodel)
Expand Down
9 changes: 6 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@ addprocs(2)
using InteractiveUtils
using MLJ
using MLJDecisionTreeInterface
import DecisionTree as DT
import MLJModelInterface as MMI
import XGBoost as XGB
using DecisionTree
using MLJModelInterface
using XGBoost
const DT = DecisionTree
const MMI = MLJModelInterface
const XGB = XGBoost
using DataFrames
using Test
using Random
Expand Down