Skip to content
70 changes: 35 additions & 35 deletions src/classification/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ end

# Applies `row_fun(X_row)::AbstractVector` to each row in X
# and returns a matrix containing the resulting vectors, stacked vertically
function stack_function_results(row_fun::Function, X::AbstractMatrix)
function stack_function_results(row_fun::Function, X::AbstractVecOrMat)
N = size(X, 1)
N_cols = length(row_fun(X[1, :])) # gets the number of columns
out = Array{Float64}(undef, N, N_cols)
Expand Down Expand Up @@ -52,7 +52,7 @@ end

function build_stump(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
weights = nothing;
rng = Random.GLOBAL_RNG) where {S, T}

Expand All @@ -73,7 +73,7 @@ end

function build_tree(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_subfeatures = 0,
max_depth = -1,
min_samples_leaf = 1,
Expand Down Expand Up @@ -138,23 +138,23 @@ function prune_tree(tree::LeafOrNode{S, T}, purity_thresh=1.0) where {S, T}
end


apply_tree(leaf::Leaf{T}, feature::AbstractVector{S}) where {S, T} = leaf.majority
_apply_tree(leaf::Leaf{T}, feature::AbstractVector{S}) where {S, T} = leaf.majority

function apply_tree(tree::Node{S, T}, features::AbstractVector{S}) where {S, T}
function _apply_tree(tree::Node{S, T}, features::AbstractVector{S}) where {S, T}
if tree.featid == 0
return apply_tree(tree.left, features)
return _apply_tree(tree.left, features)
elseif features[tree.featid] < tree.featval
return apply_tree(tree.left, features)
return _apply_tree(tree.left, features)
else
return apply_tree(tree.right, features)
return _apply_tree(tree.right, features)
end
end

function apply_tree(tree::LeafOrNode{S, T}, features::AbstractMatrix{S}) where {S, T}
function apply_tree(tree::LeafOrNode{S, T}, features::AbstractVecOrMat{S}) where {S, T}
N = size(features,1)
predictions = Array{T}(undef, N)
for i in 1:N
predictions[i] = apply_tree(tree, features[i, :])
predictions[i] = _apply_tree(tree, features[i, :])
end
if T <: Float64
return Float64.(predictions)
Expand All @@ -171,25 +171,25 @@ n_labels` matrix of probabilities, each row summing up to 1.
`col_labels` is a vector containing the distinct labels
(eg. ["versicolor", "virginica", "setosa"]). It specifies the column ordering
of the output matrix. """
apply_tree_proba(leaf::Leaf{T}, features::AbstractVector{S}, labels) where {S, T} =
_apply_tree_proba(leaf::Leaf{T}, features::AbstractVector{S}, labels) where {S, T} =
compute_probabilities(labels, leaf.values)

function apply_tree_proba(tree::Node{S, T}, features::AbstractVector{S}, labels) where {S, T}
function _apply_tree_proba(tree::Node{S, T}, features::AbstractVector{S}, labels) where {S, T}
if tree.featval === nothing
return apply_tree_proba(tree.left, features, labels)
return _apply_tree_proba(tree.left, features, labels)
elseif features[tree.featid] < tree.featval
return apply_tree_proba(tree.left, features, labels)
return _apply_tree_proba(tree.left, features, labels)
else
return apply_tree_proba(tree.right, features, labels)
return _apply_tree_proba(tree.right, features, labels)
end
end

apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractMatrix{S}, labels) where {S, T} =
stack_function_results(row->apply_tree_proba(tree, row, labels), features)
apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractVecOrMat{S}, labels) where {S, T} =
stack_function_results(row->_apply_tree_proba(tree, row, labels), features)

function build_forest(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_subfeatures = -1,
n_trees = 10,
partial_sampling = 0.7,
Expand Down Expand Up @@ -254,11 +254,11 @@ function build_forest(
return Ensemble{S, T}(forest)
end

function apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where {S, T}
function _apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where {S, T}
n_trees = length(forest)
votes = Array{T}(undef, n_trees)
for i in 1:n_trees
votes[i] = apply_tree(forest.trees[i], features)
votes[i] = _apply_tree(forest.trees[i], features)
end

if T <: Float64
Expand All @@ -268,11 +268,11 @@ function apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where
end
end

function apply_forest(forest::Ensemble{S, T}, features::AbstractMatrix{S}) where {S, T}
function apply_forest(forest::Ensemble{S, T}, features::AbstractVecOrMat{S}) where {S, T}
N = size(features,1)
predictions = Array{T}(undef, N)
for i in 1:N
predictions[i] = apply_forest(forest, features[i, :])
predictions[i] = _apply_forest(forest, features[i, :])
end
return predictions
end
Expand All @@ -285,18 +285,18 @@ n_labels` matrix of probabilities, each row summing up to 1.
`col_labels` is a vector containing the distinct labels
(eg. ["versicolor", "virginica", "setosa"]). It specifies the column ordering
of the output matrix. """
function apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVector{S}, labels) where {S, T}
votes = [apply_tree(tree, features) for tree in forest.trees]
function _apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVector{S}, labels) where {S, T}
votes = [_apply_tree(tree, features) for tree in forest.trees]
return compute_probabilities(labels, votes)
end

apply_forest_proba(forest::Ensemble{S, T}, features::AbstractMatrix{S}, labels) where {S, T} =
stack_function_results(row->apply_forest_proba(forest, row, labels),
apply_forest_proba(forest::Ensemble{S, T}, features::AbstractVecOrMat{S}, labels) where {S, T} =
stack_function_results(row->_apply_forest_proba(forest, row, labels),
features)

function build_adaboost_stumps(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_iterations :: Integer;
rng = Random.GLOBAL_RNG) where {S, T}
N = length(labels)
Expand All @@ -321,11 +321,11 @@ function build_adaboost_stumps(
return (Ensemble{S, T}(stumps), coeffs)
end

function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractVector{S}) where {S, T}
function _apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractVector{S}) where {S, T}
n_stumps = length(stumps)
counts = Dict()
for i in 1:n_stumps
prediction = apply_tree(stumps.trees[i], features)
prediction = _apply_tree(stumps.trees[i], features)
counts[prediction] = get(counts, prediction, 0.0) + coeffs[i]
end
top_prediction = stumps.trees[1].left.majority
Expand All @@ -339,11 +339,11 @@ function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Fl
return top_prediction
end

function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractMatrix{S}) where {S, T}
function apply_adaboost_stumps(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractVecOrMat{S}) where {S, T}
n_samples = size(features, 1)
predictions = Array{T}(undef, n_samples)
for i in 1:n_samples
predictions[i] = apply_adaboost_stumps(stumps, coeffs, features[i,:])
predictions[i] = _apply_adaboost_stumps(stumps, coeffs, features[i,:])
end
return predictions
end
Expand All @@ -356,13 +356,13 @@ n_labels` matrix of probabilities, each row summing up to 1.
`col_labels` is a vector containing the distinct labels
(eg. ["versicolor", "virginica", "setosa"]). It specifies the column ordering
of the output matrix. """
function apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
function _apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
features::AbstractVector{S}, labels::AbstractVector{T}) where {S, T}
votes = [apply_tree(stump, features) for stump in stumps.trees]
votes = [_apply_tree(stump, features) for stump in stumps.trees]
compute_probabilities(labels, votes, coeffs)
end

function apply_adaboost_stumps_proba(stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64},
features::AbstractMatrix{S}, labels::AbstractVector{T}) where {S, T}
stack_function_results(row->apply_adaboost_stumps_proba(stumps, coeffs, row, labels), features)
features::AbstractVecOrMat{S}, labels::AbstractVector{T}) where {S, T}
stack_function_results(row->_apply_adaboost_stumps_proba(stumps, coeffs, row, labels), features)
end
13 changes: 6 additions & 7 deletions src/classification/tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module treeclassifier
# find an optimal split that satisfy the given constraints
# (max_depth, min_samples_split, min_purity_increase)
function _split!(
X :: AbstractMatrix{S}, # the feature array
X :: AbstractVecOrMat{S}, # the feature array
Y :: AbstractVector{Int}, # the label array
W :: AbstractVector{U}, # the weight vector
purity_function :: Function,
Expand Down Expand Up @@ -226,7 +226,7 @@ module treeclassifier
end

function _fit(
X :: AbstractMatrix{S},
X :: AbstractVecOrMat{S},
Y :: AbstractVector{Int},
W :: AbstractVector{U},
loss :: Function,
Expand All @@ -237,9 +237,8 @@ module treeclassifier
min_samples_split :: Int,
min_purity_increase :: Float64,
rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}

n_samples, n_features = size(X)


n_samples, n_features = util.find_n_samples_and_n_features(X)
nc = Array{U}(undef, n_classes)
ncl = Array{U}(undef, n_classes)
ncr = Array{U}(undef, n_classes)
Expand Down Expand Up @@ -273,7 +272,7 @@ module treeclassifier
end

function fit(;
X :: AbstractMatrix{S},
X :: AbstractVecOrMat{S},
Y :: AbstractVector{T},
W :: Union{Nothing, AbstractVector{U}},
loss=util.entropy :: Function,
Expand All @@ -284,7 +283,7 @@ module treeclassifier
min_purity_increase :: Float64,
rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, T, U}

n_samples, n_features = size(X)
n_samples, n_features = util.find_n_samples_and_n_features(X)
list, Y_ = util.assign(Y)
if W == nothing
W = fill(1, n_samples)
Expand Down
14 changes: 7 additions & 7 deletions src/measures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function confusion_matrix(actual::AbstractVector, predicted::AbstractVector)
return ConfusionMatrix(classes, CM, accuracy, kappa)
end

function _nfoldCV(classifier::Symbol, labels::AbstractVector{T}, features::AbstractMatrix{S}, args...; verbose, rng) where {S, T}
function _nfoldCV(classifier::Symbol, labels::AbstractVector{T}, features::AbstractVecOrMat{S}, args...; verbose, rng) where {S, T}
_rng = mk_rng(rng)::Random.AbstractRNG
nfolds = args[1]
if nfolds < 2
Expand Down Expand Up @@ -151,7 +151,7 @@ end

function nfoldCV_tree(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_folds :: Integer,
pruning_purity :: Float64 = 1.0,
max_depth :: Integer = -1,
Expand All @@ -165,7 +165,7 @@ function nfoldCV_tree(
end
function nfoldCV_forest(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_folds :: Integer,
n_subfeatures :: Integer = -1,
n_trees :: Integer = 10,
Expand All @@ -181,7 +181,7 @@ function nfoldCV_forest(
end
function nfoldCV_stumps(
labels ::AbstractVector{T},
features ::AbstractMatrix{S},
features ::AbstractVecOrMat{S},
n_folds ::Integer,
n_iterations ::Integer = 10;
verbose :: Bool = true,
Expand All @@ -203,7 +203,7 @@ function R2(actual, predicted)
return 1.0 - ss_residual/ss_total
end

function _nfoldCV(regressor::Symbol, labels::AbstractVector{T}, features::AbstractMatrix, args...; verbose, rng) where T <: Float64
function _nfoldCV(regressor::Symbol, labels::AbstractVector{T}, features::AbstractVecOrMat, args...; verbose, rng) where T <: Float64
_rng = mk_rng(rng)::Random.AbstractRNG
nfolds = args[1]
if nfolds < 2
Expand Down Expand Up @@ -279,7 +279,7 @@ end

function nfoldCV_tree(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_folds :: Integer,
pruning_purity :: Float64 = 1.0,
max_depth :: Integer = -1,
Expand All @@ -293,7 +293,7 @@ _nfoldCV(:tree, labels, features, n_folds, pruning_purity, max_depth,
end
function nfoldCV_forest(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_folds :: Integer,
n_subfeatures :: Integer = -1,
n_trees :: Integer = 10,
Expand Down
6 changes: 3 additions & 3 deletions src/regression/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ function _convert(node::treeregressor.NodeMeta{S}, labels::Array{T}) where {S, T
end
end

function build_stump(labels::AbstractVector{T}, features::AbstractMatrix{S}; rng = Random.GLOBAL_RNG) where {S, T <: Float64}
function build_stump(labels::AbstractVector{T}, features::AbstractVecOrMat{S}; rng = Random.GLOBAL_RNG) where {S, T <: Float64}
return build_tree(labels, features, 0, 1)
end

function build_tree(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_subfeatures = 0,
max_depth = -1,
min_samples_leaf = 5,
Expand Down Expand Up @@ -48,7 +48,7 @@ end

function build_forest(
labels :: AbstractVector{T},
features :: AbstractMatrix{S},
features :: AbstractVecOrMat{S},
n_subfeatures = -1,
n_trees = 10,
partial_sampling = 0.7,
Expand Down
11 changes: 5 additions & 6 deletions src/regression/tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ module treeregressor
# find an optimal split that satisfy the given constraints
# (max_depth, min_samples_split, min_purity_increase)
function _split!(
X :: AbstractMatrix{S}, # the feature array
X :: AbstractVecOrMat{S}, # the feature array
Y :: AbstractVector{Float64}, # the label array
W :: AbstractVector{U},
node :: NodeMeta{S}, # the node to split
Expand Down Expand Up @@ -229,7 +229,7 @@ module treeregressor
end

function _fit(
X :: AbstractMatrix{S},
X :: AbstractVecOrMat{S},
Y :: AbstractVector{Float64},
W :: AbstractVector{U},
max_features :: Int,
Expand All @@ -239,8 +239,7 @@ module treeregressor
min_purity_increase :: Float64,
rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}

n_samples, n_features = size(X)

n_samples, n_features = util.find_n_samples_and_n_features(X)
Yf = Array{Float64}(undef, n_samples)
Xf = Array{S}(undef, n_samples)
Wf = Array{U}(undef, n_samples)
Expand Down Expand Up @@ -272,7 +271,7 @@ module treeregressor
end

function fit(;
X :: AbstractMatrix{S},
X :: AbstractVecOrMat{S},
Y :: AbstractVector{Float64},
W :: Union{Nothing, AbstractVector{U}},
max_features :: Int,
Expand All @@ -282,7 +281,7 @@ module treeregressor
min_purity_increase :: Float64,
rng=Random.GLOBAL_RNG :: Random.AbstractRNG) where {S, U}

n_samples, n_features = size(X)
n_samples, n_features = util.find_n_samples_and_n_features(X)
if W == nothing
W = fill(1.0, n_samples)
end
Expand Down
Loading