From 58f26a16b684be73501cd3c9852d440f89dd7e45 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 4 Aug 2025 08:53:24 +0200 Subject: [PATCH 01/14] start LM update --- src/LMModel.jl | 55 ++++++++++ src/LM_alg.jl | 195 ++++++++++++++++++++++++++++++++- src/RegularizedOptimization.jl | 1 + 3 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 src/LMModel.jl diff --git a/src/LMModel.jl b/src/LMModel.jl new file mode 100644 index 00000000..02d00fe6 --- /dev/null +++ b/src/LMModel.jl @@ -0,0 +1,55 @@ +export LMModel + +@doc raw""" + LMModel(J, F, v, σ, x0) + +Given the unconstrained optimization problem: +```math +\min \tfrac{1}{2} \| F(x) \|^2, +``` +this model represents the smooth LM subproblem: +```math +\min_s \ \tfrac{1}{2} \| F(x) + J(x)s \|^2 + \tfrac{1}{2} σ \|s\|^2 +``` +where `J` is the Jacobian of `F` at `x0` in sparse format or as a linear operator. +`σ > 0` is a regularization parameter and `v` is a vector of the same size as `F(x0)` used for intermediary computations. +""" +mutable struct LMModel{T <: Real, V <: AbstractVector{T}, G <: Union{AbstractMatrix{T}, AbstractLinearOperator{T}}} <: + AbstractNLPModel{T, V} + J::G + F::V + v::V + σ::T + meta::NLPModelMeta{T, V} + counters::Counters +end + +function LMModel(J::G, F::V, σ::T, x0::V) where {T, V, G} + @assert length(x0) == size(J, 2) + @assert length(F) == size(J, 1) + meta = NLPModelMeta( + length(x0), + x0 = x0, # Perhaps we should add lvar and uvar as well here. + ) + v = similar(F) + return LMModel(J::G, F::V, v::V, σ::T, meta, Counters()) +end + +function NLPModels.obj(nlp::LMModel, x::AbstractVector{T}) where{T} + @lencheck nlp.meta.nvar x + increment!(nlp, :neval_obj) + nlp.v .= nlp.F + mul!(nlp.v, nlp.J, x, one(T), one(T)) + return ( dot(nlp.v, nlp.v) + nlp.σ * dot(x, x) ) / 2 +end + +function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T}) where{T} + @lencheck nlp.meta.nvar x + @lencheck nlp.meta.nvar g + increment!(nlp, :neval_grad) + nlp.v .= nlp.F + @. nlp.g = nlp.σ .* x + mul!(nlp.v, nlp.J, x, one(T), one(T)) + mul!(g, nlp.J', nlp.v, one(T), one(T)) + return g +end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index d63df0db..1f031581 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -1,4 +1,195 @@ -export LM +export LM, LMSolver, solve! + +import SolverCore.solve! + +mutable struct TRDHSolver # FIXME +end + +mutable struct LMSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + M <: AbstractLinearOperator{T}, + ST <: AbstractOptimizationSolver, + PB <: AbstractRegularizedNLPModel, +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + mν∇fk::V + Fk::V + Fkn::V + Jk::M + ψ::G + xkn::V + s::V + has_bnds::Bool + l_bound::V + u_bound::V + l_bound_m_x::V + u_bound_m_x::V + subsolver::ST + subpb::PB + substats::GenericExecutionStats{T, V, V, T} +end + +function LMSolver( + reg_nls::AbstractRegularizedNLPModel{T, V}; + subsolver = R2Solver, +) where{T, V} + x0 = reg_nls.model.meta.x0 + l_bound = reg_nls.model.meta.lvar + u_bound = reg_nls.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + mν∇fk = similar(x0) + Fk = similar(x0, reg_nls.model.nls_meta.nequ) + Fkn = similar(Fk) + Jk = jac_op_residual(reg_nls.model, xk) + xkn = similar(x0) + s = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) || subsolver == TRDHSolver + if has_bnds + l_bound_m_x = similar(xk) + u_bound_m_x = similar(xk) + @. l_bound_m_x = l_bound - x0 + @. u_bound_m_x = u_bound - x0 + else + l_bound_m_x = similar(xk, 0) + u_bound_m_x = similar(xk, 0) + end + + ψ = + has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) : + shifted(reg_nls.h, xk) + + sub_nlp = LMModel(Jk, Fk, T(1), x0) + subpb = RegularizedNLPModel(sub_nlp, ψ) + substats = RegularizedExecutionStats(subpb) + subsolver = subsolver(subpb) + + return LMSolver( + xk, + ∇fk, + mν∇fk, + Fk, + Fkn, + Jk, + ψ, + xkn, + s, + has_bnds, + l_bound, + u_bound, + l_bound_m_x, + u_bound_m_x, + subsolver, + subpb, + substats + ) +end + +function SolverCore.solve!( + solver::LMSolver{T, G, V}, + reg_nls::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nls.model.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + σk::T = eps(T)^(1 / 5), + σmin::T = eps(T), + η1::T = √√eps(T), + η2::T = T(0.9), + γ::T = T(3), + θ::T = 1/(1 + eps(T)^(1 / 5)), +) where {T, V, G} + reset!(stats) + + # Retrieve workspace + selected = reg_nls.selected + h = reg_nls.h + nls = reg_nls.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + Fk = solver.Fk + Fkn = solver.Fkn + Jk = solver.Jk + ∇fk = solver.∇fk + JdFk = solver.JdFk + Jt_Fk = solver.Jt_Fk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + + has_bnds = solver.has_bnds + if has_bnds + l_bound = solver.l_bound + u_bound = solver.u_bound + l_bound_m_x = solver.l_bound_m_x + u_bound_m_x = solver.u_bound_m_x + end + + # initialize parameters + improper = false + hk = @views h(xk[selected]) + if hk == Inf + verbose > 0 && @info "LM: finding initial guess where nonsmooth term is finite" + prox!(xk, h, xk, one(eltype(x0))) + hk = @views h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "LM: found point where h has value" hk + end + improper = (hk == -Inf) + improper == true && @warn "LM: Improper term detected" + improper == true && return stats + + if verbose > 0 + @info log_header( + [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normJ, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, Char], + hdr_override = Dict{Symbol, String}( + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ1/ν)", + :normx => "‖x‖", + :norms => "‖s‖", + :normB => "‖J‖²", + :arrow => "R2N", + ), + colsep = 1, + ) + end + + local ξ1::T + local ρk::T = zero(T) + + residual!(nls, xk, Fk) + Jk = jac_op_residual(nls, xk) + mul!(∇fk, Jk', Fk) + fk = dot(Fk, Fk) / 2 + + σmax, found_σ = opnorm(Jk) + found_σ || error("operator norm computation failed") + ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + sqrt_ξ1_νInv = one(T) + + @. mν∇fk = -ν * ∇fk + + φ1(d) = let Fk = Fk, Jk = Jk, + d -> dot(Fk, Fk) / 2 + end + + return +end """ LM(nls, h, options; kwargs...) @@ -143,7 +334,7 @@ function LM( Resid_hist[k] = nls.counters.neval_residual # model for first prox-gradient iteration - φ1(d) = begin + φ1(d) = begin # || Fk ||^2/2 + d*Jk'*Fk jtprod_residual!(nls, xk, Fk, Jt_Fk) dot(Fk, Fk) / 2 + dot(Jt_Fk, d) end diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 01778e86..ac3740f0 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -19,6 +19,7 @@ include("splitting.jl") include("TR_alg.jl") include("TRDH_alg.jl") include("R2_alg.jl") +include("LMModel.jl") include("LM_alg.jl") include("LMTR_alg.jl") include("R2DH.jl") From 3ffc73b15cdccd33fa8827aa7132e48ff5c58e68 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 6 Aug 2025 11:28:20 +0200 Subject: [PATCH 02/14] JSO-compliant LM draft --- src/LMModel.jl | 2 +- src/LM_alg.jl | 221 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 211 insertions(+), 12 deletions(-) diff --git a/src/LMModel.jl b/src/LMModel.jl index 02d00fe6..454566e0 100644 --- a/src/LMModel.jl +++ b/src/LMModel.jl @@ -48,7 +48,7 @@ function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T @lencheck nlp.meta.nvar g increment!(nlp, :neval_grad) nlp.v .= nlp.F - @. nlp.g = nlp.σ .* x + @. g = nlp.σ .* x mul!(nlp.v, nlp.J, x, one(T), one(T)) mul!(g, nlp.J', nlp.v, one(T), one(T)) return g diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 1f031581..ccf2068d 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -95,8 +95,10 @@ function SolverCore.solve!( stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, x::V = reg_nls.model.meta.x0, + nonlinear::Bool = true, atol::T = √eps(T), rtol::T = √eps(T), + neg_tol::T = zero(T), verbose::Int = 0, max_iter::Int = 10000, max_time::Float64 = 30.0, @@ -124,8 +126,7 @@ function SolverCore.solve!( Fkn = solver.Fkn Jk = solver.Jk ∇fk = solver.∇fk - JdFk = solver.JdFk - Jt_Fk = solver.Jt_Fk + mν∇fk = solver.mν∇fk ψ = solver.ψ xkn = solver.xkn s = solver.s @@ -162,8 +163,8 @@ function SolverCore.solve!( :xi => "√(ξ1/ν)", :normx => "‖x‖", :norms => "‖s‖", - :normB => "‖J‖²", - :arrow => "R2N", + :normJ => "‖J‖²", + :arrow => "LM", ), colsep = 1, ) @@ -174,7 +175,7 @@ function SolverCore.solve!( residual!(nls, xk, Fk) Jk = jac_op_residual(nls, xk) - mul!(∇fk, Jk', Fk) + jtprod_residual!(nls, xk, Fk, ∇fk) fk = dot(Fk, Fk) / 2 σmax, found_σ = opnorm(Jk) @@ -184,11 +185,208 @@ function SolverCore.solve!( @. mν∇fk = -ν * ∇fk - φ1(d) = let Fk = Fk, Jk = Jk, - d -> dot(Fk, Fk) / 2 + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + + φ1 = let Fk = Fk, ∇fk = ∇fk + d -> dot(Fk, Fk) / 2 + dot(∇fk, d) # ∇fk = Jk^T Fk + end + + mk1 = let φ1 = φ1, ψ = ψ + d -> φ1(d) + ψ(d) + end + + mk = let ψ = ψ, solver = solver + d -> obj(solver.subpb.model, d) + ψ(d) + end + + prox!(s, ψ, mν∇fk, ν) + ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + + set_status!( + stats, + get_status( + reg_nls, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nls, solver, stats) + + done = stats.status != :unknown + + while !done + + sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3) + solver.subpb.model.σ = σk + isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν) + if isa(solver.subsolver, R2Solver) #FIXME + solve!( + solver.subsolver, + solver.subpb, + solver.substats, + x = s, + atol = sub_atol, + ν = ν, + ) + else + solve!( + solver.subsolver, + solver.subpb, + solver.substats, + x = s, + atol = sub_atol, + σk = σk, #FIXME + ) + end + + s .= solver.substats.solution + + xkn .= xk .+ s + residual!(nls, xkn, Fkn) + fkn = dot(Fkn, Fkn) / 2 + hkn = @views h(xkn[selected]) + mks = mk(s) + ξ = fk + hk - mks + max(1, abs(hk)) * 10 * eps() + + if (ξ ≤ 0 || isnan(ξ)) + error("LM: failed to compute a step: ξ = $ξ") + end + + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ρk = Δobj / ξ + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + solver.substats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + 1 / ν, + (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + ], + colsep = 1, + ) + + if η1 ≤ ρk < Inf + xk .= xkn + + if has_bnds + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + end + + #update functions + Fk .= Fkn + fk = fkn + hk = hkn + + # update gradient & Hessian + shift!(ψ, xk) + Jk = jac_op_residual(nls, xk) + jtprod_residual!(nls, xk, Fk, ∇fk) + + # update opnorm if not linear least squares + if nonlinear == true + σmax, found_σ = opnorm(Jk) + found_σ || error("operator norm computation failed") + end + end + + if η2 ≤ ρk < Inf + σk = max(σk / γ, σmin) + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + + @. mν∇fk = - ν * ∇fk + prox!(s, ψ, mν∇fk, ν) + mks = mk1(s) + + ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps() + + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + set_status!( + stats, + get_status( + reg_nls, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nls, solver, stats) + + done = stats.status != :unknown + + end + + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + 0, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + 1 / ν, + "", + ], + colsep = 1, + ) + @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end - return + set_solution!(stats, xk) + set_residuals!(stats, zero(T), sqrt_ξ1_νInv) + return stats end """ @@ -388,9 +586,10 @@ function LM( subsolver_options.ν = ν subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol - s, iter, _ = with_logger(subsolver_logger) do - subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) - end + #s, iter, _ = with_logger(subsolver_logger) do + #subsolver_options.verbose = 1 + s, iter, _ = subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) + #end # restore initial subsolver_options here so that it is not modified if there is an error subsolver_options.ν = ν_subsolver subsolver_options.ϵa = ϵa_subsolver From 02b6cceb506bdfa76059d7493ad80db777fc698d Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 13:38:42 +0200 Subject: [PATCH 03/14] make LMModel type stable --- src/LMModel.jl | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/LMModel.jl b/src/LMModel.jl index 454566e0..fe5314fa 100644 --- a/src/LMModel.jl +++ b/src/LMModel.jl @@ -1,7 +1,7 @@ export LMModel @doc raw""" - LMModel(J, F, v, σ, x0) + LMModel(j_prod!, jt_prod, F, v, σ, xk) Given the unconstrained optimization problem: ```math @@ -11,35 +11,37 @@ this model represents the smooth LM subproblem: ```math \min_s \ \tfrac{1}{2} \| F(x) + J(x)s \|^2 + \tfrac{1}{2} σ \|s\|^2 ``` -where `J` is the Jacobian of `F` at `x0` in sparse format or as a linear operator. -`σ > 0` is a regularization parameter and `v` is a vector of the same size as `F(x0)` used for intermediary computations. +where `J` is the Jacobian of `F` at `xk`, represented via matrix-free operations. +`j_prod!(xk, s, out)` computes `J(xk) * s`, and `jt_prod!(xk, r, out)` computes `J(xk)' * r`. + +`σ > 0` is a regularization parameter and `v` is a vector of the same size as `F(xk)` used for intermediary computations. """ -mutable struct LMModel{T <: Real, V <: AbstractVector{T}, G <: Union{AbstractMatrix{T}, AbstractLinearOperator{T}}} <: +mutable struct LMModel{T <: Real, V <: AbstractVector{T}, J <: Function , Jt <: Function} <: AbstractNLPModel{T, V} - J::G + j_prod!::J + jt_prod!::Jt F::V v::V + xk::V σ::T meta::NLPModelMeta{T, V} counters::Counters end -function LMModel(J::G, F::V, σ::T, x0::V) where {T, V, G} - @assert length(x0) == size(J, 2) - @assert length(F) == size(J, 1) +function LMModel(j_prod!::J, jt_prod!::Jt, F::V, σ::T, xk::V) where {T, V, J, Jt} meta = NLPModelMeta( - length(x0), - x0 = x0, # Perhaps we should add lvar and uvar as well here. + length(xk), + x0 = xk, # Perhaps we should add lvar and uvar as well here. ) v = similar(F) - return LMModel(J::G, F::V, v::V, σ::T, meta, Counters()) + return LMModel(j_prod!, jt_prod!, F, v, xk, σ, meta, Counters()) end function NLPModels.obj(nlp::LMModel, x::AbstractVector{T}) where{T} @lencheck nlp.meta.nvar x increment!(nlp, :neval_obj) - nlp.v .= nlp.F - mul!(nlp.v, nlp.J, x, one(T), one(T)) + nlp.j_prod!(nlp.xk, x, nlp.v) # v = J(xk)x + nlp.v .+= nlp.F return ( dot(nlp.v, nlp.v) + nlp.σ * dot(x, x) ) / 2 end @@ -47,9 +49,9 @@ function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T @lencheck nlp.meta.nvar x @lencheck nlp.meta.nvar g increment!(nlp, :neval_grad) - nlp.v .= nlp.F - @. g = nlp.σ .* x - mul!(nlp.v, nlp.J, x, one(T), one(T)) - mul!(g, nlp.J', nlp.v, one(T), one(T)) + nlp.j_prod!(nlp.xk, x, nlp.v) # v = J(xk)x + F + nlp.v .+= nlp.F + nlp.jt_prod!(nlp.xk, nlp.v, g) # g = J^T(xk) v + @. g += nlp.σ .* x return g end From 458aae8b687734ce2b7ea05d5c4103356c943dfe Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 13:39:37 +0200 Subject: [PATCH 04/14] improve type stability in LM --- src/LM_alg.jl | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index ccf2068d..5aeaa435 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -9,7 +9,6 @@ mutable struct LMSolver{ T <: Real, G <: ShiftedProximableFunction, V <: AbstractVector{T}, - M <: AbstractLinearOperator{T}, ST <: AbstractOptimizationSolver, PB <: AbstractRegularizedNLPModel, } <: AbstractOptimizationSolver @@ -18,7 +17,6 @@ mutable struct LMSolver{ mν∇fk::V Fk::V Fkn::V - Jk::M ψ::G xkn::V s::V @@ -45,7 +43,6 @@ function LMSolver( mν∇fk = similar(x0) Fk = similar(x0, reg_nls.model.nls_meta.nequ) Fkn = similar(Fk) - Jk = jac_op_residual(reg_nls.model, xk) xkn = similar(x0) s = similar(x0) has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) || subsolver == TRDHSolver @@ -63,18 +60,24 @@ function LMSolver( has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) : shifted(reg_nls.h, xk) - sub_nlp = LMModel(Jk, Fk, T(1), x0) + jprod! = let nls = reg_nls.model + (x, v, Jv) -> jprod_residual!(nls, x, v, Jv) + end + jt_prod! = let nls = reg_nls.model + (x, v, Jtv) -> jtprod_residual!(nls, x, v, Jtv) + end + + sub_nlp = LMModel(jprod!, jt_prod!, Fk, T(1), xk) subpb = RegularizedNLPModel(sub_nlp, ψ) substats = RegularizedExecutionStats(subpb) subsolver = subsolver(subpb) - return LMSolver( + return LMSolver{T, typeof(ψ), V, typeof(subsolver), typeof(subpb)}( xk, ∇fk, mν∇fk, Fk, Fkn, - Jk, ψ, xkn, s, @@ -124,13 +127,11 @@ function SolverCore.solve!( Fk = solver.Fk Fkn = solver.Fkn - Jk = solver.Jk ∇fk = solver.∇fk mν∇fk = solver.mν∇fk ψ = solver.ψ xkn = solver.xkn s = solver.s - has_bnds = solver.has_bnds if has_bnds l_bound = solver.l_bound @@ -144,7 +145,7 @@ function SolverCore.solve!( hk = @views h(xk[selected]) if hk == Inf verbose > 0 && @info "LM: finding initial guess where nonsmooth term is finite" - prox!(xk, h, xk, one(eltype(x0))) + prox!(xk, h, xk, one(T)) hk = @views h(xk[selected]) hk < Inf || error("prox computation must be erroneous") verbose > 0 && @debug "LM: found point where h has value" hk @@ -174,11 +175,12 @@ function SolverCore.solve!( local ρk::T = zero(T) residual!(nls, xk, Fk) - Jk = jac_op_residual(nls, xk) + #solver.subpb.model.J = jac_op_residual!(nls, xk, Jv, Jtv) jtprod_residual!(nls, xk, Fk, ∇fk) fk = dot(Fk, Fk) / 2 - σmax, found_σ = opnorm(Jk) + #σmax, found_σ = opnorm(solver.subpb.model.J) + σmax, found_σ = one(T), true found_σ || error("operator norm computation failed") ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ sqrt_ξ1_νInv = one(T) @@ -306,12 +308,13 @@ function SolverCore.solve!( # update gradient & Hessian shift!(ψ, xk) - Jk = jac_op_residual(nls, xk) + #solver.subpb.model.J = jac_op_residual!(nls, xk, Jv, Jtv) jtprod_residual!(nls, xk, Fk, ∇fk) # update opnorm if not linear least squares if nonlinear == true - σmax, found_σ = opnorm(Jk) + #σmax, found_σ = opnorm(solver.subpb.model.J) + σmax, found_σ = one(T), true found_σ || error("operator norm computation failed") end end From 5101a101e4e93355f9b19a429fbace5743f9251e Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 13:51:55 +0200 Subject: [PATCH 05/14] add LM doc --- src/LM_alg.jl | 377 ++++++++++---------------------------------------- 1 file changed, 71 insertions(+), 306 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 5aeaa435..b8a2a792 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -92,6 +92,74 @@ function LMSolver( ) end +""" + LM(reg_nls; kwargs...) + +A Levenberg-Marquardt method for the problem + + min ½ ‖F(x)‖² + h(x) + +where F: ℝⁿ → ℝᵐ and its Jacobian J are Lipschitz continuous and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +At each iteration, a step s is computed as an approximate solution of + + min ½ ‖J(x) s + F(x)‖² + ½ σ ‖s‖² + ψ(s; x) + +where F(x) and J(x) are the residual and its Jacobian at x, respectively, ψ(s; xₖ) is either h(xₖ + s) or an approximation of h(xₖ + s), +‖⋅‖ is the ℓ₂ norm and σₖ > 0 is the regularization parameter. + +For advanced usage, first define a solver "LMSolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = LMSolver(reg_nls; subsolver = R2Solver) + solve!(solver, reg_nls) + + stats = RegularizedExecutionStats(reg_nls) + solve!(solver, reg_nls, stats) + +# Arguments +* `reg_nls::AbstractRegularizedNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `nonlinear::Bool = true`: whether the function `F` is nonlinear or not. +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = zero(T): negative tolerance; +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `σmin::T = eps(T)`: minimum value of the regularization parameter; +- `σk::T = eps(T)^(1 / 5)`: initial value of the regularization parameter; +- `η1::T = √√eps(T)`: successful iteration threshold; +- `η2::T = T(0.9)`: very successful iteration threshold; +- `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful; +- `θ::T = 1/(1 + eps(T)^(1 / 5))`: is the model decrease fraction with respect to the decrease of the Cauchy model; +- `subsolver = R2Solver`: the solver used to solve the subproblems. + +The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.xk`: current iterate; +- `solver.∇fk`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; + - `stats.elapsed_time`: elapsed time in seconds. +""" function SolverCore.solve!( solver::LMSolver{T, G, V}, reg_nls::AbstractRegularizedNLPModel{T, V}, @@ -179,8 +247,7 @@ function SolverCore.solve!( jtprod_residual!(nls, xk, Fk, ∇fk) fk = dot(Fk, Fk) / 2 - #σmax, found_σ = opnorm(solver.subpb.model.J) - σmax, found_σ = one(T), true + σmax, found_σ = opnorm(solver.subpb.model.J) found_σ || error("operator norm computation failed") ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ sqrt_ξ1_νInv = one(T) @@ -313,8 +380,7 @@ function SolverCore.solve!( # update opnorm if not linear least squares if nonlinear == true - #σmax, found_σ = opnorm(solver.subpb.model.J) - σmax, found_σ = one(T), true + σmax, found_σ = opnorm(solver.subpb.model.J) found_σ || error("operator norm computation failed") end end @@ -390,305 +456,4 @@ function SolverCore.solve!( set_solution!(stats, xk) set_residuals!(stats, zero(T), sqrt_ξ1_νInv) return stats -end - -""" - LM(nls, h, options; kwargs...) - -A Levenberg-Marquardt method for the problem - - min ½ ‖F(x)‖² + h(x) - -where F: ℝⁿ → ℝᵐ and its Jacobian J are Lipschitz continuous and h: ℝⁿ → ℝ is -lower semi-continuous, proper and prox-bounded. - -At each iteration, a step s is computed as an approximate solution of - - min ½ ‖J(x) s + F(x)‖² + ½ σ ‖s‖² + ψ(s; x) - -where F(x) and J(x) are the residual and its Jacobian at x, respectively, ψ(s; x) = h(x + s), -and σ > 0 is a regularization parameter. - -### Arguments - -* `nls::AbstractNLSModel`: a smooth nonlinear least-squares problem -* `h`: a regularizer such as those defined in ProximalOperators -* `options::ROSolverOptions`: a structure containing algorithmic parameters - -### Keyword arguments - -* `x0::AbstractVector`: an initial guess (default: `nls.meta.x0`) -* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver -* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`) -* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver. -* `selected::AbstractVector{<:Integer}`: (default `1:nls.meta.nvar`). - -### Return values - -* `xk`: the final iterate -* `Fobj_hist`: an array with the history of values of the smooth objective -* `Hobj_hist`: an array with the history of values of the nonsmooth objective -* `Complex_hist`: an array with the history of number of inner iterations. -""" -function LM( - nls::AbstractNLSModel, - h::H, - options::ROSolverOptions; - x0::AbstractVector = nls.meta.x0, - subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), - subsolver = R2, - subsolver_options = ROSolverOptions(ϵa = options.ϵa), - selected::AbstractVector{<:Integer} = 1:(nls.meta.nvar), - nonlinear::Bool = true, -) where {H} - start_time = time() - elapsed_time = 0.0 - # initialize passed options - ϵ = options.ϵa - ϵ_subsolver = subsolver_options.ϵa - ϵr = options.ϵr - verbose = options.verbose - maxIter = options.maxIter - maxTime = options.maxTime - η1 = options.η1 - η2 = options.η2 - γ = options.γ - θ = options.θ - σmin = options.σmin - σk = options.σk - - # store initial values of the subsolver_options fields that will be modified - ν_subsolver = subsolver_options.ν - ϵa_subsolver = subsolver_options.ϵa - - local l_bound, u_bound - treats_bounds = has_bounds(nls) || subsolver == TRDH - if treats_bounds - l_bound = nls.meta.lvar - u_bound = nls.meta.uvar - end - - if verbose == 0 - ptf = Inf - elseif verbose == 1 - ptf = round(maxIter / 10) - elseif verbose == 2 - ptf = round(maxIter / 100) - else - ptf = 1 - end - - # initialize parameters - xk = copy(x0) - hk = h(xk[selected]) - if hk == Inf - verbose > 0 && @info "LM: finding initial guess where nonsmooth term is finite" - prox!(xk, h, x0, one(eltype(x0))) - hk = h(xk[selected]) - hk < Inf || error("prox computation must be erroneous") - verbose > 0 && @debug "LM: found point where h has value" hk - end - hk == -Inf && error("nonsmooth term is not proper") - ψ = treats_bounds ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) - - xkn = similar(xk) - - local ξ1 - local sqrt_ξ1_νInv - k = 0 - Fobj_hist = zeros(maxIter) - Hobj_hist = zeros(maxIter) - Complex_hist = zeros(Int, maxIter) - Grad_hist = zeros(Int, maxIter) - Resid_hist = zeros(Int, maxIter) - - if verbose > 0 - #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" - #! format: on - end - - # main algorithm initialization - Fk = residual(nls, xk) - Fkn = similar(Fk) - fk = dot(Fk, Fk) / 2 - Jk = jac_op_residual(nls, xk) - ∇fk = Jk' * Fk - JdFk = similar(Fk) # temporary storage - Jt_Fk = similar(∇fk) - - σmax, found_σ = opnorm(Jk) - found_σ || error("operator norm computation failed") - ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ - - s = zero(xk) - - optimal = false - tired = k ≥ maxIter || elapsed_time > maxTime - - while !(optimal || tired) - k = k + 1 - elapsed_time = time() - start_time - Fobj_hist[k] = fk - Hobj_hist[k] = hk - Grad_hist[k] = nls.counters.neval_jtprod_residual + nls.counters.neval_jprod_residual - Resid_hist[k] = nls.counters.neval_residual - - # model for first prox-gradient iteration - φ1(d) = begin # || Fk ||^2/2 + d*Jk'*Fk - jtprod_residual!(nls, xk, Fk, Jt_Fk) - dot(Fk, Fk) / 2 + dot(Jt_Fk, d) - end - - mk1(d) = φ1(d) + ψ(d) - - # TODO: reuse residual computation - # model for subsequent prox-gradient iterations - φ(d) = begin - jprod_residual!(nls, xk, d, JdFk) - JdFk .+= Fk - return dot(JdFk, JdFk) / 2 + σk * dot(d, d) / 2 - end - - ∇φ!(g, d) = begin - jprod_residual!(nls, xk, d, JdFk) - JdFk .+= Fk - jtprod_residual!(nls, xk, JdFk, g) - g .+= σk * d - return g - end - - mk(d) = begin - jprod_residual!(nls, xk, d, JdFk) - JdFk .+= Fk - return dot(JdFk, JdFk) / 2 + σk * dot(d, d) / 2 + ψ(d) - end - - # take first proximal gradient step s1 and see if current xk is nearly stationary - # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). - ∇fk .*= -ν # reuse gradient storage - prox!(s, ψ, ∇fk, ν) - ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? - ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 / ν) - - if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt_ξ1_νInv - ϵ += ϵ_increment # make stopping test absolute and relative - ϵ_subsolver += ϵ_increment - end - - if sqrt_ξ1_νInv < ϵ - # the current xk is approximately first-order stationary - optimal = true - continue - end - - # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) - subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default - subsolver_options.ν = ν - subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () - @debug "setting inner stopping tolerance to" subsolver_options.optTol - #s, iter, _ = with_logger(subsolver_logger) do - #subsolver_options.verbose = 1 - s, iter, _ = subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) - #end - # restore initial subsolver_options here so that it is not modified if there is an error - subsolver_options.ν = ν_subsolver - subsolver_options.ϵa = ϵa_subsolver - - Complex_hist[k] = iter - - xkn .= xk .+ s - residual!(nls, xkn, Fkn) - fkn = dot(Fkn, Fkn) / 2 - hkn = h(xkn[selected]) - hkn == -Inf && error("nonsmooth term is not proper") - mks = mk(s) - ξ = fk + hk - mks + max(1, abs(hk)) * 10 * eps() - - if (ξ ≤ 0 || isnan(ξ)) - error("LM: failed to compute a step: ξ = $ξ") - end - - Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() - ρk = Δobj / ξ - - σ_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") - - if (verbose > 0) && (k % ptf == 0) - #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) 1/ν σ_stat - #! format: off - end - - if η2 ≤ ρk < Inf - σk = max(σk / γ, σmin) - end - - if η1 ≤ ρk < Inf - xk .= xkn - treats_bounds && set_bounds!(ψ, l_bound - xk, u_bound - xk) - - # update functions - Fk .= Fkn - fk = fkn - hk = hkn - - # update gradient & Hessian - shift!(ψ, xk) - Jk = jac_op_residual(nls, xk) - jtprod_residual!(nls, xk, Fk, ∇fk) - - # update opnorm if not linear least squares - if nonlinear == true - σmax, found_σ = opnorm(Jk) - found_σ || error("operator norm computation failed") - end - - Complex_hist[k] += 1 - end - - if ρk < η1 || ρk == Inf - σk = σk * γ - end - ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ - tired = k ≥ maxIter || elapsed_time > maxTime - end - - if verbose > 0 - if k == 1 - @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk - elseif optimal - #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) 1/ν - #! format: on - @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" - end - end - status = if optimal - :first_order - elseif elapsed_time > maxTime - :max_time - elseif tired - :max_iter - else - :exception - end - - stats = GenericExecutionStats(nls) - set_status!(stats, status) - set_solution!(stats, xk) - set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt_ξ1_νInv : ξ1) - set_iter!(stats, k) - set_time!(stats, elapsed_time) - set_solver_specific!(stats, :sigma, σk) - set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) - set_solver_specific!(stats, :Hhist, Hobj_hist[1:k]) - set_solver_specific!(stats, :NonSmooth, h) - set_solver_specific!(stats, :SubsolverCounter, Complex_hist[1:k]) - set_solver_specific!(stats, :NLSGradHist, Grad_hist[1:k]) - set_solver_specific!(stats, :ResidHist, Resid_hist[1:k]) - return stats -end +end \ No newline at end of file From 144f9a400ba7e3c230be1e2e09e18da6da5d3b20 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 14:02:19 +0200 Subject: [PATCH 06/14] add LM function signatures --- src/LM_alg.jl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index b8a2a792..1f981ebc 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -160,6 +160,43 @@ Notably, you can access, and modify, the following: - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; - `stats.elapsed_time`: elapsed time in seconds. """ +function LM( + nls::AbstractNLSModel, + h::H, + options::ROSolverOptions; + kwargs..., +) where {H} + kwargs_dict = Dict(kwargs...) + selected = pop!(kwargs_dict, :selected, 1:(nls.meta.nvar)) + x0 = pop!(kwargs_dict, :x0, nls.meta.x0) + reg_nls = RegularizedNLPModel(nls, h, selected) + return LM( + reg_nls, + x = x0, + atol = options.ϵa, + rtol = options.ϵr, + neg_tol = options.neg_tol, + verbose = options.verbose, + max_iter = options.maxIter, + max_time = options.maxTime, + σmin = options.σmin, + σk = options.σk, + η1 = options.η1, + η2 = options.η2, + γ = options.γ, + kwargs_dict..., + ) +end + +function LM(reg_nls::AbstractRegularizedNLPModel, kwargs...) + kwargs_dict = Dict(kwargs...) + subsolver = pop!(kwargs_dict, :subsolver, R2Solver) + solver = LMSolver(reg_nls, subsolver = subsolver) + stats = RegularizedExecutionStats(reg_nls.model) + solve!(solver, reg_nls, stats; kwargs_dict...) + return stats +end + function SolverCore.solve!( solver::LMSolver{T, G, V}, reg_nls::AbstractRegularizedNLPModel{T, V}, From 576bb885f2db2554b617f12b8e55794564ba7fea Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 14:08:07 +0200 Subject: [PATCH 07/14] update tests --- test/runtests.jl | 14 ++------------ test/test_allocs.jl | 17 ++++++++++++++++- test/test_bounds.jl | 28 +++++----------------------- 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 1e8b28d4..59d0ee4e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -91,19 +91,9 @@ for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * com x0[p[1:nz]] = sign.(randn(nz)) # initial guess with nz nonzeros (necessary for h = B0) args = solver_sym == :LM ? () : (NormLinf(1.0),) out = solver(bpdn_nls, h, args..., options, x0 = x0) - @test typeof(out.solution) == typeof(bpdn_nls.meta.x0) - @test length(out.solution) == bpdn_nls.meta.nvar - @test typeof(out.solver_specific[:Fhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:Hhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:SubsolverCounter]) == Array{Int, 1} + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar @test typeof(out.dual_feas) == eltype(out.solution) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:Hhist]) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:SubsolverCounter]) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:NLSGradHist]) - @test out.solver_specific[:NLSGradHist][end] == - bpdn_nls.counters.neval_jprod_residual + bpdn_nls.counters.neval_jtprod_residual - 1 - @test obj(bpdn_nls, out.solution) == out.solver_specific[:Fhist][end] - @test h(out.solution) == out.solver_specific[:Hhist][end] @test out.status == :first_order end end diff --git a/test/test_allocs.jl b/test/test_allocs.jl index accf0ccc..5d1f95f9 100644 --- a/test/test_allocs.jl +++ b/test/test_allocs.jl @@ -40,7 +40,7 @@ macro wrappedallocs(expr) end # Test non allocating solve! -@testset "allocs" begin +@testset "NLP allocs" begin for (h, h_name) ∈ ((NormL0(λ), "l0"),) for (solver, solver_name) ∈ ((:R2Solver, "R2"), (:R2DHSolver, "R2DH"), (:R2NSolver, "R2N")) @testset "$(solver_name)" begin @@ -59,3 +59,18 @@ end end end end + +@testset "NLS allocs" begin + for (h, h_name) ∈ ((NormL0(λ), "l0"),) + for (solver, solver_name) ∈ ((:LMSolver, "LM"), ) + @testset "$(solver_name)" begin + solver_name == "LM" && continue #FIXME + reg_nlp = RegularizedNLPModel(bpdn_nls, h) + solver = eval(solver)(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + @test @wrappedallocs(solve!(solver, reg_nlp, stats, σk = 1.0, atol = 1e-6, rtol = 1e-6)) == 0 + @test stats.status == :first_order + end + end + end +end diff --git a/test/test_bounds.jl b/test/test_bounds.jl index 14407d60..f13efb8d 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -54,16 +54,9 @@ for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) args = solver_sym == :LM ? () : (NormLinf(1.0),) @test has_bounds(bpdn_nls2) out = solver(bpdn_nls2, h, args..., options, x0 = x0) - @test typeof(out.solution) == typeof(bpdn_nls2.meta.x0) - @test length(out.solution) == bpdn_nls2.meta.nvar - @test typeof(out.solver_specific[:Fhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:Hhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:SubsolverCounter]) == Array{Int, 1} + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar @test typeof(out.dual_feas) == eltype(out.solution) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:Hhist]) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:SubsolverCounter]) - @test obj(bpdn_nls2, out.solution) == out.solver_specific[:Fhist][end] - @test h(out.solution) == out.solver_specific[:Hhist][end] @test out.status == :first_order end end @@ -108,20 +101,9 @@ for (h, h_name) ∈ ((NormL0(λ), "l0"),) @test has_bounds(bpdn_nls2) LM_out = LM(bpdn_nls2, h, options, x0 = x0, subsolver = R2DH, subsolver_options = subsolver_options) - @test typeof(LM_out.solution) == typeof(bpdn_nls2.meta.x0) - @test length(LM_out.solution) == bpdn_nls2.meta.nvar - @test typeof(LM_out.solver_specific[:Fhist]) == typeof(LM_out.solution) - @test typeof(LM_out.solver_specific[:Hhist]) == typeof(LM_out.solution) - @test typeof(LM_out.solver_specific[:SubsolverCounter]) == Array{Int, 1} - @test typeof(LM_out.dual_feas) == eltype(LM_out.solution) - @test length(LM_out.solver_specific[:Fhist]) == length(LM_out.solver_specific[:Hhist]) - @test length(LM_out.solver_specific[:Fhist]) == - length(LM_out.solver_specific[:SubsolverCounter]) - @test length(LM_out.solver_specific[:Fhist]) == length(LM_out.solver_specific[:NLSGradHist]) - @test LM_out.solver_specific[:NLSGradHist][end] == - bpdn_nls2.counters.neval_jprod_residual + bpdn_nls2.counters.neval_jtprod_residual - 1 - @test obj(bpdn_nls2, LM_out.solution) == LM_out.solver_specific[:Fhist][end] - @test h(LM_out.solution) == LM_out.solver_specific[:Hhist][end] + @test typeof(LM_out.solution) == typeof(bpdn.meta.x0) + @test length(LM_out.solution) == bpdn.meta.nvar + @test typeof(LM_out.dual_feas) == eltype(out.solution) @test LM_out.status == :first_order end end From f85deacff4f84bea32f80ab5eab2524d049a3369 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 15:11:17 +0200 Subject: [PATCH 08/14] solve function signature bugs --- src/LM_alg.jl | 18 +++++++++++++----- test/test_bounds.jl | 4 ++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 1f981ebc..527937b5 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -17,6 +17,8 @@ mutable struct LMSolver{ mν∇fk::V Fk::V Fkn::V + Jv::V + Jtv::V ψ::G xkn::V s::V @@ -43,6 +45,8 @@ function LMSolver( mν∇fk = similar(x0) Fk = similar(x0, reg_nls.model.nls_meta.nequ) Fkn = similar(Fk) + Jv = similar(Fk) + Jtv = similar(x0) xkn = similar(x0) s = similar(x0) has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) || subsolver == TRDHSolver @@ -78,6 +82,8 @@ function LMSolver( mν∇fk, Fk, Fkn, + Jv, + Jtv, ψ, xkn, s, @@ -171,7 +177,7 @@ function LM( x0 = pop!(kwargs_dict, :x0, nls.meta.x0) reg_nls = RegularizedNLPModel(nls, h, selected) return LM( - reg_nls, + reg_nls; x = x0, atol = options.ϵa, rtol = options.ϵr, @@ -188,11 +194,11 @@ function LM( ) end -function LM(reg_nls::AbstractRegularizedNLPModel, kwargs...) +function LM(reg_nls::AbstractRegularizedNLPModel; kwargs...) kwargs_dict = Dict(kwargs...) subsolver = pop!(kwargs_dict, :subsolver, R2Solver) solver = LMSolver(reg_nls, subsolver = subsolver) - stats = RegularizedExecutionStats(reg_nls.model) + stats = RegularizedExecutionStats(reg_nls) solve!(solver, reg_nls, stats; kwargs_dict...) return stats end @@ -232,6 +238,8 @@ function SolverCore.solve!( Fk = solver.Fk Fkn = solver.Fkn + Jv = solver.Jv + Jtv = solver.Jtv ∇fk = solver.∇fk mν∇fk = solver.mν∇fk ψ = solver.ψ @@ -284,7 +292,7 @@ function SolverCore.solve!( jtprod_residual!(nls, xk, Fk, ∇fk) fk = dot(Fk, Fk) / 2 - σmax, found_σ = opnorm(solver.subpb.model.J) + σmax, found_σ = opnorm(jac_op_residual!(nls, xk, Jv, Jtv)) found_σ || error("operator norm computation failed") ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ sqrt_ξ1_νInv = one(T) @@ -417,7 +425,7 @@ function SolverCore.solve!( # update opnorm if not linear least squares if nonlinear == true - σmax, found_σ = opnorm(solver.subpb.model.J) + σmax, found_σ = opnorm(jac_op_residual!(nls, xk, Jv, Jtv)) found_σ || error("operator norm computation failed") end end diff --git a/test/test_bounds.jl b/test/test_bounds.jl index f13efb8d..7913f59d 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -100,10 +100,10 @@ for (h, h_name) ∈ ((NormL0(λ), "l0"),) x0 = zeros(bpdn_nls2.meta.nvar) @test has_bounds(bpdn_nls2) LM_out = - LM(bpdn_nls2, h, options, x0 = x0, subsolver = R2DH, subsolver_options = subsolver_options) + LM(bpdn_nls2, h, options, x0 = x0, subsolver = R2DHSolver)#, subsolver_options = subsolver_options) @test typeof(LM_out.solution) == typeof(bpdn.meta.x0) @test length(LM_out.solution) == bpdn.meta.nvar - @test typeof(LM_out.dual_feas) == eltype(out.solution) + @test typeof(LM_out.dual_feas) == eltype(LM_out.solution) @test LM_out.status == :first_order end end From 53c40b80560d9a278f03bd3adf16157ff9bac722 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 7 Aug 2025 15:19:46 +0200 Subject: [PATCH 09/14] remove dead code --- src/LM_alg.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 527937b5..ef7fc089 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -288,7 +288,6 @@ function SolverCore.solve!( local ρk::T = zero(T) residual!(nls, xk, Fk) - #solver.subpb.model.J = jac_op_residual!(nls, xk, Jv, Jtv) jtprod_residual!(nls, xk, Fk, ∇fk) fk = dot(Fk, Fk) / 2 @@ -420,7 +419,6 @@ function SolverCore.solve!( # update gradient & Hessian shift!(ψ, xk) - #solver.subpb.model.J = jac_op_residual!(nls, xk, Jv, Jtv) jtprod_residual!(nls, xk, Fk, ∇fk) # update opnorm if not linear least squares From 3c8e92edc76ca0f4895b84303ec472b0734acea7 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 8 Aug 2025 12:13:30 +0200 Subject: [PATCH 10/14] apply julia formatter --- src/LMModel.jl | 8 ++++---- src/LM_alg.jl | 51 +++++++++++--------------------------------------- 2 files changed, 15 insertions(+), 44 deletions(-) diff --git a/src/LMModel.jl b/src/LMModel.jl index fe5314fa..b655eae7 100644 --- a/src/LMModel.jl +++ b/src/LMModel.jl @@ -16,7 +16,7 @@ where `J` is the Jacobian of `F` at `xk`, represented via matrix-free operations `σ > 0` is a regularization parameter and `v` is a vector of the same size as `F(xk)` used for intermediary computations. """ -mutable struct LMModel{T <: Real, V <: AbstractVector{T}, J <: Function , Jt <: Function} <: +mutable struct LMModel{T <: Real, V <: AbstractVector{T}, J <: Function, Jt <: Function} <: AbstractNLPModel{T, V} j_prod!::J jt_prod!::Jt @@ -37,15 +37,15 @@ function LMModel(j_prod!::J, jt_prod!::Jt, F::V, σ::T, xk::V) where {T, V, J, J return LMModel(j_prod!, jt_prod!, F, v, xk, σ, meta, Counters()) end -function NLPModels.obj(nlp::LMModel, x::AbstractVector{T}) where{T} +function NLPModels.obj(nlp::LMModel, x::AbstractVector{T}) where {T} @lencheck nlp.meta.nvar x increment!(nlp, :neval_obj) nlp.j_prod!(nlp.xk, x, nlp.v) # v = J(xk)x nlp.v .+= nlp.F - return ( dot(nlp.v, nlp.v) + nlp.σ * dot(x, x) ) / 2 + return (dot(nlp.v, nlp.v) + nlp.σ * dot(x, x)) / 2 end -function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T}) where{T} +function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T}) where {T} @lencheck nlp.meta.nvar x @lencheck nlp.meta.nvar g increment!(nlp, :neval_grad) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index ef7fc089..bf55c09e 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -32,10 +32,7 @@ mutable struct LMSolver{ substats::GenericExecutionStats{T, V, V, T} end -function LMSolver( - reg_nls::AbstractRegularizedNLPModel{T, V}; - subsolver = R2Solver, -) where{T, V} +function LMSolver(reg_nls::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solver) where {T, V} x0 = reg_nls.model.meta.x0 l_bound = reg_nls.model.meta.lvar u_bound = reg_nls.model.meta.uvar @@ -63,7 +60,7 @@ function LMSolver( ψ = has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) : shifted(reg_nls.h, xk) - + jprod! = let nls = reg_nls.model (x, v, Jv) -> jprod_residual!(nls, x, v, Jv) end @@ -82,7 +79,7 @@ function LMSolver( mν∇fk, Fk, Fkn, - Jv, + Jv, Jtv, ψ, xkn, @@ -94,7 +91,7 @@ function LMSolver( u_bound_m_x, subsolver, subpb, - substats + substats, ) end @@ -166,12 +163,7 @@ Notably, you can access, and modify, the following: - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; - `stats.elapsed_time`: elapsed time in seconds. """ -function LM( - nls::AbstractNLSModel, - h::H, - options::ROSolverOptions; - kwargs..., -) where {H} +function LM(nls::AbstractNLSModel, h::H, options::ROSolverOptions; kwargs...) where {H} kwargs_dict = Dict(kwargs...) selected = pop!(kwargs_dict, :selected, 1:(nls.meta.nvar)) x0 = pop!(kwargs_dict, :x0, nls.meta.x0) @@ -207,7 +199,7 @@ function SolverCore.solve!( solver::LMSolver{T, G, V}, reg_nls::AbstractRegularizedNLPModel{T, V}, stats::GenericExecutionStats{T, V}; - callback = (args...) -> nothing, + callback = (args...) -> nothing, x::V = reg_nls.model.meta.x0, nonlinear::Bool = true, atol::T = √eps(T), @@ -344,19 +336,11 @@ function SolverCore.solve!( done = stats.status != :unknown while !done - sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3) solver.subpb.model.σ = σk isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν) if isa(solver.subsolver, R2Solver) #FIXME - solve!( - solver.subsolver, - solver.subpb, - solver.substats, - x = s, - atol = sub_atol, - ν = ν, - ) + solve!(solver.subsolver, solver.subpb, solver.substats, x = s, atol = sub_atol, ν = ν) else solve!( solver.subsolver, @@ -402,7 +386,7 @@ function SolverCore.solve!( ], colsep = 1, ) - + if η1 ≤ ρk < Inf xk .= xkn @@ -473,24 +457,11 @@ function SolverCore.solve!( callback(nls, solver, stats) done = stats.status != :unknown - end - + if verbose > 0 && stats.status == :first_order @info log_row( - Any[ - stats.iter, - 0, - fk, - hk, - sqrt_ξ1_νInv, - ρk, - σk, - norm(xk), - norm(s), - 1 / ν, - "", - ], + Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, σk, norm(xk), norm(s), 1 / ν, ""], colsep = 1, ) @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" @@ -499,4 +470,4 @@ function SolverCore.solve!( set_solution!(stats, xk) set_residuals!(stats, zero(T), sqrt_ξ1_νInv) return stats -end \ No newline at end of file +end From de6a62b50542842d135118eed541fbdec383a024 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 8 Sep 2025 10:05:27 -0400 Subject: [PATCH 11/14] implicit jacobian update --- src/LMModel.jl | 15 +++++++-------- src/LM_alg.jl | 14 ++++---------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/LMModel.jl b/src/LMModel.jl index b655eae7..802ae752 100644 --- a/src/LMModel.jl +++ b/src/LMModel.jl @@ -16,10 +16,9 @@ where `J` is the Jacobian of `F` at `xk`, represented via matrix-free operations `σ > 0` is a regularization parameter and `v` is a vector of the same size as `F(xk)` used for intermediary computations. """ -mutable struct LMModel{T <: Real, V <: AbstractVector{T}, J <: Function, Jt <: Function} <: +mutable struct LMModel{T <: Real, V <: AbstractVector{T}, Jac <: Union{AbstractMatrix, AbstractLinearOperator}} <: AbstractNLPModel{T, V} - j_prod!::J - jt_prod!::Jt + J::Jac F::V v::V xk::V @@ -28,19 +27,19 @@ mutable struct LMModel{T <: Real, V <: AbstractVector{T}, J <: Function, Jt <: F counters::Counters end -function LMModel(j_prod!::J, jt_prod!::Jt, F::V, σ::T, xk::V) where {T, V, J, Jt} +function LMModel(J::Jac, F::V, σ::T, xk::V) where {T, V, Jac} meta = NLPModelMeta( length(xk), x0 = xk, # Perhaps we should add lvar and uvar as well here. ) v = similar(F) - return LMModel(j_prod!, jt_prod!, F, v, xk, σ, meta, Counters()) + return LMModel(J, F, v, xk, σ, meta, Counters()) end function NLPModels.obj(nlp::LMModel, x::AbstractVector{T}) where {T} @lencheck nlp.meta.nvar x increment!(nlp, :neval_obj) - nlp.j_prod!(nlp.xk, x, nlp.v) # v = J(xk)x + mul!(nlp.v, nlp.J, x) nlp.v .+= nlp.F return (dot(nlp.v, nlp.v) + nlp.σ * dot(x, x)) / 2 end @@ -49,9 +48,9 @@ function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T @lencheck nlp.meta.nvar x @lencheck nlp.meta.nvar g increment!(nlp, :neval_grad) - nlp.j_prod!(nlp.xk, x, nlp.v) # v = J(xk)x + F + mul!(nlp.v, nlp.J, x) nlp.v .+= nlp.F - nlp.jt_prod!(nlp.xk, nlp.v, g) # g = J^T(xk) v + mul!(g, nlp.J', nlp.v) @. g += nlp.σ .* x return g end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index bf55c09e..29cba866 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -61,14 +61,8 @@ function LMSolver(reg_nls::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solv has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) : shifted(reg_nls.h, xk) - jprod! = let nls = reg_nls.model - (x, v, Jv) -> jprod_residual!(nls, x, v, Jv) - end - jt_prod! = let nls = reg_nls.model - (x, v, Jtv) -> jtprod_residual!(nls, x, v, Jtv) - end - - sub_nlp = LMModel(jprod!, jt_prod!, Fk, T(1), xk) + Jk = jac_op_residual(reg_nls.model, xk) + sub_nlp = LMModel(Jk, Fk, T(1), xk) subpb = RegularizedNLPModel(sub_nlp, ψ) substats = RegularizedExecutionStats(subpb) subsolver = subsolver(subpb) @@ -283,7 +277,7 @@ function SolverCore.solve!( jtprod_residual!(nls, xk, Fk, ∇fk) fk = dot(Fk, Fk) / 2 - σmax, found_σ = opnorm(jac_op_residual!(nls, xk, Jv, Jtv)) + σmax, found_σ = opnorm(solver.subpb.model.J) found_σ || error("operator norm computation failed") ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ sqrt_ξ1_νInv = one(T) @@ -407,7 +401,7 @@ function SolverCore.solve!( # update opnorm if not linear least squares if nonlinear == true - σmax, found_σ = opnorm(jac_op_residual!(nls, xk, Jv, Jtv)) + σmax, found_σ = opnorm(solver.subpb.model.J) found_σ || error("operator norm computation failed") end end From b4cb5b7345415a84136c9f8fc1fcfd003be81500 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 9 Sep 2025 15:54:18 -0400 Subject: [PATCH 12/14] remove temporary struct --- src/LM_alg.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 29cba866..3c4109d3 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -2,9 +2,6 @@ export LM, LMSolver, solve! import SolverCore.solve! -mutable struct TRDHSolver # FIXME -end - mutable struct LMSolver{ T <: Real, G <: ShiftedProximableFunction, From 77ddd784e0d52bf53e153a88dd05c30ff973d7b4 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 13 Sep 2025 11:29:43 -0400 Subject: [PATCH 13/14] update callback docstring --- src/LM_alg.jl | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 3c4109d3..1a0905c0 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -138,21 +138,7 @@ The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.xk`: current iterate; -- `solver.∇fk`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; - - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; - - `stats.elapsed_time`: elapsed time in seconds. +$(callback_docstring) """ function LM(nls::AbstractNLSModel, h::H, options::ROSolverOptions; kwargs...) where {H} kwargs_dict = Dict(kwargs...) From 7df41cc4d1b7719ec974cd62a424d97a16acdc78 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 16 Sep 2025 12:08:33 -0400 Subject: [PATCH 14/14] fix arrow prints --- src/LM_alg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 1a0905c0..bec67249 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -359,7 +359,7 @@ function SolverCore.solve!( norm(xk), norm(s), 1 / ν, - (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + (η2 ≤ ρk < Inf) ? '↘' : (ρk < η1 ? '↗' : '='), ], colsep = 1, )