diff --git a/.cirrus.yml b/.cirrus.yml deleted file mode 100644 index ad5fb1d9..00000000 --- a/.cirrus.yml +++ /dev/null @@ -1,26 +0,0 @@ -task: - matrix: - - name: FreeBSD - freebsd_instance: - image_family: freebsd-14-2 - env: - matrix: - - JULIA_VERSION: 1 - install_script: | - URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh" - set -x - if [ "$(uname -s)" = "Linux" ] && command -v apt; then - apt update - apt install -y curl - fi - if command -v curl; then - sh -c "$(curl ${URL})" - elif command -v wget; then - sh -c "$(wget ${URL} -q -O-)" - elif command -v fetch; then - sh -c "$(fetch ${URL} -o -)" - fi - build_script: - - cirrusjl build - test_script: - - cirrusjl test diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index c0e82906..1f6fc01c 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -21,7 +21,7 @@ jobs: journal: joss # This should be the path to the paper within your repo. paper-path: paper/paper.md - - name: Upload + - name: Upload pdf artifact uses: actions/upload-artifact@v4 with: name: paper @@ -29,3 +29,24 @@ jobs: # PDF. Note, this should be the same directory as the input # paper.md path: paper/paper.pdf + - name: Create release + if: github.event_name == 'push' + uses: rymndhng/release-on-push-action@master + id: release + with: + bump_version_scheme: patch + tag_prefix: v + release_body: "" + use_github_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload PDF to release + if: github.event_name == 'push' + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: paper/paper.pdf + asset_name: joss-draft.pdf + tag: ${{ steps.release.outputs.tag_name }} + overwrite: true + body: "" diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c68fff95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +paper/examples/Manifest.toml +paper/jats/paper.jats +paper/jats/jso-packages.pdf +paper/paper.pdf diff --git a/paper/examples/Project.toml b/paper/examples/Project.toml new file mode 100644 index 00000000..43bcfd97 --- /dev/null +++ b/paper/examples/Project.toml @@ -0,0 +1,18 @@ +[deps] +ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a" +DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" +RegularizedOptimization = "196f2941-2d58-45ba-9f13-43a2532b2fa8" +RegularizedProblems = "ea076b23-609f-44d2-bb12-a4ae45328278" +ShiftedProximalOperators = "d4fd37fa-580c-4e43-9b30-361c21aae263" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +NLPModels = "0.19, 0.20, 0.21" +NLPModelsModifiers = "0.7" +ProximalOperators = "0.15" +RegularizedProblems = "0.1" +ShiftedProximalOperators = "0.2" diff --git a/paper/examples/example1.jl b/paper/examples/example1.jl new file mode 100644 index 00000000..b292d6c7 --- /dev/null +++ b/paper/examples/example1.jl @@ -0,0 +1,35 @@ +using LinearAlgebra, Random +using ProximalOperators +using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization +using MLDatasets + +random_seed = 1234 +Random.seed!(random_seed) + +# Load MNIST from MLDatasets +imgs, labels = MLDatasets.MNIST.traindata() + +# Use RegularizedProblems' preprocessing +A, b = RegularizedProblems.generate_data(imgs, labels, (1, 7), false) + +# Build the models +model, _, _ = RegularizedProblems.svm_model(A, b) + +# Define the Hessian approximation +f = LBFGSModel(model) + +# Define the nonsmooth regularizer (L0 norm) +λ = 1.0e-1 +h = NormL0(λ) + +# Define the regularized NLP model +reg_nlp = RegularizedNLPModel(f, h) + +# Choose a solver (R2DH) and execution statistics tracker +solver_r2dh= R2DHSolver(reg_nlp) +stats = RegularizedExecutionStats(reg_nlp) + +# Solve the problem +solve!(solver_r2dh, reg_nlp, stats, x = f.meta.x0, σk = 1e-6, atol = 2e-5, rtol = 2e-5, verbose = 1) + +@test stats.status == :first_order diff --git a/paper/examples/example2.jl b/paper/examples/example2.jl new file mode 100644 index 00000000..bb6302d3 --- /dev/null +++ b/paper/examples/example2.jl @@ -0,0 +1,27 @@ +## After merging the PRs on TR + +using LinearAlgebra +using DifferentialEquations, ProximalOperators +using ADNLPModels, NLPModels, NLPModelsModifiers, RegularizedOptimization, RegularizedProblems + +# Define the Fitzhugh-Nagumo problem +model, _, _ = RegularizedProblems.fh_model() + +# Define the Hessian approximation +f = LBFGSModel(model) + +# Define the nonsmooth regularizer (L1 norm) +λ = 0.1 +h = NormL1(λ) + +# Define the regularized NLP model +reg_nlp = RegularizedNLPModel(f, h) + +# Choose a solver (TR) and execution statistics tracker +solver_tr = TRSolver(reg_nlp) +stats = RegularizedExecutionStats(reg_nlp) + +# Solve the problem +solve!(solver_tr, reg_nlp, stats, x = f.meta.x0, atol = 1e-3, rtol = 1e-4, verbose = 10, ν = 1.0e+2) + +@test stats.status == :first_order diff --git a/paper/jso-packages.pdf b/paper/jso-packages.pdf new file mode 100644 index 00000000..77683eb6 Binary files /dev/null and b/paper/jso-packages.pdf differ diff --git a/paper/paper.bib b/paper/paper.bib index e69de29b..c962afe7 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -0,0 +1,121 @@ +@Article{ aravkin-baraldi-orban-2022, + Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, + Title = {A Proximal Quasi-{N}ewton Trust-Region Method for Nonsmooth Regularized Optimization}, + Journal = siopt, + Year = 2022, + Volume = 32, + Number = 2, + Pages = {900--929}, + doi = {10.1137/21M1409536}, +} + +@Article{ aravkin-baraldi-orban-2024, + Author = {Aravkin, Aleksandr Y. and Baraldi, Robert and Orban, Dominique}, + Title = {A {L}evenberg–{M}arquardt Method for Nonsmooth Regularized Least Squares}, + Journal = sisc, + Year = 2024, + Volume = 46, + Number = 4, + Pages = {A2557--A2581}, + doi = {10.1137/22M1538971}, +} + +@Software{ leconte_linearoperators_jl_linear_operators_2023, + Author = {Leconte, Geoffroy and Orban, Dominique and Soares Siqueira, Abel and contributors}, + license = {MPL-2.0}, + Title = {{LinearOperators.jl: Linear Operators for Julia}}, + url = {https://github.com/JuliaSmoothOptimizers/LinearOperators.jl}, + version = {2.6.0}, + Year = 2023, +} + +@Article{ leconte-orban-2023, + Author = {G. Leconte and D. Orban}, + Title = {The Indefinite Proximal Gradient Method}, + Journal = coap, + Year = 2025, + Volume = 91, + Number = 2, + Pages = 861--903, + doi = {10.1007/s10589-024-00604-5}, +} + +@TechReport{ leconte-orban-2023-2, + Author = {Leconte, Geoffroy and Orban, Dominique}, + Title = {Complexity of trust-region methods with unbounded {H}essian approximations for smooth and nonsmooth optimization}, + Institution = gerad, + Year = 2023, + Type = {Cahier}, + Number = {G-2023-65}, + Address = gerad-address, + url = {https://www.gerad.ca/fr/papers/G-2023-65}, +} + +@TechReport{ diouane-habiboullah-orban-2024, + Author = {Youssef Diouane and Mohamed Laghdaf Habiboullah and Dominique Orban}, + Title = {A proximal modified quasi-Newton method for nonsmooth regularized optimization}, + Institution = {GERAD}, + Year = 2024, + Type = {Cahier}, + Number = {G-2024-64}, + Address = {Montr\'eal, Canada}, + doi = {10.48550/arxiv.2409.19428}, + url = {https://www.gerad.ca/fr/papers/G-2024-64}, +} + +@TechReport{ diouane-gollier-orban-2024, + Author = {Youssef Diouane and Maxence Gollier and Dominique Orban}, + Title = {A nonsmooth exact penalty method for equality-constrained optimization: complexity and implementation}, + Institution = {GERAD}, + Year = 2024, + Type = {Cahier}, + Number = {G-2024-65}, + Address = {Montr\'eal, Canada}, + doi = {10.13140/RG.2.2.16095.47527}, +} + +@article{bezanson-edelman-karpinski-shah-2017, + author = {Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B.}, + title = {Julia: A Fresh Approach to Numerical Computing}, + journal = {SIAM Review}, + volume = {59}, + number = {1}, + pages = {65--98}, + year = {2017}, + doi = {10.1137/141000671}, + publisher = {SIAM}, +} + +@Misc{orban-siqueira-cutest-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, + month = {October}, + url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, + year = {2020}, + DOI = {10.5281/zenodo.1188851}, +} + +@Misc{orban-siqueira-nlpmodels-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{NLPModels.jl}: Data Structures for Optimization Models}, + month = {July}, + url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, + year = {2020}, + DOI = {10.5281/zenodo.2558627}, +} + +@Misc{jso, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, + year = {2021}, + url = {https://juliasmoothoptimizers.github.io/}, + doi = {10.5281/zenodo.2655082}, +} + +@Misc{migot-orban-siqueira-optimizationproblems-2023, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {OptimizationProblems.jl: A collection of optimization problems in Julia}, + year = {2023}, + doi = {10.5281/zenodo.3672094}, + url = {https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl}, +} diff --git a/paper/paper.md b/paper/paper.md index 03c2d9a0..45a170e2 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -1,5 +1,5 @@ --- -title: 'RegularizedOptimization.jl: A Julia framework for regularization-based nonlinear optimization' +title: 'RegularizedOptimization.jl: A Julia framework for regularized and nonsmooth optimization' tags: - Julia - nonsmooth optimization @@ -30,4 +30,166 @@ header-includes: | \setmonofont[Path = ./, Scale=0.68]{JuliaMono-Regular.ttf} --- -# References \ No newline at end of file +# Summary + +[RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) is a Julia [@bezanson-edelman-karpinski-shah-2017] package that implements a family of quadratic regularization and trust-region type algorithms for solving nonsmooth optimization problems of the form: +\begin{equation}\label{eq:nlp} + \underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x), +\end{equation} +where $f: \mathbb{R}^n \to \mathbb{R}$ is continuously differentiable on $\mathbb{R}^n$, and $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous. +Both $f$ and $h$ may be nonconvex. + +The library provides a modular and extensible framework for experimenting with nonsmooth and nonconvex optimization algorithms, including: + +- **Trust-region methods (TR, TRDH)** [@aravkin-baraldi-orban-2022] and [@leconte-orban-2023], +- **Quadratic regularization methods (R2, R2N)** [@diouane-habiboullah-orban-2024] and [@aravkin-baraldi-orban-2022], +- **Levenberg-Marquardt methods (LM, LMTR)** [@aravkin-baraldi-orban-2024]. +- **Trust-region methods (TR, TRDH)** [@aravkin-baraldi-orban-2022;@leconte-orban-2023], +- **Quadratic regularization methods (R2, R2N)** [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022], +- **Levenbergh-Marquardt methods (LM, LMTR)** [@aravkin-baraldi-orban-2024]. + +These methods rely solely on the gradient and Hessian(-vector) information of the smooth part $f$ and the proximal mapping of the nonsmooth part $h$ in order to compute steps. +Then, the objective function $f + h$ is used only to accept or reject trial points. +Moreover, they can handle cases where Hessian approximations are unbounded [@diouane-habiboullah-orban-2024] and [@leconte-orban-2023-2], making the package particularly suited for large-scale, ill-conditioned, and nonsmooth problems. + +# Statement of need + +## Model-based framework for nonsmooth methods + +There exists a way to solve \eqref{eq:nlp} in Julia using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements in-place first-order line search–based methods for \eqref{eq:nlp}. +Most of these methods are generally splitting schemes that alternate between taking steps along the gradient of the smooth part $f$ (or quasi-Newton directions) and applying proximal steps on the nonsmooth part $h$. +Currently, **ProximalAlgorithms.jl** provides only L-BFGS as a quasi-Newton option. +By contrast, **RegularizedOptimization.jl** focuses on model-based approaches such as trust-region and regularization algorithms. +As shown in [@aravkin-baraldi-orban-2022], model-based methods typically require fewer evaluations of the objective and its gradient than first-order line search methods, at the expense of solving more involved subproblems. +Although these subproblems may require many proximal iterations, each proximal computation is inexpensive, making the overall approach efficient for large-scale problems. + +Building on this perspective, **RegularizedOptimization.jl** implements state-of-the-art regularization-based algorithms for solving problems of the form $f(x) + h(x)$, where $f$ is smooth and $h$ is nonsmooth. +The package provides a consistent API to formulate optimization problems and apply different regularization methods. +It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. + +On the one hand, smooth problems $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. +Large collections of such problems are available in [Cutest.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023]. +Another option is to use [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl), which provides problem instances commonly used in the nonsmooth optimization literature. + +On the other hand, Hessian approximations of these functions, including quasi-Newton and diagonal schemes, can be specified through [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl), which represents Hessians as linear operators and implements efficient Hessian–vector products. + +Finally, nonsmooth terms $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of nonsmooth functions, together with [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl), which provides shifted proximal mappings for nonsmooth functions. + +This modularity makes it easy to benchmark existing solvers available in the repository [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022;@aravkin-baraldi-orban-2024;@leconte-orban-2023-2]. + +## Support for Hessians + +In contrast to first-order methods package like [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), **RegularizedOptimization.jl** enables the use of second-order information, which can significantly improve convergence rates, especially for ill-conditioned problems. +A way to use Hessians is via automatic differentiation tools such as [ADNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl). + +## Requirements of the ShiftedProximalOperators.jl package + +The nonsmooth part $h$ must have a computable proximal mapping, defined as +$$\text{prox}_{h}(v) = \underset{x \in \mathbb{R}^n}{\arg\min} \left( h(x) + \frac{1}{2} \|x - v\|^2 \right).$$ +This requirement is satisfied by a wide range of nonsmooth functions commonly used in practice, such as $\ell_1$ norm, $\ell_0$ "norm", indicator functions of convex sets, and group sparsity-inducing norms. +The package [ProximalOperators.jl](https://www.github.com/FirstOrder/ProximalOperators.jl) provides a comprehensive collection of such functions, along with their proximal mappings. +The main difference between the proximal operators implemented in +[ProximalOperators.jl](https://github.com/JuliaFirstOrder/ProximalOperators.jl) +is that those implemented here involve a translation of the nonsmooth term. +Specifically, this package considers proximal operators defined as +$$ + argmin \, { \tfrac{1}{2} ‖t - q‖₂² + ν h(x + s + t) + χ(s + t; ΔB) | t ∈ ℝⁿ }, +$$ +where q is given, x and s are fixed shifts, h is the nonsmooth term with respect +to which we are computing the proximal operator, and χ(.; ΔB) is the indicator of +a ball of radius Δ defined by a certain norm. + +![Composition of JSO packages](jso-packages.pdf){ width=70% } + + +## Testing and documentation + +The package includes a comprehensive suite of unit tests that cover all functionalities, ensuring reliability and correctness. +Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. +Aqua.jl is used to test the package dependencies. +Documentation is built using Documenter.jl. + +## Hyperparameter tuning + +The solvers in **RegularizedOptimization.jl** do not require extensive hyperparameter tuning. + +## Non-monotone strategies + +The solvers in **RegularizedOptimization.jl** implement non-monotone strategies to accept trial points, which can enhance convergence properties. + +## Application studies + +The package is used in the exact penalty work of [@diouane-gollier-orban-2024] to solve a problem where the model of the nonsmooth part differs from the function $h$. +This is not covered in the current version of the competitive package [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). + +## Support for inexact subproblem solves + +Solvers in **RegularizedOptimization.jl** allow inexact resolution of trust-region and quadratic-regularized subproblems using first-order that are implemented in the package itself such as the quadratic regularization method R2[@aravkin-baraldi-orban-2022] and R2DH[@diouane-habiboullah-orban-2024] with trust-region variants TRDH[@leconte-orban-2023-2]. + +This is crucial for large-scale problems where exact subproblem solutions are prohibitive. + +## Support for Hessians as Linear Operators + +The second-order methods in **RegularizedOptimization.jl** can use Hessian approximations represented as linear operators via [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl). +Explicitly forming Hessians as dense or sparse matrices is often prohibitively expensive, both computationally and in terms of memory, especially in high-dimensional settings. +In contrast, many problems admit efficient implementations of Hessian–vector or Jacobian–vector products, either through automatic differentiation tools or limited-memory quasi-Newton updates, making the linear-operator approach more scalable and practical. + +## In-place methods + +All solvers in **RegularizedOptimization.jl** are implemented in an in-place fashion, minimizing memory allocations during the resolution process. + +# Examples + +We consider two examples where the smooth part $f$ is nonconvex and the nonsmooth part $h$ is either $\ell_0$ or $\ell_1$ norm. + +A first example is the FitzHugh-Nagumo inverse problem with an $\ell_1$ penalty, as described in [@aravkin-baraldi-orban-2022] and [@aravkin-baraldi-orban-2024]. + +```julia +using LinearAlgebra +using ProximalOperators +using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization +using DifferentialEquations, ADNLPModels + +# Define the Fitzhugh-Nagumo problem +model, _, _ = RegularizedProblems.fh_model() + +# Define the Hessian approximation +f = LBFGSModel(fh_model) + +# Define the nonsmooth regularizer (L1 norm) +λ = 0.1 +h = NormL1(λ) + +# Define the regularized NLP model +reg_nlp = RegularizedNLPModel(f, h) + +# Choose a solver (TR) and execution statistics tracker +solver_tr = TRSolver(reg_nlp) +stats = RegularizedExecutionStats(reg_nlp) + +# Solve the problem +solve!(solver_tr, reg_nlp, stats, x = f.meta.x0, atol = 1e-3, rtol = 1e-4, verbose = 10) +``` + +```` +=== Comparaison PANOC vs TR (FH_smooth_term) === +PANOC : + itérations = 81 + # f évaluations = 188 + # ∇f évaluations = 188 + # prox appels (g) = 107 + solution (≈) = [-0.0, 0.19071674721048656, 1.037084478194805, -0.0, -0.0] + +TR : + statut = first_order + # f évaluations = 65 + # ∇f évaluations = 52 + solution (≈) = [0.0, 0.1910326406395867, 1.0357773976471938, 0.0, 0.0] + ```` + +# Acknowledgements + +Mohamed Laghdaf Habiboullah is supported by an excellence FRQNT grant. +Youssef Diouane and Dominique Orban are partially supported by an NSERC Discovery Grant. + +# References