From ba05ce3f1f1f1b104701abb615f84861324ad981 Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Sat, 17 Aug 2024 11:36:55 +0200 Subject: [PATCH 01/28] work on thoery --- .../how_to/how_to_algorithm_selection.ipynb | 69 ++++++++++++++++--- .../how_to/how_to_algorithm_selection.md | 44 ++++++++++++ 2 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 docs/source/how_to/how_to_algorithm_selection.md diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index f476bae88..6f564f13a 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -6,14 +6,32 @@ "source": [ "# Which optimizer to use\n", "\n", - "This is a very very very short and oversimplifying guide on selecting an optimization algorithm based on a minimum of information. \n", + "This is a short and a simplified guide on selecting an optimization algorithm based on the properties of your problem.\n", "\n", + "Knowledge of those properties can significantly narrow the set of algorithms best suited for your problem. \n", "\n", - "To select an optimizer, you need to answer two questions:\n", + "### Choosing a local optimizer\n", + "```mermaid\n", + "graph LR\n", + " classDef highlight fill:#FF4500;\n", + " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", + " B[\"differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr'\"]\n", + " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla'\"]\n", "\n", - "1. Is your criterion function differentiable?\n", + " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", - "2. Do you have a nonlinear least squares structure (i.e. do you sum some kind of squared residuals at the end of your criterion function)?" + " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'\"]\n", + " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders'\"]\n", + "\n", + " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'fides'\"]\n", + " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", + "```\n", + "\n", + "Almost always, you will have more than one algorithm to try out choose the best by comparing them via the criterion plots [insert link]. \n", + "\n", + "Remember, no amount of theory can replace experimentation!\n" ] }, { @@ -27,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +87,18 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mFailed to start the Kernel. \n", + "\u001b[1;31mAttributeError: module 'collections' has no attribute 'MutableMapping'. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], "source": [ "res = om.minimize(\n", " fun=sphere,\n", @@ -111,7 +140,18 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mFailed to start the Kernel. \n", + "\u001b[1;31mAttributeError: module 'collections' has no attribute 'MutableMapping'. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], "source": [ "res = om.minimize(\n", " fun=sphere,\n", @@ -139,7 +179,18 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mFailed to start the Kernel. \n", + "\u001b[1;31mAttributeError: module 'collections' has no attribute 'MutableMapping'. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], "source": [ "res = om.minimize(\n", " fun=sphere,\n", diff --git a/docs/source/how_to/how_to_algorithm_selection.md b/docs/source/how_to/how_to_algorithm_selection.md new file mode 100644 index 000000000..9f09c18be --- /dev/null +++ b/docs/source/how_to/how_to_algorithm_selection.md @@ -0,0 +1,44 @@ +# Which optimizer to use + +This is a short and a simplified guide on selecting an optimization algorithm based on the properties of your problem. + +Knowledge of those properties can significantly narrow the set of algorithms best suited for your problem. + +### Choosing a local optimizer +```mermaid +graph LR + classDef highlight fill:#FF4500; + A["Do you have
nonlinear constraints?"] -- yes --> B["differentiable?"] + B["differentiable?"] -- yes --> C["'ipopt', 'nlopt_slsqp', 'scipy_trust_constr'"] + B["differentiable?"] -- no --> D["'scipy_cobyla', 'nlopt_cobyla'"] + + A["Do you have
nonlinear constraints?"] -- no --> E["Can you exploit
a least-squares
structure?"] + E["Can you exploit
a least-squares
structure?"] -- yes --> F["differentiable?"] + E["Can you exploit
a least-squares
structure?"] -- no --> G["differentiable?"] + + F["differentiable?"] -- yes --> H["'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'"] + F["differentiable?"] -- no --> I["'nag_dflos', 'pounders', 'tao_pounders'"] + + G["differentiable?"] -- yes --> J["'scipy_lbfgsb', 'fides'"] + G["differentiable?"] -- no --> K["'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'"] +``` + +Almost always, you will have more than one algorithm to try out choose the best by comparing them via the `criterion_plot` [insert link]. + +Remember, no amount of theory can replace experimentation! + +```{eval-rst} +.. tabbed:: Differentiable Scalar Function + As an example of unconstrained optimization problem with a scalar differentiable objective function, consider the minimization of the sphere function: + .. code-block:: python + import numpy as np + + def sphere(params): + return params@params + + def sphere_gradient(params): + return params*2 + + start_params = np.arange(5) + + For this problem, the algorithm choice lies between ``'scipy_lbfgs'`` and ``'fides'``. \ No newline at end of file From 1fb4120d44fcfcede8b79e07c93274c720482d6f Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Fri, 4 Oct 2024 14:33:24 +0200 Subject: [PATCH 02/28] Add myst extensions for mermaid blocks --- docs/rtd_environment.yml | 1 + docs/source/conf.py | 6 + .../how_to/how_to_algorithm_selection.ipynb | 168 +----------------- .../how_to/how_to_algorithm_selection.md | 44 ----- docs/source/how_to/index.md | 1 + environment.yml | 1 + 6 files changed, 11 insertions(+), 210 deletions(-) delete mode 100644 docs/source/how_to/how_to_algorithm_selection.md diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 8407ede25..a7b455bcc 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -38,3 +38,4 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs diff --git a/docs/source/conf.py b/docs/source/conf.py index 38a4c6bb4..45638f54a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -48,6 +48,8 @@ "sphinxcontrib.bibtex", "sphinx_panels", "sphinx_design", + "sphinxcontrib.mermaid", + ] myst_enable_extensions = [ @@ -55,6 +57,10 @@ "dollarmath", "html_image", ] +myst_fence_as_directive = ["mermaid"] + + + copybutton_prompt_text = ">>> " copybutton_only_copy_prompt_lines = False diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index aceee77f4..19c280066 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -13,7 +13,7 @@ "Knowledge of those properties can significantly narrow the set of algorithms best suited for your problem. \n", "\n", "### Choosing a local optimizer\n", - "```mermaid\n", + "```{mermaid}\n", "graph LR\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", @@ -35,170 +35,6 @@ "\n", "Remember, no amount of theory can replace experimentation!\n" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define some inputs\n", - "\n", - "Again, we use versions of the sphere function to illustrate how to select these algorithms in practice" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "import optimagic as om" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "@om.mark.least_squares\n", - "def sphere(params):\n", - " return params\n", - "\n", - "\n", - "def sphere_gradient(params):\n", - " return params * 2\n", - "\n", - "\n", - "start_params = np.arange(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Differentiable criterion function\n", - "\n", - "Use `scipy_lbfsgsb` as optimizer and provide the closed form derivative if you can. If you do not provide a derivative, optimagic will calculate it numerically. However, this is less precise and slower. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mFailed to start the Kernel. \n", - "\u001b[1;31mAttributeError: module 'collections' has no attribute 'MutableMapping'. \n", - "\u001b[1;31mView Jupyter log for further details." - ] - } - ], - "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"scipy_lbfgsb\",\n", - " jac=sphere_gradient,\n", - ")\n", - "res.n_fun_evals" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that this solves a 5 dimensional problem with just 3 criterion evaluations. For higher dimensions, you will need more, but it scales very well to dozens and hundreds of parameters. \n", - "\n", - "If you are worried about being stuck in a local optimum, use multistart optimization." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Not differentiable, only scalar output" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use `nag_pybobyqa`. Note that for this you need to install the `PyBOBYQA` package if you do not already have it:\n", - " \n", - "`pip install Py-BOBYQA`\n", - "\n", - "Then you select the algorithm as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mFailed to start the Kernel. \n", - "\u001b[1;31mAttributeError: module 'collections' has no attribute 'MutableMapping'. \n", - "\u001b[1;31mView Jupyter log for further details." - ] - } - ], - "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"nag_pybobyqa\",\n", - ")\n", - "res.n_fun_evals" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Not differentiable, least squares structure\n", - "\n", - "Use `nag_dfols`. To use `nag_dfols`, you need to install it via:\n", - "\n", - "`pip install DFO-LS`\n", - "\n", - "\n", - "This optimizer will only work if your criterion function returns a dictionary that contains the entry `root_contributions`. This needs to be a numpy array or pytree that contains the residuals of the least squares problem. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mFailed to start the Kernel. \n", - "\u001b[1;31mAttributeError: module 'collections' has no attribute 'MutableMapping'. \n", - "\u001b[1;31mView Jupyter log for further details." - ] - } - ], - "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"nag_dfols\",\n", - ")\n", - "res.n_fun_evals" - ] } ], "metadata": { @@ -217,7 +53,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/docs/source/how_to/how_to_algorithm_selection.md b/docs/source/how_to/how_to_algorithm_selection.md deleted file mode 100644 index 9f09c18be..000000000 --- a/docs/source/how_to/how_to_algorithm_selection.md +++ /dev/null @@ -1,44 +0,0 @@ -# Which optimizer to use - -This is a short and a simplified guide on selecting an optimization algorithm based on the properties of your problem. - -Knowledge of those properties can significantly narrow the set of algorithms best suited for your problem. - -### Choosing a local optimizer -```mermaid -graph LR - classDef highlight fill:#FF4500; - A["Do you have
nonlinear constraints?"] -- yes --> B["differentiable?"] - B["differentiable?"] -- yes --> C["'ipopt', 'nlopt_slsqp', 'scipy_trust_constr'"] - B["differentiable?"] -- no --> D["'scipy_cobyla', 'nlopt_cobyla'"] - - A["Do you have
nonlinear constraints?"] -- no --> E["Can you exploit
a least-squares
structure?"] - E["Can you exploit
a least-squares
structure?"] -- yes --> F["differentiable?"] - E["Can you exploit
a least-squares
structure?"] -- no --> G["differentiable?"] - - F["differentiable?"] -- yes --> H["'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'"] - F["differentiable?"] -- no --> I["'nag_dflos', 'pounders', 'tao_pounders'"] - - G["differentiable?"] -- yes --> J["'scipy_lbfgsb', 'fides'"] - G["differentiable?"] -- no --> K["'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'"] -``` - -Almost always, you will have more than one algorithm to try out choose the best by comparing them via the `criterion_plot` [insert link]. - -Remember, no amount of theory can replace experimentation! - -```{eval-rst} -.. tabbed:: Differentiable Scalar Function - As an example of unconstrained optimization problem with a scalar differentiable objective function, consider the minimization of the sphere function: - .. code-block:: python - import numpy as np - - def sphere(params): - return params@params - - def sphere_gradient(params): - return params*2 - - start_params = np.arange(5) - - For this problem, the algorithm choice lies between ``'scipy_lbfgs'`` and ``'fides'``. \ No newline at end of file diff --git a/docs/source/how_to/index.md b/docs/source/how_to/index.md index 8b7cdd37d..81b86fd02 100644 --- a/docs/source/how_to/index.md +++ b/docs/source/how_to/index.md @@ -23,4 +23,5 @@ how_to_logging how_to_errors_during_optimization how_to_slice_plot how_to_benchmarking +how_to_algorithm_selection_ ``` diff --git a/environment.yml b/environment.yml index d321d992e..d669546f6 100644 --- a/environment.yml +++ b/environment.yml @@ -50,3 +50,4 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs From ca369c728484ef7aada33536f3b461ec3835cbf1 Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Fri, 4 Oct 2024 14:40:12 +0200 Subject: [PATCH 03/28] Run pre commit hooks --- .tools/envs/testenv-linux.yml | 1 + .tools/envs/testenv-others.yml | 1 + .tools/envs/testenv-pandas.yml | 1 + docs/rtd_environment.yml | 2 +- docs/source/conf.py | 2 -- environment.yml | 2 +- 6 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.tools/envs/testenv-linux.yml b/.tools/envs/testenv-linux.yml index fa5ece402..b6db3fe68 100644 --- a/.tools/envs/testenv-linux.yml +++ b/.tools/envs/testenv-linux.yml @@ -37,4 +37,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-others.yml b/.tools/envs/testenv-others.yml index 4467a19b0..9c39a841e 100644 --- a/.tools/envs/testenv-others.yml +++ b/.tools/envs/testenv-others.yml @@ -35,4 +35,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-pandas.yml b/.tools/envs/testenv-pandas.yml index 757d5f39a..af016394f 100644 --- a/.tools/envs/testenv-pandas.yml +++ b/.tools/envs/testenv-pandas.yml @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index a7b455bcc..57e16de01 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -38,4 +38,4 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests - - sphinxcontrib-mermaid # dev, tests, docs + - sphinxcontrib-mermaid # dev, tests, docs diff --git a/docs/source/conf.py b/docs/source/conf.py index 45638f54a..bf50593c3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -49,7 +49,6 @@ "sphinx_panels", "sphinx_design", "sphinxcontrib.mermaid", - ] myst_enable_extensions = [ @@ -60,7 +59,6 @@ myst_fence_as_directive = ["mermaid"] - copybutton_prompt_text = ">>> " copybutton_only_copy_prompt_lines = False diff --git a/environment.yml b/environment.yml index d669546f6..00fb56e5e 100644 --- a/environment.yml +++ b/environment.yml @@ -50,4 +50,4 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests - - sphinxcontrib-mermaid # dev, tests, docs + - sphinxcontrib-mermaid # dev, tests, docs From 1813ba761b9ac8b3ea88d0cbd96ee23e0ab7353a Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Fri, 4 Oct 2024 14:46:39 +0200 Subject: [PATCH 04/28] Edit intro text in how_to_algorithm_selection --- docs/source/how_to/how_to_algorithm_selection.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 19c280066..f6b32e1e9 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -8,7 +8,7 @@ "\n", "# Which optimizer to use\n", "\n", - "This is a short and a simplified guide on selecting an optimization algorithm based on the properties of your problem.\n", + "This is a practical guide on selecting an optimization algorithm based on the properties of your problem.\n", "\n", "Knowledge of those properties can significantly narrow the set of algorithms best suited for your problem. \n", "\n", @@ -31,7 +31,7 @@ " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", "```\n", "\n", - "Almost always, you will have more than one algorithm to try out choose the best by comparing them via the criterion plots [insert link]. \n", + "Almost always, you will have more than one algorithm to try out choose the best by comparing them via the criterion plots. \n", "\n", "Remember, no amount of theory can replace experimentation!\n" ] From eef2c22bf767c7bd1654c0bd64096023276724bf Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Fri, 4 Oct 2024 16:30:37 +0200 Subject: [PATCH 05/28] Finish intro for how to select algorithm --- .../how_to/how_to_algorithm_selection.ipynb | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index f6b32e1e9..1fd160495 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -5,14 +5,32 @@ "metadata": {}, "source": [ "(how-to-select-algorithms)=\n", + "# How to select an optimization alogrithm\n", + "## Why optimization is difficult\n", "\n", - "# Which optimizer to use\n", + "Optimization without the knowldge of the properties of the problem we are trying to solve is difficult. \n", "\n", - "This is a practical guide on selecting an optimization algorithm based on the properties of your problem.\n", + "The only algorithms that are guaranteed to solve all problems are grid search or other algorithms that evaluate the criterion function almost everywhere in the parameter space.\n", "\n", - "Knowledge of those properties can significantly narrow the set of algorithms best suited for your problem. \n", + "If you have more than a hand full of parameters, these methods would take too long.\n", "\n", - "### Choosing a local optimizer\n", + "Thus, you have to know the properties of your optimization problem and have knowledge about different optimization algorithms in order to choose the right algorithm for your problem.\n", + "\n", + "## How to approach algorithm selection\n", + "\n", + "Knowledge of theoretical properties of your problem can significantly narrow the set of the suitable algorihtms. \n", + "\n", + "Almost always, you will have more than one algorithm to try out choose the best.\n", + "\n", + "We recommend the following workflow for most efficiency:\n", + "- Based on the theoretical properties of your problem, narrow the set of algorithms to experiment with to 2-3 algorithms.\n", + "- Run the algorithms for smaller number of iterations. As a rule of thumb, use 10*`n_params` iterations or function evaluations.\n", + "- Plot the results in a criterion plot.\n", + "- Re run the optimization algorithm with the best results until a convergence criterion is achieved.\n", + " \n", + "## How to narrow down algorithm sets\n", + "\n", + "The below decision tree offers a practical guide on how to narrow down the set of algorithms to experiment with, based on the theoretical properties of your problem:\n", "```{mermaid}\n", "graph LR\n", " classDef highlight fill:#FF4500;\n", @@ -30,10 +48,7 @@ " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'fides'\"]\n", " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", "```\n", - "\n", - "Almost always, you will have more than one algorithm to try out choose the best by comparing them via the criterion plots. \n", - "\n", - "Remember, no amount of theory can replace experimentation!\n" + "\n" ] } ], From c13f3b30822b65971c344edc282434ab1ed0ee3e Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Fri, 4 Oct 2024 19:11:31 +0200 Subject: [PATCH 06/28] start example optimization in how to select algorithm --- .../how_to/how_to_algorithm_selection.ipynb | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 1fd160495..a66e3caa3 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "(how-to-select-algorithms)=\n", - "# How to select an optimization alogrithm\n", + "# How to select a local optimization alogrithm\n", "## Why optimization is difficult\n", "\n", "Optimization without the knowldge of the properties of the problem we are trying to solve is difficult. \n", @@ -50,6 +50,49 @@ "```\n", "\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Choosing optimization algorithm for unconcstrained, least-squares, non-differentiable function:\n", + "consider the Rosenbrock function [insert link] from the More-Wild benchmark set [insert link]\n", + "set of algorithms to consider: `'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import optimagic as om" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@om.mark.least_squares\n", + "def rosenbrock(x):\n", + " fvec = np.zeros(2)\n", + " fvec[0] = 10 * (x[1] - x[0] ** 2)\n", + " fvec[1] = 1.0 - x[0]\n", + " return fvec\n", + "\n", + "\n", + "results = {}\n", + "for algo in [\"scipy_ls_lm\", \"scipy_ls_trf\", \"scipy_ls_dogbox\"]:\n", + " results[algo] = om.minimize(\n", + " rosenbrock,\n", + " params=np.arange(2),\n", + " algorithm=algo,\n", + " )" + ] } ], "metadata": { From 878802fab3fa53de5b8bf1f0337c62158302084e Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Sat, 5 Oct 2024 13:11:33 +0200 Subject: [PATCH 07/28] Add example with criterion plot --- .../how_to/how_to_algorithm_selection.ipynb | 90 ++++++++++++------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index a66e3caa3..c0fdcb21b 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -5,8 +5,8 @@ "metadata": {}, "source": [ "(how-to-select-algorithms)=\n", - "# How to select a local optimization alogrithm\n", - "## Why optimization is difficult\n", + "# How to select a local optimizer\n", + "## Why choosing the right optimizer is difficult\n", "\n", "Optimization without the knowldge of the properties of the problem we are trying to solve is difficult. \n", "\n", @@ -18,17 +18,13 @@ "\n", "## How to approach algorithm selection\n", "\n", - "Knowledge of theoretical properties of your problem can significantly narrow the set of the suitable algorihtms. \n", - "\n", - "Almost always, you will have more than one algorithm to try out choose the best.\n", - "\n", - "We recommend the following workflow for most efficiency:\n", - "- Based on the theoretical properties of your problem, narrow the set of algorithms to experiment with to 2-3 algorithms.\n", - "- Run the algorithms for smaller number of iterations. As a rule of thumb, use 10*`n_params` iterations or function evaluations.\n", - "- Plot the results in a criterion plot.\n", - "- Re run the optimization algorithm with the best results until a convergence criterion is achieved.\n", + "We recommend the following workflow to choose algorithms:\n", + " 1. Based on the theoretical properties of your problem, narrow the set of algorithms to experiment with to 2-3 algorithms.\n", + " 2. Run the algorithms for smaller number of iterations. As a rule of thumb, use 10*`n_params` iterations or function evaluations.\n", + " 3. Compare the results in a criterion plot.\n", + " 4. Re run the optimization algorithm with the best results until a convergence criterion is achieved.\n", " \n", - "## How to narrow down algorithm sets\n", + "## Step 1: Choosing candidates\n", "\n", "The below decision tree offers a practical guide on how to narrow down the set of algorithms to experiment with, based on the theoretical properties of your problem:\n", "```{mermaid}\n", @@ -51,15 +47,6 @@ "\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Choosing optimization algorithm for unconcstrained, least-squares, non-differentiable function:\n", - "consider the Rosenbrock function [insert link] from the More-Wild benchmark set [insert link]\n", - "set of algorithms to consider: `'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'`" - ] - }, { "cell_type": "code", "execution_count": null, @@ -77,22 +64,65 @@ "metadata": {}, "outputs": [], "source": [ - "@om.mark.least_squares\n", - "def rosenbrock(x):\n", - " fvec = np.zeros(2)\n", - " fvec[0] = 10 * (x[1] - x[0] ** 2)\n", - " fvec[1] = 1.0 - x[0]\n", - " return fvec\n", + "def trid_scalar(x):\n", + " \"\"\"Implement Trid function: https://www.sfu.ca/~ssurjano/trid.html.\"\"\"\n", + " return ((x - 1) ** 2).sum() - (x[1:] * x[:-1]).sum()\n", + "\n", + "\n", + "def trid_gradient(x):\n", + " \"\"\"Calculate gradient of trid function.\"\"\"\n", + " l1 = np.insert(x, 0, 0)\n", + " l1 = np.delete(l1, [-1])\n", + " l2 = np.append(x, 0)\n", + " l2 = np.delete(l2, [0])\n", + " return 2 * (x - 1) - l1 - l2\n", "\n", "\n", "results = {}\n", - "for algo in [\"scipy_ls_lm\", \"scipy_ls_trf\", \"scipy_ls_dogbox\"]:\n", + "for algo in [\"scipy_lbfgsb\", \"nlopt_lbfgsb\", \"fides\"]:\n", " results[algo] = om.minimize(\n", - " rosenbrock,\n", - " params=np.arange(2),\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=np.arange(20),\n", " algorithm=algo,\n", + " algo_options={\"stopping_maxfun\": 8, \"stopping_maxiter\": 8},\n", " )" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = om.criterion_plot(results, max_evaluations=8)\n", + "fig.show(renderer=\"png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_x = results[\"nlopt_lbfgsb\"].params\n", + "results[\"nlopt_lbfgsb_complete\"] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=best_x,\n", + " algorithm=\"nlopt_lbfgsb\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = om.criterion_plot(results)\n", + "fig.show(renderer=\"png\")" + ] } ], "metadata": { From 8d6a7a10358b1eb38e4e9f3d1f571f5daf584e10 Mon Sep 17 00:00:00 2001 From: mpetrosian Date: Sat, 5 Oct 2024 13:14:49 +0200 Subject: [PATCH 08/28] Add nlopt_lbfgsb to the algorithm selection tree --- docs/source/how_to/how_to_algorithm_selection.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index c0fdcb21b..4b0bb650c 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -41,7 +41,7 @@ " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'\"]\n", " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders'\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'fides'\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides'\"]\n", " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", "```\n", "\n" From f66ca51d7a47fa3a1b095924e3dfdfe8d819e0a5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:57:33 +0000 Subject: [PATCH 09/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .tools/envs/testenv-numpy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.tools/envs/testenv-numpy.yml b/.tools/envs/testenv-numpy.yml index ef75ece28..9c96854fa 100644 --- a/.tools/envs/testenv-numpy.yml +++ b/.tools/envs/testenv-numpy.yml @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ From 7f0b314c0df296ce6f2942cc3e665f5b122fe10b Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Thu, 31 Oct 2024 13:58:47 +0100 Subject: [PATCH 10/28] Polish how to guide for local algorithm selection. --- .tools/envs/testenv-numpy.yml | 1 + .../how_to/how_to_algorithm_selection.ipynb | 168 ++++++++++++++---- docs/source/how_to/how_to_globalization.ipynb | 20 +++ docs/source/how_to/index.md | 2 +- 4 files changed, 152 insertions(+), 39 deletions(-) create mode 100644 docs/source/how_to/how_to_globalization.ipynb diff --git a/.tools/envs/testenv-numpy.yml b/.tools/envs/testenv-numpy.yml index ef75ece28..9c96854fa 100644 --- a/.tools/envs/testenv-numpy.yml +++ b/.tools/envs/testenv-numpy.yml @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 4b0bb650c..2f1ee35dd 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -6,27 +6,76 @@ "source": [ "(how-to-select-algorithms)=\n", "# How to select a local optimizer\n", - "## Why choosing the right optimizer is difficult\n", "\n", - "Optimization without the knowldge of the properties of the problem we are trying to solve is difficult. \n", + "This guide explains how to choose a local optimizer that works well for your problem. \n", + "Depending on your [strategy for global optimization](how_to_globalization.ipynb) it \n", + "is also relevant for global optimization problems. \n", "\n", - "The only algorithms that are guaranteed to solve all problems are grid search or other algorithms that evaluate the criterion function almost everywhere in the parameter space.\n", + "## Important facts \n", "\n", - "If you have more than a hand full of parameters, these methods would take too long.\n", + "- There is no optimizer that works well for all problems \n", + "- Making the right choice can lead to enormous speedups\n", + "- Making the wrong choice can mean that you cannot solve your problem at all\n", "\n", - "Thus, you have to know the properties of your optimization problem and have knowledge about different optimization algorithms in order to choose the right algorithm for your problem.\n", "\n", - "## How to approach algorithm selection\n", + "## The four steps for selecting algorithms\n", "\n", - "We recommend the following workflow to choose algorithms:\n", - " 1. Based on the theoretical properties of your problem, narrow the set of algorithms to experiment with to 2-3 algorithms.\n", - " 2. Run the algorithms for smaller number of iterations. As a rule of thumb, use 10*`n_params` iterations or function evaluations.\n", - " 3. Compare the results in a criterion plot.\n", - " 4. Re run the optimization algorithm with the best results until a convergence criterion is achieved.\n", - " \n", - "## Step 1: Choosing candidates\n", + "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", + "for steps:\n", + "\n", + "1. Theory: Select three to 5 candidate algorithms based on the properties \n", + "of your problem. Below we provide a simple decision tree for this step.\n", + "2. Experiments: Run the candidate algorithms fo a small number of function \n", + "evaluations. As a rule of thumb, use between `n_params` and `10 * n_params`\n", + "evaluations. \n", + "3. Comparison: Compare the results in a criterion plot.\n", + "4. Optimization: Re-run the optimization algorithm with the best results until \n", + "convergence. Use the best parameter vector from the experiments as starting point.\n", + "\n", + "These steps work well for most problems. Sometimes you need [variations](four-steps-variations).\n", + "\n", + "## An example problem\n", + "\n", + "As an example we use the Trid function. The Trid function has no local minimum except \n", + "the global one. It is defined for any number of dimensions, we will pick 20. As starting \n", + "values we will pick the vector [0, 1, ..., 19]. \n", + "\n", + "A Python implementation of the function and its gradient looks like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import optimagic as om\n", + "\n", + "\n", + "def trid_scalar(x):\n", + " \"\"\"Implement Trid function: https://www.sfu.ca/~ssurjano/trid.html.\"\"\"\n", + " return ((x - 1) ** 2).sum() - (x[1:] * x[:-1]).sum()\n", + "\n", + "\n", + "def trid_gradient(x):\n", + " \"\"\"Calculate gradient of trid function.\"\"\"\n", + " l1 = np.insert(x, 0, 0)\n", + " l1 = np.delete(l1, [-1])\n", + " l2 = np.append(x, 0)\n", + " l2 = np.delete(l2, [0])\n", + " return 2 * (x - 1) - l1 - l2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Theory\n", "\n", "The below decision tree offers a practical guide on how to narrow down the set of algorithms to experiment with, based on the theoretical properties of your problem:\n", + "\n", "```{mermaid}\n", "graph LR\n", " classDef highlight fill:#FF4500;\n", @@ -44,18 +93,24 @@ " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides'\"]\n", " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", "```\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", "\n", - "import optimagic as om" + "Let's go through the steps for the Trid function:\n", + "\n", + "1. There are no nonlinear constraints our solution needs to satisfy\n", + "2. There is no least-squares structure we can exploit \n", + "3. The function is differentiable and we have a closed form gradient that we would like \n", + "to use. \n", + "\n", + "We therefore end up with the candidate algorithms `scipy_lbfgsb`, `nlopt_lbfgsb`, and \n", + "`fides`.\n", + "\n", + "\n", + "## Step 2: Experiments\n", + "\n", + "Below, we simply run optimizations with all algorithms in a loop and store the result \n", + "in a dictionary. We limit the number of function evaluations to 8. Since some algorithms \n", + "only support a maximum number of iterations as stopping criterion we also limit the \n", + "number of iterations to 8." ] }, { @@ -64,20 +119,6 @@ "metadata": {}, "outputs": [], "source": [ - "def trid_scalar(x):\n", - " \"\"\"Implement Trid function: https://www.sfu.ca/~ssurjano/trid.html.\"\"\"\n", - " return ((x - 1) ** 2).sum() - (x[1:] * x[:-1]).sum()\n", - "\n", - "\n", - "def trid_gradient(x):\n", - " \"\"\"Calculate gradient of trid function.\"\"\"\n", - " l1 = np.insert(x, 0, 0)\n", - " l1 = np.delete(l1, [-1])\n", - " l2 = np.append(x, 0)\n", - " l2 = np.delete(l2, [0])\n", - " return 2 * (x - 1) - l1 - l2\n", - "\n", - "\n", "results = {}\n", "for algo in [\"scipy_lbfgsb\", \"nlopt_lbfgsb\", \"fides\"]:\n", " results[algo] = om.minimize(\n", @@ -89,6 +130,15 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Comparison\n", + "\n", + "Next we plot the optimizer histories to find out which optimizer worked best:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -99,6 +149,21 @@ "fig.show(renderer=\"png\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All optimizers work pretty well here and since this is a very simple problem, any of them \n", + "would probably find the optimum in a reasonable time. However, `nlopt_lbfgsb` is a bit \n", + "better than the others, so we will select it for the next step. \n", + "\n", + "## Step 4: Optimization \n", + "\n", + "All that is left to do is to run the optimization until convergence with the best \n", + "optimizer. To avoid duplicated calculations, we can already start from the previously \n", + "best parameter vector:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -114,6 +179,14 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looking at the result in a criterion plot we can see that the optimizer converges after \n", + "a bit more than 30 function evaluations. " + ] + }, { "cell_type": "code", "execution_count": null, @@ -123,6 +196,25 @@ "fig = om.criterion_plot(results)\n", "fig.show(renderer=\"png\")" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(four-steps-variations)=\n", + "\n", + "## Variations\n", + "\n", + "The four steps described above work very well in most situations. However, sometimes \n", + "it makes sense to deviate: \n", + "\n", + "- If you are unsure about some of the questions in step 1, select more algorithms for \n", + "the experimentation phase and run more than 1 algorithm until convergence. \n", + "- If it is very important to find a precise optimum, run more than 1 algorithm until \n", + "convergence. \n", + "- If you have a very fast objective function, simply run all candidate algorithms until \n", + "convergence. " + ] } ], "metadata": { diff --git a/docs/source/how_to/how_to_globalization.ipynb b/docs/source/how_to/how_to_globalization.ipynb new file mode 100644 index 000000000..1ddf45cbc --- /dev/null +++ b/docs/source/how_to/how_to_globalization.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to choose a strategy for global optimization\n", + "\n", + "(to be written)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/how_to/index.md b/docs/source/how_to/index.md index 81b86fd02..412b0f252 100644 --- a/docs/source/how_to/index.md +++ b/docs/source/how_to/index.md @@ -15,6 +15,7 @@ how_to_derivatives how_to_algorithm_selection how_to_bounds how_to_constraints +how_to_globalization how_to_multistart how_to_visualize_histories how_to_specify_algorithm_and_algo_options @@ -23,5 +24,4 @@ how_to_logging how_to_errors_during_optimization how_to_slice_plot how_to_benchmarking -how_to_algorithm_selection_ ``` From 4046a888fb4816b25829fe1087773954b1fc0cc1 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Fri, 1 Nov 2024 11:04:23 +0100 Subject: [PATCH 11/28] Polishing. --- .../how_to/how_to_algorithm_selection.ipynb | 102 ++++++++++++++---- 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 2f1ee35dd..7321cf566 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -15,7 +15,8 @@ "\n", "- There is no optimizer that works well for all problems \n", "- Making the right choice can lead to enormous speedups\n", - "- Making the wrong choice can mean that you cannot solve your problem at all\n", + "- Making the wrong choice can mean that you don't solve your problem at all; Sometimes, \n", + "optimizers fail silently!\n", "\n", "\n", "## The four steps for selecting algorithms\n", @@ -23,13 +24,13 @@ "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", "for steps:\n", "\n", - "1. Theory: Select three to 5 candidate algorithms based on the properties \n", + "1. **Theory**: Select three to 5 candidate algorithms based on the properties \n", "of your problem. Below we provide a simple decision tree for this step.\n", - "2. Experiments: Run the candidate algorithms fo a small number of function \n", + "2. **Experiments**: Run the candidate algorithms for a small number of function \n", "evaluations. As a rule of thumb, use between `n_params` and `10 * n_params`\n", "evaluations. \n", - "3. Comparison: Compare the results in a criterion plot.\n", - "4. Optimization: Re-run the optimization algorithm with the best results until \n", + "3. **Comparison**: Compare the results in a *criterion plot*.\n", + "4. **Optimization**: Re-run the algorithm with the best results until \n", "convergence. Use the best parameter vector from the experiments as starting point.\n", "\n", "These steps work well for most problems. Sometimes you need [variations](four-steps-variations).\n", @@ -43,6 +44,17 @@ "A Python implementation of the function and its gradient looks like this:" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -80,18 +92,18 @@ "graph LR\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr'\"]\n", - " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla'\"]\n", + " B[\"differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"]\n", + " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla', ...\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", - " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'\"]\n", - " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders'\"]\n", + " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg', ...\"]\n", + " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides'\"]\n", - " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", + " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"]\n", "```\n", "\n", "Let's go through the steps for the Trid function:\n", @@ -107,10 +119,10 @@ "\n", "## Step 2: Experiments\n", "\n", - "Below, we simply run optimizations with all algorithms in a loop and store the result \n", - "in a dictionary. We limit the number of function evaluations to 8. Since some algorithms \n", - "only support a maximum number of iterations as stopping criterion we also limit the \n", - "number of iterations to 8." + "To find out which algorithms work well for our problem, we simply run optimizations with\n", + "all algorithms in a loop and store the result in a dictionary. We limit the number of \n", + "function evaluations to 8. Since some algorithms only support a maximum number of iterations \n", + "as stopping criterion we also limit the number of iterations to 8.\n" ] }, { @@ -155,7 +167,8 @@ "source": [ "All optimizers work pretty well here and since this is a very simple problem, any of them \n", "would probably find the optimum in a reasonable time. However, `nlopt_lbfgsb` is a bit \n", - "better than the others, so we will select it for the next step. \n", + "better than the others, so we will select it for the next step. In more difficult\n", + "examples, the difference between optimizers can be much more pronounced.\n", "\n", "## Step 4: Optimization \n", "\n", @@ -203,7 +216,7 @@ "source": [ "(four-steps-variations)=\n", "\n", - "## Variations\n", + "## Variations of the four steps\n", "\n", "The four steps described above work very well in most situations. However, sometimes \n", "it makes sense to deviate: \n", @@ -213,7 +226,60 @@ "- If it is very important to find a precise optimum, run more than 1 algorithm until \n", "convergence. \n", "- If you have a very fast objective function, simply run all candidate algorithms until \n", - "convergence. " + "convergence. \n", + "- If you have a differentiable objective function but no closed form derivative, use \n", + "at least one gradient based optimizer and one gradient free optimizer in the \n", + "experiments. See [here](how_to_derivatives.ipynb) to learn more about derivatives.\n", + "\n", + "\n", + "## How important was it?\n", + "\n", + "The Trid function is differentiable and very well behaved in almost every aspect. \n", + "Moreover, it has a very short runtime. One would think that any optimizer can find its \n", + "optimum. So let's compare the selected optimizer with a few others:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = {}\n", + "for algo in [\"nlopt_lbfgsb\", \"scipy_neldermead\", \"scipy_cobyla\"]:\n", + " results[algo] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=np.arange(20),\n", + " algorithm=algo,\n", + " )\n", + "\n", + "fig = om.criterion_plot(results)\n", + "fig.show(renderer=\"png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that our chosen optimizer solves the problem with less than 35 function \n", + "evaluations. At this time, the two gradient free optimizers have not even started to \n", + "make significant progress. Cobyla gets reasonably close to an optimum after about 4k \n", + "evaluations. Neldermead gets stuck after 8k evaluations and fails to solve the problem. \n", + "\n", + "This example shows not only that the choice of optimizer is important but that the commonly \n", + "held belief that gradient free optimizers are generally more robust than gradient based \n", + "ones is dangerous! The Neldermead algorithm did \"converge\" and reports success, but\n", + "did not find the optimum. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results[\"scipy_neldermead\"].success" ] } ], From 60df51a8bf69cb51509b317c4fc4da5cb1213d46 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Fri, 1 Nov 2024 11:12:30 +0100 Subject: [PATCH 12/28] Add a comment. --- docs/source/how_to/how_to_algorithm_selection.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 7321cf566..2f9bbb66e 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -270,7 +270,9 @@ "This example shows not only that the choice of optimizer is important but that the commonly \n", "held belief that gradient free optimizers are generally more robust than gradient based \n", "ones is dangerous! The Neldermead algorithm did \"converge\" and reports success, but\n", - "did not find the optimum. " + "did not find the optimum. It did not even get stuck in a local optimum because we know \n", + "that the Trid function does not have local optima except the global one. It just got \n", + "stuck somewhere. " ] }, { From f99d483b9c239a1458a2bf9774277c29384503cf Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 09:45:44 +0100 Subject: [PATCH 13/28] Add nlopt to rtd environment. --- .tools/envs/testenv-linux.yml | 2 +- .tools/envs/testenv-numpy.yml | 2 +- .tools/envs/testenv-others.yml | 2 +- .tools/envs/testenv-pandas.yml | 2 +- docs/rtd_environment.yml | 1 + environment.yml | 2 +- 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.tools/envs/testenv-linux.yml b/.tools/envs/testenv-linux.yml index ba95d1fbd..1e1c0846f 100644 --- a/.tools/envs/testenv-linux.yml +++ b/.tools/envs/testenv-linux.yml @@ -8,7 +8,7 @@ dependencies: - jax - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests diff --git a/.tools/envs/testenv-numpy.yml b/.tools/envs/testenv-numpy.yml index 9c96854fa..34681b9ba 100644 --- a/.tools/envs/testenv-numpy.yml +++ b/.tools/envs/testenv-numpy.yml @@ -8,7 +8,7 @@ dependencies: - numpy<2 - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests diff --git a/.tools/envs/testenv-others.yml b/.tools/envs/testenv-others.yml index b4e303382..444205593 100644 --- a/.tools/envs/testenv-others.yml +++ b/.tools/envs/testenv-others.yml @@ -6,7 +6,7 @@ channels: dependencies: - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests diff --git a/.tools/envs/testenv-pandas.yml b/.tools/envs/testenv-pandas.yml index fad83dc3a..ff4996dc5 100644 --- a/.tools/envs/testenv-pandas.yml +++ b/.tools/envs/testenv-pandas.yml @@ -8,7 +8,7 @@ dependencies: - numpy<2 - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 57e16de01..535f595d2 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -27,6 +27,7 @@ dependencies: - patsy - joblib - plotly + - nlopt - annotated-types - pip: - ../ diff --git a/environment.yml b/environment.yml index f2645ab0d..cfd6bf6eb 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - jupyterlab # dev, docs - - nlopt # dev, tests + - nlopt # dev, tests, docs - pdbpp # dev - pip # dev, tests, docs - pytest # dev, tests From 7cb889d0ec5fd3f2930ede91b2bdf25a4125fd76 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 09:59:10 +0100 Subject: [PATCH 14/28] Apply suggestions from code review by HM Co-authored-by: Hans-Martin von Gaudecker --- .../how_to/how_to_algorithm_selection.ipynb | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 2f9bbb66e..d6a04ee54 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -15,7 +15,7 @@ "\n", "- There is no optimizer that works well for all problems \n", "- Making the right choice can lead to enormous speedups\n", - "- Making the wrong choice can mean that you don't solve your problem at all; Sometimes, \n", + "- Making the wrong choice can mean that you don't solve your problem at all. Sometimes, \n", "optimizers fail silently!\n", "\n", "\n", @@ -24,20 +24,20 @@ "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", "for steps:\n", "\n", - "1. **Theory**: Select three to 5 candidate algorithms based on the properties \n", - "of your problem. Below we provide a simple decision tree for this step.\n", + "1. **Theory**: Based on the properties of your problem, start with 3 to 5 candidate algorithms. \n", + "You may use the [decision tree below](link)\n", "2. **Experiments**: Run the candidate algorithms for a small number of function \n", "evaluations. As a rule of thumb, use between `n_params` and `10 * n_params`\n", "evaluations. \n", "3. **Comparison**: Compare the results in a *criterion plot*.\n", "4. **Optimization**: Re-run the algorithm with the best results until \n", - "convergence. Use the best parameter vector from the experiments as starting point.\n", + "convergence. Use the best parameter vector from the experiments as start parameters.\n", "\n", "These steps work well for most problems. Sometimes you need [variations](four-steps-variations).\n", "\n", "## An example problem\n", "\n", - "As an example we use the Trid function. The Trid function has no local minimum except \n", + "As an example we use the [Trid function](https://www.sfu.ca/~ssurjano/trid.html). The Trid function has no local minimum except \n", "the global one. It is defined for any number of dimensions, we will pick 20. As starting \n", "values we will pick the vector [0, 1, ..., 19]. \n", "\n", @@ -86,13 +86,13 @@ "source": [ "## Step 1: Theory\n", "\n", - "The below decision tree offers a practical guide on how to narrow down the set of algorithms to experiment with, based on the theoretical properties of your problem:\n", + "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", "\n", "```{mermaid}\n", "graph LR\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"]\n", " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla', ...\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", @@ -108,9 +108,9 @@ "\n", "Let's go through the steps for the Trid function:\n", "\n", - "1. There are no nonlinear constraints our solution needs to satisfy\n", - "2. There is no least-squares structure we can exploit \n", - "3. The function is differentiable and we have a closed form gradient that we would like \n", + "1. **No** nonlinear constraints our solution needs to satisfy\n", + "2. **No** no least-squares structure we can exploit \n", + "3. **Yes**, the function is differentiable and we have a closed form gradient that we would like \n", "to use. \n", "\n", "We therefore end up with the candidate algorithms `scipy_lbfgsb`, `nlopt_lbfgsb`, and \n", @@ -263,13 +263,13 @@ "metadata": {}, "source": [ "We can see that our chosen optimizer solves the problem with less than 35 function \n", - "evaluations. At this time, the two gradient free optimizers have not even started to \n", - "make significant progress. Cobyla gets reasonably close to an optimum after about 4k \n", - "evaluations. Neldermead gets stuck after 8k evaluations and fails to solve the problem. \n", + "evaluations. At this point, the two gradient-free optimizers have not yet made \n", + "significant progress. CoByLA gets reasonably close to an optimum after about 4k \n", + "evaluations. Nelder-Mead gets stuck after 8k evaluations and fails to solve the problem. \n", "\n", "This example shows not only that the choice of optimizer is important but that the commonly \n", "held belief that gradient free optimizers are generally more robust than gradient based \n", - "ones is dangerous! The Neldermead algorithm did \"converge\" and reports success, but\n", + "ones is dangerous! The Nelder-Mead algorithm did \"converge\" and reports success, but\n", "did not find the optimum. It did not even get stuck in a local optimum because we know \n", "that the Trid function does not have local optima except the global one. It just got \n", "stuck somewhere. " From fbd678fd67c20171de73e6efd7f35ab057732ef1 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 10:00:20 +0100 Subject: [PATCH 15/28] Apply suggestions from code review by Christian Co-authored-by: Christian Zimpelmann --- docs/source/how_to/how_to_algorithm_selection.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index d6a04ee54..e04c4c937 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -22,7 +22,7 @@ "## The four steps for selecting algorithms\n", "\n", "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", - "for steps:\n", + "four steps:\n", "\n", "1. **Theory**: Based on the properties of your problem, start with 3 to 5 candidate algorithms. \n", "You may use the [decision tree below](link)\n", From f5dfe44a22922258069f59120370866ca534ae8e Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 10:44:30 +0100 Subject: [PATCH 16/28] Polishing and add fides to docs environment. --- docs/rtd_environment.yml | 1 + .../how_to/how_to_algorithm_selection.ipynb | 67 ++++++++++++------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 535f595d2..1929ec914 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -40,3 +40,4 @@ dependencies: - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests - sphinxcontrib-mermaid # dev, tests, docs + - fides==0.7.4 # dev, tests diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index e04c4c937..aa7ed7360 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -35,6 +35,45 @@ "\n", "These steps work well for most problems. Sometimes you need [variations](four-steps-variations).\n", "\n", + "\n", + "## A decision tree \n", + "\n", + "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", + "\n", + "```{mermaid}\n", + "graph LR\n", + " classDef highlight fill:#FF4500;\n", + " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"]\n", + " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla', ...\"]\n", + "\n", + " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", + "\n", + " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg', ...\"]\n", + " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"]\n", + "\n", + " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", + " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"]\n", + "```\n", + "\n", + "Going through the different questions will give you a list of candidate algorithms. \n", + "All algorithms in that list are designed for the same problem class but use different \n", + "approaches to solve the problem. Which of them works best for your problem can only be \n", + "found out through experimentation.\n", + "\n", + "```{note}\n", + "Many books on numerical optimization focus strongly on the inner workings of algorithms.\n", + "They will, for example, describe the difference between a trust-region algorithm and a \n", + "line-search algorithm in a lot of detail. We have an [intuitive explanation](../explanation/explanation_of_numerical_optimizers.md) of this too. However, these details are not \n", + "very relevant for algorithm selection. For example, If you have a scalar, differentiable \n", + "problem without nonlinear constraints, the decision tree suggests `fides` and `lbfgsb`.\n", + "`fides` is a trust-region algorithm, `lbfgsb` is a line-search algorithm. Both are \n", + "designed to solve the same kinds of problems and which one works best needs to be \n", + "found out through experimentation.\n", + "```\n", + "\n", "## An example problem\n", "\n", "As an example we use the [Trid function](https://www.sfu.ca/~ssurjano/trid.html). The Trid function has no local minimum except \n", @@ -84,29 +123,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Step 1: Theory\n", + "### Step 1: Theory\n", "\n", - "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", - "\n", - "```{mermaid}\n", - "graph LR\n", - " classDef highlight fill:#FF4500;\n", - " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"Is your objective function differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"]\n", - " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla', ...\"]\n", "\n", - " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", - " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", - " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", - "\n", - " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg', ...\"]\n", - " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"]\n", - "\n", - " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", - " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"]\n", - "```\n", "\n", - "Let's go through the steps for the Trid function:\n", + "Let's go through the decision tree for the Trid function:\n", "\n", "1. **No** nonlinear constraints our solution needs to satisfy\n", "2. **No** no least-squares structure we can exploit \n", @@ -117,7 +138,7 @@ "`fides`.\n", "\n", "\n", - "## Step 2: Experiments\n", + "### Step 2: Experiments\n", "\n", "To find out which algorithms work well for our problem, we simply run optimizations with\n", "all algorithms in a loop and store the result in a dictionary. We limit the number of \n", @@ -146,7 +167,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Step 3: Comparison\n", + "### Step 3: Comparison\n", "\n", "Next we plot the optimizer histories to find out which optimizer worked best:" ] @@ -170,7 +191,7 @@ "better than the others, so we will select it for the next step. In more difficult\n", "examples, the difference between optimizers can be much more pronounced.\n", "\n", - "## Step 4: Optimization \n", + "### Step 4: Optimization \n", "\n", "All that is left to do is to run the optimization until convergence with the best \n", "optimizer. To avoid duplicated calculations, we can already start from the previously \n", From de76d5cd33f9629d29d22ecda4eafc46e24ef423 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 11:18:17 +0100 Subject: [PATCH 17/28] Polishing. --- .../how_to/how_to_algorithm_selection.ipynb | 48 ++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index aa7ed7360..e7dc6ee71 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -19,7 +19,7 @@ "optimizers fail silently!\n", "\n", "\n", - "## The four steps for selecting algorithms\n", + "## The three steps for selecting algorithms\n", "\n", "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", "four steps:\n", @@ -27,13 +27,14 @@ "1. **Theory**: Based on the properties of your problem, start with 3 to 5 candidate algorithms. \n", "You may use the [decision tree below](link)\n", "2. **Experiments**: Run the candidate algorithms for a small number of function \n", - "evaluations. As a rule of thumb, use between `n_params` and `10 * n_params`\n", - "evaluations. \n", - "3. **Comparison**: Compare the results in a *criterion plot*.\n", - "4. **Optimization**: Re-run the algorithm with the best results until \n", + "evaluations and compare the results in a *criterion plot*. As a rule of thumb, use \n", + "between `n_params` and `10 * n_params` evaluations. \n", + "3. **Optimization**: Re-run the algorithm with the best results until \n", "convergence. Use the best parameter vector from the experiments as start parameters.\n", "\n", - "These steps work well for most problems. Sometimes you need [variations](four-steps-variations).\n", + "We will walk you through the steps in an [example](algo-selection-example-problem)\n", + "below. These steps work well for most problems but sometimes you need \n", + "[variations](algo-selection-steps-variations).\n", "\n", "\n", "## A decision tree \n", @@ -74,6 +75,8 @@ "found out through experimentation.\n", "```\n", "\n", + "(algo-selection-example-problem)=\n", + "\n", "## An example problem\n", "\n", "As an example we use the [Trid function](https://www.sfu.ca/~ssurjano/trid.html). The Trid function has no local minimum except \n", @@ -131,12 +134,19 @@ "\n", "1. **No** nonlinear constraints our solution needs to satisfy\n", "2. **No** no least-squares structure we can exploit \n", - "3. **Yes**, the function is differentiable and we have a closed form gradient that we would like \n", - "to use. \n", + "3. **Yes**, the function is differentiable. We even have a closed form gradient that \n", + "we would like to use. \n", "\n", "We therefore end up with the candidate algorithms `scipy_lbfgsb`, `nlopt_lbfgsb`, and \n", "`fides`.\n", "\n", + "```{note}\n", + "If your function is differentiable but you do not have a closed form gradient (yet), \n", + "we suggest to use at least one gradient based optimizer and one gradient free optimizer.\n", + "in your experiments. Optimagic will use numerical gradients in that case. For details, \n", + "see [here](how_to_derivatives.ipynb).\n", + "```\n", + "\n", "\n", "### Step 2: Experiments\n", "\n", @@ -160,24 +170,8 @@ " params=np.arange(20),\n", " algorithm=algo,\n", " algo_options={\"stopping_maxfun\": 8, \"stopping_maxiter\": 8},\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 3: Comparison\n", + " )\n", "\n", - "Next we plot the optimizer histories to find out which optimizer worked best:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ "fig = om.criterion_plot(results, max_evaluations=8)\n", "fig.show(renderer=\"png\")" ] @@ -191,7 +185,7 @@ "better than the others, so we will select it for the next step. In more difficult\n", "examples, the difference between optimizers can be much more pronounced.\n", "\n", - "### Step 4: Optimization \n", + "### Step 3: Optimization \n", "\n", "All that is left to do is to run the optimization until convergence with the best \n", "optimizer. To avoid duplicated calculations, we can already start from the previously \n", @@ -235,7 +229,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(four-steps-variations)=\n", + "(algo-selection-steps-variations)=\n", "\n", "## Variations of the four steps\n", "\n", From f9d1c2e1b9ed88b24341ec476db200f786472735 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 11:47:01 +0100 Subject: [PATCH 18/28] Try workaround for missing text in terminal nodes. --- docs/source/how_to/how_to_algorithm_selection.ipynb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index e7dc6ee71..3612a6c45 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -57,6 +57,14 @@ "\n", " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"]\n", + "\n", + " C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"] ~~~ L[\" \"]:::transparent\n", + " H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg', ...\"] ~~~ M[\" \"]:::transparent\n", + " I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"] ~~~ N[\" \"]:::transparent\n", + " J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"] ~~~ O[\" \"]:::transparent\n", + " K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"] ~~~ P[\" \"]:::transparent\n", + "\n", + " classDef transparent stroke:#FFFFFF, fill:#FFFFFF\n", "```\n", "\n", "Going through the different questions will give you a list of candidate algorithms. \n", From ff76d3a3542652520568e8fe7fbbed6851897702 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 12:03:02 +0100 Subject: [PATCH 19/28] Undo the workaround. --- docs/source/how_to/how_to_algorithm_selection.ipynb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 3612a6c45..ebab07e21 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -58,13 +58,6 @@ " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"]\n", "\n", - " C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"] ~~~ L[\" \"]:::transparent\n", - " H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg', ...\"] ~~~ M[\" \"]:::transparent\n", - " I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"] ~~~ N[\" \"]:::transparent\n", - " J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"] ~~~ O[\" \"]:::transparent\n", - " K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"] ~~~ P[\" \"]:::transparent\n", - "\n", - " classDef transparent stroke:#FFFFFF, fill:#FFFFFF\n", "```\n", "\n", "Going through the different questions will give you a list of candidate algorithms. \n", From b305a01f0b3f269f6a846984b23b1d7718f23ca7 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 12:05:56 +0100 Subject: [PATCH 20/28] Fix mermaid? --- docs/source/how_to/how_to_algorithm_selection.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index ebab07e21..dbfa77ddb 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -56,7 +56,7 @@ " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"]\n", "\n", " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", - " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel', ...\"]\n", + " G[\"differentiable?\"] -- no --> K[\"test\"]\n", "\n", "```\n", "\n", From 1594e1ebbdedf6052c683901e48c4158befd460f Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 12:51:14 +0100 Subject: [PATCH 21/28] Fix mermaid? --- docs/source/how_to/how_to_algorithm_selection.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index dbfa77ddb..1f5aa7172 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -45,17 +45,17 @@ "graph LR\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"Is your objective function differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr', ...\"]\n", - " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla', ...\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr'\"]\n", + " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla'\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", - " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg', ...\"]\n", - " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders', ...\"]\n", + " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'\"]\n", + " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders'\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides', ...\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides'\"]\n", " G[\"differentiable?\"] -- no --> K[\"test\"]\n", "\n", "```\n", From 8b3fbddd796de744edf25b0b84726a8bfcafdb70 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 14:46:24 +0100 Subject: [PATCH 22/28] Fix mermaid? --- docs/source/how_to/how_to_algorithm_selection.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 1f5aa7172..3871379df 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -45,7 +45,7 @@ "graph LR\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"Is your objective function differentiable?\"] -- yes --> C[\"'ipopt', 'nlopt_slsqp', 'scipy_trust_constr'\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt, nlopt_slsqp, scipy_trust_constr\"]\n", " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla'\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", @@ -56,7 +56,7 @@ " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders'\"]\n", "\n", " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides'\"]\n", - " G[\"differentiable?\"] -- no --> K[\"test\"]\n", + " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", "\n", "```\n", "\n", From 2db7f7d3ad24393b2ed531ac70e23109ff406d9b Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 15:06:19 +0100 Subject: [PATCH 23/28] Fix mermaid? --- docs/source/how_to/how_to_algorithm_selection.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 3871379df..8bcb5ff6a 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -45,18 +45,18 @@ "graph LR\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt, nlopt_slsqp, scipy_trust_constr\"]\n", - " B[\"differentiable?\"] -- no --> D[\"'scipy_cobyla', 'nlopt_cobyla'\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt nlopt_slsqp scipy_trust_constr\"]\n", + " B[\"differentiable?\"] -- no --> D[\"scipy_cobyla , nlopt_cobyla\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", - " F[\"differentiable?\"] -- yes --> H[\"'scipy_ls_lm', 'scipy_ls_trf', 'scipy_ls_dogleg'\"]\n", - " F[\"differentiable?\"] -- no --> I[\"'nag_dflos', 'pounders', 'tao_pounders'\"]\n", + " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm; scipy_ls_trf; scipy_ls_dogleg\"]\n", + " F[\"differentiable?\"] -- no --> I[\"nag_dflos, pounders, tao_pounders\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"'scipy_lbfgsb', 'nlopt_lbfgsb', 'fides'\"]\n", - " G[\"differentiable?\"] -- no --> K[\"'nlopt_bobyqa', 'nlopt_neldermead', 'neldermead_parallel'\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb, nlopt_lbfgsb, fides\"]\n", + " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa, nlopt_neldermead, neldermead_parallel\"]\n", "\n", "```\n", "\n", From 57b8f45d94ad2b13e648a9a9146255241ba64479 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 15:15:38 +0100 Subject: [PATCH 24/28] Fix mermaid? --- docs/source/how_to/how_to_algorithm_selection.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 8bcb5ff6a..9a5c237f6 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -53,9 +53,9 @@ " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm; scipy_ls_trf; scipy_ls_dogleg\"]\n", - " F[\"differentiable?\"] -- no --> I[\"nag_dflos, pounders, tao_pounders\"]\n", + " F[\"differentiable?\"] -- no --> I[\"nag_dflos/pounders/tao_pounders\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb, nlopt_lbfgsb, fides\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb / nlopt_lbfgsb / fides\"]\n", " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa, nlopt_neldermead, neldermead_parallel\"]\n", "\n", "```\n", From 374b9d387e7cdc2225b92b38c708be1bf2297043 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 15:26:51 +0100 Subject: [PATCH 25/28] Fix mermaid? --- docs/source/how_to/how_to_algorithm_selection.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 9a5c237f6..fb9f26fb7 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -42,21 +42,21 @@ "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", "\n", "```{mermaid}\n", - "graph LR\n", + "graph TD\n", " classDef highlight fill:#FF4500;\n", " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt nlopt_slsqp scipy_trust_constr\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt , nlopt_slsqp , scipy_trust_constr\"]\n", " B[\"differentiable?\"] -- no --> D[\"scipy_cobyla , nlopt_cobyla\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", - " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm; scipy_ls_trf; scipy_ls_dogleg\"]\n", - " F[\"differentiable?\"] -- no --> I[\"nag_dflos/pounders/tao_pounders\"]\n", + " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm , scipy_ls_trf , scipy_ls_dogleg\"]\n", + " F[\"differentiable?\"] -- no --> I[\"nag_dflos , pounders , tao_pounders\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb / nlopt_lbfgsb / fides\"]\n", - " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa, nlopt_neldermead, neldermead_parallel\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb , nlopt_lbfgsb , fides\"]\n", + " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa , nlopt_neldermead , neldermead_parallel\"]\n", "\n", "```\n", "\n", From 56cfe9b741d588d956496b939ce133c9374874f6 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 4 Nov 2024 15:41:29 +0100 Subject: [PATCH 26/28] Fix mermaid? --- .../how_to/how_to_algorithm_selection.ipynb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index fb9f26fb7..5b15c8352 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -42,21 +42,21 @@ "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", "\n", "```{mermaid}\n", - "graph TD\n", + "graph LR\n", " classDef highlight fill:#FF4500;\n", - " A[\"Do you have
nonlinear constraints?\"] -- yes --> B[\"differentiable?\"]\n", - " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt , nlopt_slsqp , scipy_trust_constr\"]\n", - " B[\"differentiable?\"] -- no --> D[\"scipy_cobyla , nlopt_cobyla\"]\n", + " A[\"Do you have
nonlinear
constraints?\"] -- yes --> B[\"differentiable?\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt
nlopt_slsqp
scipy_trust_constr\"]\n", + " B[\"differentiable?\"] -- no --> D[\"scipy_cobyla
nlopt_cobyla\"]\n", "\n", " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", "\n", - " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm , scipy_ls_trf , scipy_ls_dogleg\"]\n", - " F[\"differentiable?\"] -- no --> I[\"nag_dflos , pounders , tao_pounders\"]\n", + " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm
scipy_ls_trf
scipy_ls_dogleg\"]\n", + " F[\"differentiable?\"] -- no --> I[\"nag_dflos
pounders
tao_pounders\"]\n", "\n", - " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb , nlopt_lbfgsb , fides\"]\n", - " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa , nlopt_neldermead , neldermead_parallel\"]\n", + " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb
nlopt_lbfgsb
fides\"]\n", + " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa
nlopt_neldermead
neldermead_parallel\"]\n", "\n", "```\n", "\n", From 863c063018416c5395091f48ac919f1d88954d80 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Tue, 5 Nov 2024 07:56:37 +0100 Subject: [PATCH 27/28] Last polishing. --- .../how_to/how_to_algorithm_selection.ipynb | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 5b15c8352..fceb44038 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -15,17 +15,17 @@ "\n", "- There is no optimizer that works well for all problems \n", "- Making the right choice can lead to enormous speedups\n", - "- Making the wrong choice can mean that you don't solve your problem at all. Sometimes, \n", + "- Making the wrong choice can mean that you [don't solve your problem at all](algo-selection-how-important). Sometimes,\n", "optimizers fail silently!\n", "\n", "\n", "## The three steps for selecting algorithms\n", "\n", "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", - "four steps:\n", + "steps:\n", "\n", "1. **Theory**: Based on the properties of your problem, start with 3 to 5 candidate algorithms. \n", - "You may use the [decision tree below](link)\n", + "You may use the decision tree below.\n", "2. **Experiments**: Run the candidate algorithms for a small number of function \n", "evaluations and compare the results in a *criterion plot*. As a rule of thumb, use \n", "between `n_params` and `10 * n_params` evaluations. \n", @@ -68,12 +68,12 @@ "```{note}\n", "Many books on numerical optimization focus strongly on the inner workings of algorithms.\n", "They will, for example, describe the difference between a trust-region algorithm and a \n", - "line-search algorithm in a lot of detail. We have an [intuitive explanation](../explanation/explanation_of_numerical_optimizers.md) of this too. However, these details are not \n", - "very relevant for algorithm selection. For example, If you have a scalar, differentiable \n", - "problem without nonlinear constraints, the decision tree suggests `fides` and `lbfgsb`.\n", - "`fides` is a trust-region algorithm, `lbfgsb` is a line-search algorithm. Both are \n", - "designed to solve the same kinds of problems and which one works best needs to be \n", - "found out through experimentation.\n", + "line-search algorithm in a lot of detail. We have an [intuitive explanation](../explanation/explanation_of_numerical_optimizers.md) of this too. Understanding these details is important for configuring and\n", + "troubleshooting optimizations, but not for algorithm selection. For example, If you have\n", + "a scalar, differentiable problem without nonlinear constraints, the decision tree \n", + "suggests `fides` and two variants of `lbfgsb`. `fides` is a trust-region algorithm, \n", + "`lbfgsb` is a line-search algorithm. Both are designed to solve the same kinds of \n", + "problems and which one works best needs to be found out through experimentation.\n", "```\n", "\n", "(algo-selection-example-problem)=\n", @@ -134,7 +134,7 @@ "Let's go through the decision tree for the Trid function:\n", "\n", "1. **No** nonlinear constraints our solution needs to satisfy\n", - "2. **No** no least-squares structure we can exploit \n", + "2. **No** least-squares structure we can exploit \n", "3. **Yes**, the function is differentiable. We even have a closed form gradient that \n", "we would like to use. \n", "\n", @@ -152,9 +152,9 @@ "### Step 2: Experiments\n", "\n", "To find out which algorithms work well for our problem, we simply run optimizations with\n", - "all algorithms in a loop and store the result in a dictionary. We limit the number of \n", - "function evaluations to 8. Since some algorithms only support a maximum number of iterations \n", - "as stopping criterion we also limit the number of iterations to 8.\n" + "all candidate algorithms in a loop and store the result in a dictionary. We limit the \n", + "number of function evaluations to 8. Since some algorithms only support a maximum number\n", + "of iterations as stopping criterion we also limit the number of iterations to 8.\n" ] }, { @@ -248,6 +248,8 @@ "experiments. See [here](how_to_derivatives.ipynb) to learn more about derivatives.\n", "\n", "\n", + "(algo-selection-how-important)=\n", + "\n", "## How important was it?\n", "\n", "The Trid function is differentiable and very well behaved in almost every aspect. \n", From 3652038ada575cbdb2bc6ed8d0d85b8fbebf1e87 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 07:57:41 +0100 Subject: [PATCH 28/28] [pre-commit.ci] pre-commit autoupdate (#546) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f5a8c66a3..aacd207c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,7 +68,7 @@ repos: - --blank exclude: src/optimagic/optimization/algo_options.py - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.7.2 hooks: # Run the linter. - id: ruff @@ -108,7 +108,7 @@ repos: - '88' files: (docs/.) - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.0 hooks: - id: nbstripout exclude: |