summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdoardo Pasca <edo.paskino@gmail.com>2019-11-19 10:00:25 +0000
committerGitHub <noreply@github.com>2019-11-19 10:00:25 +0000
commita1341271589a5bd3dad0def8198c49c37d1c197d (patch)
tree744476f5e9bb0d3b8219cdc689114a16e4fa5ab8
parentaa80145534d4fe325fbfbae6569bcb6709006d33 (diff)
downloadframework-a1341271589a5bd3dad0def8198c49c37d1c197d.tar.gz
framework-a1341271589a5bd3dad0def8198c49c37d1c197d.tar.bz2
framework-a1341271589a5bd3dad0def8198c49c37d1c197d.tar.xz
framework-a1341271589a5bd3dad0def8198c49c37d1c197d.zip
added armijo rule (#390)
* added armijo rule * added info and kwarg for alpha * updated Armijo rule * modified stop criterion to GD * added kmax to limit the while True loop * fix raise comment * change default stopping criterion * updated test * added Rosenbrock function and test Gradient descent on that * removed test code * added missing import
-rwxr-xr-xWrappers/Python/ccpi/optimisation/algorithms/Algorithm.py4
-rwxr-xr-xWrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py134
-rwxr-xr-xWrappers/Python/ccpi/optimisation/functions/Rosenbrock.py78
-rw-r--r--Wrappers/Python/ccpi/optimisation/functions/__init__.py1
-rwxr-xr-xWrappers/Python/test/test_algorithms.py57
-rw-r--r--Wrappers/Python/test/test_functions.py11
6 files changed, 254 insertions, 31 deletions
diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py
index df8dbf5..408a904 100755
--- a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py
+++ b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py
@@ -197,6 +197,10 @@ class Algorithm(object):
if verbose:
print (self.verbose_output(very_verbose))
break
+ if verbose:
+ if self.update_objective_interval != 1:
+ print (self.verbose_output(very_verbose))
+ print ("Stop criterion has been reached.")
def verbose_output(self, verbose=False):
'''Creates a nice tabulated output'''
diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py
index 8f9c958..5e7284a 100755
--- a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py
+++ b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py
@@ -24,7 +24,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
-
+import numpy
from ccpi.optimisation.algorithms import Algorithm
class GradientDescent(Algorithm):
@@ -35,7 +35,7 @@ class GradientDescent(Algorithm):
'''
- def __init__(self, x_init=None, objective_function=None, rate=None, **kwargs):
+ def __init__(self, x_init=None, objective_function=None, step_size =None, **kwargs):
'''GradientDescent algorithm creator
initialisation can be done at creation time if all
@@ -43,51 +43,135 @@ class GradientDescent(Algorithm):
:param x_init: initial guess
:param objective_function: objective function to be minimised
- :param rate: step rate
+ :param step_size: step size for gradient descent iteration
+ :param alpha: optional parameter to start the backtracking algorithm, default 1e6
+ :param beta: optional parameter defining the reduction of step, default 0.5.
+ It's value can be in (0,1)
+ :param rtol: optional parameter defining the relative tolerance comparing the
+ current objective function to 0, default 1e-5, see numpy.isclose
+ :param atol: optional parameter defining the absolute tolerance comparing the
+ current objective function to 0, default 1e-8, see numpy.isclose
'''
super(GradientDescent, self).__init__(**kwargs)
-
- if x_init is not None and objective_function is not None and rate is not None:
- self.set_up(x_init=x_init, objective_function=objective_function, rate=rate)
-
- def should_stop(self):
- '''stopping criterion, currently only based on number of iterations'''
- return self.iteration >= self.max_iteration
+ self.alpha = kwargs.get('alpha' , 1e6)
+ self.beta = kwargs.get('beta', 0.5)
+ self.rtol = kwargs.get('rtol', 1e-5)
+ self.atol = kwargs.get('atol', 1e-8)
+ if x_init is not None and objective_function is not None :
+ self.set_up(x_init=x_init, objective_function=objective_function, step_size=step_size)
- def set_up(self, x_init, objective_function, rate):
+ def set_up(self, x_init, objective_function, step_size):
'''initialisation of the algorithm
:param x_init: initial guess
:param objective_function: objective function to be minimised
- :param rate: step rate'''
+ :param step_size: step size'''
print("{} setting up".format(self.__class__.__name__, ))
self.x = x_init.copy()
self.objective_function = objective_function
- self.rate = rate
+
- self.loss.append(objective_function(x_init))
+ if step_size is None:
+ self.k = 0
+ self.update_step_size = True
+ self.x_armijo = x_init.copy()
+ # self.rate = self.armijo_rule() * 2
+ # print (self.rate)
+ else:
+ self.step_size = step_size
+ self.update_step_size = False
+
+
+ self.update_objective()
self.iteration = 0
- try:
- self.memopt = self.objective_function.memopt
- except AttributeError as ae:
- self.memopt = False
- if self.memopt:
- self.x_update = x_init.copy()
+ self.x_update = x_init.copy()
self.configured = True
print("{} configured".format(self.__class__.__name__, ))
def update(self):
'''Single iteration'''
- if self.memopt:
- self.objective_function.gradient(self.x, out=self.x_update)
- self.x_update *= -self.rate
- self.x += self.x_update
+
+ self.objective_function.gradient(self.x, out=self.x_update)
+
+ if self.update_step_size:
+ # the next update and solution are calculated within the armijo_rule
+ self.step_size = self.armijo_rule()
else:
- self.x += -self.rate * self.objective_function.gradient(self.x)
+ self.x_update *= -self.step_size
+ self.x += self.x_update
+
+
def update_objective(self):
self.loss.append(self.objective_function(self.x))
+
+ def armijo_rule(self):
+ '''Applies the Armijo rule to calculate the step size (step_size)
+
+ https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080
+
+ The Armijo rule runs a while loop to find the appropriate step_size by starting
+ from a very large number (alpha). The step_size is found by dividing alpha by 2
+ in an iterative way until a certain criterion is met. To avoid infinite loops, we
+ add a maximum number of times the while loop is run.
+
+ This rule would allow to reach a minimum step_size of 10^-alpha.
+
+ if
+ alpha = numpy.power(10,gamma)
+ delta = 3
+ step_size = numpy.power(10, -delta)
+ with armijo rule we can get to step_size from initial alpha by repeating the while loop k times
+ where
+ alpha / 2^k = step_size
+ 10^gamma / 2^k = 10^-delta
+ 2^k = 10^(gamma+delta)
+ k = gamma+delta / log10(2) \\approx 3.3 * (gamma+delta)
+
+ if we would take by default delta = gamma
+ kmax = numpy.ceil ( 2 * gamma / numpy.log10(2) )
+ kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2))
+
+ '''
+ f_x = self.objective_function(self.x)
+ if not hasattr(self, 'x_update'):
+ self.x_update = self.objective_function.gradient(self.x)
+
+ while self.k < self.kmax:
+ # self.x - alpha * self.x_update
+ self.x_update.multiply(self.alpha, out=self.x_armijo)
+ self.x.subtract(self.x_armijo, out=self.x_armijo)
+
+ f_x_a = self.objective_function(self.x_armijo)
+ sqnorm = self.x_update.squared_norm()
+ if f_x_a - f_x <= - ( self.alpha/2. ) * sqnorm:
+ self.x.fill(self.x_armijo)
+ break
+ else:
+ self.k += 1.
+ # we don't want to update kmax
+ self._alpha *= self.beta
+
+ if self.k == self.kmax:
+ raise ValueError('Could not find a proper step_size in {} loops. Consider increasing alpha.'.format(self.kmax))
+ return self.alpha
+
+ @property
+ def alpha(self):
+ return self._alpha
+ @alpha.setter
+ def alpha(self, alpha):
+ self._alpha = alpha
+ self.kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2))
+ self.k = 0
+
+ def should_stop(self):
+ return self.max_iteration_stop_cryterion() or \
+ numpy.isclose(self.get_last_objective(), 0., rtol=self.rtol,
+ atol=self.atol, equal_nan=False)
+
+
diff --git a/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py
new file mode 100755
index 0000000..03abf7a
--- /dev/null
+++ b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+#========================================================================
+# Copyright 2019 Science Technology Facilities Council
+# Copyright 2019 University of Manchester
+#
+# This work is part of the Core Imaging Library developed by Science Technology
+# Facilities Council and University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#=========================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy
+from ccpi.optimisation.functions import Function
+from ccpi.framework import VectorData, VectorGeometry
+
+class Rosenbrock(Function):
+ r'''Rosenbrock function
+
+ .. math::
+
+ F(x,y) = (\alpha - x)^2 + \beta(y-x^2)^2
+
+ The function has a global minimum at .. math:: (x,y)=(\alpha, \alpha^2)
+
+ '''
+ def __init__(self, alpha, beta):
+ super(Rosenbrock, self).__init__()
+
+ self.alpha = alpha
+ self.beta = beta
+
+ def __call__(self, x):
+ if not isinstance(x, VectorData):
+ raise TypeError('Rosenbrock function works on VectorData only')
+ vec = x.as_array()
+ a = (self.alpha - vec[0])
+ b = (vec[1] - (vec[0]*vec[0]))
+ return a * a + self.beta * b * b
+
+ def gradient(self, x, out=None):
+ r'''Gradient of the Rosenbrock function
+
+ .. math::
+
+ \nabla f(x,y) = \left[ 2*((x-\alpha) - 2\beta x(y-x^2)) ; 2\beta (y - x^2) \right]
+
+ '''
+ if not isinstance(x, VectorData):
+ raise TypeError('Rosenbrock function works on VectorData only')
+
+ vec = x.as_array()
+ a = (vec[0] - self.alpha)
+ b = (vec[1] - (vec[0]*vec[0]))
+
+ res = numpy.empty_like(vec)
+ res[0] = 2 * ( a - 2 * self.beta * vec[0] * b)
+ res[1] = 2 * self.beta * b
+
+ if out is not None:
+ out.fill (res)
+ else:
+ return VectorData(res)
+
diff --git a/Wrappers/Python/ccpi/optimisation/functions/__init__.py b/Wrappers/Python/ccpi/optimisation/functions/__init__.py
index 67abeef..d968539 100644
--- a/Wrappers/Python/ccpi/optimisation/functions/__init__.py
+++ b/Wrappers/Python/ccpi/optimisation/functions/__init__.py
@@ -26,3 +26,4 @@ from .MixedL21Norm import MixedL21Norm
from .IndicatorBox import IndicatorBox
from .KullbackLeibler import KullbackLeibler
from .Norm2Sq import Norm2Sq
+from .Rosenbrock import Rosenbrock
diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py
index db13b97..7a312f9 100755
--- a/Wrappers/Python/test/test_algorithms.py
+++ b/Wrappers/Python/test/test_algorithms.py
@@ -66,24 +66,73 @@ class TestAlgorithms(unittest.TestCase):
identity = Identity(ig)
norm2sq = Norm2Sq(identity, b)
- rate = 0.3
rate = norm2sq.L / 3.
alg = GradientDescent(x_init=x_init,
objective_function=norm2sq,
- rate=rate)
+ rate=rate, atol=1e-9, rtol=1e-6)
+ alg.max_iteration = 1000
+ alg.run()
+ self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+ alg = GradientDescent(x_init=x_init,
+ objective_function=norm2sq,
+ rate=rate, max_iteration=20,
+ update_objective_interval=2,
+ atol=1e-9, rtol=1e-6)
alg.max_iteration = 20
+ self.assertTrue(alg.max_iteration == 20)
+ self.assertTrue(alg.update_objective_interval==2)
alg.run(20, verbose=True)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+ def test_GradientDescentArmijo(self):
+ print ("Test GradientDescent")
+ ig = ImageGeometry(12,13,14)
+ x_init = ig.allocate()
+ # b = x_init.copy()
+ # fill with random numbers
+ # b.fill(numpy.random.random(x_init.shape))
+ b = ig.allocate('random')
+ identity = Identity(ig)
+
+ norm2sq = Norm2Sq(identity, b)
+ rate = None
+
+ alg = GradientDescent(x_init=x_init,
+ objective_function=norm2sq, rate=rate)
+ alg.max_iteration = 100
+ alg.run()
+ self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
alg = GradientDescent(x_init=x_init,
objective_function=norm2sq,
- rate=rate, max_iteration=20,
+ max_iteration=20,
update_objective_interval=2)
- alg.max_iteration = 20
+ #alg.max_iteration = 20
self.assertTrue(alg.max_iteration == 20)
self.assertTrue(alg.update_objective_interval==2)
alg.run(20, verbose=True)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+ def test_GradientDescentArmijo2(self):
+ from ccpi.optimisation.functions import Rosenbrock
+ from ccpi.framework import VectorData, VectorGeometry
+
+ f = Rosenbrock (alpha = 1., beta=100.)
+ vg = VectorGeometry(2)
+ x = vg.allocate('random_int', seed=2)
+ # x = vg.allocate('random', seed=1)
+ x.fill(numpy.asarray([10.,-3.]))
+
+ max_iter = 1000000
+ update_interval = 100000
+
+ alg = GradientDescent(x, f, max_iteration=max_iter, update_objective_interval=update_interval, alpha=1e6)
+
+ alg.run()
+
+ print (alg.get_output().as_array(), alg.step_size, alg.kmax, alg.k)
+
+ numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [1,1], decimal = 1)
+ numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [0.982744, 0.965725], decimal = 6)
+
def test_CGLS(self):
print ("Test CGLS")
#ig = ImageGeometry(124,153,154)
diff --git a/Wrappers/Python/test/test_functions.py b/Wrappers/Python/test/test_functions.py
index 550f1de..d295234 100644
--- a/Wrappers/Python/test/test_functions.py
+++ b/Wrappers/Python/test/test_functions.py
@@ -17,8 +17,8 @@
# limitations under the License.
import numpy as np
-from ccpi.optimisation.functions import Function, KullbackLeibler
-from ccpi.framework import DataContainer, ImageData, ImageGeometry
+from ccpi.optimisation.functions import Function, KullbackLeibler, Rosenbrock
+from ccpi.framework import DataContainer, ImageData, ImageGeometry , VectorData
from ccpi.optimisation.operators import Identity
from ccpi.optimisation.operators import BlockOperator
from ccpi.framework import BlockDataContainer
@@ -371,3 +371,10 @@ class TestFunction(unittest.TestCase):
proxc = f.proximal_conjugate(x,1.2)
f.proximal_conjugate(x, 1.2, out=out)
numpy.testing.assert_array_equal(proxc.as_array(), out.as_array())
+
+ def test_Rosenbrock(self):
+ f = Rosenbrock (alpha = 1, beta=100)
+ x = VectorData(numpy.asarray([1,1]))
+ assert f(x) == 0.
+ numpy.testing.assert_array_almost_equal( f.gradient(x).as_array(), numpy.zeros(shape=(2,), dtype=numpy.float32))
+