agd.AutomaticDifferentiation.Optimization

  1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
  2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
  3
  4import itertools
  5import math
  6import numpy as np
  7import copy
  8from . import Dense
  9from . import Sparse
 10from . import ad_generic
 11from .ad_generic import remove_ad,is_ad
 12
 13# Newton minimize
 14from . import Dense2
 15from . import Sparse2
 16
 17def norm(arr,ord=2,axis=None,keepdims=False,averaged=False):
 18	"""
 19	Returns L^p norm of array, seen as a vector, w.r.t. weights.
 20	Defined as : (sum_i x[i]^p)^(1/p)
 21
 22	Remark : not a matrix operator norm
 23	
 24	Inputs:
 25	 - ord : exponent p
 26	 - axis : int or None, axis along which to compute the norm. 
 27	 - keepdims : wether to keep singleton dimensions.
 28	 - averaged : wether to introduce a normalization factor, so that norm(ones(...))=1
 29
 30	Compatible with automatic differentiation.
 31	"""
 32	arr = ad_generic.array(arr)
 33	if ord==np.inf or ord%2!=0:
 34		try: arr = np.abs(arr)
 35		except TypeError: arr = np.vectorize(np.abs)(arr)
 36	if ord==np.inf: return np.max(arr,axis=axis,keepdims=keepdims)
 37	sum_pow = np.sum(arr**ord,axis=axis,keepdims=keepdims)
 38	
 39	if averaged:
 40		size = arr.size if axis is None else arr.shape[axis]
 41		sum_pow/=size
 42
 43	return sum_pow**(1./ord)
 44
 45def norm_infinity(arr,*args,**kwargs):
 46	"""
 47	L-Infinity norm (largest absolute value)
 48	"""
 49	return norm(arr,np.inf,*args,**kwargs)
 50
 51def norm_average(arr,*args,**kwargs):
 52	"""
 53	Averaged L1 norm (sum of absolute values divided by array size)
 54	"""
 55	return norm(arr,1,*args,**kwargs,averaged=True)
 56
 57class stop_default:
 58	"""
 59	Default stopping criterion for the Newton method, which uses the method __call__.
 60	Parameters : 
 61	* residue_tol : target tolerance on the residue infinity norm
 62	* niter_max : max iterations before aborting
 63	* raise_on_abort : wether to raise an exception if aborting
 64	* niter_print : generator for which iterations to print the state
 65	"""	
 66	def __init__(
 67		self,residue_tol=1e-8,niter_max=50,raise_on_abort=True,
 68		niter_print="Default",
 69		verbosity=3
 70		):
 71		self.residue_tol	=residue_tol
 72		self.niter_max 		=niter_max
 73		self.raise_on_abort	=raise_on_abort
 74
 75		if niter_print=="Default":
 76			niter_print = itertools.chain(range(1,6),range(6,16,2),itertools.count(16,4))
 77		self.niter_print_iter =iter(copy.deepcopy(niter_print))
 78		self.niter_print_next = next(self.niter_print_iter) # Next iteration to print
 79		self.niter_print_last = None # Last iteration printed
 80
 81		self.residue_norms = []
 82		self.verbosity=verbosity
 83
 84	def abort(self):
 85		if self.raise_on_abort: raise ValueError("Newton solver did not reach convergence")
 86		return True
 87
 88
 89	def __call__(self,residue,niter):
 90		"""
 91		Decides wether to stop the Newton solver, and which information to print. 
 92		Input : 
 93		 - residue : current error residual 
 94		 - niter : current iteration number 
 95		Output : 
 96		 - a boolean : wether to stop the Newton solver at the current iteration
 97		"""
 98		residue_norm = norm_infinity(remove_ad(residue))
 99		self.residue_norms.append(residue_norm)
100
101		def print_state():
102			if niter!=self.niter_print_last:
103				self.niter_print_last = niter
104				if self.verbosity>=3: print("Iteration:",niter," Residue norm:",residue_norm)
105
106
107		if niter>=self.niter_print_next:
108			print_state()
109			self.niter_print_next = next(self.niter_print_iter)
110		
111		
112		if residue_norm<self.residue_tol:
113			print_state()
114			if self.verbosity>=2: print("Target residue reached. Terminating.")
115			return True
116
117		if np.isnan(residue_norm):
118			print_state()
119			if self.verbosity>=1: print("Residue has NaNs. Aborting.")
120			return self.abort()
121		
122		if niter>=self.niter_max:
123			print_state()
124			if self.verbosity>=1: print("Max iterations exceeded. Aborting.")
125			return self.abort()
126
127		return False		
128
129class damping_default:
130	def __init__(self,criterion=None,refine_factor=2.,step_min=2.e-3,raise_on_abort=False):
131		self.criterion=criterion
132		self.refine_factor=refine_factor
133		self.step_min=step_min
134		self.raise_on_abort=raise_on_abort
135		self.steps = []
136
137	def __call__(self,x,direction,*cargs):
138		step = 1.
139		while self.criterion(x+step*direction,*cargs):
140			step/=2.
141			if step<self.step_min:
142				print("Minimal damping undershot. Aborting.")
143				if self.raise_on_abort:
144					raise ValueError
145				break	
146		self.steps.append(step)
147		return step
148
149
150def newton_root(f,x0,fargs=tuple(),stop="Default",
151	relax=None,damping=None,ad="Sparse",solver=None,in_place=False):
152	"""
153	Newton's method, for finding a root of a given function.
154	f : function to be solved
155	x0 : initial guess for the root
156	stop : stopping criterion, presented as either
157	   - keyword "Default" for using the stop_default class
158	   - a dict for using the stop_class with specified initialization arguments
159	   - a callable,  
160	relax : added to the jacobian before inversion
161	damping : criterion for step reduction
162	ad : is either 
163	   - keyword "Sparse" for using Sparse AD (Default)
164	   - keyword "Dense" for using Dense AD
165	   - a shape_bound given as a tuple, for Dense AD with few independent variables
166	"""
167
168	if stop == "Default": 	stop = stop_default()
169	elif isinstance(stop,dict): stop = stop_default(**stop)
170	if ad == "Dense":		ad = tuple() # Tuple represents shape_bound
171
172	# Create a variable featuring AD information
173	def ad_var(x0):
174		if not in_place: x0=np.copy(x0)
175
176		if ad == "Sparse":		   return Sparse.identity(constant=x0)
177		elif isinstance(ad,tuple): return Dense.identity( constant=x0,shape_bound=ad)						
178		assert False
179
180	# Find Newton's descent direction
181	def descent_direction(residue):
182		if relax is not None: residue+=relax # Relax is usually a multiple of identity
183
184		if solver is not None:     return solver(residue)
185		elif ad == "Sparse":       return residue.solve() # Sparse ad
186		elif isinstance(ad,tuple): return residue.solve(shape_bound=ad) # Dense ad
187
188	x=ad_var(x0)
189	for niter in itertools.count():
190		residue = f(x,*fargs)
191		if stop(residue,niter): return remove_ad(x)
192
193		direction = descent_direction(residue)
194
195		step = 1. if damping is None else damping(remove_ad(x),direction,*fargs)
196		x += step*direction
197
198
199def newton_minimize(f,x0,fargs=tuple(),
200	f_value_gradient_direction="Sparse2",
201	step_min=0.01,maxiter=50,verbosity=1,
202	δ_atol=1e-9,δ_rtol=1e-9,δ_ntol=3,δ_nneg=3):
203	"""
204	Inputs
205	- f : function to evaluate
206	- x0 : initial guess
207	- fargs (optional) : additional arguments for f
208	- f_value_gradient_direction (optional) : method for computing the value, gradient, 
209	   and descent direction of f, which can be either of 
210	   - "Sparse2" or "Dense2", automatic differentiation is applied to f
211	   - a callable, with the same arguments as f, returning a Sparse2 or Dense2 result
212	   - a callable, with the same arguments as f, returning a tuple (value,gradient,direction)
213	- step_min (optional) : minimum admissible step for the damped newton method
214	- maxiter (optional) : max number of iterations
215	- verbosity (optional) : amount of information displayed
216	- δ_atol,δ_rtol,δ_ntol,δ_nneg (optional) : parameters of the stopping criterion
217	"""
218	if verbosity<=0: kiter_print=iter([-1,-1])
219	elif verbosity==1: kiter_print = itertools.chain(range(1,6),range(6,16,2),itertools.count(16,4))
220	elif verbosity>=2: kiter_print = itertools.count(1)
221	kiter_print_next=next(kiter_print)
222
223	step_min = 2.**np.ceil(np.log2(step_min))
224	def tol(v): return δ_atol+np.abs(v)*δ_rtol
225
226	x = x0.copy() # Updated variable throughout the iterations
227	f_vgd = f_value_gradient_direction
228	if isinstance(f_vgd,str):
229		if   f_vgd=="Sparse2": x_ad = Sparse2.identity(x.shape)
230		elif f_vgd=="Dense2":  x_ad = Dense2.identity(x.shape)
231		else: raise ValueError(f"Unrecognized value {f_vgd} of f_value_gradient_direction")
232		def f_vgd(x,*fargs): # Overrides previous definition
233			x_ad.value=x
234			return f(x_ad,*fargs)
235
236	y = f_vgd(x,*fargs)
237	if isinstance(y,tuple): 
238		assert len(y)==3 # Directly get value,gradient,direction
239		vgd = lambda x : f_vgd(x,*fargs)
240	else:
241		def split(y): 
242			if not is_ad(y): # Allow returning np.nan of np.inf in case of error
243				assert not np.isfinite(y)
244				return y,None,None
245			return y.value,y.to_first().to_dense().coef,y.solve_stationnary()
246		y = split(y)
247		def vgd(x): return split(f_vgd(x,*fargs))
248	if verbosity>=0: print(f"Initialization, objective {y[0]}")
249
250	δ_ktol = 0 # Number of consecutive improvements below tolerance 
251	δ_kneg = 0 # Number of consecutive deteriorations of the solution
252	exit=False
253
254	s = 1. # Newton step size, can be damped, always a power of two
255	for i in range(1,maxiter):
256		v,g,d = y 
257#		print(x,d,"x,d,iter=",i)
258		# Try with current step
259		x += s*d
260		y = vgd(x)
261		δ = v - y[0]
262
263		# Adjust damping for this step, if needed
264		δtol = tol(v)
265		if not δ > -δtol and s>step_min: 
266			while s>step_min:
267				s/=2.
268				x-=s*d
269				δ = v - f(x,*fargs)
270				if δ > -δtol: break
271			else:
272				if verbosity>=-1: print(f"Warning minimal step size {s} deteriorates objective")
273			y = vgd(x)
274
275		# Adjust damping for next step
276		gd = np.dot(g,d) # Expected to be non-negative
277		δ0 = gd*(s-s**2/2.) # Expected improvement if the function was quadratic
278		if δ>=δ0/2 and s<1: s*=2. # Looks quadratic, increase step
279		if δ<=δ0/4 and s>step_min: s/=2. # Looks non-linear, decrease step
280
281		# Stopping criteria
282		v=y[0]
283		δtol = tol(v)
284		if not np.isfinite(v): 
285			if verbosity>=-2: print("Infinite objective value, aborting.")
286			exit=True
287
288		if np.abs(δ)<δtol: 
289			δ_ktol+=1
290			if δ_ktol==δ_ntol:
291				if verbosity>=0: print("Convergence criterion satisfied, terminating.")
292				exit=True
293		else: δ_ktol=0
294
295		if δ<=-δtol: 
296			δ_kneg+=1
297			if δ_kneg==δ_nneg:
298				if verbosity>=-2: print("Stalling, aborting")
299				exit=True
300		else: δ_kneg=0
301
302		# Display
303		if i==kiter_print_next or exit:
304			kiter_print_next = next(kiter_print)
305			if verbosity>=0: print(f"Iteration {i}, Newton step {s}, objective {v}.")
306
307		if exit: break
308
309	return x
def norm(arr, ord=2, axis=None, keepdims=False, averaged=False):
18def norm(arr,ord=2,axis=None,keepdims=False,averaged=False):
19	"""
20	Returns L^p norm of array, seen as a vector, w.r.t. weights.
21	Defined as : (sum_i x[i]^p)^(1/p)
22
23	Remark : not a matrix operator norm
24	
25	Inputs:
26	 - ord : exponent p
27	 - axis : int or None, axis along which to compute the norm. 
28	 - keepdims : wether to keep singleton dimensions.
29	 - averaged : wether to introduce a normalization factor, so that norm(ones(...))=1
30
31	Compatible with automatic differentiation.
32	"""
33	arr = ad_generic.array(arr)
34	if ord==np.inf or ord%2!=0:
35		try: arr = np.abs(arr)
36		except TypeError: arr = np.vectorize(np.abs)(arr)
37	if ord==np.inf: return np.max(arr,axis=axis,keepdims=keepdims)
38	sum_pow = np.sum(arr**ord,axis=axis,keepdims=keepdims)
39	
40	if averaged:
41		size = arr.size if axis is None else arr.shape[axis]
42		sum_pow/=size
43
44	return sum_pow**(1./ord)

Returns L^p norm of array, seen as a vector, w.r.t. weights. Defined as : (sum_i x[i]^p)^(1/p)

Remark : not a matrix operator norm

Inputs:

  • ord : exponent p
  • axis : int or None, axis along which to compute the norm.
  • keepdims : wether to keep singleton dimensions.
  • averaged : wether to introduce a normalization factor, so that norm(ones(...))=1

Compatible with automatic differentiation.

def norm_infinity(arr, *args, **kwargs):
46def norm_infinity(arr,*args,**kwargs):
47	"""
48	L-Infinity norm (largest absolute value)
49	"""
50	return norm(arr,np.inf,*args,**kwargs)

L-Infinity norm (largest absolute value)

def norm_average(arr, *args, **kwargs):
52def norm_average(arr,*args,**kwargs):
53	"""
54	Averaged L1 norm (sum of absolute values divided by array size)
55	"""
56	return norm(arr,1,*args,**kwargs,averaged=True)

Averaged L1 norm (sum of absolute values divided by array size)

class stop_default:
 58class stop_default:
 59	"""
 60	Default stopping criterion for the Newton method, which uses the method __call__.
 61	Parameters : 
 62	* residue_tol : target tolerance on the residue infinity norm
 63	* niter_max : max iterations before aborting
 64	* raise_on_abort : wether to raise an exception if aborting
 65	* niter_print : generator for which iterations to print the state
 66	"""	
 67	def __init__(
 68		self,residue_tol=1e-8,niter_max=50,raise_on_abort=True,
 69		niter_print="Default",
 70		verbosity=3
 71		):
 72		self.residue_tol	=residue_tol
 73		self.niter_max 		=niter_max
 74		self.raise_on_abort	=raise_on_abort
 75
 76		if niter_print=="Default":
 77			niter_print = itertools.chain(range(1,6),range(6,16,2),itertools.count(16,4))
 78		self.niter_print_iter =iter(copy.deepcopy(niter_print))
 79		self.niter_print_next = next(self.niter_print_iter) # Next iteration to print
 80		self.niter_print_last = None # Last iteration printed
 81
 82		self.residue_norms = []
 83		self.verbosity=verbosity
 84
 85	def abort(self):
 86		if self.raise_on_abort: raise ValueError("Newton solver did not reach convergence")
 87		return True
 88
 89
 90	def __call__(self,residue,niter):
 91		"""
 92		Decides wether to stop the Newton solver, and which information to print. 
 93		Input : 
 94		 - residue : current error residual 
 95		 - niter : current iteration number 
 96		Output : 
 97		 - a boolean : wether to stop the Newton solver at the current iteration
 98		"""
 99		residue_norm = norm_infinity(remove_ad(residue))
100		self.residue_norms.append(residue_norm)
101
102		def print_state():
103			if niter!=self.niter_print_last:
104				self.niter_print_last = niter
105				if self.verbosity>=3: print("Iteration:",niter," Residue norm:",residue_norm)
106
107
108		if niter>=self.niter_print_next:
109			print_state()
110			self.niter_print_next = next(self.niter_print_iter)
111		
112		
113		if residue_norm<self.residue_tol:
114			print_state()
115			if self.verbosity>=2: print("Target residue reached. Terminating.")
116			return True
117
118		if np.isnan(residue_norm):
119			print_state()
120			if self.verbosity>=1: print("Residue has NaNs. Aborting.")
121			return self.abort()
122		
123		if niter>=self.niter_max:
124			print_state()
125			if self.verbosity>=1: print("Max iterations exceeded. Aborting.")
126			return self.abort()
127
128		return False		

Default stopping criterion for the Newton method, which uses the method __call__. Parameters :

  • residue_tol : target tolerance on the residue infinity norm
  • niter_max : max iterations before aborting
  • raise_on_abort : wether to raise an exception if aborting
  • niter_print : generator for which iterations to print the state
stop_default( residue_tol=1e-08, niter_max=50, raise_on_abort=True, niter_print='Default', verbosity=3)
67	def __init__(
68		self,residue_tol=1e-8,niter_max=50,raise_on_abort=True,
69		niter_print="Default",
70		verbosity=3
71		):
72		self.residue_tol	=residue_tol
73		self.niter_max 		=niter_max
74		self.raise_on_abort	=raise_on_abort
75
76		if niter_print=="Default":
77			niter_print = itertools.chain(range(1,6),range(6,16,2),itertools.count(16,4))
78		self.niter_print_iter =iter(copy.deepcopy(niter_print))
79		self.niter_print_next = next(self.niter_print_iter) # Next iteration to print
80		self.niter_print_last = None # Last iteration printed
81
82		self.residue_norms = []
83		self.verbosity=verbosity
residue_tol
niter_max
raise_on_abort
niter_print_iter
niter_print_next
niter_print_last
residue_norms
verbosity
def abort(self):
85	def abort(self):
86		if self.raise_on_abort: raise ValueError("Newton solver did not reach convergence")
87		return True
class damping_default:
130class damping_default:
131	def __init__(self,criterion=None,refine_factor=2.,step_min=2.e-3,raise_on_abort=False):
132		self.criterion=criterion
133		self.refine_factor=refine_factor
134		self.step_min=step_min
135		self.raise_on_abort=raise_on_abort
136		self.steps = []
137
138	def __call__(self,x,direction,*cargs):
139		step = 1.
140		while self.criterion(x+step*direction,*cargs):
141			step/=2.
142			if step<self.step_min:
143				print("Minimal damping undershot. Aborting.")
144				if self.raise_on_abort:
145					raise ValueError
146				break	
147		self.steps.append(step)
148		return step
damping_default( criterion=None, refine_factor=2.0, step_min=0.002, raise_on_abort=False)
131	def __init__(self,criterion=None,refine_factor=2.,step_min=2.e-3,raise_on_abort=False):
132		self.criterion=criterion
133		self.refine_factor=refine_factor
134		self.step_min=step_min
135		self.raise_on_abort=raise_on_abort
136		self.steps = []
criterion
refine_factor
step_min
raise_on_abort
steps
def newton_root( f, x0, fargs=(), stop='Default', relax=None, damping=None, ad='Sparse', solver=None, in_place=False):
151def newton_root(f,x0,fargs=tuple(),stop="Default",
152	relax=None,damping=None,ad="Sparse",solver=None,in_place=False):
153	"""
154	Newton's method, for finding a root of a given function.
155	f : function to be solved
156	x0 : initial guess for the root
157	stop : stopping criterion, presented as either
158	   - keyword "Default" for using the stop_default class
159	   - a dict for using the stop_class with specified initialization arguments
160	   - a callable,  
161	relax : added to the jacobian before inversion
162	damping : criterion for step reduction
163	ad : is either 
164	   - keyword "Sparse" for using Sparse AD (Default)
165	   - keyword "Dense" for using Dense AD
166	   - a shape_bound given as a tuple, for Dense AD with few independent variables
167	"""
168
169	if stop == "Default": 	stop = stop_default()
170	elif isinstance(stop,dict): stop = stop_default(**stop)
171	if ad == "Dense":		ad = tuple() # Tuple represents shape_bound
172
173	# Create a variable featuring AD information
174	def ad_var(x0):
175		if not in_place: x0=np.copy(x0)
176
177		if ad == "Sparse":		   return Sparse.identity(constant=x0)
178		elif isinstance(ad,tuple): return Dense.identity( constant=x0,shape_bound=ad)						
179		assert False
180
181	# Find Newton's descent direction
182	def descent_direction(residue):
183		if relax is not None: residue+=relax # Relax is usually a multiple of identity
184
185		if solver is not None:     return solver(residue)
186		elif ad == "Sparse":       return residue.solve() # Sparse ad
187		elif isinstance(ad,tuple): return residue.solve(shape_bound=ad) # Dense ad
188
189	x=ad_var(x0)
190	for niter in itertools.count():
191		residue = f(x,*fargs)
192		if stop(residue,niter): return remove_ad(x)
193
194		direction = descent_direction(residue)
195
196		step = 1. if damping is None else damping(remove_ad(x),direction,*fargs)
197		x += step*direction

Newton's method, for finding a root of a given function. f : function to be solved x0 : initial guess for the root stop : stopping criterion, presented as either

  • keyword "Default" for using the stop_default class
  • a dict for using the stop_class with specified initialization arguments
  • a callable,
    relax : added to the jacobian before inversion damping : criterion for step reduction ad : is either
  • keyword "Sparse" for using Sparse AD (Default)
  • keyword "Dense" for using Dense AD
  • a shape_bound given as a tuple, for Dense AD with few independent variables
def newton_minimize( f, x0, fargs=(), f_value_gradient_direction='Sparse2', step_min=0.01, maxiter=50, verbosity=1, δ_atol=1e-09, δ_rtol=1e-09, δ_ntol=3, δ_nneg=3):
200def newton_minimize(f,x0,fargs=tuple(),
201	f_value_gradient_direction="Sparse2",
202	step_min=0.01,maxiter=50,verbosity=1,
203	δ_atol=1e-9,δ_rtol=1e-9,δ_ntol=3,δ_nneg=3):
204	"""
205	Inputs
206	- f : function to evaluate
207	- x0 : initial guess
208	- fargs (optional) : additional arguments for f
209	- f_value_gradient_direction (optional) : method for computing the value, gradient, 
210	   and descent direction of f, which can be either of 
211	   - "Sparse2" or "Dense2", automatic differentiation is applied to f
212	   - a callable, with the same arguments as f, returning a Sparse2 or Dense2 result
213	   - a callable, with the same arguments as f, returning a tuple (value,gradient,direction)
214	- step_min (optional) : minimum admissible step for the damped newton method
215	- maxiter (optional) : max number of iterations
216	- verbosity (optional) : amount of information displayed
217	- δ_atol,δ_rtol,δ_ntol,δ_nneg (optional) : parameters of the stopping criterion
218	"""
219	if verbosity<=0: kiter_print=iter([-1,-1])
220	elif verbosity==1: kiter_print = itertools.chain(range(1,6),range(6,16,2),itertools.count(16,4))
221	elif verbosity>=2: kiter_print = itertools.count(1)
222	kiter_print_next=next(kiter_print)
223
224	step_min = 2.**np.ceil(np.log2(step_min))
225	def tol(v): return δ_atol+np.abs(v)*δ_rtol
226
227	x = x0.copy() # Updated variable throughout the iterations
228	f_vgd = f_value_gradient_direction
229	if isinstance(f_vgd,str):
230		if   f_vgd=="Sparse2": x_ad = Sparse2.identity(x.shape)
231		elif f_vgd=="Dense2":  x_ad = Dense2.identity(x.shape)
232		else: raise ValueError(f"Unrecognized value {f_vgd} of f_value_gradient_direction")
233		def f_vgd(x,*fargs): # Overrides previous definition
234			x_ad.value=x
235			return f(x_ad,*fargs)
236
237	y = f_vgd(x,*fargs)
238	if isinstance(y,tuple): 
239		assert len(y)==3 # Directly get value,gradient,direction
240		vgd = lambda x : f_vgd(x,*fargs)
241	else:
242		def split(y): 
243			if not is_ad(y): # Allow returning np.nan of np.inf in case of error
244				assert not np.isfinite(y)
245				return y,None,None
246			return y.value,y.to_first().to_dense().coef,y.solve_stationnary()
247		y = split(y)
248		def vgd(x): return split(f_vgd(x,*fargs))
249	if verbosity>=0: print(f"Initialization, objective {y[0]}")
250
251	δ_ktol = 0 # Number of consecutive improvements below tolerance 
252	δ_kneg = 0 # Number of consecutive deteriorations of the solution
253	exit=False
254
255	s = 1. # Newton step size, can be damped, always a power of two
256	for i in range(1,maxiter):
257		v,g,d = y 
258#		print(x,d,"x,d,iter=",i)
259		# Try with current step
260		x += s*d
261		y = vgd(x)
262		δ = v - y[0]
263
264		# Adjust damping for this step, if needed
265		δtol = tol(v)
266		if not δ > -δtol and s>step_min: 
267			while s>step_min:
268				s/=2.
269				x-=s*d
270				δ = v - f(x,*fargs)
271				if δ > -δtol: break
272			else:
273				if verbosity>=-1: print(f"Warning minimal step size {s} deteriorates objective")
274			y = vgd(x)
275
276		# Adjust damping for next step
277		gd = np.dot(g,d) # Expected to be non-negative
278		δ0 = gd*(s-s**2/2.) # Expected improvement if the function was quadratic
279		if δ>=δ0/2 and s<1: s*=2. # Looks quadratic, increase step
280		if δ<=δ0/4 and s>step_min: s/=2. # Looks non-linear, decrease step
281
282		# Stopping criteria
283		v=y[0]
284		δtol = tol(v)
285		if not np.isfinite(v): 
286			if verbosity>=-2: print("Infinite objective value, aborting.")
287			exit=True
288
289		if np.abs(δ)<δtol: 
290			δ_ktol+=1
291			if δ_ktol==δ_ntol:
292				if verbosity>=0: print("Convergence criterion satisfied, terminating.")
293				exit=True
294		else: δ_ktol=0
295
296		if δ<=-δtol: 
297			δ_kneg+=1
298			if δ_kneg==δ_nneg:
299				if verbosity>=-2: print("Stalling, aborting")
300				exit=True
301		else: δ_kneg=0
302
303		# Display
304		if i==kiter_print_next or exit:
305			kiter_print_next = next(kiter_print)
306			if verbosity>=0: print(f"Iteration {i}, Newton step {s}, objective {v}.")
307
308		if exit: break
309
310	return x

Inputs

  • f : function to evaluate
  • x0 : initial guess
  • fargs (optional) : additional arguments for f
  • f_value_gradient_direction (optional) : method for computing the value, gradient, and descent direction of f, which can be either of
    • "Sparse2" or "Dense2", automatic differentiation is applied to f
    • a callable, with the same arguments as f, returning a Sparse2 or Dense2 result
    • a callable, with the same arguments as f, returning a tuple (value,gradient,direction)
  • step_min (optional) : minimum admissible step for the damped newton method
  • maxiter (optional) : max number of iterations
  • verbosity (optional) : amount of information displayed
  • δ_atol,δ_rtol,δ_ntol,δ_nneg (optional) : parameters of the stopping criterion