agd.AutomaticDifferentiation.Dense

View Source

  1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
  2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
  3
  4import functools
  5import numpy as np
  6from . import functional
  7from . import Base
  8from . import ad_generic
  9from . import misc
 10
 11
 12_add_dim = misc._add_dim; _add_coef=misc._add_coef
 13
 14class denseAD(Base.baseAD):
 15	"""
 16	A class for dense forward automatic differentiation
 17	"""
 18
 19	def __init__(self,value,coef=None,broadcast_ad=False):
 20		if self.is_ad(value): # Case where one should just reproduce value
 21			assert coef is None
 22			self.value,self.coef=value.value,value.coef
 23			return
 24		if ad_generic.is_ad(value):
 25			raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type denseAD")
 26		self.value = ad_generic.asarray(value)
 27		self.coef = (np.zeros_like(value,shape=self.shape+(0,)) if coef is None 
 28			else misc._test_or_broadcast_ad(coef,self.shape,broadcast_ad) )
 29
 30	@classmethod
 31	def order(cls): return 1
 32	def copy(self,order='C'):
 33		return self.new(self.value.copy(order=order),self.coef.copy(order=order))
 34	def as_tuple(self): return self.value,self.coef
 35	
 36	# Representation 
 37	def __iter__(self):
 38		for value,coef in zip(self.value,self.coef):
 39			yield self.new(value,coef)
 40	def __str__(self):
 41		return "denseAD("+str(self.value)+","+misc._prep_nl(str(self.coef))+")"
 42	def __repr__(self):
 43		return "denseAD("+repr(self.value)+","+misc._prep_nl(repr(self.coef))+")"
 44
 45	# Operators
 46	def as_func(self,h):
 47		"""Replaces the symbolic perturbation with h"""
 48		value,coef = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef))
 49		return value+(coef*h).sum(axis=self.ndim)
 50
 51	def __add__(self,other):
 52		if self.is_ad(other):
 53			return self.new(self.value+other.value, _add_coef(self.coef,other.coef))
 54		else:
 55			return self.new(self.value+other, self.coef, broadcast_ad=True)
 56
 57	def __sub__(self,other):
 58		if self.is_ad(other):
 59			return self.new(self.value-other.value, _add_coef(self.coef,-other.coef))
 60		else:
 61			return self.new(self.value-other, self.coef, broadcast_ad=True)
 62
 63	def __mul__(self,other):
 64		if self.is_ad(other):
 65			return self.new(self.value*other.value,_add_coef(_add_dim(other.value)*self.coef,
 66				_add_dim(self.value)*other.coef))
 67		elif self.isndarray(other):
 68			return self.new(self.value*other,_add_dim(other)*self.coef)
 69		else:
 70			return self.new(self.value*other,other*self.coef)
 71
 72	def __truediv__(self,other):
 73		if self.is_ad(other):
 74			return self.new(self.value/other.value,
 75				_add_coef(_add_dim(1/other.value)*self.coef,
 76					_add_dim(-self.value/other.value**2)*other.coef))
 77		elif self.isndarray(other):
 78			return self.new(self.value/other,_add_dim(1./other)*self.coef)
 79		else:
 80			return self.new(self.value/other,(1./other)*self.coef) 
 81
 82	__rmul__ = __mul__
 83	__radd__ = __add__
 84	def __rsub__(self,other):     return -(self-other)
 85	def __rtruediv__(self,other): return self.new(other/self.value,
 86		_add_dim(-other/self.value**2)*self.coef)
 87
 88	def __neg__(self): return self.new(-self.value,-self.coef)
 89
 90	# Math functions
 91	def _math_helper(self,deriv):
 92		a,b=deriv
 93		return self.new(a,_add_dim(b)*self.coef)
 94
 95	@classmethod
 96	def compose(cls,a,t):
 97		assert cls.is_ad(a) and all(cls.is_ad(b) for b in t)
 98		b = np.moveaxis(cls.concatenate(t,axis=0),0,-1)
 99		coef = (_add_dim(a.coef)*b.coef).sum(axis=-2)
100		return cls(a.value,coef)
101
102	#Indexing
103	@property
104	def size_ad(self):  return self.coef.shape[-1]
105
106	def gradient(self,i=None): 
107		"""Returns the gradient, or the i-th component of the gradient if specified."""
108		grad = np.moveaxis(self.coef,-1,0)
109		return grad if i is None else grad[i]
110
111	def __getitem__(self,key):
112		ekey = misc.key_expand(key)
113		return self.new(self.value[key], self.coef[ekey])
114
115	def __setitem__(self,key,other):
116		ekey = misc.key_expand(key)
117		if self.is_ad(other):
118			if other.size_ad==0: return self.__setitem__(key,other.value)
119			elif self.size_ad==0: 
120				self.coef=np.zeros_like(self.value,shape=self.shape+(other.size_ad,))
121			self.value[key] = other.value
122			self.coef[ekey] =  other.coef
123		else:
124			self.value[key] = other
125			self.coef[ekey] = 0.
126
127	def reshape(self,shape,order='C'):
128		value = self.value.reshape(shape,order=order)
129		return self.new(value,self.coef.reshape(value.shape+(self.size_ad,),order=order))
130
131	def broadcast_to(self,shape):
132		shape2 = shape+(self.size_ad,)
133		return self.new(np.broadcast_to(self.value,shape), np.broadcast_to(self.coef,shape2) )
134
135	def pad(self,pad_width,*args,constant_values=0,**kwargs):
136		return self.new(
137			np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs),
138			np.pad(self.coef,pad_width+((0,0),),*args,constant_values=0,**kwargs))
139	
140	def transpose(self,axes=None):
141		if axes is None: axes = tuple(reversed(range(self.ndim)))
142		axes2 = tuple(axes)+(self.ndim,)
143		return self.new(self.value.transpose(axes),self.coef.transpose(axes2))
144
145	def allclose(self,other,*args,**kwargs): 
146		return np.allclose(self.value,other.value,*args,**kwargs) and \
147		       np.allclose(self.coef, other.coef, *args,**kwargs)
148
149	# Reductions
150	def sum(self,axis=None,out=None,**kwargs):
151		if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs)
152		axis=misc.normalize_axis(axis,self.ndim)
153		out = self.new(self.value.sum(axis,**kwargs), self.coef.sum(axis,**kwargs))
154		return out
155
156	# Numerical 
157	def solve(self,shape_free=None,shape_bound=None):
158		shape_free,shape_bound = ad_generic._set_shape_free_bound(self.shape,shape_free,shape_bound)
159		assert np.prod(shape_free)==self.size_ad
160		v = np.moveaxis(np.reshape(self.value,(self.size_ad,)+shape_bound),0,-1)
161		a = np.moveaxis(np.reshape(self.coef,(self.size_ad,)+shape_bound+(self.size_ad,)),0,-2)
162		return -np.reshape(np.moveaxis(np.linalg.solve(a,v),-1,0),self.shape)
163
164	@classmethod
165	def concatenate(cls,elems,axis=0):
166		axis1 = axis if axis>=0 else axis-1
167		elems2 = tuple(cls(e) for e in elems)
168		size_ad = max(e.size_ad for e in elems2)
169		assert all((e.size_ad==size_ad or e.size_ad==0) for e in elems2)
170		return cls( 
171		np.concatenate(tuple(e.value for e in elems2), axis=axis), 
172		np.concatenate(tuple(e.coef if e.size_ad==size_ad else 
173			np.zeros_like(e.value,shape=e.shape+(size_ad,)) for e in elems2),axis=axis1))
174
175	def associate(self,squeeze_free_dims=-1,squeeze_bound_dims=-1):
176		from . import associate
177		sq_free = squeeze_free_dims
178		sq_free1= sq_free if sq_free>=0 else (sq_free-1)
179		value = associate(self.value,sq_free, squeeze_bound_dims)
180		coef  = associate(self.coef, sq_free1,squeeze_bound_dims)
181		coef = np.moveaxis(coef,self.ndim if sq_free is None else (self.ndim-1),-1)
182		return self.new(value,coef)
183
184	def apply_linear_operator(self,op):
185		if isinstance(op,denseAD_Lin): return op(self)
186		val = op(self.value)
187		# The next case is also for denseAD_Lin, but when it is hidden within a function.
188		# Note : incurs a recomputation of op(self.value)...
189		if self.is_ad(val): return op(self)
190		return self.new(val,misc.apply_linear_operator(op,self.coef,flatten_ndim=1))
191
192# -------- End of class denseAD -------
193
194# -------- Factory methods -----
195
196
197def identity(shape=None,shape_free=None,shape_bound=None,constant=None,shift=(0,0)):
198	"""
199	Creates a dense AD variable with independent symbolic perturbations for each coordinate
200	(unless some are tied together as specified by shape_free and shape_bound)
201	"""
202	shape,constant = misc._set_shape_constant(shape,constant)
203	shape_free,shape_bound = ad_generic._set_shape_free_bound(shape,shape_free,shape_bound)
204
205	ndim_elem = len(shape)-len(shape_bound)
206	shape_elem = shape[:ndim_elem]
207	size_elem = int(np.prod(shape_elem))
208	size_ad = shift[0]+size_elem+shift[1]
209	coef1 = np.zeros_like(constant,shape=(size_elem,size_ad))
210	for i in range(size_elem):
211		coef1[i,shift[0]+i]=1.
212	coef1 = coef1.reshape(shape_elem+(1,)*len(shape_bound)+(size_ad,))
213	if coef1.shape[:-1]!=constant.shape: 
214		coef1 = np.broadcast_to(coef1,shape+(size_ad,))
215	return denseAD(constant,coef1)
216
217def register(inputs,iterables=None,shape_bound=None,shift=(0,0),ident=identity,considered=None):
218	"""
219	Creates a series of dense AD variables with independent symbolic perturbations for each coordinate,
220	and adequate intermediate shifts.
221	"""
222	if iterables is None:
223		iterables = (tuple,)
224	boundsize = 1 if shape_bound is None else np.prod(shape_bound,dtype=int)
225	def is_considered(a):
226		return considered is None or a in considered
227
228	start=shift[0]
229	starts = []
230	def setstart(a):
231		nonlocal start,starts
232		if considered is None or any(a is val for val in considered):
233			a,to_ad = misc.ready_ad(a)
234			if to_ad: 
235				starts.append(start)
236				start += a.size//boundsize
237				return a
238		starts.append(None)
239		return a
240	inputs = functional.map_iterables(setstart,inputs,iterables)
241
242	end = start+shift[1]
243
244	starts_it = iter(starts)
245	def setad(a):
246		start = next(starts_it)
247		if start is None: return a
248		else: return ident(constant=a,shift=(start,end-start-a.size//boundsize),
249			shape_bound=shape_bound)
250	return functional.map_iterables(setad,inputs,iterables)
251
252
253# ------- class denseAD_Lin --------
254class denseAD_Lin:
255	"""
256	A class implementing a linear operator L with an AD part δL, and rule
257	(L+δL)(u+δu) = L(u) + (δL(u) + L(δu)) + o(δ^2).
258	"""
259	
260	def __init__(self,value,coef):
261		"""
262		- value : Some linear operator L
263		- coef : A list of linear operators δL
264		"""
265		self.value = value 
266		self.coef = coef 
267	
268	def __str__(self):  return "denseAD_Lin" +  str((self.value,self.coef))
269	def __repr__(self): return "denseAD_Lin" + repr((self.value,self.coef))
270	
271	@property
272	def size_ad(): return len(self.coef)
273	@property
274	def _value(self): return as_callable(self.value)
275	@property
276	def _coef(self): return [as_callable(δL) for δL in self.coef]
277
278	def __call__(self,other):
279		if isinstance(other,denseAD):
280			if other.size_ad==0: return self.__call__(other.value)
281			res = other.apply_linear_operator(self._value)
282			res.coef += np.stack([δL(other.value) for δL in self._coef],axis=-1)
283			return res
284		else: 
285			return denseAD(self._value(other), 
286				np.stack([δL(other) for δL in self._coef],axis=-1))
287		
288	def __mul__(self,other): return self.__call__(other)
289			
290def as_callable(L):
291	"""Make matrices and sparse matrices callable"""
292	if callable(L): return L
293	return lambda x:np.dot(L,x) if isinstance(L,np.ndarray) else (L*x)

def identity( shape=None, shape_free=None, shape_bound=None, constant=None, shift=(0, 0)): View Source

198def identity(shape=None,shape_free=None,shape_bound=None,constant=None,shift=(0,0)):
199	"""
200	Creates a dense AD variable with independent symbolic perturbations for each coordinate
201	(unless some are tied together as specified by shape_free and shape_bound)
202	"""
203	shape,constant = misc._set_shape_constant(shape,constant)
204	shape_free,shape_bound = ad_generic._set_shape_free_bound(shape,shape_free,shape_bound)
205
206	ndim_elem = len(shape)-len(shape_bound)
207	shape_elem = shape[:ndim_elem]
208	size_elem = int(np.prod(shape_elem))
209	size_ad = shift[0]+size_elem+shift[1]
210	coef1 = np.zeros_like(constant,shape=(size_elem,size_ad))
211	for i in range(size_elem):
212		coef1[i,shift[0]+i]=1.
213	coef1 = coef1.reshape(shape_elem+(1,)*len(shape_bound)+(size_ad,))
214	if coef1.shape[:-1]!=constant.shape: 
215		coef1 = np.broadcast_to(coef1,shape+(size_ad,))
216	return denseAD(constant,coef1)

Creates a dense AD variable with independent symbolic perturbations for each coordinate (unless some are tied together as specified by shape_free and shape_bound)

def register( inputs, iterables=None, shape_bound=None, shift=(0, 0), ident=<function identity>, considered=None): View Source

218def register(inputs,iterables=None,shape_bound=None,shift=(0,0),ident=identity,considered=None):
219	"""
220	Creates a series of dense AD variables with independent symbolic perturbations for each coordinate,
221	and adequate intermediate shifts.
222	"""
223	if iterables is None:
224		iterables = (tuple,)
225	boundsize = 1 if shape_bound is None else np.prod(shape_bound,dtype=int)
226	def is_considered(a):
227		return considered is None or a in considered
228
229	start=shift[0]
230	starts = []
231	def setstart(a):
232		nonlocal start,starts
233		if considered is None or any(a is val for val in considered):
234			a,to_ad = misc.ready_ad(a)
235			if to_ad: 
236				starts.append(start)
237				start += a.size//boundsize
238				return a
239		starts.append(None)
240		return a
241	inputs = functional.map_iterables(setstart,inputs,iterables)
242
243	end = start+shift[1]
244
245	starts_it = iter(starts)
246	def setad(a):
247		start = next(starts_it)
248		if start is None: return a
249		else: return ident(constant=a,shift=(start,end-start-a.size//boundsize),
250			shape_bound=shape_bound)
251	return functional.map_iterables(setad,inputs,iterables)

Creates a series of dense AD variables with independent symbolic perturbations for each coordinate, and adequate intermediate shifts.

class denseAD_Lin: View Source

255class denseAD_Lin:
256	"""
257	A class implementing a linear operator L with an AD part δL, and rule
258	(L+δL)(u+δu) = L(u) + (δL(u) + L(δu)) + o(δ^2).
259	"""
260	
261	def __init__(self,value,coef):
262		"""
263		- value : Some linear operator L
264		- coef : A list of linear operators δL
265		"""
266		self.value = value 
267		self.coef = coef 
268	
269	def __str__(self):  return "denseAD_Lin" +  str((self.value,self.coef))
270	def __repr__(self): return "denseAD_Lin" + repr((self.value,self.coef))
271	
272	@property
273	def size_ad(): return len(self.coef)
274	@property
275	def _value(self): return as_callable(self.value)
276	@property
277	def _coef(self): return [as_callable(δL) for δL in self.coef]
278
279	def __call__(self,other):
280		if isinstance(other,denseAD):
281			if other.size_ad==0: return self.__call__(other.value)
282			res = other.apply_linear_operator(self._value)
283			res.coef += np.stack([δL(other.value) for δL in self._coef],axis=-1)
284			return res
285		else: 
286			return denseAD(self._value(other), 
287				np.stack([δL(other) for δL in self._coef],axis=-1))
288		
289	def __mul__(self,other): return self.__call__(other)

A class implementing a linear operator L with an AD part δL, and rule (L+δL)(u+δu) = L(u) + (δL(u) + L(δu)) + o(δ^2).

denseAD_Lin(value, coef) View Source

261	def __init__(self,value,coef):
262		"""
263		- value : Some linear operator L
264		- coef : A list of linear operators δL
265		"""
266		self.value = value 
267		self.coef = coef

value : Some linear operator L
coef : A list of linear operators δL

value

coef

size_ad View Source

272	@property
273	def size_ad(): return len(self.coef)

def as_callable(L): View Source

291def as_callable(L):
292	"""Make matrices and sparse matrices callable"""
293	if callable(L): return L
294	return lambda x:np.dot(L,x) if isinstance(L,np.ndarray) else (L*x)

Make matrices and sparse matrices callable

agd.AutomaticDifferentiation.Dense

Inherited Members