agd.AutomaticDifferentiation.Sparse

View Source

  1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
  2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
  3
  4import numpy as np
  5import functools
  6from . import functional
  7from . import Base
  8from . import cupy_support as cps
  9from . import ad_generic
 10from . import cupy_generic
 11from . import misc
 12from . import Dense
 13
 14_add_dim = misc._add_dim; _pad_last = misc._pad_last; _concatenate=misc._concatenate;
 15
 16class spAD(Base.baseAD):
 17	"""
 18	A class for sparse forward automatic differentiation
 19	"""
 20
 21	def __init__(self,value,coef=None,index=None,broadcast_ad=False):
 22		if self.is_ad(value): # Case where one should just reproduce value
 23			assert coef is None and index is None
 24			self.value,self.coef,self.index = value.value,value.coef,value.index
 25			return
 26		if ad_generic.is_ad(value):
 27			raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD")
 28			
 29		assert ((coef is None) and (index is None)) or (coef.shape==index.shape)
 30
 31		self.value = ad_generic.asarray(value)
 32		shape =self.shape
 33		shape2=shape+(0,)
 34		self.coef  = (np.zeros_like(value,shape=shape2) if coef is None 
 35			else misc._test_or_broadcast_ad(coef,shape,broadcast_ad) ) 
 36		self.index = (np.zeros_like(value,shape=shape2,
 37			dtype=cupy_generic.samesize_int_t(value.dtype))  
 38			if index is None else misc._test_or_broadcast_ad(index,shape,broadcast_ad) )
 39
 40	@classmethod
 41	def order(cls): return 1
 42	def copy(self,order='C'):
 43		return self.new(self.value.copy(order=order),
 44			self.coef.copy(order=order),self.index.copy(order=order))
 45	def as_tuple(self): return self.value,self.coef,self.index
 46
 47	def __copy__(self): return self.copy(order='K')
 48	def __deepcopy__(self,*args): 
 49		return self.new(self.value.__deepcopy__(*args),
 50			self.coef.__deepcopy__(*args),self.index.__deepcopy__(*args))
 51
 52	# Representation 
 53	def __iter__(self):
 54		for value,coef,index in zip(self.value,self.coef,self.index):
 55			yield self.new(value,coef,index)
 56
 57	def __str__(self):
 58		return "spAD"+str((self.value,self.coef,self.index))
 59	def __repr__(self):
 60		return "spAD"+repr((self.value,self.coef,self.index))	
 61
 62	# Operators
 63	def as_func(self,h=None):
 64		"""Replaces the symbolic perturbation with h, if specified."""
 65		if h is None: 
 66			lin = self.tangent_operator()
 67			return lambda h : (lin*h).reshape(self.shape) + misc.add_ndim(self.value,h.ndim-1)
 68		value,coef = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef))
 69		return value+(coef*h[self.index]).sum(axis=self.ndim)
 70
 71	def __add__(self,other):
 72		if self.is_ad(other):
 73			value = self.value+other.value
 74			return self.new(value, _concatenate(self.coef,other.coef,value.shape), 
 75				_concatenate(self.index,other.index,value.shape))
 76		else:
 77			return self.new(self.value+other, self.coef, self.index, broadcast_ad=True)
 78
 79	def __sub__(self,other):
 80		if self.is_ad(other):
 81			value = self.value-other.value
 82			return self.new(self.value-other.value, _concatenate(self.coef,-other.coef,value.shape), _concatenate(self.index,other.index,value.shape))
 83		else:
 84			return self.new(self.value-other, self.coef, self.index, broadcast_ad=True)
 85
 86	def __mul__(self,other):
 87		if self.is_ad(other):
 88			value = self.value*other.value
 89			coef1,coef2 = _add_dim(other.value)*self.coef,_add_dim(self.value)*other.coef
 90			index1,index2 = np.broadcast_to(self.index,coef1.shape),np.broadcast_to(other.index,coef2.shape)
 91			return self.new(value,_concatenate(coef1,coef2),_concatenate(index1,index2))
 92		elif self.isndarray(other):
 93			value = self.value*other
 94			coef = _add_dim(other)*self.coef
 95			index = np.broadcast_to(self.index,coef.shape)
 96			return self.new(value,coef,index)
 97		else:
 98			return self.new(self.value*other,other*self.coef,self.index)
 99
100	def __truediv__(self,other):
101		if self.is_ad(other):
102			return self.new(self.value/other.value,
103				_concatenate(self.coef*_add_dim(1/other.value),other.coef*_add_dim(-self.value/other.value**2)),
104				_concatenate(self.index,other.index))
105		elif self.isndarray(other):
106			return self.new(self.value/other,self.coef*_add_dim(1./other),self.index)
107		else:
108			return self.new(self.value/other,self.coef/other,self.index)
109
110	__rmul__ = __mul__
111	__radd__ = __add__
112	def __rsub__(self,other): 		return -(self-other)
113	def __rtruediv__(self,other): 	
114		value = other/self.value
115		coef = self.coef*_add_dim(-other/self.value**2)
116		index = np.broadcast_to(self.index,coef.shape)
117		return self.new(value,coef,index)
118
119	def __neg__(self):		return self.new(-self.value,-self.coef,self.index)
120
121	# Math functions
122	def _math_helper(self,deriv):
123		a,b=deriv
124		return self.new(a,_add_dim(b)*self.coef,self.index)
125
126	@classmethod
127	def compose(cls,a,t):
128		assert isinstance(a,Dense.denseAD) and all(cls.is_ad(b) for b in t)
129		lens = tuple(len(b) for b in t)
130		assert a.size_ad == sum(lens)
131		t = tuple(np.moveaxis(b,0,-1) for b in t)
132		a_coefs = np.split(a.coef,np.cumsum(lens[:-1]),axis=-1)
133		def FlattenLast2(arr): return arr.reshape(arr.shape[:-2]+(np.prod(arr.shape[-2:],dtype=int),))
134		coef = tuple(_add_dim(c)*b.coef for c,b in zip(a_coefs,t) )
135		coef = np.concatenate( tuple(FlattenLast2(c) for c in coef), axis=-1)
136		index = np.broadcast_to(np.concatenate( tuple(FlattenLast2(b.index) 
137			for b in t), axis=-1),coef.shape)
138		return cls.new(a.value,coef,index)
139
140	#Indexing
141	@property
142	def size_ad(self):  return self.coef.shape[-1]
143
144	def __getitem__(self,key):
145		ekey = misc.key_expand(key)
146		return self.new(self.value[key], self.coef[ekey], self.index[ekey])
147
148	def __setitem__(self,key,other):
149		ekey = misc.key_expand(key)
150		if self.is_ad(other):
151			self.value[key] = other.value
152			pad_size = max(self.coef.shape[-1],other.coef.shape[-1])
153			if pad_size>self.coef.shape[-1]:
154				self.coef = _pad_last(self.coef,pad_size)
155				self.index = _pad_last(self.index,pad_size)
156			self.coef[ekey] = _pad_last(other.coef,pad_size)
157			self.index[ekey] = _pad_last(other.index,pad_size)
158		else:
159			self.value[key] = other
160			self.coef[ekey] = 0.
161
162	def reshape(self,shape,order='C'):
163		value = self.value.reshape(shape,order=order)
164		shape2 = value.shape+(self.size_ad,)
165		return self.new(value,self.coef.reshape(shape2,order=order),
166			self.index.reshape(shape2,order=order))
167
168	def broadcast_to(self,shape):
169		shape2 = shape+(self.size_ad,)
170		return self.new(np.broadcast_to(self.value,shape), 
171			np.broadcast_to(self.coef,shape2), np.broadcast_to(self.index,shape2))
172
173	def pad(self,pad_width,*args,constant_values=0,**kwargs):
174		return self.new(
175			np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs),
176			np.pad(self.coef, pad_width+((0,0),),*args,constant_values=0,**kwargs),
177			np.pad(self.index,pad_width+((0,0),),*args,constant_values=0,**kwargs))
178	
179	def transpose(self,axes=None):
180		if axes is None: axes = tuple(reversed(range(self.ndim)))
181		axes2 = tuple(axes) +(self.ndim,)
182		return self.new(self.value.transpose(axes),
183			self.coef.transpose(axes2),self.index.transpose(axes2))
184
185	def allclose(self,other,*args,**kwargs):
186		raise ValueError("Unsupported, sorry, please try allclose(a.to_dense(),b.to_dense())")
187		 
188	# Reductions
189	def sum(self,axis=None,out=None,**kwargs):
190		if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs)
191		value = self.value.sum(axis,**kwargs)
192		shape = value.shape +(self.size_ad * self.shape[axis],)
193		coef = np.moveaxis(self.coef, axis,-1).reshape(shape)
194		index = np.moveaxis(self.index, axis,-1).reshape(shape)
195		out = self.new(value,coef,index)
196		return out
197
198	# Conversion
199	def bound_ad(self):
200		return 1+int(cps.max(self.index,initial=-1))
201
202	def to_dense(self,dense_size_ad=None):
203		if self.ndim!=1: 
204			return self.reshape(-1).to_dense(dense_size_ad=dense_size_ad).reshape(self.shape)
205		if dense_size_ad is None: dense_size_ad = self.bound_ad()
206		index = self.index+dense_size_ad*np.arange(self.size)[:,None]
207		coef = np.bincount(index.reshape(-1),self.coef.reshape(-1),dense_size_ad*self.size)
208		return Dense.denseAD(self.value,coef.reshape(self.size,dense_size_ad))
209
210	#Linear algebra
211	def triplets(self,tol=0):
212		"""
213		Returns the triplets defining the sparse linear operator. 
214		- tol : remove coefficients whose magnitude is below this threshold
215		"""
216		xp = cupy_generic.get_array_module(self.value)
217		coef = self.coef.reshape(-1)
218		row = np.broadcast_to(_add_dim(xp.arange(self.size).reshape(self.shape)), self.index.shape).reshape(-1)
219		column = self.index.reshape(-1)
220
221		pos = np.logical_or(np.abs(coef)>tol,np.isnan(coef))
222		return (coef[pos],(row[pos],column[pos]))
223
224	def tangent_operator(self,bound_ad=None,**kwargs):
225		"""
226		The tangent linear operator as a sparse matrix.
227		- **kwargs : passed to triplets
228		"""
229		if bound_ad is None: bound_ad = self.bound_ad()
230		return misc.tocsr(self.triplets(**kwargs),shape=(self.size,bound_ad))
231		
232	def adjoint_operator(self,bound_ad=None,**kwargs):
233		"""
234		The adjoint of the tangent linear operator as a sparse matrix.
235		- **kwargs : passed to triplets
236		"""
237		if bound_ad is None: bound_ad = self.bound_ad()
238		coef,(row,column) = self.triplets(**kwargs)
239		return misc.tocsr((coef,(column,row)),shape=(bound_ad,self.size))
240
241	def solve(self,raw=False):
242		"""
243		Assume that the spAD instance represents the variable y = x + A*delta,
244		where delta is a symbolic perturbation. 
245		Solves the system x + A*delta = 0, assuming compatible shapes.
246		"""
247		mat = self.triplets()
248		rhs = -self.value.flatten()
249		return (mat,rhs) if raw else misc.spsolve(mat,rhs).reshape(self.shape)
250
251	def is_elliptic(self,tol=None,identity_var=None):
252		"""
253		Tests wether the variable encodes a (linear) degenerate elliptic scheme.
254		Output :
255		- sum of the coefficients at each position (must be non-negative for 
256		degenerate ellipticity, positive for strict ellipticity)
257		- maximum of off-diagonal coefficients at each position (must be non-positive)
258		Output (if tol is specified) : 
259		- min_sum >=-tol and max_off <= tol
260		Side effect warning : AD simplification, which is also possibly costly
261		"""
262		self.simplify_ad()
263		min_sum = self.coef.sum(axis=-1)
264
265		rg = (np.arange(self.size).reshape(self.shape+(1,))
266			if identity_var is None else identity_var.index)
267		coef = self.coef.copy()
268		coef[self.index==rg] = -np.inf
269		coef[coef==0.] = -np.inf
270		max_off = coef.max(axis=-1)
271
272		if tol is None: return min_sum,max_off
273		return min_sum.min()>=-tol and max_off.max()<=tol
274
275	@classmethod
276	def concatenate(cls,elems,axis=0):
277		axis1 = axis if axis>=0 else axis-1
278		elems2 = tuple(cls(e) for e in elems)
279		size_ad = max(e.size_ad for e in elems2)
280		return cls( 
281		np.concatenate(tuple(e.value for e in elems2), axis=axis), 
282		np.concatenate(tuple(_pad_last(e.coef,size_ad)  for e in elems2),axis=axis1),
283		np.concatenate(tuple(_pad_last(e.index,size_ad) for e in elems2),axis=axis1))
284
285	# Memory optimization
286	def simplify_ad(self,atol=None,rtol=None):
287		"""
288		Compresses the AD information by merging suitable coefficients, and optionally 
289		removing negligible ones.
290		- atol : absolute tolerance to discard a coefficient. (True -> sensible default.)
291		- rtol : relative tolerance to discard a coefficient (compared to largest in row)
292		Operates in place, but also returns itself.
293		"""
294		# TODO : investigate possible optimizations using np.bincount or scipy.ndimage.sum 
295		if atol is True: atol = np.finfo(self.value.dtype).resolution
296		if rtol is True: rtol = np.finfo(self.value.dtype).resolution
297		if atol is None and rtol is not None: atol=0.
298		if rtol is None and atol is not None: rtol=0.
299
300		if self.size_ad==0: # Nothing to simplify
301			return self
302		if len(self.shape)==0: # Add dimension to scalar-like arrays
303			other = self.reshape((1,))
304			other.simplify_ad(atol=atol,rtol=rtol)
305			other = other.reshape(tuple())
306			self.coef,self.index = other.coef,other.index
307			return self
308
309		if self.cupy_based(): 
310			from .AD_CUDA.simplify_ad import simplify_ad
311			return simplify_ad(self,atol=atol,rtol=rtol)
312
313		if not self.index.flags['WRITEABLE']: 
314			self.index = self.index.copy()
315
316		# Sort indices by increasing values
317		bad_index = np.iinfo(self.index.dtype).max
318		bad_pos = self.coef==0
319		self.index[bad_pos] = bad_index
320		ordering = self.index.argsort(axis=-1)
321		self.coef  = np.take_along_axis(self.coef, ordering,axis=-1)
322		self.index = np.take_along_axis(self.index,ordering,axis=-1)
323
324		# Accumulate coefficients associated with identical indices
325		cum_coef = np.zeros_like(self.value)
326		indices  = np.zeros_like(self.index,shape=self.shape)
327		size_ad = self.size_ad
328		self.coef  = np.moveaxis(self.coef, -1,0)
329		self.index = np.moveaxis(self.index,-1,0)
330		prev_index = np.copy(self.index[0])
331
332		for i in range(size_ad):
333			 # Note : self.index, self.coef change during iterations
334			ind,co = self.index[i],self.coef[i]
335			pos_new_index = np.logical_and(prev_index!=ind, ind!=bad_index)
336			pos_old_index = np.logical_not(pos_new_index)
337			prev_index[pos_new_index] = ind[pos_new_index]
338			cum_coef[pos_new_index]  = co[pos_new_index]
339			cum_coef[pos_old_index] += co[pos_old_index]
340			indices[pos_new_index] += 1
341			indices_exp = np.expand_dims(indices,axis=0)
342			cps.put_along_axis(self.index,indices_exp,prev_index,axis=0)
343			cps.put_along_axis(self.coef, indices_exp,cum_coef,axis=0)
344
345		# Eliminate meaningless coefficients, after largest of indices
346		indices[self.index[0]==bad_index]=-1
347		indices_max = int(np.max(indices,axis=None))
348		size_ad_new = indices_max+1
349		self.coef  = self.coef[:size_ad_new]
350		self.index = self.index[:size_ad_new]
351		if size_ad_new==0:
352			self.coef  = np.moveaxis(self.coef,0,-1)
353			self.index = np.moveaxis(self.index,0,-1)
354			return self
355
356		coef_end  = self.coef[ np.maximum(indices_max,0)]
357		index_end = self.index[np.maximum(indices_max,0)]
358		coef_end[ indices<indices_max] = 0.
359		index_end[indices<indices_max] = -1
360		while np.min(indices,axis=None)<indices_max:
361			indices=np.minimum(indices_max,1+indices)
362			indices_exp = np.expand_dims(indices,axis=0)
363			cps.put_along_axis(self.coef, indices_exp,coef_end,axis=0)
364			cps.put_along_axis(self.index,indices_exp,index_end,axis=0)
365
366		self.coef  = np.moveaxis(self.coef,0,-1)
367		self.index = np.moveaxis(self.index,0,-1)
368		self.coef  = self.coef.reshape( self.shape+(size_ad_new,))
369		self.index = self.index.reshape(self.shape+(size_ad_new,))
370
371		self.index[self.index==-1]=0 # Corresponding coefficient is zero anyway.
372
373		# Optionally remove coefficients below tolerance threshold
374		if atol is not None:
375			tol = atol
376			if rtol!=0:
377				max_coef = np.max(np.abs(self.coef),axis=-1)
378				tol = np.expand_dims( tol + max_coef*rtol, axis=-1)
379
380			bad_pos = np.abs(self.coef) <= tol
381			self.index[bad_pos] = bad_index
382			self.coef[ bad_pos] = 0.
383			ordering = self.index.argsort(axis=-1)
384			self.coef  = np.take_along_axis(self.coef, ordering,axis=-1)
385			self.index = np.take_along_axis(self.index,ordering,axis=-1)
386
387			new_size_ad = self.size_ad - np.min(np.sum(bad_pos,axis=-1))
388			self.coef = self.coef[...,:new_size_ad]
389			self.index = self.index[...,:new_size_ad]
390			self.index[self.index==bad_index]=0 
391
392		return self
393
394# -------- End of class spAD -------
395
396# -------- Factory methods -----
397
398def identity(shape=None,constant=None,shift=0):
399	shape,constant = misc._set_shape_constant(shape,constant)
400	shape2 = shape+(1,)
401	xp = cupy_generic.get_array_module(constant)
402	int_t=cupy_generic.samesize_int_t(constant.dtype)
403	return spAD(constant,np.ones_like(constant,shape=shape2),
404		xp.arange(shift,shift+np.prod(shape,dtype=int),dtype=int_t).reshape(shape2))
405
406def register(inputs,iterables=None,shift=0,ident=identity):
407	if iterables is None:
408		iterables = (tuple,)
409	def reg(a):
410		nonlocal shift
411		a,to_ad = misc.ready_ad(a)
412		if to_ad:
413			result = ident(constant=a,shift=shift)
414			shift += result.size
415			return result
416		else:
417			return a
418	return functional.map_iterables(reg,inputs,iterables)

def identity(shape=None, constant=None, shift=0): View Source

399def identity(shape=None,constant=None,shift=0):
400	shape,constant = misc._set_shape_constant(shape,constant)
401	shape2 = shape+(1,)
402	xp = cupy_generic.get_array_module(constant)
403	int_t=cupy_generic.samesize_int_t(constant.dtype)
404	return spAD(constant,np.ones_like(constant,shape=shape2),
405		xp.arange(shift,shift+np.prod(shape,dtype=int),dtype=int_t).reshape(shape2))

def register(inputs, iterables=None, shift=0, ident=<function identity>): View Source

407def register(inputs,iterables=None,shift=0,ident=identity):
408	if iterables is None:
409		iterables = (tuple,)
410	def reg(a):
411		nonlocal shift
412		a,to_ad = misc.ready_ad(a)
413		if to_ad:
414			result = ident(constant=a,shift=shift)
415			shift += result.size
416			return result
417		else:
418			return a
419	return functional.map_iterables(reg,inputs,iterables)

agd.AutomaticDifferentiation.Sparse

Inherited Members