agd.AutomaticDifferentiation.Sparse2

  1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
  2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
  3
  4import numpy as np
  5import functools
  6from . import cupy_generic
  7from . import ad_generic
  8from . import cupy_support as cps
  9from . import misc
 10from . import Sparse
 11from . import Dense
 12from . import Dense2
 13from . import Base
 14
 15_add_dim = misc._add_dim; _pad_last = misc._pad_last; _concatenate=misc._concatenate;
 16
 17
 18class spAD2(Base.baseAD):
 19	"""
 20	A class for sparse forward second order automatic differentiation
 21	"""
 22
 23	def __init__(self,value,coef1=None,index=None,coef2=None,index_row=None,index_col=None,broadcast_ad=False):
 24		if self.is_ad(value):
 25			assert (coef1 is None and index is None 
 26				and coef2 is None and index_row is None and index_col is None)
 27			self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col \
 28				= value.value,value.coef1,value.index,value.coef2,value.index_row,value.index_col
 29			return
 30		if ad_generic.is_ad(value):
 31			raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD2")
 32
 33		# Create instance 
 34		self.value = ad_generic.asarray(value)
 35		shape = self.shape
 36		shape2 = shape+(0,)
 37		int_t = cupy_generic.samesize_int_t(value.dtype)
 38		assert ((coef1 is None) and (index is None)) or (coef1.shape==index.shape)
 39		self.coef1 = (np.zeros_like(value,shape=shape2) if coef1  is None 
 40			else misc._test_or_broadcast_ad(coef1,shape,broadcast_ad) )
 41		self.index = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index is None  
 42			else misc._test_or_broadcast_ad(index,shape,broadcast_ad) )
 43		
 44		assert (((coef2 is None) and (index_row is None) and (index_col is None)) 
 45			or ((coef2.shape==index_row.shape) and (coef2.shape==index_col.shape)))
 46		self.coef2 = (np.zeros_like(value,shape=shape2) if coef2  is None 
 47			else misc._test_or_broadcast_ad(coef2,shape,broadcast_ad) )
 48		self.index_row = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index_row is None 
 49			else misc._test_or_broadcast_ad(index_row,shape,broadcast_ad) )
 50		self.index_col = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index_col is None 
 51			else misc._test_or_broadcast_ad(index_col,shape,broadcast_ad) )
 52
 53	@classmethod
 54	def order(cls): return 2
 55	def copy(self,order='C'):
 56		return self.new(self.value.copy(order=order),
 57			self.coef1.copy(order=order),self.index.copy(order=order),
 58			self.coef2.copy(order=order),self.index_row.copy(order=order),self.index_col.copy(order=order))
 59	def as_tuple(self): return self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col
 60
 61	# Representation 
 62	def __iter__(self):
 63		for value,coef1,index,coef2,index_row,index_col in zip(self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col):
 64			yield self.new(value,coef1,index,coef2,index_row,index_col)
 65
 66	def __str__(self):
 67		return "spAD2"+str((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col))
 68	def __repr__(self):
 69		return "spAD2"+repr((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col))	
 70
 71	# Operators
 72	def as_func(self,h):
 73		"""Replaces the symbolic perturbation with h"""
 74		value,coef1,coef2 = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef1,self.coef2))
 75		return (value+(coef1*h[self.index]).sum(axis=self.ndim)
 76			+0.5*(coef2*h[self.index_row]*h[self.index_col]).sum(axis=self.ndim))
 77
 78	def __add__(self,other):
 79		if self.is_ad(other):
 80			value = self.value+other.value; shape = value.shape
 81			return self.new(value, 
 82				_concatenate(self.coef1,other.coef1,shape), _concatenate(self.index,other.index,shape),
 83				_concatenate(self.coef2,other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape))
 84		else:
 85			return self.new(self.value+other, self.coef1, self.index, 
 86				self.coef2, self.index_row, self.index_col, broadcast_ad=True)
 87
 88	def __sub__(self,other):
 89		if self.is_ad(other):
 90			value = self.value-other.value; shape = value.shape
 91			return self.new(value, 
 92				_concatenate(self.coef1,-other.coef1,shape), _concatenate(self.index,other.index,shape),
 93				_concatenate(self.coef2,-other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape))
 94		else:
 95			return self.new(self.value-other, self.coef1, self.index, 
 96				self.coef2, self.index_row, self.index_col, broadcast_ad=True)
 97
 98	def __mul__(self,other):
 99		if self.is_ad(other):
100			value = self.value*other.value
101			coef1_a,coef1_b = _add_dim(other.value)*self.coef1,_add_dim(self.value)*other.coef1
102			index_a,index_b = np.broadcast_to(self.index,coef1_a.shape),np.broadcast_to(other.index,coef1_b.shape)
103			coef2_a,coef2_b = _add_dim(other.value)*self.coef2,_add_dim(self.value)*other.coef2
104			index_row_a,index_row_b = np.broadcast_to(self.index_row,coef2_a.shape),np.broadcast_to(other.index_row,coef2_b.shape)
105			index_col_a,index_col_b = np.broadcast_to(self.index_col,coef2_a.shape),np.broadcast_to(other.index_col,coef2_b.shape)
106
107			len_a,len_b = self.coef1.shape[-1],other.coef1.shape[-1]
108			coef2_ab = np.repeat(self.coef1,len_b,axis=-1) * np.tile(other.coef1,len_a) 
109			index2_a = np.broadcast_to(np.repeat(self.index,len_b,axis=-1),coef2_ab.shape)
110			index2_b = np.broadcast_to(np.tile(other.index,len_a),coef2_ab.shape)
111
112			return self.new(value,_concatenate(coef1_a,coef1_b),_concatenate(index_a,index_b),
113				np.concatenate((coef2_a,coef2_b,coef2_ab,coef2_ab),axis=-1),
114				np.concatenate((index_row_a,index_row_b,index2_a,index2_b),axis=-1),
115				np.concatenate((index_col_a,index_col_b,index2_b,index2_a),axis=-1))
116		elif self.isndarray(other):
117			value = self.value*other
118			coef1 = _add_dim(other)*self.coef1
119			index = np.broadcast_to(self.index,coef1.shape)
120			coef2 = _add_dim(other)*self.coef2
121			index_row = np.broadcast_to(self.index_row,coef2.shape)
122			index_col = np.broadcast_to(self.index_col,coef2.shape)
123			return self.new(value,coef1,index,coef2,index_row,index_col)
124		else:
125			return self.new(self.value*other,other*self.coef1,self.index,
126				other*self.coef2,self.index_row,self.index_col)
127
128	def __truediv__(self,other):
129		if self.is_ad(other):
130			return self.__mul__(other.__pow__(-1))
131		elif self.isndarray(other):
132			inv = 1./other
133			return self.new(self.value*inv,self.coef1*_add_dim(inv),self.index,
134				self.coef2*_add_dim(inv),self.index_row,self.index_col)
135		else:
136			inv = 1./other
137			return self.new(self.value*inv,self.coef1*inv,self.index,
138				self.coef2*inv,self.index_row,self.index_col)
139
140	__rmul__ = __mul__
141	__radd__ = __add__
142	def __rsub__(self,other): 		return -(self-other)
143	def __rtruediv__(self,other):
144		return self.__pow__(-1).__mul__(other)
145
146	def __neg__(self):		return self.new(-self.value,-self.coef1,self.index,
147		-self.coef2,self.index_row,self.index_col)
148
149	# Math functions
150	def _math_helper(self,deriv): # Inputs : a=f(x), b=f'(x), c=f''(x), where x=self.value
151		a,b,c=deriv
152		len_1 = self.coef1.shape[-1]
153		coef1_r,index_r = np.repeat(self.coef1,len_1,axis=-1),np.repeat(self.index,len_1,axis=-1)
154		coef1_t,index_t = np.tile(self.coef1,len_1),np.tile(self.index,len_1) 
155		return self.new(a,_add_dim(b)*self.coef1,self.index,
156			_concatenate(_add_dim(b)*self.coef2,_add_dim(c)*(coef1_r*coef1_t)),
157			_concatenate(self.index_row, index_r),_concatenate(self.index_col, index_t))
158	
159	@classmethod
160	def compose(cls,a,t):
161		assert isinstance(a,Dense2.denseAD2) and all(cls.is_ad(b) for b in t)
162		b = np.moveaxis(cls.concatenate(t,axis=0),0,-1) # Possible performance hit if ad sizes are inhomogeneous
163		coef1 = _add_dim(a.coef1)*b.coef1
164		index1 = np.broadcast_to(b.index,coef1.shape)
165		coef2_pure = _add_dim(a.coef1)*b.coef2
166		index_row_pure = np.broadcast_to(b.index_row,coef2_pure.shape)
167		index_col_pure = np.broadcast_to(b.index_col,coef2_pure.shape)
168
169		s = b.shape[:-1]; na = a.size_ad; nb = b.size_ad1;
170		coef2_mixed = misc._add_dim2(a.coef2)*np.reshape(b.coef1,s+(na,1,nb,1))*np.reshape(b.coef1,s+(1,na,1,nb))
171		s2 = coef2_mixed.shape
172		index_row_mixed = np.broadcast_to(b.index.reshape(s+(na,1,nb,1)),s2)
173		index_col_mixed = np.broadcast_to(b.index.reshape(s+(1,na,1,nb)),s2)
174		#s3 = s2[:-4]+(na*na*nb*nb,) a.reshape(s3)
175
176		coef1,index1,coef2_pure,index_row_pure,index_col_pure = (
177			_flatten_nlast(a,2) for a in (coef1,index1,coef2_pure,index_row_pure,index_col_pure))
178		coef2_mixed,index_row_mixed,index_col_mixed = (
179			_flatten_nlast(a,4) for a in (coef2_mixed,index_row_mixed,index_col_mixed))
180		
181		return cls.new(a.value,coef1,index1,
182			_concatenate(coef2_pure,coef2_mixed),_concatenate(index_row_pure,index_row_mixed),
183			_concatenate(index_col_pure,index_col_mixed))
184
185	#Indexing
186	@property
187	def size_ad1(self):  return self.coef1.shape[-1]
188	@property
189	def size_ad2(self):  return self.coef2.shape[-1]
190
191	def __getitem__(self,key):
192		ekey = misc.key_expand(key)
193		try:
194			return self.new(self.value[key], 
195				self.coef1[ekey], self.index[ekey], 
196				self.coef2[ekey], self.index_row[ekey], self.index_col[ekey])
197		except ZeroDivisionError: # Some cupy versions fail indexing correctly if size==0
198			value = self.value[ekey]
199			shape = value.shape
200			def take(arr): 
201				size_ad = arr.shape[-1]
202				return arr[ekey] if size_ad>0 else np.zeros_like(arr,shape=(*shape,size_ad))
203			return self.new(self.value[key], 
204				take(self.coef1), take(self.index), 
205				take(self.coef2), take(self.index_row), take(self.index_col))
206
207	def __setitem__(self,key,other):
208		ekey = misc.key_expand(key)
209		if self.is_ad(other):
210			self.value[key] = other.value
211
212			pad_size = max(self.coef1.shape[-1],other.coef1.shape[-1])
213			if pad_size>self.coef1.shape[-1]:
214				self.coef1 = _pad_last(self.coef1,pad_size)
215				self.index = _pad_last(self.index,pad_size)
216			self.coef1[ekey] = _pad_last(other.coef1,pad_size)
217			self.index[ekey] = _pad_last(other.index,pad_size)
218
219			pad_size = max(self.coef2.shape[-1],other.coef2.shape[-1])
220			if pad_size>self.coef2.shape[-1]:
221				self.coef2 = _pad_last(self.coef2,pad_size)
222				self.index_row = _pad_last(self.index_row,pad_size)
223				self.index_col = _pad_last(self.index_col,pad_size)
224			self.coef2[ekey] = _pad_last(other.coef2,pad_size)
225			self.index_row[ekey] = _pad_last(other.index_row,pad_size)
226			self.index_col[ekey] = _pad_last(other.index_col,pad_size)
227		else:
228			self.value[key] = other
229			self.coef1[ekey] = 0.
230			self.coef2[ekey] = 0.
231
232	def reshape(self,shape,order='C'):
233		value = self.value.reshape(shape,order=order)
234		shape1 = value.shape+(self.size_ad1,)
235		shape2 = value.shape+(self.size_ad2,)
236		return self.new(value,
237			self.coef1.reshape(shape1,order=order), self.index.reshape(shape1,order=order),
238			self.coef2.reshape(shape2,order=order),self.index_row.reshape(shape2,order=order),
239			self.index_col.reshape(shape2,order=order))
240
241	def broadcast_to(self,shape):
242		shape1 = shape+(self.size_ad1,)
243		shape2 = shape+(self.size_ad2,)
244		return self.new(np.broadcast_to(self.value,shape), 
245			np.broadcast_to(self.coef1,shape1), np.broadcast_to(self.index,shape1),
246			np.broadcast_to(self.coef2,shape2), np.broadcast_to(self.index_row,shape2), 
247			np.broadcast_to(self.index_col,shape2))
248
249	def pad(self,pad_width,*args,constant_values=0,**kwargs):
250		def _pad(arr):return np.pad(arr,pad_width+((0,0),),*args,constant_values=0,**kwargs)
251		return self.new(
252			np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs),
253			_pad(self.coef1),_pad(self.index),
254			_pad(self.coef2),_pad(self.index_row),_pad(self.index_col))
255	
256	def transpose(self,axes=None):
257		if axes is None: axes = tuple(reversed(range(self.ndim)))
258		axes2 = tuple(axes) +(self.ndim,)
259		return self.new(self.value.transpose(axes),
260			self.coef1.transpose(axes2),self.index.transpose(axes2),
261			self.coef2.transpose(axes2),self.index_row.transpose(axes2),
262			self.index_col.transpose(axes2))
263
264	def allclose(self,other,*args,**kwargs):
265		raise ValueError("Unsupported, sorry, please try allclose(a.to_dense(),b.to_dense())")
266
267	# Reductions
268	def sum(self,axis=None,out=None,**kwargs):
269		if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs)
270		value = self.value.sum(axis,**kwargs)
271
272		shape1 = value.shape + (self.size_ad1 * self.shape[axis],)
273		coef1 = np.moveaxis(self.coef1, axis,-1).reshape(shape1)
274		index = np.moveaxis(self.index, axis,-1).reshape(shape1)
275
276		shape2 = value.shape + (self.size_ad2 * self.shape[axis],)
277		coef2 = np.moveaxis(self.coef2, axis,-1).reshape(shape2)
278		index_row = np.moveaxis(self.index_row, axis,-1).reshape(shape2)
279		index_col = np.moveaxis(self.index_col, axis,-1).reshape(shape2)
280
281		out = self.new(value,coef1,index,coef2,index_row,index_col)
282		return out
283
284	# Conversion
285	def bound_ad(self):
286		def maxi(a): return int(cps.max(a,initial=-1))
287		return 1+np.max((maxi(self.index),maxi(self.index_row),maxi(self.index_col)))
288	def to_dense(self,dense_size_ad=None):
289		# Can be accelerated using np.bincount, similarly to spAD.to_dense
290		def mvax(arr): return np.moveaxis(arr,-1,0)
291		dsad = self.bound_ad() if dense_size_ad is None else dense_size_ad
292		coef1 = np.zeros_like(self.value,shape=self.shape+(dsad,))
293		for c,i in zip(mvax(self.coef1),mvax(self.index)):
294			np.put_along_axis(coef1,_add_dim(i),np.take_along_axis(coef1,_add_dim(i),axis=-1)+_add_dim(c),axis=-1)
295		coef2 = np.zeros_like(self.value,shape=self.shape+(dsad*dsad,))
296		for c,i in zip(mvax(self.coef2),mvax(self.index_row*dsad+self.index_col)):
297			np.put_along_axis(coef2,_add_dim(i),np.take_along_axis(coef2,_add_dim(i),axis=-1)+_add_dim(c),axis=-1)
298		return Dense2.denseAD2(self.value,coef1,np.reshape(coef2,self.shape+(dsad,dsad)))
299
300	def to_first(self):
301		return Sparse.spAD(self.value,self.coef1,self.index)
302
303	#Linear algebra
304	def triplets(self):
305		"""The hessian operator, presented as triplets"""
306		return (self.coef2,(self.index_row,self.index_col))
307
308	def hessian_operator(self,shape=None):
309		"""
310		The hessian operator, presented as an opaque matrix class, supporting mul.
311		Implicitly sums over all axes. Recommendation : apply simplify_ad before call.
312		"""
313		return misc.tocsr(self.sum().triplets(),shape=shape)
314
315	def tangent_operator(self): return self.to_first().tangent_operator()
316	def adjoint_operator(self): return self.to_first().adjoint_operator()
317
318	def solve_stationnary(self,raw=False):
319		"""
320		Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. 
321		Use "raw = True" to obtain the raw linear system and use your own solver.
322		"""
323		mat = self.triplets()
324		rhs = - self.to_first().to_dense(self.bound_ad()).coef
325		return (mat,rhs) if raw else misc.spsolve(mat,rhs)
326
327	def solve_weakform(self,raw=False):
328		"""
329		Assume that a spAD2 array scalar represents the quadratic function
330		Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v).
331		Finds u such that Q(u,v) is independent of v.
332		Use "raw = True" to obtain the raw linear system and use your own solver.
333		"""
334		(coef,(row,col)),rhs = self.solve_stationnary(raw=True)
335		n = rhs.size//2
336		rhs = rhs[n:]
337		pos = np.logical_and(row>=n,col<n)
338		mat = (coef[pos],(row[pos]-n,col[pos]))
339		return (mat,rhs) if raw else misc.spsolve(mat,rhs)
340	
341	# Static methods
342	@classmethod
343	def concatenate(cls,elems,axis=0):
344		axis1 = axis if axis>=0 else axis-1
345		elems2 = tuple(cls(e) for e in elems)
346		size_ad1 = max(e.size_ad1 for e in elems2)
347		size_ad2 = max(e.size_ad2 for e in elems2)
348		return cls( 
349		np.concatenate(tuple(e.value for e in elems2), axis=axis), 
350		np.concatenate(tuple(_pad_last(e.coef1,size_ad1)  for e in elems2),axis=axis1),
351		np.concatenate(tuple(_pad_last(e.index,size_ad1)  for e in elems2),axis=axis1),
352		np.concatenate(tuple(_pad_last(e.coef2,size_ad2)  for e in elems2),axis=axis1),
353		np.concatenate(tuple(_pad_last(e.index_row,size_ad2)  for e in elems2),axis=axis1),
354		np.concatenate(tuple(_pad_last(e.index_col,size_ad2)  for e in elems2),axis=axis1))
355
356	def simplify_ad(self,*args,**kwargs):
357		spHelper1 = Sparse.spAD(self.value,self.coef1,self.index)
358		spHelper1.simplify_ad(*args,**kwargs)
359		self.coef1,self.index = spHelper1.coef,spHelper1.index
360
361		if self.size_ad2>0: # Otherwise empty max
362			n_col = 1+np.max(self.index_col)
363			index = self.index_row.astype(np.int64)*n_col + self.index_col.astype(np.int64)
364			spHelper2 = Sparse.spAD(self.value,self.coef2,index)
365			spHelper2.simplify_ad(*args,**kwargs)
366			self.coef2 = spHelper2.coef
367			int_t = self.index_row.dtype.type
368			self.index_row,self.index_col = spHelper2.index//n_col, spHelper2.index%n_col
369			if int_t!=np.int64: self.index_row,self.index_col = (
370				e.astype(int_t) for e in (self.index_row,self.index_col))
371			
372		return self
373
374# -------- End of class spAD2 -------
375
376# -------- Utilities ------
377
378def _flatten_nlast(a,n):
379	assert n>0
380	s=a.shape
381	return a.reshape(s[:-n]+(np.prod(s[-n:]),))
382
383# -------- Factory method -----
384
385def identity(*args,**kwargs):
386	arr = Sparse.identity(*args,**kwargs)
387	shape2 = arr.shape+(0,)
388	return spAD2(arr.value,arr.coef,arr.index,
389		np.zeros_like(arr.coef,shape=shape2),
390		np.zeros_like(arr.index,shape=shape2),
391		np.zeros_like(arr.index,shape=shape2))
392
393def register(*args,**kwargs):
394	return Sparse.register(*args,**kwargs,ident=identity)
395
396# ---------- Sparse hessian extraction ---------
397
398def _hessian_operator_noAD(f,x,simplify_ad=None,fargs=tuple()):
399	"""Same as Hessian operator, but does not support f with AD values"""
400	x_ad = identity(constant=x)
401	try: f_ad = f(x_ad,*fargs)
402	except Base.ADCastError: return hessian_operator_denseAD(f,x,simplify_ad=simplify_ad,fargs=fargs)
403	if simplify_ad or simplify_ad is None and f_ad.ndim > 0: f_ad.simplify_ad(atol=True,rtol=True)
404	return f_ad.hessian_operator( shape=(x_ad.size,x_ad.size) )
405
406def hessian_operator(f,x,simplify_ad=None,fargs=tuple(),rev_ad=False):
407	"""
408	Returns the sparse matrix associated to the hessian of f at x,
409	generated using automatic differentiation.
410	Typically used to obtain the sparse matrix of a quadratic form.
411	Output of f is summed, if non-scalar.
412	- simplify_ad (optional): wether to simplify the ad information 
413	   before generating the sparse matrix
414
415
416	*Autodiff support* 
417	Consider the functional D * u**2, written with the following convention
418	>>> def f(u,D,ad_channel=lambda x:x): return ad_channel(D) * u**2
419	See Eikonal.cuda.AnisotropicWave, classes AcousticHamiltonian_Sparse and 
420	ElasticHamiltonian_Sparse for non-trivial examples.
421
422	- Foward autodiff. Returns an denseAD_Lin class (operator plus perturbation), 
423	if f(x) is a first order dense forward AD variable, with the following convention/example :   
424	>>> ad.Sparse2.hessian_operator(f,np.zeros(10),fargs=(ad.Dense.identity(constant=2),))
425
426	- Reverse autodiff support (rev_ad=True). The components of f(x,ad_channel=ones_like)
427	are regarded as independent contributions w.r.t which to compute the sensitivity of the result.
428	"""
429	f_dense = f(x,*fargs)
430	if simplify_ad is None: simplify_ad = f_dense.ndim>0
431	if isinstance(f_dense,Dense.denseAD): # ---- Forward autodiff support ----
432		size_ad = f_dense.size_ad
433		x_ad = identity(constant=x)
434		shape = x_ad.size,x_ad.size
435		ops = []
436		for i in range(-1,size_ad):
437			f_sparse = f(x_ad,*fargs,ad_channel=lambda x:x.value if i==-1 else x.coef[...,i])
438			if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True)
439			ops.append(f_sparse.hessian_operator(shape=shape))
440		return Dense.denseAD_Lin(ops[0],ops[1:])
441
442	op = _hessian_operator_noAD(f,x,simplify_ad,fargs)
443	if not rev_ad : return op #  ---- No autodiff ----
444	
445	# ---- Reverse autodiff support ----
446	x_ad = identity(constant=x)
447	f_sparse = f(x_ad,*fargs,ad_channel=lambda x:np.ones_like(x))
448	if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True)
449	coef2,row,col = f_sparse.coef2,f_sparse.index_row,f_sparse.index_col
450	f_sparse = None # deallocate first and zero-th order coefficients
451	
452	def sensitivity(x):
453		x=x.reshape((x_ad.size,*x.shape[x_ad.ndim:]))
454		return np.sum(coef2.reshape(coef2.shape+(1,)*x.ndim)
455			*x.value[row][...,None]*x.coef[col],axis=coef2.ndim-1)
456
457	return op, sensitivity
class spAD2(agd.AutomaticDifferentiation.Base.baseAD):
 19class spAD2(Base.baseAD):
 20	"""
 21	A class for sparse forward second order automatic differentiation
 22	"""
 23
 24	def __init__(self,value,coef1=None,index=None,coef2=None,index_row=None,index_col=None,broadcast_ad=False):
 25		if self.is_ad(value):
 26			assert (coef1 is None and index is None 
 27				and coef2 is None and index_row is None and index_col is None)
 28			self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col \
 29				= value.value,value.coef1,value.index,value.coef2,value.index_row,value.index_col
 30			return
 31		if ad_generic.is_ad(value):
 32			raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD2")
 33
 34		# Create instance 
 35		self.value = ad_generic.asarray(value)
 36		shape = self.shape
 37		shape2 = shape+(0,)
 38		int_t = cupy_generic.samesize_int_t(value.dtype)
 39		assert ((coef1 is None) and (index is None)) or (coef1.shape==index.shape)
 40		self.coef1 = (np.zeros_like(value,shape=shape2) if coef1  is None 
 41			else misc._test_or_broadcast_ad(coef1,shape,broadcast_ad) )
 42		self.index = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index is None  
 43			else misc._test_or_broadcast_ad(index,shape,broadcast_ad) )
 44		
 45		assert (((coef2 is None) and (index_row is None) and (index_col is None)) 
 46			or ((coef2.shape==index_row.shape) and (coef2.shape==index_col.shape)))
 47		self.coef2 = (np.zeros_like(value,shape=shape2) if coef2  is None 
 48			else misc._test_or_broadcast_ad(coef2,shape,broadcast_ad) )
 49		self.index_row = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index_row is None 
 50			else misc._test_or_broadcast_ad(index_row,shape,broadcast_ad) )
 51		self.index_col = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index_col is None 
 52			else misc._test_or_broadcast_ad(index_col,shape,broadcast_ad) )
 53
 54	@classmethod
 55	def order(cls): return 2
 56	def copy(self,order='C'):
 57		return self.new(self.value.copy(order=order),
 58			self.coef1.copy(order=order),self.index.copy(order=order),
 59			self.coef2.copy(order=order),self.index_row.copy(order=order),self.index_col.copy(order=order))
 60	def as_tuple(self): return self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col
 61
 62	# Representation 
 63	def __iter__(self):
 64		for value,coef1,index,coef2,index_row,index_col in zip(self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col):
 65			yield self.new(value,coef1,index,coef2,index_row,index_col)
 66
 67	def __str__(self):
 68		return "spAD2"+str((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col))
 69	def __repr__(self):
 70		return "spAD2"+repr((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col))	
 71
 72	# Operators
 73	def as_func(self,h):
 74		"""Replaces the symbolic perturbation with h"""
 75		value,coef1,coef2 = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef1,self.coef2))
 76		return (value+(coef1*h[self.index]).sum(axis=self.ndim)
 77			+0.5*(coef2*h[self.index_row]*h[self.index_col]).sum(axis=self.ndim))
 78
 79	def __add__(self,other):
 80		if self.is_ad(other):
 81			value = self.value+other.value; shape = value.shape
 82			return self.new(value, 
 83				_concatenate(self.coef1,other.coef1,shape), _concatenate(self.index,other.index,shape),
 84				_concatenate(self.coef2,other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape))
 85		else:
 86			return self.new(self.value+other, self.coef1, self.index, 
 87				self.coef2, self.index_row, self.index_col, broadcast_ad=True)
 88
 89	def __sub__(self,other):
 90		if self.is_ad(other):
 91			value = self.value-other.value; shape = value.shape
 92			return self.new(value, 
 93				_concatenate(self.coef1,-other.coef1,shape), _concatenate(self.index,other.index,shape),
 94				_concatenate(self.coef2,-other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape))
 95		else:
 96			return self.new(self.value-other, self.coef1, self.index, 
 97				self.coef2, self.index_row, self.index_col, broadcast_ad=True)
 98
 99	def __mul__(self,other):
100		if self.is_ad(other):
101			value = self.value*other.value
102			coef1_a,coef1_b = _add_dim(other.value)*self.coef1,_add_dim(self.value)*other.coef1
103			index_a,index_b = np.broadcast_to(self.index,coef1_a.shape),np.broadcast_to(other.index,coef1_b.shape)
104			coef2_a,coef2_b = _add_dim(other.value)*self.coef2,_add_dim(self.value)*other.coef2
105			index_row_a,index_row_b = np.broadcast_to(self.index_row,coef2_a.shape),np.broadcast_to(other.index_row,coef2_b.shape)
106			index_col_a,index_col_b = np.broadcast_to(self.index_col,coef2_a.shape),np.broadcast_to(other.index_col,coef2_b.shape)
107
108			len_a,len_b = self.coef1.shape[-1],other.coef1.shape[-1]
109			coef2_ab = np.repeat(self.coef1,len_b,axis=-1) * np.tile(other.coef1,len_a) 
110			index2_a = np.broadcast_to(np.repeat(self.index,len_b,axis=-1),coef2_ab.shape)
111			index2_b = np.broadcast_to(np.tile(other.index,len_a),coef2_ab.shape)
112
113			return self.new(value,_concatenate(coef1_a,coef1_b),_concatenate(index_a,index_b),
114				np.concatenate((coef2_a,coef2_b,coef2_ab,coef2_ab),axis=-1),
115				np.concatenate((index_row_a,index_row_b,index2_a,index2_b),axis=-1),
116				np.concatenate((index_col_a,index_col_b,index2_b,index2_a),axis=-1))
117		elif self.isndarray(other):
118			value = self.value*other
119			coef1 = _add_dim(other)*self.coef1
120			index = np.broadcast_to(self.index,coef1.shape)
121			coef2 = _add_dim(other)*self.coef2
122			index_row = np.broadcast_to(self.index_row,coef2.shape)
123			index_col = np.broadcast_to(self.index_col,coef2.shape)
124			return self.new(value,coef1,index,coef2,index_row,index_col)
125		else:
126			return self.new(self.value*other,other*self.coef1,self.index,
127				other*self.coef2,self.index_row,self.index_col)
128
129	def __truediv__(self,other):
130		if self.is_ad(other):
131			return self.__mul__(other.__pow__(-1))
132		elif self.isndarray(other):
133			inv = 1./other
134			return self.new(self.value*inv,self.coef1*_add_dim(inv),self.index,
135				self.coef2*_add_dim(inv),self.index_row,self.index_col)
136		else:
137			inv = 1./other
138			return self.new(self.value*inv,self.coef1*inv,self.index,
139				self.coef2*inv,self.index_row,self.index_col)
140
141	__rmul__ = __mul__
142	__radd__ = __add__
143	def __rsub__(self,other): 		return -(self-other)
144	def __rtruediv__(self,other):
145		return self.__pow__(-1).__mul__(other)
146
147	def __neg__(self):		return self.new(-self.value,-self.coef1,self.index,
148		-self.coef2,self.index_row,self.index_col)
149
150	# Math functions
151	def _math_helper(self,deriv): # Inputs : a=f(x), b=f'(x), c=f''(x), where x=self.value
152		a,b,c=deriv
153		len_1 = self.coef1.shape[-1]
154		coef1_r,index_r = np.repeat(self.coef1,len_1,axis=-1),np.repeat(self.index,len_1,axis=-1)
155		coef1_t,index_t = np.tile(self.coef1,len_1),np.tile(self.index,len_1) 
156		return self.new(a,_add_dim(b)*self.coef1,self.index,
157			_concatenate(_add_dim(b)*self.coef2,_add_dim(c)*(coef1_r*coef1_t)),
158			_concatenate(self.index_row, index_r),_concatenate(self.index_col, index_t))
159	
160	@classmethod
161	def compose(cls,a,t):
162		assert isinstance(a,Dense2.denseAD2) and all(cls.is_ad(b) for b in t)
163		b = np.moveaxis(cls.concatenate(t,axis=0),0,-1) # Possible performance hit if ad sizes are inhomogeneous
164		coef1 = _add_dim(a.coef1)*b.coef1
165		index1 = np.broadcast_to(b.index,coef1.shape)
166		coef2_pure = _add_dim(a.coef1)*b.coef2
167		index_row_pure = np.broadcast_to(b.index_row,coef2_pure.shape)
168		index_col_pure = np.broadcast_to(b.index_col,coef2_pure.shape)
169
170		s = b.shape[:-1]; na = a.size_ad; nb = b.size_ad1;
171		coef2_mixed = misc._add_dim2(a.coef2)*np.reshape(b.coef1,s+(na,1,nb,1))*np.reshape(b.coef1,s+(1,na,1,nb))
172		s2 = coef2_mixed.shape
173		index_row_mixed = np.broadcast_to(b.index.reshape(s+(na,1,nb,1)),s2)
174		index_col_mixed = np.broadcast_to(b.index.reshape(s+(1,na,1,nb)),s2)
175		#s3 = s2[:-4]+(na*na*nb*nb,) a.reshape(s3)
176
177		coef1,index1,coef2_pure,index_row_pure,index_col_pure = (
178			_flatten_nlast(a,2) for a in (coef1,index1,coef2_pure,index_row_pure,index_col_pure))
179		coef2_mixed,index_row_mixed,index_col_mixed = (
180			_flatten_nlast(a,4) for a in (coef2_mixed,index_row_mixed,index_col_mixed))
181		
182		return cls.new(a.value,coef1,index1,
183			_concatenate(coef2_pure,coef2_mixed),_concatenate(index_row_pure,index_row_mixed),
184			_concatenate(index_col_pure,index_col_mixed))
185
186	#Indexing
187	@property
188	def size_ad1(self):  return self.coef1.shape[-1]
189	@property
190	def size_ad2(self):  return self.coef2.shape[-1]
191
192	def __getitem__(self,key):
193		ekey = misc.key_expand(key)
194		try:
195			return self.new(self.value[key], 
196				self.coef1[ekey], self.index[ekey], 
197				self.coef2[ekey], self.index_row[ekey], self.index_col[ekey])
198		except ZeroDivisionError: # Some cupy versions fail indexing correctly if size==0
199			value = self.value[ekey]
200			shape = value.shape
201			def take(arr): 
202				size_ad = arr.shape[-1]
203				return arr[ekey] if size_ad>0 else np.zeros_like(arr,shape=(*shape,size_ad))
204			return self.new(self.value[key], 
205				take(self.coef1), take(self.index), 
206				take(self.coef2), take(self.index_row), take(self.index_col))
207
208	def __setitem__(self,key,other):
209		ekey = misc.key_expand(key)
210		if self.is_ad(other):
211			self.value[key] = other.value
212
213			pad_size = max(self.coef1.shape[-1],other.coef1.shape[-1])
214			if pad_size>self.coef1.shape[-1]:
215				self.coef1 = _pad_last(self.coef1,pad_size)
216				self.index = _pad_last(self.index,pad_size)
217			self.coef1[ekey] = _pad_last(other.coef1,pad_size)
218			self.index[ekey] = _pad_last(other.index,pad_size)
219
220			pad_size = max(self.coef2.shape[-1],other.coef2.shape[-1])
221			if pad_size>self.coef2.shape[-1]:
222				self.coef2 = _pad_last(self.coef2,pad_size)
223				self.index_row = _pad_last(self.index_row,pad_size)
224				self.index_col = _pad_last(self.index_col,pad_size)
225			self.coef2[ekey] = _pad_last(other.coef2,pad_size)
226			self.index_row[ekey] = _pad_last(other.index_row,pad_size)
227			self.index_col[ekey] = _pad_last(other.index_col,pad_size)
228		else:
229			self.value[key] = other
230			self.coef1[ekey] = 0.
231			self.coef2[ekey] = 0.
232
233	def reshape(self,shape,order='C'):
234		value = self.value.reshape(shape,order=order)
235		shape1 = value.shape+(self.size_ad1,)
236		shape2 = value.shape+(self.size_ad2,)
237		return self.new(value,
238			self.coef1.reshape(shape1,order=order), self.index.reshape(shape1,order=order),
239			self.coef2.reshape(shape2,order=order),self.index_row.reshape(shape2,order=order),
240			self.index_col.reshape(shape2,order=order))
241
242	def broadcast_to(self,shape):
243		shape1 = shape+(self.size_ad1,)
244		shape2 = shape+(self.size_ad2,)
245		return self.new(np.broadcast_to(self.value,shape), 
246			np.broadcast_to(self.coef1,shape1), np.broadcast_to(self.index,shape1),
247			np.broadcast_to(self.coef2,shape2), np.broadcast_to(self.index_row,shape2), 
248			np.broadcast_to(self.index_col,shape2))
249
250	def pad(self,pad_width,*args,constant_values=0,**kwargs):
251		def _pad(arr):return np.pad(arr,pad_width+((0,0),),*args,constant_values=0,**kwargs)
252		return self.new(
253			np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs),
254			_pad(self.coef1),_pad(self.index),
255			_pad(self.coef2),_pad(self.index_row),_pad(self.index_col))
256	
257	def transpose(self,axes=None):
258		if axes is None: axes = tuple(reversed(range(self.ndim)))
259		axes2 = tuple(axes) +(self.ndim,)
260		return self.new(self.value.transpose(axes),
261			self.coef1.transpose(axes2),self.index.transpose(axes2),
262			self.coef2.transpose(axes2),self.index_row.transpose(axes2),
263			self.index_col.transpose(axes2))
264
265	def allclose(self,other,*args,**kwargs):
266		raise ValueError("Unsupported, sorry, please try allclose(a.to_dense(),b.to_dense())")
267
268	# Reductions
269	def sum(self,axis=None,out=None,**kwargs):
270		if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs)
271		value = self.value.sum(axis,**kwargs)
272
273		shape1 = value.shape + (self.size_ad1 * self.shape[axis],)
274		coef1 = np.moveaxis(self.coef1, axis,-1).reshape(shape1)
275		index = np.moveaxis(self.index, axis,-1).reshape(shape1)
276
277		shape2 = value.shape + (self.size_ad2 * self.shape[axis],)
278		coef2 = np.moveaxis(self.coef2, axis,-1).reshape(shape2)
279		index_row = np.moveaxis(self.index_row, axis,-1).reshape(shape2)
280		index_col = np.moveaxis(self.index_col, axis,-1).reshape(shape2)
281
282		out = self.new(value,coef1,index,coef2,index_row,index_col)
283		return out
284
285	# Conversion
286	def bound_ad(self):
287		def maxi(a): return int(cps.max(a,initial=-1))
288		return 1+np.max((maxi(self.index),maxi(self.index_row),maxi(self.index_col)))
289	def to_dense(self,dense_size_ad=None):
290		# Can be accelerated using np.bincount, similarly to spAD.to_dense
291		def mvax(arr): return np.moveaxis(arr,-1,0)
292		dsad = self.bound_ad() if dense_size_ad is None else dense_size_ad
293		coef1 = np.zeros_like(self.value,shape=self.shape+(dsad,))
294		for c,i in zip(mvax(self.coef1),mvax(self.index)):
295			np.put_along_axis(coef1,_add_dim(i),np.take_along_axis(coef1,_add_dim(i),axis=-1)+_add_dim(c),axis=-1)
296		coef2 = np.zeros_like(self.value,shape=self.shape+(dsad*dsad,))
297		for c,i in zip(mvax(self.coef2),mvax(self.index_row*dsad+self.index_col)):
298			np.put_along_axis(coef2,_add_dim(i),np.take_along_axis(coef2,_add_dim(i),axis=-1)+_add_dim(c),axis=-1)
299		return Dense2.denseAD2(self.value,coef1,np.reshape(coef2,self.shape+(dsad,dsad)))
300
301	def to_first(self):
302		return Sparse.spAD(self.value,self.coef1,self.index)
303
304	#Linear algebra
305	def triplets(self):
306		"""The hessian operator, presented as triplets"""
307		return (self.coef2,(self.index_row,self.index_col))
308
309	def hessian_operator(self,shape=None):
310		"""
311		The hessian operator, presented as an opaque matrix class, supporting mul.
312		Implicitly sums over all axes. Recommendation : apply simplify_ad before call.
313		"""
314		return misc.tocsr(self.sum().triplets(),shape=shape)
315
316	def tangent_operator(self): return self.to_first().tangent_operator()
317	def adjoint_operator(self): return self.to_first().adjoint_operator()
318
319	def solve_stationnary(self,raw=False):
320		"""
321		Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. 
322		Use "raw = True" to obtain the raw linear system and use your own solver.
323		"""
324		mat = self.triplets()
325		rhs = - self.to_first().to_dense(self.bound_ad()).coef
326		return (mat,rhs) if raw else misc.spsolve(mat,rhs)
327
328	def solve_weakform(self,raw=False):
329		"""
330		Assume that a spAD2 array scalar represents the quadratic function
331		Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v).
332		Finds u such that Q(u,v) is independent of v.
333		Use "raw = True" to obtain the raw linear system and use your own solver.
334		"""
335		(coef,(row,col)),rhs = self.solve_stationnary(raw=True)
336		n = rhs.size//2
337		rhs = rhs[n:]
338		pos = np.logical_and(row>=n,col<n)
339		mat = (coef[pos],(row[pos]-n,col[pos]))
340		return (mat,rhs) if raw else misc.spsolve(mat,rhs)
341	
342	# Static methods
343	@classmethod
344	def concatenate(cls,elems,axis=0):
345		axis1 = axis if axis>=0 else axis-1
346		elems2 = tuple(cls(e) for e in elems)
347		size_ad1 = max(e.size_ad1 for e in elems2)
348		size_ad2 = max(e.size_ad2 for e in elems2)
349		return cls( 
350		np.concatenate(tuple(e.value for e in elems2), axis=axis), 
351		np.concatenate(tuple(_pad_last(e.coef1,size_ad1)  for e in elems2),axis=axis1),
352		np.concatenate(tuple(_pad_last(e.index,size_ad1)  for e in elems2),axis=axis1),
353		np.concatenate(tuple(_pad_last(e.coef2,size_ad2)  for e in elems2),axis=axis1),
354		np.concatenate(tuple(_pad_last(e.index_row,size_ad2)  for e in elems2),axis=axis1),
355		np.concatenate(tuple(_pad_last(e.index_col,size_ad2)  for e in elems2),axis=axis1))
356
357	def simplify_ad(self,*args,**kwargs):
358		spHelper1 = Sparse.spAD(self.value,self.coef1,self.index)
359		spHelper1.simplify_ad(*args,**kwargs)
360		self.coef1,self.index = spHelper1.coef,spHelper1.index
361
362		if self.size_ad2>0: # Otherwise empty max
363			n_col = 1+np.max(self.index_col)
364			index = self.index_row.astype(np.int64)*n_col + self.index_col.astype(np.int64)
365			spHelper2 = Sparse.spAD(self.value,self.coef2,index)
366			spHelper2.simplify_ad(*args,**kwargs)
367			self.coef2 = spHelper2.coef
368			int_t = self.index_row.dtype.type
369			self.index_row,self.index_col = spHelper2.index//n_col, spHelper2.index%n_col
370			if int_t!=np.int64: self.index_row,self.index_col = (
371				e.astype(int_t) for e in (self.index_row,self.index_col))
372			
373		return self

A class for sparse forward second order automatic differentiation

spAD2( value, coef1=None, index=None, coef2=None, index_row=None, index_col=None, broadcast_ad=False)
24	def __init__(self,value,coef1=None,index=None,coef2=None,index_row=None,index_col=None,broadcast_ad=False):
25		if self.is_ad(value):
26			assert (coef1 is None and index is None 
27				and coef2 is None and index_row is None and index_col is None)
28			self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col \
29				= value.value,value.coef1,value.index,value.coef2,value.index_row,value.index_col
30			return
31		if ad_generic.is_ad(value):
32			raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD2")
33
34		# Create instance 
35		self.value = ad_generic.asarray(value)
36		shape = self.shape
37		shape2 = shape+(0,)
38		int_t = cupy_generic.samesize_int_t(value.dtype)
39		assert ((coef1 is None) and (index is None)) or (coef1.shape==index.shape)
40		self.coef1 = (np.zeros_like(value,shape=shape2) if coef1  is None 
41			else misc._test_or_broadcast_ad(coef1,shape,broadcast_ad) )
42		self.index = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index is None  
43			else misc._test_or_broadcast_ad(index,shape,broadcast_ad) )
44		
45		assert (((coef2 is None) and (index_row is None) and (index_col is None)) 
46			or ((coef2.shape==index_row.shape) and (coef2.shape==index_col.shape)))
47		self.coef2 = (np.zeros_like(value,shape=shape2) if coef2  is None 
48			else misc._test_or_broadcast_ad(coef2,shape,broadcast_ad) )
49		self.index_row = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index_row is None 
50			else misc._test_or_broadcast_ad(index_row,shape,broadcast_ad) )
51		self.index_col = (np.zeros_like(value,shape=shape2,dtype=int_t)  if index_col is None 
52			else misc._test_or_broadcast_ad(index_col,shape,broadcast_ad) )
value
coef1
index
coef2
index_row
index_col
@classmethod
def order(cls):
54	@classmethod
55	def order(cls): return 2
def copy(self, order='C'):
56	def copy(self,order='C'):
57		return self.new(self.value.copy(order=order),
58			self.coef1.copy(order=order),self.index.copy(order=order),
59			self.coef2.copy(order=order),self.index_row.copy(order=order),self.index_col.copy(order=order))
def as_tuple(self):
60	def as_tuple(self): return self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col
def as_func(self, h):
73	def as_func(self,h):
74		"""Replaces the symbolic perturbation with h"""
75		value,coef1,coef2 = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef1,self.coef2))
76		return (value+(coef1*h[self.index]).sum(axis=self.ndim)
77			+0.5*(coef2*h[self.index_row]*h[self.index_col]).sum(axis=self.ndim))

Replaces the symbolic perturbation with h

@classmethod
def compose(cls, a, t):
160	@classmethod
161	def compose(cls,a,t):
162		assert isinstance(a,Dense2.denseAD2) and all(cls.is_ad(b) for b in t)
163		b = np.moveaxis(cls.concatenate(t,axis=0),0,-1) # Possible performance hit if ad sizes are inhomogeneous
164		coef1 = _add_dim(a.coef1)*b.coef1
165		index1 = np.broadcast_to(b.index,coef1.shape)
166		coef2_pure = _add_dim(a.coef1)*b.coef2
167		index_row_pure = np.broadcast_to(b.index_row,coef2_pure.shape)
168		index_col_pure = np.broadcast_to(b.index_col,coef2_pure.shape)
169
170		s = b.shape[:-1]; na = a.size_ad; nb = b.size_ad1;
171		coef2_mixed = misc._add_dim2(a.coef2)*np.reshape(b.coef1,s+(na,1,nb,1))*np.reshape(b.coef1,s+(1,na,1,nb))
172		s2 = coef2_mixed.shape
173		index_row_mixed = np.broadcast_to(b.index.reshape(s+(na,1,nb,1)),s2)
174		index_col_mixed = np.broadcast_to(b.index.reshape(s+(1,na,1,nb)),s2)
175		#s3 = s2[:-4]+(na*na*nb*nb,) a.reshape(s3)
176
177		coef1,index1,coef2_pure,index_row_pure,index_col_pure = (
178			_flatten_nlast(a,2) for a in (coef1,index1,coef2_pure,index_row_pure,index_col_pure))
179		coef2_mixed,index_row_mixed,index_col_mixed = (
180			_flatten_nlast(a,4) for a in (coef2_mixed,index_row_mixed,index_col_mixed))
181		
182		return cls.new(a.value,coef1,index1,
183			_concatenate(coef2_pure,coef2_mixed),_concatenate(index_row_pure,index_row_mixed),
184			_concatenate(index_col_pure,index_col_mixed))
size_ad1
187	@property
188	def size_ad1(self):  return self.coef1.shape[-1]
size_ad2
189	@property
190	def size_ad2(self):  return self.coef2.shape[-1]
def reshape(self, shape, order='C'):
233	def reshape(self,shape,order='C'):
234		value = self.value.reshape(shape,order=order)
235		shape1 = value.shape+(self.size_ad1,)
236		shape2 = value.shape+(self.size_ad2,)
237		return self.new(value,
238			self.coef1.reshape(shape1,order=order), self.index.reshape(shape1,order=order),
239			self.coef2.reshape(shape2,order=order),self.index_row.reshape(shape2,order=order),
240			self.index_col.reshape(shape2,order=order))
def broadcast_to(self, shape):
242	def broadcast_to(self,shape):
243		shape1 = shape+(self.size_ad1,)
244		shape2 = shape+(self.size_ad2,)
245		return self.new(np.broadcast_to(self.value,shape), 
246			np.broadcast_to(self.coef1,shape1), np.broadcast_to(self.index,shape1),
247			np.broadcast_to(self.coef2,shape2), np.broadcast_to(self.index_row,shape2), 
248			np.broadcast_to(self.index_col,shape2))
def pad(self, pad_width, *args, constant_values=0, **kwargs):
250	def pad(self,pad_width,*args,constant_values=0,**kwargs):
251		def _pad(arr):return np.pad(arr,pad_width+((0,0),),*args,constant_values=0,**kwargs)
252		return self.new(
253			np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs),
254			_pad(self.coef1),_pad(self.index),
255			_pad(self.coef2),_pad(self.index_row),_pad(self.index_col))
def transpose(self, axes=None):
257	def transpose(self,axes=None):
258		if axes is None: axes = tuple(reversed(range(self.ndim)))
259		axes2 = tuple(axes) +(self.ndim,)
260		return self.new(self.value.transpose(axes),
261			self.coef1.transpose(axes2),self.index.transpose(axes2),
262			self.coef2.transpose(axes2),self.index_row.transpose(axes2),
263			self.index_col.transpose(axes2))
def allclose(self, other, *args, **kwargs):
265	def allclose(self,other,*args,**kwargs):
266		raise ValueError("Unsupported, sorry, please try allclose(a.to_dense(),b.to_dense())")
def sum(self, axis=None, out=None, **kwargs):
269	def sum(self,axis=None,out=None,**kwargs):
270		if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs)
271		value = self.value.sum(axis,**kwargs)
272
273		shape1 = value.shape + (self.size_ad1 * self.shape[axis],)
274		coef1 = np.moveaxis(self.coef1, axis,-1).reshape(shape1)
275		index = np.moveaxis(self.index, axis,-1).reshape(shape1)
276
277		shape2 = value.shape + (self.size_ad2 * self.shape[axis],)
278		coef2 = np.moveaxis(self.coef2, axis,-1).reshape(shape2)
279		index_row = np.moveaxis(self.index_row, axis,-1).reshape(shape2)
280		index_col = np.moveaxis(self.index_col, axis,-1).reshape(shape2)
281
282		out = self.new(value,coef1,index,coef2,index_row,index_col)
283		return out
def bound_ad(self):
286	def bound_ad(self):
287		def maxi(a): return int(cps.max(a,initial=-1))
288		return 1+np.max((maxi(self.index),maxi(self.index_row),maxi(self.index_col)))
def to_dense(self, dense_size_ad=None):
289	def to_dense(self,dense_size_ad=None):
290		# Can be accelerated using np.bincount, similarly to spAD.to_dense
291		def mvax(arr): return np.moveaxis(arr,-1,0)
292		dsad = self.bound_ad() if dense_size_ad is None else dense_size_ad
293		coef1 = np.zeros_like(self.value,shape=self.shape+(dsad,))
294		for c,i in zip(mvax(self.coef1),mvax(self.index)):
295			np.put_along_axis(coef1,_add_dim(i),np.take_along_axis(coef1,_add_dim(i),axis=-1)+_add_dim(c),axis=-1)
296		coef2 = np.zeros_like(self.value,shape=self.shape+(dsad*dsad,))
297		for c,i in zip(mvax(self.coef2),mvax(self.index_row*dsad+self.index_col)):
298			np.put_along_axis(coef2,_add_dim(i),np.take_along_axis(coef2,_add_dim(i),axis=-1)+_add_dim(c),axis=-1)
299		return Dense2.denseAD2(self.value,coef1,np.reshape(coef2,self.shape+(dsad,dsad)))
def to_first(self):
301	def to_first(self):
302		return Sparse.spAD(self.value,self.coef1,self.index)
def triplets(self):
305	def triplets(self):
306		"""The hessian operator, presented as triplets"""
307		return (self.coef2,(self.index_row,self.index_col))

The hessian operator, presented as triplets

def hessian_operator(self, shape=None):
309	def hessian_operator(self,shape=None):
310		"""
311		The hessian operator, presented as an opaque matrix class, supporting mul.
312		Implicitly sums over all axes. Recommendation : apply simplify_ad before call.
313		"""
314		return misc.tocsr(self.sum().triplets(),shape=shape)

The hessian operator, presented as an opaque matrix class, supporting mul. Implicitly sums over all axes. Recommendation : apply simplify_ad before call.

def tangent_operator(self):
316	def tangent_operator(self): return self.to_first().tangent_operator()
def adjoint_operator(self):
317	def adjoint_operator(self): return self.to_first().adjoint_operator()
def solve_stationnary(self, raw=False):
319	def solve_stationnary(self,raw=False):
320		"""
321		Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. 
322		Use "raw = True" to obtain the raw linear system and use your own solver.
323		"""
324		mat = self.triplets()
325		rhs = - self.to_first().to_dense(self.bound_ad()).coef
326		return (mat,rhs) if raw else misc.spsolve(mat,rhs)

Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. Use "raw = True" to obtain the raw linear system and use your own solver.

def solve_weakform(self, raw=False):
328	def solve_weakform(self,raw=False):
329		"""
330		Assume that a spAD2 array scalar represents the quadratic function
331		Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v).
332		Finds u such that Q(u,v) is independent of v.
333		Use "raw = True" to obtain the raw linear system and use your own solver.
334		"""
335		(coef,(row,col)),rhs = self.solve_stationnary(raw=True)
336		n = rhs.size//2
337		rhs = rhs[n:]
338		pos = np.logical_and(row>=n,col<n)
339		mat = (coef[pos],(row[pos]-n,col[pos]))
340		return (mat,rhs) if raw else misc.spsolve(mat,rhs)

Assume that a spAD2 array scalar represents the quadratic function Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v). Finds u such that Q(u,v) is independent of v. Use "raw = True" to obtain the raw linear system and use your own solver.

@classmethod
def concatenate(cls, elems, axis=0):
343	@classmethod
344	def concatenate(cls,elems,axis=0):
345		axis1 = axis if axis>=0 else axis-1
346		elems2 = tuple(cls(e) for e in elems)
347		size_ad1 = max(e.size_ad1 for e in elems2)
348		size_ad2 = max(e.size_ad2 for e in elems2)
349		return cls( 
350		np.concatenate(tuple(e.value for e in elems2), axis=axis), 
351		np.concatenate(tuple(_pad_last(e.coef1,size_ad1)  for e in elems2),axis=axis1),
352		np.concatenate(tuple(_pad_last(e.index,size_ad1)  for e in elems2),axis=axis1),
353		np.concatenate(tuple(_pad_last(e.coef2,size_ad2)  for e in elems2),axis=axis1),
354		np.concatenate(tuple(_pad_last(e.index_row,size_ad2)  for e in elems2),axis=axis1),
355		np.concatenate(tuple(_pad_last(e.index_col,size_ad2)  for e in elems2),axis=axis1))
def simplify_ad(self, *args, **kwargs):
357	def simplify_ad(self,*args,**kwargs):
358		spHelper1 = Sparse.spAD(self.value,self.coef1,self.index)
359		spHelper1.simplify_ad(*args,**kwargs)
360		self.coef1,self.index = spHelper1.coef,spHelper1.index
361
362		if self.size_ad2>0: # Otherwise empty max
363			n_col = 1+np.max(self.index_col)
364			index = self.index_row.astype(np.int64)*n_col + self.index_col.astype(np.int64)
365			spHelper2 = Sparse.spAD(self.value,self.coef2,index)
366			spHelper2.simplify_ad(*args,**kwargs)
367			self.coef2 = spHelper2.coef
368			int_t = self.index_row.dtype.type
369			self.index_row,self.index_col = spHelper2.index//n_col, spHelper2.index%n_col
370			if int_t!=np.int64: self.index_row,self.index_col = (
371				e.astype(int_t) for e in (self.index_row,self.index_col))
372			
373		return self
def identity(*args, **kwargs):
386def identity(*args,**kwargs):
387	arr = Sparse.identity(*args,**kwargs)
388	shape2 = arr.shape+(0,)
389	return spAD2(arr.value,arr.coef,arr.index,
390		np.zeros_like(arr.coef,shape=shape2),
391		np.zeros_like(arr.index,shape=shape2),
392		np.zeros_like(arr.index,shape=shape2))
def register(*args, **kwargs):
394def register(*args,**kwargs):
395	return Sparse.register(*args,**kwargs,ident=identity)
def hessian_operator(f, x, simplify_ad=None, fargs=(), rev_ad=False):
407def hessian_operator(f,x,simplify_ad=None,fargs=tuple(),rev_ad=False):
408	"""
409	Returns the sparse matrix associated to the hessian of f at x,
410	generated using automatic differentiation.
411	Typically used to obtain the sparse matrix of a quadratic form.
412	Output of f is summed, if non-scalar.
413	- simplify_ad (optional): wether to simplify the ad information 
414	   before generating the sparse matrix
415
416
417	*Autodiff support* 
418	Consider the functional D * u**2, written with the following convention
419	>>> def f(u,D,ad_channel=lambda x:x): return ad_channel(D) * u**2
420	See Eikonal.cuda.AnisotropicWave, classes AcousticHamiltonian_Sparse and 
421	ElasticHamiltonian_Sparse for non-trivial examples.
422
423	- Foward autodiff. Returns an denseAD_Lin class (operator plus perturbation), 
424	if f(x) is a first order dense forward AD variable, with the following convention/example :   
425	>>> ad.Sparse2.hessian_operator(f,np.zeros(10),fargs=(ad.Dense.identity(constant=2),))
426
427	- Reverse autodiff support (rev_ad=True). The components of f(x,ad_channel=ones_like)
428	are regarded as independent contributions w.r.t which to compute the sensitivity of the result.
429	"""
430	f_dense = f(x,*fargs)
431	if simplify_ad is None: simplify_ad = f_dense.ndim>0
432	if isinstance(f_dense,Dense.denseAD): # ---- Forward autodiff support ----
433		size_ad = f_dense.size_ad
434		x_ad = identity(constant=x)
435		shape = x_ad.size,x_ad.size
436		ops = []
437		for i in range(-1,size_ad):
438			f_sparse = f(x_ad,*fargs,ad_channel=lambda x:x.value if i==-1 else x.coef[...,i])
439			if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True)
440			ops.append(f_sparse.hessian_operator(shape=shape))
441		return Dense.denseAD_Lin(ops[0],ops[1:])
442
443	op = _hessian_operator_noAD(f,x,simplify_ad,fargs)
444	if not rev_ad : return op #  ---- No autodiff ----
445	
446	# ---- Reverse autodiff support ----
447	x_ad = identity(constant=x)
448	f_sparse = f(x_ad,*fargs,ad_channel=lambda x:np.ones_like(x))
449	if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True)
450	coef2,row,col = f_sparse.coef2,f_sparse.index_row,f_sparse.index_col
451	f_sparse = None # deallocate first and zero-th order coefficients
452	
453	def sensitivity(x):
454		x=x.reshape((x_ad.size,*x.shape[x_ad.ndim:]))
455		return np.sum(coef2.reshape(coef2.shape+(1,)*x.ndim)
456			*x.value[row][...,None]*x.coef[col],axis=coef2.ndim-1)
457
458	return op, sensitivity

Returns the sparse matrix associated to the hessian of f at x, generated using automatic differentiation. Typically used to obtain the sparse matrix of a quadratic form. Output of f is summed, if non-scalar.

  • simplify_ad (optional): wether to simplify the ad information before generating the sparse matrix

Autodiff support Consider the functional D * u**2, written with the following convention

>>> def f(u,D,ad_channel=lambda x:x): return ad_channel(D) * u**2
See Eikonal.cuda.AnisotropicWave, classes AcousticHamiltonian_Sparse and 
ElasticHamiltonian_Sparse for non-trivial examples.
  • Foward autodiff. Returns an denseAD_Lin class (operator plus perturbation), if f(x) is a first order dense forward AD variable, with the following convention/example :

    >>> ad.Sparse2.hessian_operator(f,np.zeros(10),fargs=(ad.Dense.identity(constant=2),))
    
  • Reverse autodiff support (rev_ad=True). The components of f(x,ad_channel=ones_like) are regarded as independent contributions w.r.t which to compute the sensitivity of the result.