agd.AutomaticDifferentiation.Sparse2
1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay 2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0 3 4import numpy as np 5import functools 6from . import cupy_generic 7from . import ad_generic 8from . import cupy_support as cps 9from . import misc 10from . import Sparse 11from . import Dense 12from . import Dense2 13from . import Base 14 15_add_dim = misc._add_dim; _pad_last = misc._pad_last; _concatenate=misc._concatenate; 16 17 18class spAD2(Base.baseAD): 19 """ 20 A class for sparse forward second order automatic differentiation 21 """ 22 23 def __init__(self,value,coef1=None,index=None,coef2=None,index_row=None,index_col=None,broadcast_ad=False): 24 if self.is_ad(value): 25 assert (coef1 is None and index is None 26 and coef2 is None and index_row is None and index_col is None) 27 self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col \ 28 = value.value,value.coef1,value.index,value.coef2,value.index_row,value.index_col 29 return 30 if ad_generic.is_ad(value): 31 raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD2") 32 33 # Create instance 34 self.value = ad_generic.asarray(value) 35 shape = self.shape 36 shape2 = shape+(0,) 37 int_t = cupy_generic.samesize_int_t(value.dtype) 38 assert ((coef1 is None) and (index is None)) or (coef1.shape==index.shape) 39 self.coef1 = (np.zeros_like(value,shape=shape2) if coef1 is None 40 else misc._test_or_broadcast_ad(coef1,shape,broadcast_ad) ) 41 self.index = (np.zeros_like(value,shape=shape2,dtype=int_t) if index is None 42 else misc._test_or_broadcast_ad(index,shape,broadcast_ad) ) 43 44 assert (((coef2 is None) and (index_row is None) and (index_col is None)) 45 or ((coef2.shape==index_row.shape) and (coef2.shape==index_col.shape))) 46 self.coef2 = (np.zeros_like(value,shape=shape2) if coef2 is None 47 else misc._test_or_broadcast_ad(coef2,shape,broadcast_ad) ) 48 self.index_row = (np.zeros_like(value,shape=shape2,dtype=int_t) if index_row is None 49 else misc._test_or_broadcast_ad(index_row,shape,broadcast_ad) ) 50 self.index_col = (np.zeros_like(value,shape=shape2,dtype=int_t) if index_col is None 51 else misc._test_or_broadcast_ad(index_col,shape,broadcast_ad) ) 52 53 @classmethod 54 def order(cls): return 2 55 def copy(self,order='C'): 56 return self.new(self.value.copy(order=order), 57 self.coef1.copy(order=order),self.index.copy(order=order), 58 self.coef2.copy(order=order),self.index_row.copy(order=order),self.index_col.copy(order=order)) 59 def as_tuple(self): return self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col 60 61 # Representation 62 def __iter__(self): 63 for value,coef1,index,coef2,index_row,index_col in zip(self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col): 64 yield self.new(value,coef1,index,coef2,index_row,index_col) 65 66 def __str__(self): 67 return "spAD2"+str((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col)) 68 def __repr__(self): 69 return "spAD2"+repr((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col)) 70 71 # Operators 72 def as_func(self,h): 73 """Replaces the symbolic perturbation with h""" 74 value,coef1,coef2 = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef1,self.coef2)) 75 return (value+(coef1*h[self.index]).sum(axis=self.ndim) 76 +0.5*(coef2*h[self.index_row]*h[self.index_col]).sum(axis=self.ndim)) 77 78 def __add__(self,other): 79 if self.is_ad(other): 80 value = self.value+other.value; shape = value.shape 81 return self.new(value, 82 _concatenate(self.coef1,other.coef1,shape), _concatenate(self.index,other.index,shape), 83 _concatenate(self.coef2,other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape)) 84 else: 85 return self.new(self.value+other, self.coef1, self.index, 86 self.coef2, self.index_row, self.index_col, broadcast_ad=True) 87 88 def __sub__(self,other): 89 if self.is_ad(other): 90 value = self.value-other.value; shape = value.shape 91 return self.new(value, 92 _concatenate(self.coef1,-other.coef1,shape), _concatenate(self.index,other.index,shape), 93 _concatenate(self.coef2,-other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape)) 94 else: 95 return self.new(self.value-other, self.coef1, self.index, 96 self.coef2, self.index_row, self.index_col, broadcast_ad=True) 97 98 def __mul__(self,other): 99 if self.is_ad(other): 100 value = self.value*other.value 101 coef1_a,coef1_b = _add_dim(other.value)*self.coef1,_add_dim(self.value)*other.coef1 102 index_a,index_b = np.broadcast_to(self.index,coef1_a.shape),np.broadcast_to(other.index,coef1_b.shape) 103 coef2_a,coef2_b = _add_dim(other.value)*self.coef2,_add_dim(self.value)*other.coef2 104 index_row_a,index_row_b = np.broadcast_to(self.index_row,coef2_a.shape),np.broadcast_to(other.index_row,coef2_b.shape) 105 index_col_a,index_col_b = np.broadcast_to(self.index_col,coef2_a.shape),np.broadcast_to(other.index_col,coef2_b.shape) 106 107 len_a,len_b = self.coef1.shape[-1],other.coef1.shape[-1] 108 coef2_ab = np.repeat(self.coef1,len_b,axis=-1) * np.tile(other.coef1,len_a) 109 index2_a = np.broadcast_to(np.repeat(self.index,len_b,axis=-1),coef2_ab.shape) 110 index2_b = np.broadcast_to(np.tile(other.index,len_a),coef2_ab.shape) 111 112 return self.new(value,_concatenate(coef1_a,coef1_b),_concatenate(index_a,index_b), 113 np.concatenate((coef2_a,coef2_b,coef2_ab,coef2_ab),axis=-1), 114 np.concatenate((index_row_a,index_row_b,index2_a,index2_b),axis=-1), 115 np.concatenate((index_col_a,index_col_b,index2_b,index2_a),axis=-1)) 116 elif self.isndarray(other): 117 value = self.value*other 118 coef1 = _add_dim(other)*self.coef1 119 index = np.broadcast_to(self.index,coef1.shape) 120 coef2 = _add_dim(other)*self.coef2 121 index_row = np.broadcast_to(self.index_row,coef2.shape) 122 index_col = np.broadcast_to(self.index_col,coef2.shape) 123 return self.new(value,coef1,index,coef2,index_row,index_col) 124 else: 125 return self.new(self.value*other,other*self.coef1,self.index, 126 other*self.coef2,self.index_row,self.index_col) 127 128 def __truediv__(self,other): 129 if self.is_ad(other): 130 return self.__mul__(other.__pow__(-1)) 131 elif self.isndarray(other): 132 inv = 1./other 133 return self.new(self.value*inv,self.coef1*_add_dim(inv),self.index, 134 self.coef2*_add_dim(inv),self.index_row,self.index_col) 135 else: 136 inv = 1./other 137 return self.new(self.value*inv,self.coef1*inv,self.index, 138 self.coef2*inv,self.index_row,self.index_col) 139 140 __rmul__ = __mul__ 141 __radd__ = __add__ 142 def __rsub__(self,other): return -(self-other) 143 def __rtruediv__(self,other): 144 return self.__pow__(-1).__mul__(other) 145 146 def __neg__(self): return self.new(-self.value,-self.coef1,self.index, 147 -self.coef2,self.index_row,self.index_col) 148 149 # Math functions 150 def _math_helper(self,deriv): # Inputs : a=f(x), b=f'(x), c=f''(x), where x=self.value 151 a,b,c=deriv 152 len_1 = self.coef1.shape[-1] 153 coef1_r,index_r = np.repeat(self.coef1,len_1,axis=-1),np.repeat(self.index,len_1,axis=-1) 154 coef1_t,index_t = np.tile(self.coef1,len_1),np.tile(self.index,len_1) 155 return self.new(a,_add_dim(b)*self.coef1,self.index, 156 _concatenate(_add_dim(b)*self.coef2,_add_dim(c)*(coef1_r*coef1_t)), 157 _concatenate(self.index_row, index_r),_concatenate(self.index_col, index_t)) 158 159 @classmethod 160 def compose(cls,a,t): 161 assert isinstance(a,Dense2.denseAD2) and all(cls.is_ad(b) for b in t) 162 b = np.moveaxis(cls.concatenate(t,axis=0),0,-1) # Possible performance hit if ad sizes are inhomogeneous 163 coef1 = _add_dim(a.coef1)*b.coef1 164 index1 = np.broadcast_to(b.index,coef1.shape) 165 coef2_pure = _add_dim(a.coef1)*b.coef2 166 index_row_pure = np.broadcast_to(b.index_row,coef2_pure.shape) 167 index_col_pure = np.broadcast_to(b.index_col,coef2_pure.shape) 168 169 s = b.shape[:-1]; na = a.size_ad; nb = b.size_ad1; 170 coef2_mixed = misc._add_dim2(a.coef2)*np.reshape(b.coef1,s+(na,1,nb,1))*np.reshape(b.coef1,s+(1,na,1,nb)) 171 s2 = coef2_mixed.shape 172 index_row_mixed = np.broadcast_to(b.index.reshape(s+(na,1,nb,1)),s2) 173 index_col_mixed = np.broadcast_to(b.index.reshape(s+(1,na,1,nb)),s2) 174 #s3 = s2[:-4]+(na*na*nb*nb,) a.reshape(s3) 175 176 coef1,index1,coef2_pure,index_row_pure,index_col_pure = ( 177 _flatten_nlast(a,2) for a in (coef1,index1,coef2_pure,index_row_pure,index_col_pure)) 178 coef2_mixed,index_row_mixed,index_col_mixed = ( 179 _flatten_nlast(a,4) for a in (coef2_mixed,index_row_mixed,index_col_mixed)) 180 181 return cls.new(a.value,coef1,index1, 182 _concatenate(coef2_pure,coef2_mixed),_concatenate(index_row_pure,index_row_mixed), 183 _concatenate(index_col_pure,index_col_mixed)) 184 185 #Indexing 186 @property 187 def size_ad1(self): return self.coef1.shape[-1] 188 @property 189 def size_ad2(self): return self.coef2.shape[-1] 190 191 def __getitem__(self,key): 192 ekey = misc.key_expand(key) 193 try: 194 return self.new(self.value[key], 195 self.coef1[ekey], self.index[ekey], 196 self.coef2[ekey], self.index_row[ekey], self.index_col[ekey]) 197 except ZeroDivisionError: # Some cupy versions fail indexing correctly if size==0 198 value = self.value[ekey] 199 shape = value.shape 200 def take(arr): 201 size_ad = arr.shape[-1] 202 return arr[ekey] if size_ad>0 else np.zeros_like(arr,shape=(*shape,size_ad)) 203 return self.new(self.value[key], 204 take(self.coef1), take(self.index), 205 take(self.coef2), take(self.index_row), take(self.index_col)) 206 207 def __setitem__(self,key,other): 208 ekey = misc.key_expand(key) 209 if self.is_ad(other): 210 self.value[key] = other.value 211 212 pad_size = max(self.coef1.shape[-1],other.coef1.shape[-1]) 213 if pad_size>self.coef1.shape[-1]: 214 self.coef1 = _pad_last(self.coef1,pad_size) 215 self.index = _pad_last(self.index,pad_size) 216 self.coef1[ekey] = _pad_last(other.coef1,pad_size) 217 self.index[ekey] = _pad_last(other.index,pad_size) 218 219 pad_size = max(self.coef2.shape[-1],other.coef2.shape[-1]) 220 if pad_size>self.coef2.shape[-1]: 221 self.coef2 = _pad_last(self.coef2,pad_size) 222 self.index_row = _pad_last(self.index_row,pad_size) 223 self.index_col = _pad_last(self.index_col,pad_size) 224 self.coef2[ekey] = _pad_last(other.coef2,pad_size) 225 self.index_row[ekey] = _pad_last(other.index_row,pad_size) 226 self.index_col[ekey] = _pad_last(other.index_col,pad_size) 227 else: 228 self.value[key] = other 229 self.coef1[ekey] = 0. 230 self.coef2[ekey] = 0. 231 232 def reshape(self,shape,order='C'): 233 value = self.value.reshape(shape,order=order) 234 shape1 = value.shape+(self.size_ad1,) 235 shape2 = value.shape+(self.size_ad2,) 236 return self.new(value, 237 self.coef1.reshape(shape1,order=order), self.index.reshape(shape1,order=order), 238 self.coef2.reshape(shape2,order=order),self.index_row.reshape(shape2,order=order), 239 self.index_col.reshape(shape2,order=order)) 240 241 def broadcast_to(self,shape): 242 shape1 = shape+(self.size_ad1,) 243 shape2 = shape+(self.size_ad2,) 244 return self.new(np.broadcast_to(self.value,shape), 245 np.broadcast_to(self.coef1,shape1), np.broadcast_to(self.index,shape1), 246 np.broadcast_to(self.coef2,shape2), np.broadcast_to(self.index_row,shape2), 247 np.broadcast_to(self.index_col,shape2)) 248 249 def pad(self,pad_width,*args,constant_values=0,**kwargs): 250 def _pad(arr):return np.pad(arr,pad_width+((0,0),),*args,constant_values=0,**kwargs) 251 return self.new( 252 np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs), 253 _pad(self.coef1),_pad(self.index), 254 _pad(self.coef2),_pad(self.index_row),_pad(self.index_col)) 255 256 def transpose(self,axes=None): 257 if axes is None: axes = tuple(reversed(range(self.ndim))) 258 axes2 = tuple(axes) +(self.ndim,) 259 return self.new(self.value.transpose(axes), 260 self.coef1.transpose(axes2),self.index.transpose(axes2), 261 self.coef2.transpose(axes2),self.index_row.transpose(axes2), 262 self.index_col.transpose(axes2)) 263 264 def allclose(self,other,*args,**kwargs): 265 raise ValueError("Unsupported, sorry, please try allclose(a.to_dense(),b.to_dense())") 266 267 # Reductions 268 def sum(self,axis=None,out=None,**kwargs): 269 if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs) 270 value = self.value.sum(axis,**kwargs) 271 272 shape1 = value.shape + (self.size_ad1 * self.shape[axis],) 273 coef1 = np.moveaxis(self.coef1, axis,-1).reshape(shape1) 274 index = np.moveaxis(self.index, axis,-1).reshape(shape1) 275 276 shape2 = value.shape + (self.size_ad2 * self.shape[axis],) 277 coef2 = np.moveaxis(self.coef2, axis,-1).reshape(shape2) 278 index_row = np.moveaxis(self.index_row, axis,-1).reshape(shape2) 279 index_col = np.moveaxis(self.index_col, axis,-1).reshape(shape2) 280 281 out = self.new(value,coef1,index,coef2,index_row,index_col) 282 return out 283 284 # Conversion 285 def bound_ad(self): 286 def maxi(a): return int(cps.max(a,initial=-1)) 287 return 1+np.max((maxi(self.index),maxi(self.index_row),maxi(self.index_col))) 288 def to_dense(self,dense_size_ad=None): 289 # Can be accelerated using np.bincount, similarly to spAD.to_dense 290 def mvax(arr): return np.moveaxis(arr,-1,0) 291 dsad = self.bound_ad() if dense_size_ad is None else dense_size_ad 292 coef1 = np.zeros_like(self.value,shape=self.shape+(dsad,)) 293 for c,i in zip(mvax(self.coef1),mvax(self.index)): 294 np.put_along_axis(coef1,_add_dim(i),np.take_along_axis(coef1,_add_dim(i),axis=-1)+_add_dim(c),axis=-1) 295 coef2 = np.zeros_like(self.value,shape=self.shape+(dsad*dsad,)) 296 for c,i in zip(mvax(self.coef2),mvax(self.index_row*dsad+self.index_col)): 297 np.put_along_axis(coef2,_add_dim(i),np.take_along_axis(coef2,_add_dim(i),axis=-1)+_add_dim(c),axis=-1) 298 return Dense2.denseAD2(self.value,coef1,np.reshape(coef2,self.shape+(dsad,dsad))) 299 300 def to_first(self): 301 return Sparse.spAD(self.value,self.coef1,self.index) 302 303 #Linear algebra 304 def triplets(self): 305 """The hessian operator, presented as triplets""" 306 return (self.coef2,(self.index_row,self.index_col)) 307 308 def hessian_operator(self,shape=None): 309 """ 310 The hessian operator, presented as an opaque matrix class, supporting mul. 311 Implicitly sums over all axes. Recommendation : apply simplify_ad before call. 312 """ 313 return misc.tocsr(self.sum().triplets(),shape=shape) 314 315 def tangent_operator(self): return self.to_first().tangent_operator() 316 def adjoint_operator(self): return self.to_first().adjoint_operator() 317 318 def solve_stationnary(self,raw=False): 319 """ 320 Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. 321 Use "raw = True" to obtain the raw linear system and use your own solver. 322 """ 323 mat = self.triplets() 324 rhs = - self.to_first().to_dense(self.bound_ad()).coef 325 return (mat,rhs) if raw else misc.spsolve(mat,rhs) 326 327 def solve_weakform(self,raw=False): 328 """ 329 Assume that a spAD2 array scalar represents the quadratic function 330 Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v). 331 Finds u such that Q(u,v) is independent of v. 332 Use "raw = True" to obtain the raw linear system and use your own solver. 333 """ 334 (coef,(row,col)),rhs = self.solve_stationnary(raw=True) 335 n = rhs.size//2 336 rhs = rhs[n:] 337 pos = np.logical_and(row>=n,col<n) 338 mat = (coef[pos],(row[pos]-n,col[pos])) 339 return (mat,rhs) if raw else misc.spsolve(mat,rhs) 340 341 # Static methods 342 @classmethod 343 def concatenate(cls,elems,axis=0): 344 axis1 = axis if axis>=0 else axis-1 345 elems2 = tuple(cls(e) for e in elems) 346 size_ad1 = max(e.size_ad1 for e in elems2) 347 size_ad2 = max(e.size_ad2 for e in elems2) 348 return cls( 349 np.concatenate(tuple(e.value for e in elems2), axis=axis), 350 np.concatenate(tuple(_pad_last(e.coef1,size_ad1) for e in elems2),axis=axis1), 351 np.concatenate(tuple(_pad_last(e.index,size_ad1) for e in elems2),axis=axis1), 352 np.concatenate(tuple(_pad_last(e.coef2,size_ad2) for e in elems2),axis=axis1), 353 np.concatenate(tuple(_pad_last(e.index_row,size_ad2) for e in elems2),axis=axis1), 354 np.concatenate(tuple(_pad_last(e.index_col,size_ad2) for e in elems2),axis=axis1)) 355 356 def simplify_ad(self,*args,**kwargs): 357 spHelper1 = Sparse.spAD(self.value,self.coef1,self.index) 358 spHelper1.simplify_ad(*args,**kwargs) 359 self.coef1,self.index = spHelper1.coef,spHelper1.index 360 361 if self.size_ad2>0: # Otherwise empty max 362 n_col = 1+np.max(self.index_col) 363 index = self.index_row.astype(np.int64)*n_col + self.index_col.astype(np.int64) 364 spHelper2 = Sparse.spAD(self.value,self.coef2,index) 365 spHelper2.simplify_ad(*args,**kwargs) 366 self.coef2 = spHelper2.coef 367 int_t = self.index_row.dtype.type 368 self.index_row,self.index_col = spHelper2.index//n_col, spHelper2.index%n_col 369 if int_t!=np.int64: self.index_row,self.index_col = ( 370 e.astype(int_t) for e in (self.index_row,self.index_col)) 371 372 return self 373 374# -------- End of class spAD2 ------- 375 376# -------- Utilities ------ 377 378def _flatten_nlast(a,n): 379 assert n>0 380 s=a.shape 381 return a.reshape(s[:-n]+(np.prod(s[-n:]),)) 382 383# -------- Factory method ----- 384 385def identity(*args,**kwargs): 386 arr = Sparse.identity(*args,**kwargs) 387 shape2 = arr.shape+(0,) 388 return spAD2(arr.value,arr.coef,arr.index, 389 np.zeros_like(arr.coef,shape=shape2), 390 np.zeros_like(arr.index,shape=shape2), 391 np.zeros_like(arr.index,shape=shape2)) 392 393def register(*args,**kwargs): 394 return Sparse.register(*args,**kwargs,ident=identity) 395 396# ---------- Sparse hessian extraction --------- 397 398def _hessian_operator_noAD(f,x,simplify_ad=None,fargs=tuple()): 399 """Same as Hessian operator, but does not support f with AD values""" 400 x_ad = identity(constant=x) 401 try: f_ad = f(x_ad,*fargs) 402 except Base.ADCastError: return hessian_operator_denseAD(f,x,simplify_ad=simplify_ad,fargs=fargs) 403 if simplify_ad or simplify_ad is None and f_ad.ndim > 0: f_ad.simplify_ad(atol=True,rtol=True) 404 return f_ad.hessian_operator( shape=(x_ad.size,x_ad.size) ) 405 406def hessian_operator(f,x,simplify_ad=None,fargs=tuple(),rev_ad=False): 407 """ 408 Returns the sparse matrix associated to the hessian of f at x, 409 generated using automatic differentiation. 410 Typically used to obtain the sparse matrix of a quadratic form. 411 Output of f is summed, if non-scalar. 412 - simplify_ad (optional): wether to simplify the ad information 413 before generating the sparse matrix 414 415 416 *Autodiff support* 417 Consider the functional D * u**2, written with the following convention 418 >>> def f(u,D,ad_channel=lambda x:x): return ad_channel(D) * u**2 419 See Eikonal.cuda.AnisotropicWave, classes AcousticHamiltonian_Sparse and 420 ElasticHamiltonian_Sparse for non-trivial examples. 421 422 - Foward autodiff. Returns an denseAD_Lin class (operator plus perturbation), 423 if f(x) is a first order dense forward AD variable, with the following convention/example : 424 >>> ad.Sparse2.hessian_operator(f,np.zeros(10),fargs=(ad.Dense.identity(constant=2),)) 425 426 - Reverse autodiff support (rev_ad=True). The components of f(x,ad_channel=ones_like) 427 are regarded as independent contributions w.r.t which to compute the sensitivity of the result. 428 """ 429 f_dense = f(x,*fargs) 430 if simplify_ad is None: simplify_ad = f_dense.ndim>0 431 if isinstance(f_dense,Dense.denseAD): # ---- Forward autodiff support ---- 432 size_ad = f_dense.size_ad 433 x_ad = identity(constant=x) 434 shape = x_ad.size,x_ad.size 435 ops = [] 436 for i in range(-1,size_ad): 437 f_sparse = f(x_ad,*fargs,ad_channel=lambda x:x.value if i==-1 else x.coef[...,i]) 438 if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True) 439 ops.append(f_sparse.hessian_operator(shape=shape)) 440 return Dense.denseAD_Lin(ops[0],ops[1:]) 441 442 op = _hessian_operator_noAD(f,x,simplify_ad,fargs) 443 if not rev_ad : return op # ---- No autodiff ---- 444 445 # ---- Reverse autodiff support ---- 446 x_ad = identity(constant=x) 447 f_sparse = f(x_ad,*fargs,ad_channel=lambda x:np.ones_like(x)) 448 if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True) 449 coef2,row,col = f_sparse.coef2,f_sparse.index_row,f_sparse.index_col 450 f_sparse = None # deallocate first and zero-th order coefficients 451 452 def sensitivity(x): 453 x=x.reshape((x_ad.size,*x.shape[x_ad.ndim:])) 454 return np.sum(coef2.reshape(coef2.shape+(1,)*x.ndim) 455 *x.value[row][...,None]*x.coef[col],axis=coef2.ndim-1) 456 457 return op, sensitivity
19class spAD2(Base.baseAD): 20 """ 21 A class for sparse forward second order automatic differentiation 22 """ 23 24 def __init__(self,value,coef1=None,index=None,coef2=None,index_row=None,index_col=None,broadcast_ad=False): 25 if self.is_ad(value): 26 assert (coef1 is None and index is None 27 and coef2 is None and index_row is None and index_col is None) 28 self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col \ 29 = value.value,value.coef1,value.index,value.coef2,value.index_row,value.index_col 30 return 31 if ad_generic.is_ad(value): 32 raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD2") 33 34 # Create instance 35 self.value = ad_generic.asarray(value) 36 shape = self.shape 37 shape2 = shape+(0,) 38 int_t = cupy_generic.samesize_int_t(value.dtype) 39 assert ((coef1 is None) and (index is None)) or (coef1.shape==index.shape) 40 self.coef1 = (np.zeros_like(value,shape=shape2) if coef1 is None 41 else misc._test_or_broadcast_ad(coef1,shape,broadcast_ad) ) 42 self.index = (np.zeros_like(value,shape=shape2,dtype=int_t) if index is None 43 else misc._test_or_broadcast_ad(index,shape,broadcast_ad) ) 44 45 assert (((coef2 is None) and (index_row is None) and (index_col is None)) 46 or ((coef2.shape==index_row.shape) and (coef2.shape==index_col.shape))) 47 self.coef2 = (np.zeros_like(value,shape=shape2) if coef2 is None 48 else misc._test_or_broadcast_ad(coef2,shape,broadcast_ad) ) 49 self.index_row = (np.zeros_like(value,shape=shape2,dtype=int_t) if index_row is None 50 else misc._test_or_broadcast_ad(index_row,shape,broadcast_ad) ) 51 self.index_col = (np.zeros_like(value,shape=shape2,dtype=int_t) if index_col is None 52 else misc._test_or_broadcast_ad(index_col,shape,broadcast_ad) ) 53 54 @classmethod 55 def order(cls): return 2 56 def copy(self,order='C'): 57 return self.new(self.value.copy(order=order), 58 self.coef1.copy(order=order),self.index.copy(order=order), 59 self.coef2.copy(order=order),self.index_row.copy(order=order),self.index_col.copy(order=order)) 60 def as_tuple(self): return self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col 61 62 # Representation 63 def __iter__(self): 64 for value,coef1,index,coef2,index_row,index_col in zip(self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col): 65 yield self.new(value,coef1,index,coef2,index_row,index_col) 66 67 def __str__(self): 68 return "spAD2"+str((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col)) 69 def __repr__(self): 70 return "spAD2"+repr((self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col)) 71 72 # Operators 73 def as_func(self,h): 74 """Replaces the symbolic perturbation with h""" 75 value,coef1,coef2 = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef1,self.coef2)) 76 return (value+(coef1*h[self.index]).sum(axis=self.ndim) 77 +0.5*(coef2*h[self.index_row]*h[self.index_col]).sum(axis=self.ndim)) 78 79 def __add__(self,other): 80 if self.is_ad(other): 81 value = self.value+other.value; shape = value.shape 82 return self.new(value, 83 _concatenate(self.coef1,other.coef1,shape), _concatenate(self.index,other.index,shape), 84 _concatenate(self.coef2,other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape)) 85 else: 86 return self.new(self.value+other, self.coef1, self.index, 87 self.coef2, self.index_row, self.index_col, broadcast_ad=True) 88 89 def __sub__(self,other): 90 if self.is_ad(other): 91 value = self.value-other.value; shape = value.shape 92 return self.new(value, 93 _concatenate(self.coef1,-other.coef1,shape), _concatenate(self.index,other.index,shape), 94 _concatenate(self.coef2,-other.coef2,shape), _concatenate(self.index_row,other.index_row,shape), _concatenate(self.index_col,other.index_col,shape)) 95 else: 96 return self.new(self.value-other, self.coef1, self.index, 97 self.coef2, self.index_row, self.index_col, broadcast_ad=True) 98 99 def __mul__(self,other): 100 if self.is_ad(other): 101 value = self.value*other.value 102 coef1_a,coef1_b = _add_dim(other.value)*self.coef1,_add_dim(self.value)*other.coef1 103 index_a,index_b = np.broadcast_to(self.index,coef1_a.shape),np.broadcast_to(other.index,coef1_b.shape) 104 coef2_a,coef2_b = _add_dim(other.value)*self.coef2,_add_dim(self.value)*other.coef2 105 index_row_a,index_row_b = np.broadcast_to(self.index_row,coef2_a.shape),np.broadcast_to(other.index_row,coef2_b.shape) 106 index_col_a,index_col_b = np.broadcast_to(self.index_col,coef2_a.shape),np.broadcast_to(other.index_col,coef2_b.shape) 107 108 len_a,len_b = self.coef1.shape[-1],other.coef1.shape[-1] 109 coef2_ab = np.repeat(self.coef1,len_b,axis=-1) * np.tile(other.coef1,len_a) 110 index2_a = np.broadcast_to(np.repeat(self.index,len_b,axis=-1),coef2_ab.shape) 111 index2_b = np.broadcast_to(np.tile(other.index,len_a),coef2_ab.shape) 112 113 return self.new(value,_concatenate(coef1_a,coef1_b),_concatenate(index_a,index_b), 114 np.concatenate((coef2_a,coef2_b,coef2_ab,coef2_ab),axis=-1), 115 np.concatenate((index_row_a,index_row_b,index2_a,index2_b),axis=-1), 116 np.concatenate((index_col_a,index_col_b,index2_b,index2_a),axis=-1)) 117 elif self.isndarray(other): 118 value = self.value*other 119 coef1 = _add_dim(other)*self.coef1 120 index = np.broadcast_to(self.index,coef1.shape) 121 coef2 = _add_dim(other)*self.coef2 122 index_row = np.broadcast_to(self.index_row,coef2.shape) 123 index_col = np.broadcast_to(self.index_col,coef2.shape) 124 return self.new(value,coef1,index,coef2,index_row,index_col) 125 else: 126 return self.new(self.value*other,other*self.coef1,self.index, 127 other*self.coef2,self.index_row,self.index_col) 128 129 def __truediv__(self,other): 130 if self.is_ad(other): 131 return self.__mul__(other.__pow__(-1)) 132 elif self.isndarray(other): 133 inv = 1./other 134 return self.new(self.value*inv,self.coef1*_add_dim(inv),self.index, 135 self.coef2*_add_dim(inv),self.index_row,self.index_col) 136 else: 137 inv = 1./other 138 return self.new(self.value*inv,self.coef1*inv,self.index, 139 self.coef2*inv,self.index_row,self.index_col) 140 141 __rmul__ = __mul__ 142 __radd__ = __add__ 143 def __rsub__(self,other): return -(self-other) 144 def __rtruediv__(self,other): 145 return self.__pow__(-1).__mul__(other) 146 147 def __neg__(self): return self.new(-self.value,-self.coef1,self.index, 148 -self.coef2,self.index_row,self.index_col) 149 150 # Math functions 151 def _math_helper(self,deriv): # Inputs : a=f(x), b=f'(x), c=f''(x), where x=self.value 152 a,b,c=deriv 153 len_1 = self.coef1.shape[-1] 154 coef1_r,index_r = np.repeat(self.coef1,len_1,axis=-1),np.repeat(self.index,len_1,axis=-1) 155 coef1_t,index_t = np.tile(self.coef1,len_1),np.tile(self.index,len_1) 156 return self.new(a,_add_dim(b)*self.coef1,self.index, 157 _concatenate(_add_dim(b)*self.coef2,_add_dim(c)*(coef1_r*coef1_t)), 158 _concatenate(self.index_row, index_r),_concatenate(self.index_col, index_t)) 159 160 @classmethod 161 def compose(cls,a,t): 162 assert isinstance(a,Dense2.denseAD2) and all(cls.is_ad(b) for b in t) 163 b = np.moveaxis(cls.concatenate(t,axis=0),0,-1) # Possible performance hit if ad sizes are inhomogeneous 164 coef1 = _add_dim(a.coef1)*b.coef1 165 index1 = np.broadcast_to(b.index,coef1.shape) 166 coef2_pure = _add_dim(a.coef1)*b.coef2 167 index_row_pure = np.broadcast_to(b.index_row,coef2_pure.shape) 168 index_col_pure = np.broadcast_to(b.index_col,coef2_pure.shape) 169 170 s = b.shape[:-1]; na = a.size_ad; nb = b.size_ad1; 171 coef2_mixed = misc._add_dim2(a.coef2)*np.reshape(b.coef1,s+(na,1,nb,1))*np.reshape(b.coef1,s+(1,na,1,nb)) 172 s2 = coef2_mixed.shape 173 index_row_mixed = np.broadcast_to(b.index.reshape(s+(na,1,nb,1)),s2) 174 index_col_mixed = np.broadcast_to(b.index.reshape(s+(1,na,1,nb)),s2) 175 #s3 = s2[:-4]+(na*na*nb*nb,) a.reshape(s3) 176 177 coef1,index1,coef2_pure,index_row_pure,index_col_pure = ( 178 _flatten_nlast(a,2) for a in (coef1,index1,coef2_pure,index_row_pure,index_col_pure)) 179 coef2_mixed,index_row_mixed,index_col_mixed = ( 180 _flatten_nlast(a,4) for a in (coef2_mixed,index_row_mixed,index_col_mixed)) 181 182 return cls.new(a.value,coef1,index1, 183 _concatenate(coef2_pure,coef2_mixed),_concatenate(index_row_pure,index_row_mixed), 184 _concatenate(index_col_pure,index_col_mixed)) 185 186 #Indexing 187 @property 188 def size_ad1(self): return self.coef1.shape[-1] 189 @property 190 def size_ad2(self): return self.coef2.shape[-1] 191 192 def __getitem__(self,key): 193 ekey = misc.key_expand(key) 194 try: 195 return self.new(self.value[key], 196 self.coef1[ekey], self.index[ekey], 197 self.coef2[ekey], self.index_row[ekey], self.index_col[ekey]) 198 except ZeroDivisionError: # Some cupy versions fail indexing correctly if size==0 199 value = self.value[ekey] 200 shape = value.shape 201 def take(arr): 202 size_ad = arr.shape[-1] 203 return arr[ekey] if size_ad>0 else np.zeros_like(arr,shape=(*shape,size_ad)) 204 return self.new(self.value[key], 205 take(self.coef1), take(self.index), 206 take(self.coef2), take(self.index_row), take(self.index_col)) 207 208 def __setitem__(self,key,other): 209 ekey = misc.key_expand(key) 210 if self.is_ad(other): 211 self.value[key] = other.value 212 213 pad_size = max(self.coef1.shape[-1],other.coef1.shape[-1]) 214 if pad_size>self.coef1.shape[-1]: 215 self.coef1 = _pad_last(self.coef1,pad_size) 216 self.index = _pad_last(self.index,pad_size) 217 self.coef1[ekey] = _pad_last(other.coef1,pad_size) 218 self.index[ekey] = _pad_last(other.index,pad_size) 219 220 pad_size = max(self.coef2.shape[-1],other.coef2.shape[-1]) 221 if pad_size>self.coef2.shape[-1]: 222 self.coef2 = _pad_last(self.coef2,pad_size) 223 self.index_row = _pad_last(self.index_row,pad_size) 224 self.index_col = _pad_last(self.index_col,pad_size) 225 self.coef2[ekey] = _pad_last(other.coef2,pad_size) 226 self.index_row[ekey] = _pad_last(other.index_row,pad_size) 227 self.index_col[ekey] = _pad_last(other.index_col,pad_size) 228 else: 229 self.value[key] = other 230 self.coef1[ekey] = 0. 231 self.coef2[ekey] = 0. 232 233 def reshape(self,shape,order='C'): 234 value = self.value.reshape(shape,order=order) 235 shape1 = value.shape+(self.size_ad1,) 236 shape2 = value.shape+(self.size_ad2,) 237 return self.new(value, 238 self.coef1.reshape(shape1,order=order), self.index.reshape(shape1,order=order), 239 self.coef2.reshape(shape2,order=order),self.index_row.reshape(shape2,order=order), 240 self.index_col.reshape(shape2,order=order)) 241 242 def broadcast_to(self,shape): 243 shape1 = shape+(self.size_ad1,) 244 shape2 = shape+(self.size_ad2,) 245 return self.new(np.broadcast_to(self.value,shape), 246 np.broadcast_to(self.coef1,shape1), np.broadcast_to(self.index,shape1), 247 np.broadcast_to(self.coef2,shape2), np.broadcast_to(self.index_row,shape2), 248 np.broadcast_to(self.index_col,shape2)) 249 250 def pad(self,pad_width,*args,constant_values=0,**kwargs): 251 def _pad(arr):return np.pad(arr,pad_width+((0,0),),*args,constant_values=0,**kwargs) 252 return self.new( 253 np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs), 254 _pad(self.coef1),_pad(self.index), 255 _pad(self.coef2),_pad(self.index_row),_pad(self.index_col)) 256 257 def transpose(self,axes=None): 258 if axes is None: axes = tuple(reversed(range(self.ndim))) 259 axes2 = tuple(axes) +(self.ndim,) 260 return self.new(self.value.transpose(axes), 261 self.coef1.transpose(axes2),self.index.transpose(axes2), 262 self.coef2.transpose(axes2),self.index_row.transpose(axes2), 263 self.index_col.transpose(axes2)) 264 265 def allclose(self,other,*args,**kwargs): 266 raise ValueError("Unsupported, sorry, please try allclose(a.to_dense(),b.to_dense())") 267 268 # Reductions 269 def sum(self,axis=None,out=None,**kwargs): 270 if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs) 271 value = self.value.sum(axis,**kwargs) 272 273 shape1 = value.shape + (self.size_ad1 * self.shape[axis],) 274 coef1 = np.moveaxis(self.coef1, axis,-1).reshape(shape1) 275 index = np.moveaxis(self.index, axis,-1).reshape(shape1) 276 277 shape2 = value.shape + (self.size_ad2 * self.shape[axis],) 278 coef2 = np.moveaxis(self.coef2, axis,-1).reshape(shape2) 279 index_row = np.moveaxis(self.index_row, axis,-1).reshape(shape2) 280 index_col = np.moveaxis(self.index_col, axis,-1).reshape(shape2) 281 282 out = self.new(value,coef1,index,coef2,index_row,index_col) 283 return out 284 285 # Conversion 286 def bound_ad(self): 287 def maxi(a): return int(cps.max(a,initial=-1)) 288 return 1+np.max((maxi(self.index),maxi(self.index_row),maxi(self.index_col))) 289 def to_dense(self,dense_size_ad=None): 290 # Can be accelerated using np.bincount, similarly to spAD.to_dense 291 def mvax(arr): return np.moveaxis(arr,-1,0) 292 dsad = self.bound_ad() if dense_size_ad is None else dense_size_ad 293 coef1 = np.zeros_like(self.value,shape=self.shape+(dsad,)) 294 for c,i in zip(mvax(self.coef1),mvax(self.index)): 295 np.put_along_axis(coef1,_add_dim(i),np.take_along_axis(coef1,_add_dim(i),axis=-1)+_add_dim(c),axis=-1) 296 coef2 = np.zeros_like(self.value,shape=self.shape+(dsad*dsad,)) 297 for c,i in zip(mvax(self.coef2),mvax(self.index_row*dsad+self.index_col)): 298 np.put_along_axis(coef2,_add_dim(i),np.take_along_axis(coef2,_add_dim(i),axis=-1)+_add_dim(c),axis=-1) 299 return Dense2.denseAD2(self.value,coef1,np.reshape(coef2,self.shape+(dsad,dsad))) 300 301 def to_first(self): 302 return Sparse.spAD(self.value,self.coef1,self.index) 303 304 #Linear algebra 305 def triplets(self): 306 """The hessian operator, presented as triplets""" 307 return (self.coef2,(self.index_row,self.index_col)) 308 309 def hessian_operator(self,shape=None): 310 """ 311 The hessian operator, presented as an opaque matrix class, supporting mul. 312 Implicitly sums over all axes. Recommendation : apply simplify_ad before call. 313 """ 314 return misc.tocsr(self.sum().triplets(),shape=shape) 315 316 def tangent_operator(self): return self.to_first().tangent_operator() 317 def adjoint_operator(self): return self.to_first().adjoint_operator() 318 319 def solve_stationnary(self,raw=False): 320 """ 321 Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. 322 Use "raw = True" to obtain the raw linear system and use your own solver. 323 """ 324 mat = self.triplets() 325 rhs = - self.to_first().to_dense(self.bound_ad()).coef 326 return (mat,rhs) if raw else misc.spsolve(mat,rhs) 327 328 def solve_weakform(self,raw=False): 329 """ 330 Assume that a spAD2 array scalar represents the quadratic function 331 Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v). 332 Finds u such that Q(u,v) is independent of v. 333 Use "raw = True" to obtain the raw linear system and use your own solver. 334 """ 335 (coef,(row,col)),rhs = self.solve_stationnary(raw=True) 336 n = rhs.size//2 337 rhs = rhs[n:] 338 pos = np.logical_and(row>=n,col<n) 339 mat = (coef[pos],(row[pos]-n,col[pos])) 340 return (mat,rhs) if raw else misc.spsolve(mat,rhs) 341 342 # Static methods 343 @classmethod 344 def concatenate(cls,elems,axis=0): 345 axis1 = axis if axis>=0 else axis-1 346 elems2 = tuple(cls(e) for e in elems) 347 size_ad1 = max(e.size_ad1 for e in elems2) 348 size_ad2 = max(e.size_ad2 for e in elems2) 349 return cls( 350 np.concatenate(tuple(e.value for e in elems2), axis=axis), 351 np.concatenate(tuple(_pad_last(e.coef1,size_ad1) for e in elems2),axis=axis1), 352 np.concatenate(tuple(_pad_last(e.index,size_ad1) for e in elems2),axis=axis1), 353 np.concatenate(tuple(_pad_last(e.coef2,size_ad2) for e in elems2),axis=axis1), 354 np.concatenate(tuple(_pad_last(e.index_row,size_ad2) for e in elems2),axis=axis1), 355 np.concatenate(tuple(_pad_last(e.index_col,size_ad2) for e in elems2),axis=axis1)) 356 357 def simplify_ad(self,*args,**kwargs): 358 spHelper1 = Sparse.spAD(self.value,self.coef1,self.index) 359 spHelper1.simplify_ad(*args,**kwargs) 360 self.coef1,self.index = spHelper1.coef,spHelper1.index 361 362 if self.size_ad2>0: # Otherwise empty max 363 n_col = 1+np.max(self.index_col) 364 index = self.index_row.astype(np.int64)*n_col + self.index_col.astype(np.int64) 365 spHelper2 = Sparse.spAD(self.value,self.coef2,index) 366 spHelper2.simplify_ad(*args,**kwargs) 367 self.coef2 = spHelper2.coef 368 int_t = self.index_row.dtype.type 369 self.index_row,self.index_col = spHelper2.index//n_col, spHelper2.index%n_col 370 if int_t!=np.int64: self.index_row,self.index_col = ( 371 e.astype(int_t) for e in (self.index_row,self.index_col)) 372 373 return self
A class for sparse forward second order automatic differentiation
24 def __init__(self,value,coef1=None,index=None,coef2=None,index_row=None,index_col=None,broadcast_ad=False): 25 if self.is_ad(value): 26 assert (coef1 is None and index is None 27 and coef2 is None and index_row is None and index_col is None) 28 self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col \ 29 = value.value,value.coef1,value.index,value.coef2,value.index_row,value.index_col 30 return 31 if ad_generic.is_ad(value): 32 raise Base.ADCastError(f"Attempting to cast {type(value)} to incompatible type spAD2") 33 34 # Create instance 35 self.value = ad_generic.asarray(value) 36 shape = self.shape 37 shape2 = shape+(0,) 38 int_t = cupy_generic.samesize_int_t(value.dtype) 39 assert ((coef1 is None) and (index is None)) or (coef1.shape==index.shape) 40 self.coef1 = (np.zeros_like(value,shape=shape2) if coef1 is None 41 else misc._test_or_broadcast_ad(coef1,shape,broadcast_ad) ) 42 self.index = (np.zeros_like(value,shape=shape2,dtype=int_t) if index is None 43 else misc._test_or_broadcast_ad(index,shape,broadcast_ad) ) 44 45 assert (((coef2 is None) and (index_row is None) and (index_col is None)) 46 or ((coef2.shape==index_row.shape) and (coef2.shape==index_col.shape))) 47 self.coef2 = (np.zeros_like(value,shape=shape2) if coef2 is None 48 else misc._test_or_broadcast_ad(coef2,shape,broadcast_ad) ) 49 self.index_row = (np.zeros_like(value,shape=shape2,dtype=int_t) if index_row is None 50 else misc._test_or_broadcast_ad(index_row,shape,broadcast_ad) ) 51 self.index_col = (np.zeros_like(value,shape=shape2,dtype=int_t) if index_col is None 52 else misc._test_or_broadcast_ad(index_col,shape,broadcast_ad) )
60 def as_tuple(self): return self.value,self.coef1,self.index,self.coef2,self.index_row,self.index_col
73 def as_func(self,h): 74 """Replaces the symbolic perturbation with h""" 75 value,coef1,coef2 = (misc.add_ndim(e,h.ndim-1) for e in (self.value,self.coef1,self.coef2)) 76 return (value+(coef1*h[self.index]).sum(axis=self.ndim) 77 +0.5*(coef2*h[self.index_row]*h[self.index_col]).sum(axis=self.ndim))
Replaces the symbolic perturbation with h
160 @classmethod 161 def compose(cls,a,t): 162 assert isinstance(a,Dense2.denseAD2) and all(cls.is_ad(b) for b in t) 163 b = np.moveaxis(cls.concatenate(t,axis=0),0,-1) # Possible performance hit if ad sizes are inhomogeneous 164 coef1 = _add_dim(a.coef1)*b.coef1 165 index1 = np.broadcast_to(b.index,coef1.shape) 166 coef2_pure = _add_dim(a.coef1)*b.coef2 167 index_row_pure = np.broadcast_to(b.index_row,coef2_pure.shape) 168 index_col_pure = np.broadcast_to(b.index_col,coef2_pure.shape) 169 170 s = b.shape[:-1]; na = a.size_ad; nb = b.size_ad1; 171 coef2_mixed = misc._add_dim2(a.coef2)*np.reshape(b.coef1,s+(na,1,nb,1))*np.reshape(b.coef1,s+(1,na,1,nb)) 172 s2 = coef2_mixed.shape 173 index_row_mixed = np.broadcast_to(b.index.reshape(s+(na,1,nb,1)),s2) 174 index_col_mixed = np.broadcast_to(b.index.reshape(s+(1,na,1,nb)),s2) 175 #s3 = s2[:-4]+(na*na*nb*nb,) a.reshape(s3) 176 177 coef1,index1,coef2_pure,index_row_pure,index_col_pure = ( 178 _flatten_nlast(a,2) for a in (coef1,index1,coef2_pure,index_row_pure,index_col_pure)) 179 coef2_mixed,index_row_mixed,index_col_mixed = ( 180 _flatten_nlast(a,4) for a in (coef2_mixed,index_row_mixed,index_col_mixed)) 181 182 return cls.new(a.value,coef1,index1, 183 _concatenate(coef2_pure,coef2_mixed),_concatenate(index_row_pure,index_row_mixed), 184 _concatenate(index_col_pure,index_col_mixed))
233 def reshape(self,shape,order='C'): 234 value = self.value.reshape(shape,order=order) 235 shape1 = value.shape+(self.size_ad1,) 236 shape2 = value.shape+(self.size_ad2,) 237 return self.new(value, 238 self.coef1.reshape(shape1,order=order), self.index.reshape(shape1,order=order), 239 self.coef2.reshape(shape2,order=order),self.index_row.reshape(shape2,order=order), 240 self.index_col.reshape(shape2,order=order))
242 def broadcast_to(self,shape): 243 shape1 = shape+(self.size_ad1,) 244 shape2 = shape+(self.size_ad2,) 245 return self.new(np.broadcast_to(self.value,shape), 246 np.broadcast_to(self.coef1,shape1), np.broadcast_to(self.index,shape1), 247 np.broadcast_to(self.coef2,shape2), np.broadcast_to(self.index_row,shape2), 248 np.broadcast_to(self.index_col,shape2))
250 def pad(self,pad_width,*args,constant_values=0,**kwargs): 251 def _pad(arr):return np.pad(arr,pad_width+((0,0),),*args,constant_values=0,**kwargs) 252 return self.new( 253 np.pad(self.value,pad_width,*args,constant_values=constant_values,**kwargs), 254 _pad(self.coef1),_pad(self.index), 255 _pad(self.coef2),_pad(self.index_row),_pad(self.index_col))
257 def transpose(self,axes=None): 258 if axes is None: axes = tuple(reversed(range(self.ndim))) 259 axes2 = tuple(axes) +(self.ndim,) 260 return self.new(self.value.transpose(axes), 261 self.coef1.transpose(axes2),self.index.transpose(axes2), 262 self.coef2.transpose(axes2),self.index_row.transpose(axes2), 263 self.index_col.transpose(axes2))
269 def sum(self,axis=None,out=None,**kwargs): 270 if axis is None: return self.flatten().sum(axis=0,out=out,**kwargs) 271 value = self.value.sum(axis,**kwargs) 272 273 shape1 = value.shape + (self.size_ad1 * self.shape[axis],) 274 coef1 = np.moveaxis(self.coef1, axis,-1).reshape(shape1) 275 index = np.moveaxis(self.index, axis,-1).reshape(shape1) 276 277 shape2 = value.shape + (self.size_ad2 * self.shape[axis],) 278 coef2 = np.moveaxis(self.coef2, axis,-1).reshape(shape2) 279 index_row = np.moveaxis(self.index_row, axis,-1).reshape(shape2) 280 index_col = np.moveaxis(self.index_col, axis,-1).reshape(shape2) 281 282 out = self.new(value,coef1,index,coef2,index_row,index_col) 283 return out
289 def to_dense(self,dense_size_ad=None): 290 # Can be accelerated using np.bincount, similarly to spAD.to_dense 291 def mvax(arr): return np.moveaxis(arr,-1,0) 292 dsad = self.bound_ad() if dense_size_ad is None else dense_size_ad 293 coef1 = np.zeros_like(self.value,shape=self.shape+(dsad,)) 294 for c,i in zip(mvax(self.coef1),mvax(self.index)): 295 np.put_along_axis(coef1,_add_dim(i),np.take_along_axis(coef1,_add_dim(i),axis=-1)+_add_dim(c),axis=-1) 296 coef2 = np.zeros_like(self.value,shape=self.shape+(dsad*dsad,)) 297 for c,i in zip(mvax(self.coef2),mvax(self.index_row*dsad+self.index_col)): 298 np.put_along_axis(coef2,_add_dim(i),np.take_along_axis(coef2,_add_dim(i),axis=-1)+_add_dim(c),axis=-1) 299 return Dense2.denseAD2(self.value,coef1,np.reshape(coef2,self.shape+(dsad,dsad)))
305 def triplets(self): 306 """The hessian operator, presented as triplets""" 307 return (self.coef2,(self.index_row,self.index_col))
The hessian operator, presented as triplets
309 def hessian_operator(self,shape=None): 310 """ 311 The hessian operator, presented as an opaque matrix class, supporting mul. 312 Implicitly sums over all axes. Recommendation : apply simplify_ad before call. 313 """ 314 return misc.tocsr(self.sum().triplets(),shape=shape)
The hessian operator, presented as an opaque matrix class, supporting mul. Implicitly sums over all axes. Recommendation : apply simplify_ad before call.
316 def tangent_operator(self): return self.to_first().tangent_operator()
317 def adjoint_operator(self): return self.to_first().adjoint_operator()
319 def solve_stationnary(self,raw=False): 320 """ 321 Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. 322 Use "raw = True" to obtain the raw linear system and use your own solver. 323 """ 324 mat = self.triplets() 325 rhs = - self.to_first().to_dense(self.bound_ad()).coef 326 return (mat,rhs) if raw else misc.spsolve(mat,rhs)
Finds a stationnary point to a quadratic function, provided as a spAD2 array scalar. Use "raw = True" to obtain the raw linear system and use your own solver.
328 def solve_weakform(self,raw=False): 329 """ 330 Assume that a spAD2 array scalar represents the quadratic function 331 Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v). 332 Finds u such that Q(u,v) is independent of v. 333 Use "raw = True" to obtain the raw linear system and use your own solver. 334 """ 335 (coef,(row,col)),rhs = self.solve_stationnary(raw=True) 336 n = rhs.size//2 337 rhs = rhs[n:] 338 pos = np.logical_and(row>=n,col<n) 339 mat = (coef[pos],(row[pos]-n,col[pos])) 340 return (mat,rhs) if raw else misc.spsolve(mat,rhs)
Assume that a spAD2 array scalar represents the quadratic function Q(u,v) = a0 + a1.(u,v) + (u,v).a2.(u,v) of the variable (u,v). Finds u such that Q(u,v) is independent of v. Use "raw = True" to obtain the raw linear system and use your own solver.
343 @classmethod 344 def concatenate(cls,elems,axis=0): 345 axis1 = axis if axis>=0 else axis-1 346 elems2 = tuple(cls(e) for e in elems) 347 size_ad1 = max(e.size_ad1 for e in elems2) 348 size_ad2 = max(e.size_ad2 for e in elems2) 349 return cls( 350 np.concatenate(tuple(e.value for e in elems2), axis=axis), 351 np.concatenate(tuple(_pad_last(e.coef1,size_ad1) for e in elems2),axis=axis1), 352 np.concatenate(tuple(_pad_last(e.index,size_ad1) for e in elems2),axis=axis1), 353 np.concatenate(tuple(_pad_last(e.coef2,size_ad2) for e in elems2),axis=axis1), 354 np.concatenate(tuple(_pad_last(e.index_row,size_ad2) for e in elems2),axis=axis1), 355 np.concatenate(tuple(_pad_last(e.index_col,size_ad2) for e in elems2),axis=axis1))
357 def simplify_ad(self,*args,**kwargs): 358 spHelper1 = Sparse.spAD(self.value,self.coef1,self.index) 359 spHelper1.simplify_ad(*args,**kwargs) 360 self.coef1,self.index = spHelper1.coef,spHelper1.index 361 362 if self.size_ad2>0: # Otherwise empty max 363 n_col = 1+np.max(self.index_col) 364 index = self.index_row.astype(np.int64)*n_col + self.index_col.astype(np.int64) 365 spHelper2 = Sparse.spAD(self.value,self.coef2,index) 366 spHelper2.simplify_ad(*args,**kwargs) 367 self.coef2 = spHelper2.coef 368 int_t = self.index_row.dtype.type 369 self.index_row,self.index_col = spHelper2.index//n_col, spHelper2.index%n_col 370 if int_t!=np.int64: self.index_row,self.index_col = ( 371 e.astype(int_t) for e in (self.index_row,self.index_col)) 372 373 return self
407def hessian_operator(f,x,simplify_ad=None,fargs=tuple(),rev_ad=False): 408 """ 409 Returns the sparse matrix associated to the hessian of f at x, 410 generated using automatic differentiation. 411 Typically used to obtain the sparse matrix of a quadratic form. 412 Output of f is summed, if non-scalar. 413 - simplify_ad (optional): wether to simplify the ad information 414 before generating the sparse matrix 415 416 417 *Autodiff support* 418 Consider the functional D * u**2, written with the following convention 419 >>> def f(u,D,ad_channel=lambda x:x): return ad_channel(D) * u**2 420 See Eikonal.cuda.AnisotropicWave, classes AcousticHamiltonian_Sparse and 421 ElasticHamiltonian_Sparse for non-trivial examples. 422 423 - Foward autodiff. Returns an denseAD_Lin class (operator plus perturbation), 424 if f(x) is a first order dense forward AD variable, with the following convention/example : 425 >>> ad.Sparse2.hessian_operator(f,np.zeros(10),fargs=(ad.Dense.identity(constant=2),)) 426 427 - Reverse autodiff support (rev_ad=True). The components of f(x,ad_channel=ones_like) 428 are regarded as independent contributions w.r.t which to compute the sensitivity of the result. 429 """ 430 f_dense = f(x,*fargs) 431 if simplify_ad is None: simplify_ad = f_dense.ndim>0 432 if isinstance(f_dense,Dense.denseAD): # ---- Forward autodiff support ---- 433 size_ad = f_dense.size_ad 434 x_ad = identity(constant=x) 435 shape = x_ad.size,x_ad.size 436 ops = [] 437 for i in range(-1,size_ad): 438 f_sparse = f(x_ad,*fargs,ad_channel=lambda x:x.value if i==-1 else x.coef[...,i]) 439 if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True) 440 ops.append(f_sparse.hessian_operator(shape=shape)) 441 return Dense.denseAD_Lin(ops[0],ops[1:]) 442 443 op = _hessian_operator_noAD(f,x,simplify_ad,fargs) 444 if not rev_ad : return op # ---- No autodiff ---- 445 446 # ---- Reverse autodiff support ---- 447 x_ad = identity(constant=x) 448 f_sparse = f(x_ad,*fargs,ad_channel=lambda x:np.ones_like(x)) 449 if simplify_ad: f_sparse.simplify_ad(atol=True,rtol=True) 450 coef2,row,col = f_sparse.coef2,f_sparse.index_row,f_sparse.index_col 451 f_sparse = None # deallocate first and zero-th order coefficients 452 453 def sensitivity(x): 454 x=x.reshape((x_ad.size,*x.shape[x_ad.ndim:])) 455 return np.sum(coef2.reshape(coef2.shape+(1,)*x.ndim) 456 *x.value[row][...,None]*x.coef[col],axis=coef2.ndim-1) 457 458 return op, sensitivity
Returns the sparse matrix associated to the hessian of f at x, generated using automatic differentiation. Typically used to obtain the sparse matrix of a quadratic form. Output of f is summed, if non-scalar.
- simplify_ad (optional): wether to simplify the ad information before generating the sparse matrix
Autodiff support Consider the functional D * u**2, written with the following convention
>>> def f(u,D,ad_channel=lambda x:x): return ad_channel(D) * u**2
See Eikonal.cuda.AnisotropicWave, classes AcousticHamiltonian_Sparse and
ElasticHamiltonian_Sparse for non-trivial examples.
Foward autodiff. Returns an denseAD_Lin class (operator plus perturbation), if f(x) is a first order dense forward AD variable, with the following convention/example :
>>> ad.Sparse2.hessian_operator(f,np.zeros(10),fargs=(ad.Dense.identity(constant=2),))
Reverse autodiff support (rev_ad=True). The components of f(x,ad_channel=ones_like) are regarded as independent contributions w.r.t which to compute the sensitivity of the result.