agd.Eikonal.HFM_CUDA.AnisotropicWave
This file implements the linear acoustic and elastic wave equations, using custom GPU kernels for efficiency. A reference implementation using sparse matrices is provided for completeness.
1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay 2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0 3 4""" 5This file implements the linear acoustic and elastic wave equations, using custom 6GPU kernels for efficiency. A reference implementation using sparse matrices is provided 7for completeness. 8""" 9 10import numpy as np 11import os 12 13from ... import AutomaticDifferentiation as ad 14from ... import FiniteDifferences as fd 15from ... import LinearParallel as lp 16from ... import Metrics 17from ... import Selling 18from ...Metrics.Seismic import Hooke 19from ...ODE.hamiltonian import QuadraticHamiltonian,QuadraticHamiltonianBase 20rm_ad = ad.remove_ad 21 22try: # These features require a GPU, but are optional. 23 import cupy as cp 24 from . import cupy_module_helper 25 from .cupy_module_helper import SetModuleConstant 26 from . import SellingAnisotropicDiffusion 27 from .. import VoronoiDecomposition 28 29except ImportError: 30 cp = None 31 32# ------ Reference implementations using sparse matrices ------- 33# These implementations can be used to check the validity of the GPU kernels. 34# Additionally, they support automatic differentiation (ad.Dense.denseAD_Lin operators). 35 36bc_to_padding = {'Periodic':None,'Neumann':np.nan,'Dirichlet':0} 37 38def _mk_dt_max(dt_max_22, order_x): 39 """ 40 Helper for CFL condition. Includes CFL factors coming from the time and spatial 41 discretization orders in the considered wave equations. 42 - dt_max_22 : CFL in the case where order_x == order_t == 2 43 """ 44 dt_mult_x = {2:1/2, 4:1/np.sqrt(6)} 45 dt_mult_t = {1:2, 2:2, 4:1.28596} 46 return lambda order_t=2, order_x=order_x : dt_max_22 * dt_mult_x[order_x] * dt_mult_t[order_t] 47 48def AcousticHamiltonian_Sparse(ρ,D,dx=1.,order_x=2,shape_dom=None,bc='Periodic', 49 rev_ad=0,save_weights=False): 50 """ 51 Sparse matrix based implementation of the Hamiltonian of the acoustic wave equation. 52 - bc : boundary conditions, see bc_to_padding.keys() 53 - rev_ad (optional) : Implement reverse autodiff for the decomposition weights and inverse density 54 """ 55 padding = bc_to_padding[bc] 56 if shape_dom is None: shape_dom = fd.common_shape((ρ,D),depths=(0,2)) 57 assert len(shape_dom)==len(D) 58 iρ=1/ρ 59 from .. import VoronoiDecomposition # CPU or GPU 60 try: λ,e = VoronoiDecomposition(D) 61 except FileNotFoundError: λ,e = Selling.Decomposition(D) # Fallback to python implem 62 λ = fd.as_field(λ,shape_dom) 63 def Hq(q,ad_channel=lambda x:x): 64 dq = fd.DiffEll(q,e,dx,padding=padding,order=order_x) 65 if padding!=padding: dq[np.isnan(dq)]=0 66 return 0.5 * ad_channel(λ) * np.sum(dq**2,axis=0) 67 def Hp(p,ad_channel=lambda x:x): 68 return 0.5 * ad_channel(iρ) * p**2 69 H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices 70 if rev_ad>0: # Reverse autodiff support 71 H.weights = ad.Dense.denseAD(λ, coef=np.zeros_like(λ, shape=(*λ.shape, rev_ad))) 72 H.offsets = e 73 H.iρ = ad.Dense.denseAD(iρ,coef=np.zeros_like(iρ,shape=(*iρ.shape,rev_ad))) 74 rev_ad = (H.weights.coef,H.iρ.coef) 75 else: 76 if save_weights: H.weights,H.M,H.iρ = λ,iρ,iρ 77 rev_ad = (None,None) 78 H.set_spmat(np.zeros_like(rm_ad(D),shape=shape_dom),rev_ad=rev_ad) 79 H.dt_max = _mk_dt_max(dx * np.sqrt(np.min(rm_ad(ρ)/rm_ad(λ).sum(axis=0))), order_x) 80 return H 81 82def ElasticHamiltonian_Sparse(M,C,dx,order_x=2,S=None,shape_dom=None,bc='Periodic', 83 rev_ad=0,save_weights=False): 84 """ 85 Sparse matrix based implementation of the Hamiltonian of the elastic wave equation, namely 86 (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx 87 where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain. 88 89 - M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), 90 Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None] 91 - C : (hooke tensor in voigt notation) array of positive definite matrices, 92 shape (s,s,n1,...,nd) where s = d (d+1)/2 93 - bc : boundary conditions, see bc_to_padding.keys() 94 - rev_ad (optional) : Implement reverse autodiff for the decomposition weights and M. 95 """ 96 padding = bc_to_padding[bc] 97 if shape_dom is None: shape_dom = fd.common_shape((M,C),depths=(2,2)) 98 vdim = len(shape_dom); assert len(C)==(vdim*(vdim+1))//2 99 λ,E = Hooke(C).Selling() 100 λ,E,S,M = fd.common_field( (λ,E,S,M), depths=(1,3,3,2), shape=shape_dom) # broadcasting 101 if S is None: ES = (0,)*vdim 102 else: ES = np.sum(E[:,:,None,:]*S[:,:,:,None],axis=(0,1)) 103 104 def Hq(q,ad_channel=lambda x:x): # Elastic energy 105 λ_ = 2**-vdim * ad_channel(λ) # Normalize, extract relevant ad part 106 dq0 = fd.DiffEll(q[0],E[0],dx,order=order_x,α=ES[0]*q[0],padding=padding) 107 if padding!=padding: dq0[np.isnan(dq0)]=0 108 if vdim==1: return λ_ * np.sum(dq0**2, axis=0) 109 dq1 = fd.DiffEll(q[1],E[1],dx,order=order_x,α=ES[1]*q[1],padding=padding) 110 if padding!=padding: dq1[np.isnan(dq1)]=0 111 if vdim==2: return λ_ * np.sum((dq0+dq1)**2+(dq0+dq1[::-1])**2, axis=0) 112 dq2 = fd.DiffEll(q[2],E[2],dx,order=order_x,α=ES[2]*q[2],padding=padding) 113 if padding!=padding: dq2[np.isnan(dq2)]=0 114 return λ_ * np.sum((dq0+dq1+dq2)**2+(dq0+dq1+dq2[::-1])**2 115 +(dq0+dq1[::-1]+dq2)**2+(dq0+dq1[::-1]+dq2[::-1])**2, axis=0) 116 117 def Hp(p,ad_channel=lambda x:x): # Kinetic energy 118 if M.shape[:2]==(1,1): return 0.5*ad_channel(M[0,0])*np.sum(p**2,axis=0)# Typically M = 1/ρ 119 return 0.5 * p[None,:]*ad_channel(M)*p[:,None] #lp.dot_VAV(p,ad_channel(M),p) changed for rev_ad 120 121 H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices 122 if rev_ad>0: # Reverse autodiff support 123 H.weights = ad.Dense.denseAD(λ,coef=np.zeros_like(λ,shape=(*λ.shape,rev_ad))) 124 H.M = H.iρ = ad.Dense.denseAD(M,coef=np.zeros_like(M,shape=(*M.shape,rev_ad))) 125 H.moffsets = E 126 rev_ad = (H.weights.coef,H.M.coef) 127 else: 128 if save_weights: H.weights,H.M,H.iρ = λ,M,M 129 rev_ad = (None,None) 130 H.set_spmat(np.zeros_like(rm_ad(C),shape=(vdim,*shape_dom)),rev_ad=rev_ad) 131 H.reshape = lambda x:x; H.unshape = lambda x:x # Dummy, for compatibility only with the GPU kernel 132 133 ev = np.linalg.eigvalsh(np.moveaxis(rm_ad(M),(0,1),(-2,-1)))[...,-1] # Take largest eigvenvalue 134 H.dt_max = _mk_dt_max( dx/(np.sqrt(np.max(ev*rm_ad(λ).sum(axis=0)))*vdim), order_x) 135 return H 136 137def AcousticChgVar(q,p,ρ,D,ϕ,X): 138 """ 139 Change of variables in the acoustic wave equation. 140 - q,p,ρ,D (callable) : problem data 141 - ϕ : change of variables 142 - X : points where to evaluate 143 returns 144 - tq,tp,tρ,tD,ϕ(X) (arrays) : coordinate changed problem data, obtained as 145 q(ϕ), p(ϕ) J, ρ(ϕ) J, Φ^-1 D(ϕ) Φ^-t J 146 """ 147 X_ad = ad.Dense.identity(constant=X,shape_free=(len(X),)) 148 ϕ_ad = ϕ_fun(X_ad) 149 ϕ = ϕ_ad.value 150 dϕ = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed 151 inv_dϕ = lp.inverse(dϕ) 152 Jϕ = lp.det(dϕ) 153 154 D_ = fd.as_field(D(ϕ),Jϕ.shape,depth=2) 155 tD = lp.dot_AA(inv_dϕ,lp.dot_AA(D_,lp.transpose(inv_dϕ))) * Jϕ 156 157 return q(ϕ), p(ϕ)*Jϕ, ρ(ϕ)*Jϕ, tD, ϕ 158 159def ElasticChgVar(q,p,M,C,S,ϕ,X): 160 """ 161 Change of variables in the elastic wave equation. 162 - q,p,M,C,S (callable) : problem data 163 - ϕ (callable) : change of variables 164 - X : points where to evaluate 165 returns 166 - tq,tp,tM,tC,tS,ϕ(X) (arrays) : coordinate changed problem data, obtained as 167 Φ^t q(ϕ), Φ^-1 p(ϕ) J, Φ^t M(ϕ) Φ / J, (Φ^t ε(ϕ) Φ,) 168 ∑_{i'j'k'l'} C_{i'j'k'l'}(ϕ) Ψ^{i'}_i Ψ^{j'}_j Ψ^{k'}_k Ψ^{l'}_l J, 169 ∑_{i'j'} Φ^i_{i'} Φ^j_{j'} S^{i'j'}_{k'}(ϕ) Ψ_k^{k'} + ∑_{k'} ∂^{ij} ϕ_{k'} Ψ_k^{k'} 170 """ 171 X_ad = ad.Dense2.identity(constant=X,shape_free=(len(X),)) 172 ϕ_ad = ϕ_fun(X_ad) 173 ϕ = ϕ_ad.value 174 dϕ = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed 175 inv_dϕ = lp.inverse(dϕ) 176 Jϕ = lp.det(dϕ) 177 d2ϕ = np.moveaxis(ϕ_ad.hessian(),2,0) 178 179 tq = lp.dot_AV(lp.transpose(dϕ),q(ϕ)) 180 tp = lp.dot_AV(inv_dϕ,p(ϕ))*Jϕ 181 182 M_ = fd.as_field(M(ϕ),Jϕ.shape,depth=2) 183 tM = lp.dot_AA(lp.transpose(dϕ),lp.dot_AA(M_,dϕ))/Jϕ 184 185 C_ = fd.as_field(C(ϕ),Jϕ.shape,depth=2) 186 tC = Hooke(C_).rotate(inv_dϕ).hooke*Jϕ 187 188 S_ = fd.as_field(S(ϕ),Jϕ.shape,depth=3) 189 vdim = len(dϕ) 190 S1 = sum(dϕ[ip,:,None,None]*dϕ[jp,None,:,None]*S_[ip,jp,kp]*inv_dϕ[:,kp] 191 for ip in range(vdim) for jp in range(vdim) for kp in range(vdim)) 192 S2 = sum(d2ϕ[kp,:,:,None]*inv_dϕ[:,kp] for kp in range(vdim)) 193 tS = S1 + S2 194 195 return tq,tp,tM,tC,tS,ϕ 196 197# ------- Implementations based on GPU kernels ------- 198 199class AcousticHamiltonian_Kernel(QuadraticHamiltonianBase): 200 """ 201 The Hamiltonian of an anisotropic acoustic wave equation, implemented with GPU kernels, 202 whose geometry is defined by a generic Riemannianian metric field. 203 The Hamiltonian is a sum of squares of finite differences, via Selling's decomposition. 204 205 The Mathematical expression of the Hamiltonian is 206 (1/2) int_X iρ(x) p(x)^2 + D(x,grad q(x), grad q(x)) dx 207 where X is the domain, and D the is the (dual-)metric. 208 """ 209 210 def __init__(self,ρ,D,dx=1., 211 order_x=2,shape_dom=None,periodic=False, 212 flattened=False,rev_ad=0,iρ=None, 213 block_size=256,traits=None,**kwargs): 214 if cp is None: raise ImportError("Cupy library needed for this class") 215 super(AcousticHamiltonian_Kernel,self).__init__(**kwargs) 216 217 if shape_dom is None: shape_dom = fd.common_shape((ρ,D), 218 depths=(0,1 if flattened else 2)) 219 self._shape_dom = shape_dom 220 self.shape_free = shape_dom 221 size_dom = np.prod(shape_dom) 222 self._sizes_oi = (int(np.ceil(size_dom/block_size)),),(block_size,) 223 self.dx = dx 224 225 fwd_ad = ρ.size_ad if ad.is_ad(ρ) else 0 226 self._size_ad = max(rev_ad,fwd_ad) 227 228 # Init the GPU kernel 229 traits_default = { 230 'fourth_order_macro':{2:False,4:True}[order_x], 231 'Scalar':np.float32, 232 'Int':self.int_t, 233 'OffsetT':np.int8, 234 'ndim_macro':self.ndim, 235 'periodic_macro':periodic, 236 'periodic_axes':(True,)*self.ndim, 237 'fwd_macro':fwd_ad>0, 238 } 239 self._traits = traits_default if traits is None else {**traits_default,**traits} 240 241 # Setup the problem data 242 dtype32 = (self.float_t==np.float32) 243 iρ = ad.cupy_generic.cupy_set(1/ρ if iρ is None else iρ, dtype32=dtype32) 244 ρ=None 245 iρ = fd.as_field(iρ,shape_dom) 246 self.iρ = ad.Base.ascontiguousarray(iρ) 247 iρ=None 248 249 D = ad.cupy_generic.cupy_set(D, dtype32=dtype32) 250 λ,e = VoronoiDecomposition(D,offset_t=np.int8,flattened=flattened) 251 D=None 252 self._traits['decompdim'] = len(λ) 253 self.dt_max = _mk_dt_max(dx/np.sqrt( 254 np.max(rm_ad(self.iρ)*rm_ad(λ).sum(axis=0))), order_x) 255 λ = fd.as_field(λ,shape_dom,depth=1) 256 self.weights = ad.Base.ascontiguousarray(np.moveaxis(λ,0,-1)) 257 λ=None 258 259 if self.way_ad<0: # Reverse autodiff 260 self.weights = ad.Dense.denseAD(self.weights) 261 self.iρ = ad.Dense.denseAD(self.iρ) 262 for arr in (self.weights,self.iρ): 263 self.check_ad(arr) if self.size_ad>0 else self.check(arr) 264 265 # Generate the cuda module 266 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 267 date_modified = cupy_module_helper.getmtime_max(cuda_path) 268 source = cupy_module_helper.traits_header(self.traits,size_of_shape=True) 269 270 source += [ 271 '#include "Kernel_AcousticWave.h"', 272 f"// Date cuda code last modified : {date_modified}"] 273 cuoptions = ("-default-device", f"-I {cuda_path}") 274 275 source="\n".join(source) 276 module = cupy_module_helper.GetModule(source,cuoptions) 277 get_indices = module.get_function('get_indices') 278 self._DpH_Kernel = module.get_function("DpH") 279 self._DqH_Kernel = module.get_function("DqH") 280 281 if self.size_ad: 282 source_ad = f"#define size_ad_macro {self.size_ad}\n"+source 283 module_ad = cupy_module_helper.GetModule(source_ad,cuoptions) 284 self._DpH_Kernel_ad = module_ad.get_function("DpH") 285 self._DqH_Kernel_ad = module_ad.get_function("DqH") 286 self._modules = (module,module_ad) 287 else: self._modules = (module,) 288 289 for module in self._modules: 290 SetModuleConstant(module,'shape_tot',self.shape_dom,self.int_t) 291 SetModuleConstant(module,'size_tot',np.prod(self.shape_dom),self.int_t) 292 293 # Get the indices of the scheme neighbors 294 self._ineigh = cp.full((*self.weights.shape,order_x),-2**30,dtype=self.int_t) 295 e = cp.ascontiguousarray(fd.as_field(e,shape_dom,depth=2)) 296 get_indices(*self._sizes_oi,(e,self._ineigh)) 297 e=None 298 self.check(self._ineigh) 299 300 @property 301 def M(self): 302 """Alias for the inverse density""" 303 return self.iρ 304 @property 305 def size_ad(self): return self._size_ad 306 @property 307 def way_ad(self): 308 """0 : no AD. 1 : forward AD. -1 : reverse AD""" 309 return 0 if self.size_ad==0 else 1 if self.traits['fwd_macro'] else -1 310 def rev_reset(self): 311 """ 312 Reset the accumulators for reverse autodiff 313 Namely (self.metric.coef and self.weights.coef) 314 """ 315 self.weights.coef[:]=0 316 self.iρ.coef[:]=0 317 318 @property 319 def shape_dom(self): 320 """Shape of the PDE discretization domain.""" 321 return self._shape_dom 322 @property 323 def ndim(self): 324 """Number of dimensions of the domain.""" 325 return len(self.shape_dom) 326 @property 327 def decompdim(self): 328 """Length of quadratic form decomposition.""" 329 return self.weights.shape[-1] 330 331 @property 332 def traits(self): return self._traits 333 @property 334 def int_t(self): return np.int32 335 @property 336 def float_t(self): return self.traits['Scalar'] 337 @property 338 def order_x(self): return 4 if self.traits['fourth_order_macro'] else 2 339 340 def Expl_p(self,q,p,δ): 341 """ 342 Explicit time step for the impulsion p. 343 Expects : q and p reshaped in GPU friendly format, using self.reshape 344 """ 345 mult = -δ/self.dx**2/{2:2,4:24}[self.order_x] 346 if self.preserve_p: p = self._mk_copy(p) 347 if ad.is_ad(q): # Use automatic differentiation, forward or reverse 348 SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t) 349 self.check_ad(q); self.check_ad(p) 350 self._DqH_Kernel_ad(*self._sizes_oi,(self.weights.value,self._ineigh, 351 q.value,self.weights.coef,q.coef,p.coef,p.value)) 352 else: # No automatic differentiation 353 SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t) 354 self.check(q); self.check(p) 355 self._DqH_Kernel(*self._sizes_oi, 356 (ad.remove_ad(self.weights),self._ineigh,q,p)) 357 return p 358 359 def Expl_q(self,q,p,δ): 360 """ 361 Explicit time step for the position q. 362 Expects : q and p reshaped in GPU friendly format, using self.reshape 363 """ 364 if self.preserve_q: q = self._mk_copy(q) 365 if ad.is_ad(p): # Use automatic differentiation, forward or reverse 366 SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t) 367 self.check_ad(q); self.check_ad(p) 368 self._DpH_Kernel_ad(*self._sizes_oi,(self.iρ.value,p.value, 369 self.iρ.coef,p.coef,q.coef,q.value)) 370 else: # No automatic differentiation 371 self.check(q); self.check(p) 372 q += δ*ad.remove_ad(self.iρ)*p 373 return q 374 375 def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1) 376 def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1) 377 378 def check_ad(self,x): 379 """ 380 Puts zero ad coefficients, with the correct c-contiguity, if those are empty. 381 Basic additional checks of shapes, contiguity. 382 """ 383 assert self.way_ad!=0 # Either forward and reverse AD must be supported 384 if x.size_ad==0: x.coef = np.zeros_like(x.value,shape=(*x.shape,self.size_ad)) 385 assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad 386 assert x.coef.flags.c_contiguous 387 assert x.coef.dtype==self.float_t 388 self.check(x.value) 389 390 def check(self,x): 391 """ Basic check of the types, shapes, contiguity of GPU inputs. """ 392 assert x.flags.c_contiguous 393 assert x.shape[:self.ndim]==self.shape_dom 394 assert x.dtype in (self.float_t,self.int_t) 395 396 397class WaveHamiltonianBase(QuadraticHamiltonianBase): 398 """ 399 A base class for GPU implementations of Hamiltonians of wave equations. 400 Warning : position and impulsion arrays are padded and reshaped in a GPU friendly format. 401 __init__ arguments 402 - constant values : used as padding reshape function 403 """ 404 405 def __init__(self,shape_dom,traits=None,periodic=False,constant_values=0,**kwargs): 406 super(WaveHamiltonianBase,self).__init__(**kwargs) 407 self._shape_dom = shape_dom 408 409 traits_default = { 410 'Scalar':np.float32, 411 'Int':np.int32, 412 'bypass_zeros_macro':True, 413 'fourth_order_macro':False, 414 'shape_i': {1:(64,),2:(8,8),3:(4,4,4),4:(4,4,2,2)}[self.ndim], 415 } 416 if traits is not None: traits_default.update(traits) 417 traits = traits_default 418 419 if np.ndim(periodic)==0: periodic=(periodic,)*self.ndim 420 assert periodic is None or len(periodic)==self.ndim 421 self._periodic = periodic 422 traits.update({ 423 'ndim_macro':self.ndim, 424 'periodic_macro':any(periodic), 425 'periodic_axes':periodic, 426 }) 427 self._traits = traits 428 self._check = True # Perform some safety input checks before calling GPU kernels 429 430 assert len(self.shape_i) == self.ndim 431 self._shape_o = tuple(fd.round_up_ratio(shape_dom,self.shape_i)) 432 self.constant_values = constant_values 433 434 def _dot(self,q,p): 435 """Duality bracket, ignoring NaNs used for padding.""" 436 return np.nansum(q*p) if np.isnan(self.constant_values) else np.sum(q*p) 437 438 # Domain shape 439 @property 440 def shape_dom(self): 441 """Shape of the PDE discretization domain.""" 442 return self._shape_dom 443 @property 444 def shape_o(self): 445 """Outer shape : number of blocks in each dimension (for GPU kernels).""" 446 return self._shape_o 447 @property 448 def shape_i(self): 449 """Inner shape : accessed by a block of threads (for GPU kernels).""" 450 return self.traits['shape_i'] 451 @property 452 def size_o(self): return np.prod(self.shape_o) 453 @property 454 def size_i(self): return np.prod(self.shape_i) 455 456 @property 457 def ndim(self): 458 """Number of dimensions of the domain.""" 459 return len(self.shape_dom) 460 @property 461 def symdim(self): 462 """DImension of the space of symmetric matrices.""" 463 return _triangular_number(self.ndim) 464 465 # Traits 466 @property 467 def traits(self): 468 """Collection of traits, passed to GPU kernel.""" 469 return self._traits 470 471 @property 472 def float_t(self): 473 """Scalar type used by the GPU kernel. Defaults to float32.""" 474 return self.traits['Scalar'] 475 @property 476 def int_t(self): 477 """Int type used by the GPU kernel. Defaults to int32.""" 478 return self.traits['Int'] 479 @property 480 def order_x(self): 481 """Consistency order of the finite differences scheme.""" 482 return 4 if self.traits['fourth_order_macro'] else 2 483 @property 484 def periodic(self): 485 """Wether to apply periodic boundary conditions, for each axis.""" 486 return self._periodic 487 488 def SetCst(self,name,value,dtype): 489 """Set a constant un the cuda module""" 490 for module in self._modules: 491 SetModuleConstant(module,name,value,dtype) 492 493 def reshape(self,x,constant_values=None,**kwargs): 494 """ 495 Reshapes and pads the array x in a kernel friendly format. 496 Factors shape_i. Also moves the geometry axis before 497 shape_i, following the convention of HookeWave.h 498 - **kwargs : passed to fd.block_expand 499 """ 500 if constant_values is None: constant_values = self.constant_values 501 x = fd.block_expand(x,self.shape_i,constant_values=constant_values,**kwargs) 502 if x.ndim == 2*self.ndim: pass 503 elif x.ndim == 2*self.ndim+1: x = np.moveaxis(x,0,self.ndim) 504 else: raise ValueError("Unsupported geometry depth") 505 #Cast to correct float and int types 506 if x.dtype==np.float64: dtype=self.float_t 507 elif x.dtype==np.int64: dtype=self.int_t 508 else: dtype=x.dtype 509 #dtype = {np.float64:self.float_t,np.int64:self.int_t}.get(value.dtype,value.dtype) fails 510 if ad.is_ad(x): 511 x.value = cp.ascontiguousarray(cp.asarray(x.value,dtype=dtype)) 512 # Setup a suitable contiguity of the AD variable coefficients for the GPU kernel 513 x_coef = cp.ascontiguousarray(cp.asarray(np.moveaxis(x.coef,-1,self.ndim),dtype=dtype)) 514 x.coef = np.moveaxis(x_coef,self.ndim,-1) 515 return x 516 else: return cp.ascontiguousarray(cp.asarray(x,dtype=dtype)) 517 518 return cp.ascontiguousarray(cp.asarray(value,dtype=dtype)) 519 520 def unshape(self,value): 521 """Inverse operation to reshape""" 522 if value.ndim==2*self.ndim: pass 523 elif value.ndim==2*self.ndim+1: value = np.moveaxis(value,self.ndim,0) 524 else: raise ValueError("Unsupported geometry depth") 525 return fd.block_squeeze(value,self.shape_dom) 526 527class ElasticHamiltonian_Kernel(WaveHamiltonianBase): 528 """ 529 The Hamiltonian of an anisotropic elastic wave equation, implemented with GPU kernels, 530 whose geometry is defined by a generic Hooke tensor field. 531 The Hamiltonian is a sum of squares of finite differences, via Voronoi's decomposition. 532 Dirichlet boundary conditions are applied, see also optional damping layers. 533 534 The Mathematical expression of the Hamiltonian is 535 (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx 536 where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain. 537 538 - M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), 539 Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None] 540 - C : (hooke tensor in voigt notation) array of positive definite matrices, 541 shape (s,s,n1,...,nd) where s = d (d+1)/2 542 Reuse decomposition from previous run : C = H_prev.C_for_reuse 543 544 Warning : accessing some of this object's properties has a significant memory and 545 computational cost, because all data is converted to a GPU kernel friendly format. 546 """ 547 def __init__(self,M,C,dx=1.,order_x=2,shape_dom=None, 548 traits=None,rev_ad=0,**kwargs): 549 if cp is None: raise ImportError("Cupy library needed for this class") 550 fwd_ad = M.size_ad if ad.is_ad(M) else 0 551 if fwd_ad>0 and rev_ad>0: 552 raise ValueError("Please choose between forward and reverse differentiation") 553 self._size_ad = max(fwd_ad,rev_ad) 554 555 traits_default = { 556 'isotropic_metric_macro':M.shape[0]==1, 557 'fourth_order_macro':{2:False,4:True}[order_x], 558 'fwd_macro': fwd_ad>0 559 } 560 if traits is not None: traits_default.update(traits) 561 traits = traits_default 562 563 # Flatten the symmetric matrix arrays, if necessary 564 if (M.ndim==2 or M.shape[0] in (1,M.ndim-2)) and M.shape[0]==M.shape[1]: 565 M = Metrics.misc.flatten_symmetric_matrix(M) 566 if isinstance(C,tuple): self._weights,self._offsets,shape_dom = C # Reuse decomposition 567 elif (C.ndim==2 or C.shape[0]==_triangular_number(C.ndim-2)) and C.shape[0]==C.shape[1]: 568 C = Metrics.misc.flatten_symmetric_matrix(C) 569 570 # Get the domain shape 571 if shape_dom is None: shape_dom = M.shape[1:] if isinstance(C,tuple) else \ 572 fd.common_shape((M,C),depths=(1,1)) 573 super(ElasticHamiltonian_Kernel,self).__init__(shape_dom,traits,**kwargs) 574 575 if self.ndim not in (1,2,3): 576 raise ValueError("Only domains of dimension 1, 2 and 3 are supported") 577 578 self._offsetpack_t = np.int32 579 self.dx = dx 580 581 # Voronoi decomposition of the Hooke tensor 582 if not isinstance(C,tuple): # Decomposition was bypassed by feeding weights, offsets 583 assert len(C) == self.decompdim 584 from .. import VoronoiDecomposition 585 weights,offsets = VoronoiDecomposition(ad.cupy_generic.cupy_set(C), 586 offset_t=np.int8,flattened=True) 587 C = None 588 # Broadcast if necessary 589 weights = fd.as_field(weights,self.shape_dom,depth=1) 590 offsets = fd.as_field(offsets,self.shape_dom,depth=2) 591 self._weights = self.reshape(weights) 592 weights = None 593 # offsets = offsets.get() # Uncomment if desperate to save memory... 594 self._offsets = self.reshape(self._compress_offsets(offsets),constant_values=0) 595 offsets = None 596 597 # Setup the metric 598 assert len(M) == self.symdim or self.isotropic_metric 599 self._metric = self.reshape(fd.as_field(M,self.shape_dom,depth=1)) 600 M = None 601 602 if self.way_ad<0: # Reverse autodiff 603 self._weights = ad.Dense.denseAD(self._weights) 604 self._metric = ad.Dense.denseAD(self._metric) 605 if self.size_ad: # Forward or reverse autodiff 606 self.check_ad(self._weights) 607 self.check_ad(self._metric) 608 else: 609 self.check(self._weights) 610 self.check(self._metric) 611 self.check(self._offsets) 612 613 614 if self.isotropic_metric: # TODO : case where M is anisotropic, see Sparse variant 615 self.dt_max = _mk_dt_max(dx/(self.ndim*np.sqrt(np.nanmax( 616 ad.remove_ad(self._metric).squeeze(axis=self.ndim) 617 *ad.remove_ad(self._weights).sum(axis=self.ndim)))), 618 order_x=order_x) 619 620 self.shape_free = (*self.shape_o,self.ndim,*self.shape_i) 621 self._sizes_oi = (self.size_o,),(self.size_i,) 622 623 # Generate the cuda module 624 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 625 date_modified = cupy_module_helper.getmtime_max(cuda_path) 626 source = cupy_module_helper.traits_header(self.traits,size_of_shape=True) 627 628 source += [ 629 '#include "Kernel_ElasticWave.h"', 630 f"// Date cuda code last modified : {date_modified}"] 631 cuoptions = ("-default-device", f"-I {cuda_path}") 632 633 source="\n".join(source) 634 module = cupy_module_helper.GetModule(source,cuoptions) 635 self._DpH_Kernel = module.get_function("DpH") 636 self._DqH_Kernel = module.get_function("DqH") 637 638 if self.size_ad: 639 source_ad = f"#define size_ad_macro {self.size_ad}\n"+source 640 module_ad = cupy_module_helper.GetModule(source_ad,cuoptions) 641 self._DpH_Kernel_ad = module_ad.get_function("DpH") 642 self._DqH_Kernel_ad = module_ad.get_function("DqH") 643 self._modules = (module,module_ad) 644 else: self._modules = (module,) 645 646 self.SetCst('shape_o',self.shape_o,self.int_t) 647 self.SetCst('size_o',np.prod(self.shape_o),self.int_t) 648 self.SetCst('shape_tot',self.shape_dom,self.int_t) 649 650 # Traits 651 @property 652 def size_ad(self): return self._size_ad #return self._traits['size_ad_macro'] 653 @property 654 def way_ad(self): 655 """0 : no AD. 1 : forward AD. -1 : reverse AD""" 656 return 0 if self.size_ad==0 else 1 if self._traits['fwd_macro'] else -1 657 def rev_reset(self): 658 """ 659 Reset the accumulators for reverse autodiff 660 Namely (self.metric.coef and self.weights.coef) 661 """ 662 assert self.way_ad<0 663 self._metric.coef[:]=0 664 self._weights.coef[:]=0 665 666 @property 667 def isotropic_metric(self): 668 """Wether M has shape (1,1,...) or (d,d,...)""" 669 return self._traits['isotropic_metric_macro'] 670 671 @property 672 def offsetpack_t(self): 673 """Type used to store a matrix offset. Defaults to int32.""" 674 return self._offsetpack_t 675 676 @property 677 def decompdim(self): 678 """Number of terms in the decomposition of a generic Hooke tensor.""" 679 return _triangular_number(self.symdim) 680 681 @property 682 def offsetnbits(self): 683 """Number of bits for storing each integer coefficient of the matrix offsets""" 684 return (None,10,10,5)[self.ndim] 685 686 @property 687 def voigt2lower(self): 688 """ 689 Correspondance between voigt notation and symmetric matrix indexing. 690 d=2 : [0 ] d=3 : [0 ] 691 [2 1] [5 1 ] 692 [4 3 2] 693 """ 694 return {1:(0,), 2:(0,2,1), 3:(0,5,1,4,3,2)}[self.ndim] 695 696 # PDE parameters 697 @property 698 def weights(self): 699 """Weights, obtained from Voronoi's decomposition of the Hooke tensors.""" 700 return self.unshape(self._weights) 701 @property 702 def offsets(self): 703 """Offsets, obtained from Voronoi's decomposition of the Hooke tensors.""" 704 offsets2 = self.unshape(self._offsets) 705 # Uncompress 706 offsets = cp.zeros((self.symdim,)+offsets2.shape,dtype=np.int8) 707 order = self.voigt2lower 708 nbits = self.offsetnbits 709 for i,o in enumerate(order): 710 offsets[o]=((offsets2//2**(i*nbits))% 2**nbits) - 2**(nbits-1) 711 return offsets 712 713 @property 714 def C_for_reuse(self): 715 """Avoid the initial tensor decomposition step.""" 716 return self._weights,self._offsets,self.shape_dom 717 718 def _compress_offsets(self,offsets): 719 """Compress each matrix offset (symmetric, integer valued), into a single int_t""" 720 offsets2 = np.zeros_like(offsets,shape=offsets.shape[1:],dtype=self.offsetpack_t) 721 order = self.voigt2lower 722 nbits = self.offsetnbits 723 for i,o in enumerate(order): 724 offsets2 += (offsets[o].astype(self.int_t)+2**(nbits-1)) * 2**(nbits*i) 725 return offsets2 726 727 @property 728 def hooke(self): 729 """The Hooke tensor, input 'C', defining the elasticity properties of the medium.""" 730 # The Hooke tensor is not stored, so as to save memory. We thus reconstruct it 731 # from Voronoi's decomposition. 732 weights,offsets = self.weights,self.offsets 733 full_hooke = (weights * lp.outer_self(offsets)).sum(axis=2) 734 return full_hooke 735 736 @property 737 def metric(self): 738 """ 739 The metric tensor, input 'M'. Defines the norm for measuring momentum. 740 Usually metric = Id/ρ . 741 """ 742 return Metrics.misc.expand_symmetric_matrix(self.unshape(self._metric)) 743 744 @property 745 def iρ(self): 746 """Inverse density. Alias for the metric M used to define the kinetic energy.""" 747 return self.metric 748 749 def Expl_p(self,q,p,δ): 750 """ 751 Explicit time step for the impulsion p. 752 Expects : q and p reshaped in GPU friendly format, using self.reshape 753 """ 754 mult = -δ/self.dx**2/{2:4,4:48}[self.order_x] 755 if self.preserve_p: p = self._mk_copy(p) 756 if ad.is_ad(q): # Use automatic differentiation, forward or reverse 757 SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t) 758 self.check_ad(q); self.check_ad(p) 759 self._DqH_Kernel_ad(self.shape_o,self.shape_i,(self._weights.value,self._offsets, 760 q.value,self._weights.coef,q.coef,p.coef,p.value)) 761 else: # No automatic differentiation 762 SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t) 763 self.check(q); self.check(p) 764 self._DqH_Kernel(self.shape_o,self.shape_i, 765 (ad.remove_ad(self._weights),self._offsets,q,p)) 766 return p 767 768 def Expl_q(self,q,p,δ): 769 """ 770 Explicit time step for the position q. 771 Expects : q and p reshaped in GPU friendly format, using self.reshape 772 """ 773 if self.preserve_q: q = self._mk_copy(q) 774 if ad.is_ad(p): # Use automatic differentiation, forward or reverse 775 SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t) 776 self.check_ad(q); self.check_ad(p) 777 self._DpH_Kernel_ad(*self._sizes_oi,(self._metric.value,p.value, 778 self._metric.coef,p.coef,q.coef,q.value)) 779 else: # No automatic differentiation 780 SetModuleConstant(self._modules[0],'DpH_mult',δ,self.float_t) 781 self.check(q); self.check(p) 782 self._DpH_Kernel(*self._sizes_oi,(ad.remove_ad(self._metric),p,q)) 783 return q 784 785 def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1) 786 def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1) 787 788 def _mk_copy(self,x): 789 """Copies the variable x, with a specific contiguity for the AD coefficients""" 790 if ad.is_ad(x): 791 assert isinstance(x,ad.Dense.denseAD) and x.size_ad in (0,self.size_ad) 792 return ad.Dense.denseAD(x.value.copy(), 793 np.moveaxis(np.moveaxis(x.coef,-1,self.ndim).copy(),self.ndim,-1)) 794 return x.copy() 795 796 def check_ad(self,x): 797 """ 798 Puts zero coefficients with the correct contiguity if those are empty. 799 Checks that the AD variable x has the correct c-contiguity for the kernel. 800 """ 801 assert self.way_ad!=0 # Both forward and reverse 802 if x.size_ad==0: 803 x.coef = np.moveaxis(np.zeros_like(x.value, 804 shape=(*x.shape[:self.ndim],self.size_ad,*x.shape[self.ndim:])),self.ndim,-1) 805 assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad 806 assert np.moveaxis(x.coef,-1,self.ndim).flags.c_contiguous 807 assert x.coef.dtype==self.float_t 808 self.check(x.value) 809 810 def check(self,x): 811 """ 812 Basic check of the types, shapes, contiguity of GPU inputs. 813 """ 814 assert x.flags.c_contiguous 815 assert x.shape[:self.ndim]==self.shape_o and x.shape[-self.ndim:]==self.shape_i 816 assert x.dtype in (self.float_t,self.int_t,self.offsetpack_t) 817 assert x.ndim in (2*self.ndim,2*self.ndim+1) 818 819# Utility functions 820def _triangular_number(n): return (n*(n+1))//2
49def AcousticHamiltonian_Sparse(ρ,D,dx=1.,order_x=2,shape_dom=None,bc='Periodic', 50 rev_ad=0,save_weights=False): 51 """ 52 Sparse matrix based implementation of the Hamiltonian of the acoustic wave equation. 53 - bc : boundary conditions, see bc_to_padding.keys() 54 - rev_ad (optional) : Implement reverse autodiff for the decomposition weights and inverse density 55 """ 56 padding = bc_to_padding[bc] 57 if shape_dom is None: shape_dom = fd.common_shape((ρ,D),depths=(0,2)) 58 assert len(shape_dom)==len(D) 59 iρ=1/ρ 60 from .. import VoronoiDecomposition # CPU or GPU 61 try: λ,e = VoronoiDecomposition(D) 62 except FileNotFoundError: λ,e = Selling.Decomposition(D) # Fallback to python implem 63 λ = fd.as_field(λ,shape_dom) 64 def Hq(q,ad_channel=lambda x:x): 65 dq = fd.DiffEll(q,e,dx,padding=padding,order=order_x) 66 if padding!=padding: dq[np.isnan(dq)]=0 67 return 0.5 * ad_channel(λ) * np.sum(dq**2,axis=0) 68 def Hp(p,ad_channel=lambda x:x): 69 return 0.5 * ad_channel(iρ) * p**2 70 H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices 71 if rev_ad>0: # Reverse autodiff support 72 H.weights = ad.Dense.denseAD(λ, coef=np.zeros_like(λ, shape=(*λ.shape, rev_ad))) 73 H.offsets = e 74 H.iρ = ad.Dense.denseAD(iρ,coef=np.zeros_like(iρ,shape=(*iρ.shape,rev_ad))) 75 rev_ad = (H.weights.coef,H.iρ.coef) 76 else: 77 if save_weights: H.weights,H.M,H.iρ = λ,iρ,iρ 78 rev_ad = (None,None) 79 H.set_spmat(np.zeros_like(rm_ad(D),shape=shape_dom),rev_ad=rev_ad) 80 H.dt_max = _mk_dt_max(dx * np.sqrt(np.min(rm_ad(ρ)/rm_ad(λ).sum(axis=0))), order_x) 81 return H
Sparse matrix based implementation of the Hamiltonian of the acoustic wave equation.
- bc : boundary conditions, see bc_to_padding.keys()
- rev_ad (optional) : Implement reverse autodiff for the decomposition weights and inverse density
83def ElasticHamiltonian_Sparse(M,C,dx,order_x=2,S=None,shape_dom=None,bc='Periodic', 84 rev_ad=0,save_weights=False): 85 """ 86 Sparse matrix based implementation of the Hamiltonian of the elastic wave equation, namely 87 (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx 88 where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain. 89 90 - M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), 91 Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None] 92 - C : (hooke tensor in voigt notation) array of positive definite matrices, 93 shape (s,s,n1,...,nd) where s = d (d+1)/2 94 - bc : boundary conditions, see bc_to_padding.keys() 95 - rev_ad (optional) : Implement reverse autodiff for the decomposition weights and M. 96 """ 97 padding = bc_to_padding[bc] 98 if shape_dom is None: shape_dom = fd.common_shape((M,C),depths=(2,2)) 99 vdim = len(shape_dom); assert len(C)==(vdim*(vdim+1))//2 100 λ,E = Hooke(C).Selling() 101 λ,E,S,M = fd.common_field( (λ,E,S,M), depths=(1,3,3,2), shape=shape_dom) # broadcasting 102 if S is None: ES = (0,)*vdim 103 else: ES = np.sum(E[:,:,None,:]*S[:,:,:,None],axis=(0,1)) 104 105 def Hq(q,ad_channel=lambda x:x): # Elastic energy 106 λ_ = 2**-vdim * ad_channel(λ) # Normalize, extract relevant ad part 107 dq0 = fd.DiffEll(q[0],E[0],dx,order=order_x,α=ES[0]*q[0],padding=padding) 108 if padding!=padding: dq0[np.isnan(dq0)]=0 109 if vdim==1: return λ_ * np.sum(dq0**2, axis=0) 110 dq1 = fd.DiffEll(q[1],E[1],dx,order=order_x,α=ES[1]*q[1],padding=padding) 111 if padding!=padding: dq1[np.isnan(dq1)]=0 112 if vdim==2: return λ_ * np.sum((dq0+dq1)**2+(dq0+dq1[::-1])**2, axis=0) 113 dq2 = fd.DiffEll(q[2],E[2],dx,order=order_x,α=ES[2]*q[2],padding=padding) 114 if padding!=padding: dq2[np.isnan(dq2)]=0 115 return λ_ * np.sum((dq0+dq1+dq2)**2+(dq0+dq1+dq2[::-1])**2 116 +(dq0+dq1[::-1]+dq2)**2+(dq0+dq1[::-1]+dq2[::-1])**2, axis=0) 117 118 def Hp(p,ad_channel=lambda x:x): # Kinetic energy 119 if M.shape[:2]==(1,1): return 0.5*ad_channel(M[0,0])*np.sum(p**2,axis=0)# Typically M = 1/ρ 120 return 0.5 * p[None,:]*ad_channel(M)*p[:,None] #lp.dot_VAV(p,ad_channel(M),p) changed for rev_ad 121 122 H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices 123 if rev_ad>0: # Reverse autodiff support 124 H.weights = ad.Dense.denseAD(λ,coef=np.zeros_like(λ,shape=(*λ.shape,rev_ad))) 125 H.M = H.iρ = ad.Dense.denseAD(M,coef=np.zeros_like(M,shape=(*M.shape,rev_ad))) 126 H.moffsets = E 127 rev_ad = (H.weights.coef,H.M.coef) 128 else: 129 if save_weights: H.weights,H.M,H.iρ = λ,M,M 130 rev_ad = (None,None) 131 H.set_spmat(np.zeros_like(rm_ad(C),shape=(vdim,*shape_dom)),rev_ad=rev_ad) 132 H.reshape = lambda x:x; H.unshape = lambda x:x # Dummy, for compatibility only with the GPU kernel 133 134 ev = np.linalg.eigvalsh(np.moveaxis(rm_ad(M),(0,1),(-2,-1)))[...,-1] # Take largest eigvenvalue 135 H.dt_max = _mk_dt_max( dx/(np.sqrt(np.max(ev*rm_ad(λ).sum(axis=0)))*vdim), order_x) 136 return H
Sparse matrix based implementation of the Hamiltonian of the elastic wave equation, namely (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.
- M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
- C : (hooke tensor in voigt notation) array of positive definite matrices, shape (s,s,n1,...,nd) where s = d (d+1)/2
- bc : boundary conditions, see bc_to_padding.keys()
- rev_ad (optional) : Implement reverse autodiff for the decomposition weights and M.
138def AcousticChgVar(q,p,ρ,D,ϕ,X): 139 """ 140 Change of variables in the acoustic wave equation. 141 - q,p,ρ,D (callable) : problem data 142 - ϕ : change of variables 143 - X : points where to evaluate 144 returns 145 - tq,tp,tρ,tD,ϕ(X) (arrays) : coordinate changed problem data, obtained as 146 q(ϕ), p(ϕ) J, ρ(ϕ) J, Φ^-1 D(ϕ) Φ^-t J 147 """ 148 X_ad = ad.Dense.identity(constant=X,shape_free=(len(X),)) 149 ϕ_ad = ϕ_fun(X_ad) 150 ϕ = ϕ_ad.value 151 dϕ = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed 152 inv_dϕ = lp.inverse(dϕ) 153 Jϕ = lp.det(dϕ) 154 155 D_ = fd.as_field(D(ϕ),Jϕ.shape,depth=2) 156 tD = lp.dot_AA(inv_dϕ,lp.dot_AA(D_,lp.transpose(inv_dϕ))) * Jϕ 157 158 return q(ϕ), p(ϕ)*Jϕ, ρ(ϕ)*Jϕ, tD, ϕ
Change of variables in the acoustic wave equation.
- q,p,ρ,D (callable) : problem data
- ϕ : change of variables
- X : points where to evaluate returns
- tq,tp,tρ,tD,ϕ(X) (arrays) : coordinate changed problem data, obtained as q(ϕ), p(ϕ) J, ρ(ϕ) J, Φ^-1 D(ϕ) Φ^-t J
160def ElasticChgVar(q,p,M,C,S,ϕ,X): 161 """ 162 Change of variables in the elastic wave equation. 163 - q,p,M,C,S (callable) : problem data 164 - ϕ (callable) : change of variables 165 - X : points where to evaluate 166 returns 167 - tq,tp,tM,tC,tS,ϕ(X) (arrays) : coordinate changed problem data, obtained as 168 Φ^t q(ϕ), Φ^-1 p(ϕ) J, Φ^t M(ϕ) Φ / J, (Φ^t ε(ϕ) Φ,) 169 ∑_{i'j'k'l'} C_{i'j'k'l'}(ϕ) Ψ^{i'}_i Ψ^{j'}_j Ψ^{k'}_k Ψ^{l'}_l J, 170 ∑_{i'j'} Φ^i_{i'} Φ^j_{j'} S^{i'j'}_{k'}(ϕ) Ψ_k^{k'} + ∑_{k'} ∂^{ij} ϕ_{k'} Ψ_k^{k'} 171 """ 172 X_ad = ad.Dense2.identity(constant=X,shape_free=(len(X),)) 173 ϕ_ad = ϕ_fun(X_ad) 174 ϕ = ϕ_ad.value 175 dϕ = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed 176 inv_dϕ = lp.inverse(dϕ) 177 Jϕ = lp.det(dϕ) 178 d2ϕ = np.moveaxis(ϕ_ad.hessian(),2,0) 179 180 tq = lp.dot_AV(lp.transpose(dϕ),q(ϕ)) 181 tp = lp.dot_AV(inv_dϕ,p(ϕ))*Jϕ 182 183 M_ = fd.as_field(M(ϕ),Jϕ.shape,depth=2) 184 tM = lp.dot_AA(lp.transpose(dϕ),lp.dot_AA(M_,dϕ))/Jϕ 185 186 C_ = fd.as_field(C(ϕ),Jϕ.shape,depth=2) 187 tC = Hooke(C_).rotate(inv_dϕ).hooke*Jϕ 188 189 S_ = fd.as_field(S(ϕ),Jϕ.shape,depth=3) 190 vdim = len(dϕ) 191 S1 = sum(dϕ[ip,:,None,None]*dϕ[jp,None,:,None]*S_[ip,jp,kp]*inv_dϕ[:,kp] 192 for ip in range(vdim) for jp in range(vdim) for kp in range(vdim)) 193 S2 = sum(d2ϕ[kp,:,:,None]*inv_dϕ[:,kp] for kp in range(vdim)) 194 tS = S1 + S2 195 196 return tq,tp,tM,tC,tS,ϕ
Change of variables in the elastic wave equation.
- q,p,M,C,S (callable) : problem data
- ϕ (callable) : change of variables
- X : points where to evaluate returns
- tq,tp,tM,tC,tS,ϕ(X) (arrays) : coordinate changed problem data, obtained as Φ^t q(ϕ), Φ^-1 p(ϕ) J, Φ^t M(ϕ) Φ / J, (Φ^t ε(ϕ) Φ,) ∑_{i'j'k'l'} C_{i'j'k'l'}(ϕ) Ψ^{i'}_i Ψ^{j'}_j Ψ^{k'}_k Ψ^{l'}_l J, ∑_{i'j'} Φ^i_{i'} Φ^j_{j'} S^{i'j'}_{k'}(ϕ) Ψ_k^{k'} + ∑_{k'} ∂^{ij} ϕ_{k'} Ψ_k^{k'}
200class AcousticHamiltonian_Kernel(QuadraticHamiltonianBase): 201 """ 202 The Hamiltonian of an anisotropic acoustic wave equation, implemented with GPU kernels, 203 whose geometry is defined by a generic Riemannianian metric field. 204 The Hamiltonian is a sum of squares of finite differences, via Selling's decomposition. 205 206 The Mathematical expression of the Hamiltonian is 207 (1/2) int_X iρ(x) p(x)^2 + D(x,grad q(x), grad q(x)) dx 208 where X is the domain, and D the is the (dual-)metric. 209 """ 210 211 def __init__(self,ρ,D,dx=1., 212 order_x=2,shape_dom=None,periodic=False, 213 flattened=False,rev_ad=0,iρ=None, 214 block_size=256,traits=None,**kwargs): 215 if cp is None: raise ImportError("Cupy library needed for this class") 216 super(AcousticHamiltonian_Kernel,self).__init__(**kwargs) 217 218 if shape_dom is None: shape_dom = fd.common_shape((ρ,D), 219 depths=(0,1 if flattened else 2)) 220 self._shape_dom = shape_dom 221 self.shape_free = shape_dom 222 size_dom = np.prod(shape_dom) 223 self._sizes_oi = (int(np.ceil(size_dom/block_size)),),(block_size,) 224 self.dx = dx 225 226 fwd_ad = ρ.size_ad if ad.is_ad(ρ) else 0 227 self._size_ad = max(rev_ad,fwd_ad) 228 229 # Init the GPU kernel 230 traits_default = { 231 'fourth_order_macro':{2:False,4:True}[order_x], 232 'Scalar':np.float32, 233 'Int':self.int_t, 234 'OffsetT':np.int8, 235 'ndim_macro':self.ndim, 236 'periodic_macro':periodic, 237 'periodic_axes':(True,)*self.ndim, 238 'fwd_macro':fwd_ad>0, 239 } 240 self._traits = traits_default if traits is None else {**traits_default,**traits} 241 242 # Setup the problem data 243 dtype32 = (self.float_t==np.float32) 244 iρ = ad.cupy_generic.cupy_set(1/ρ if iρ is None else iρ, dtype32=dtype32) 245 ρ=None 246 iρ = fd.as_field(iρ,shape_dom) 247 self.iρ = ad.Base.ascontiguousarray(iρ) 248 iρ=None 249 250 D = ad.cupy_generic.cupy_set(D, dtype32=dtype32) 251 λ,e = VoronoiDecomposition(D,offset_t=np.int8,flattened=flattened) 252 D=None 253 self._traits['decompdim'] = len(λ) 254 self.dt_max = _mk_dt_max(dx/np.sqrt( 255 np.max(rm_ad(self.iρ)*rm_ad(λ).sum(axis=0))), order_x) 256 λ = fd.as_field(λ,shape_dom,depth=1) 257 self.weights = ad.Base.ascontiguousarray(np.moveaxis(λ,0,-1)) 258 λ=None 259 260 if self.way_ad<0: # Reverse autodiff 261 self.weights = ad.Dense.denseAD(self.weights) 262 self.iρ = ad.Dense.denseAD(self.iρ) 263 for arr in (self.weights,self.iρ): 264 self.check_ad(arr) if self.size_ad>0 else self.check(arr) 265 266 # Generate the cuda module 267 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 268 date_modified = cupy_module_helper.getmtime_max(cuda_path) 269 source = cupy_module_helper.traits_header(self.traits,size_of_shape=True) 270 271 source += [ 272 '#include "Kernel_AcousticWave.h"', 273 f"// Date cuda code last modified : {date_modified}"] 274 cuoptions = ("-default-device", f"-I {cuda_path}") 275 276 source="\n".join(source) 277 module = cupy_module_helper.GetModule(source,cuoptions) 278 get_indices = module.get_function('get_indices') 279 self._DpH_Kernel = module.get_function("DpH") 280 self._DqH_Kernel = module.get_function("DqH") 281 282 if self.size_ad: 283 source_ad = f"#define size_ad_macro {self.size_ad}\n"+source 284 module_ad = cupy_module_helper.GetModule(source_ad,cuoptions) 285 self._DpH_Kernel_ad = module_ad.get_function("DpH") 286 self._DqH_Kernel_ad = module_ad.get_function("DqH") 287 self._modules = (module,module_ad) 288 else: self._modules = (module,) 289 290 for module in self._modules: 291 SetModuleConstant(module,'shape_tot',self.shape_dom,self.int_t) 292 SetModuleConstant(module,'size_tot',np.prod(self.shape_dom),self.int_t) 293 294 # Get the indices of the scheme neighbors 295 self._ineigh = cp.full((*self.weights.shape,order_x),-2**30,dtype=self.int_t) 296 e = cp.ascontiguousarray(fd.as_field(e,shape_dom,depth=2)) 297 get_indices(*self._sizes_oi,(e,self._ineigh)) 298 e=None 299 self.check(self._ineigh) 300 301 @property 302 def M(self): 303 """Alias for the inverse density""" 304 return self.iρ 305 @property 306 def size_ad(self): return self._size_ad 307 @property 308 def way_ad(self): 309 """0 : no AD. 1 : forward AD. -1 : reverse AD""" 310 return 0 if self.size_ad==0 else 1 if self.traits['fwd_macro'] else -1 311 def rev_reset(self): 312 """ 313 Reset the accumulators for reverse autodiff 314 Namely (self.metric.coef and self.weights.coef) 315 """ 316 self.weights.coef[:]=0 317 self.iρ.coef[:]=0 318 319 @property 320 def shape_dom(self): 321 """Shape of the PDE discretization domain.""" 322 return self._shape_dom 323 @property 324 def ndim(self): 325 """Number of dimensions of the domain.""" 326 return len(self.shape_dom) 327 @property 328 def decompdim(self): 329 """Length of quadratic form decomposition.""" 330 return self.weights.shape[-1] 331 332 @property 333 def traits(self): return self._traits 334 @property 335 def int_t(self): return np.int32 336 @property 337 def float_t(self): return self.traits['Scalar'] 338 @property 339 def order_x(self): return 4 if self.traits['fourth_order_macro'] else 2 340 341 def Expl_p(self,q,p,δ): 342 """ 343 Explicit time step for the impulsion p. 344 Expects : q and p reshaped in GPU friendly format, using self.reshape 345 """ 346 mult = -δ/self.dx**2/{2:2,4:24}[self.order_x] 347 if self.preserve_p: p = self._mk_copy(p) 348 if ad.is_ad(q): # Use automatic differentiation, forward or reverse 349 SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t) 350 self.check_ad(q); self.check_ad(p) 351 self._DqH_Kernel_ad(*self._sizes_oi,(self.weights.value,self._ineigh, 352 q.value,self.weights.coef,q.coef,p.coef,p.value)) 353 else: # No automatic differentiation 354 SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t) 355 self.check(q); self.check(p) 356 self._DqH_Kernel(*self._sizes_oi, 357 (ad.remove_ad(self.weights),self._ineigh,q,p)) 358 return p 359 360 def Expl_q(self,q,p,δ): 361 """ 362 Explicit time step for the position q. 363 Expects : q and p reshaped in GPU friendly format, using self.reshape 364 """ 365 if self.preserve_q: q = self._mk_copy(q) 366 if ad.is_ad(p): # Use automatic differentiation, forward or reverse 367 SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t) 368 self.check_ad(q); self.check_ad(p) 369 self._DpH_Kernel_ad(*self._sizes_oi,(self.iρ.value,p.value, 370 self.iρ.coef,p.coef,q.coef,q.value)) 371 else: # No automatic differentiation 372 self.check(q); self.check(p) 373 q += δ*ad.remove_ad(self.iρ)*p 374 return q 375 376 def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1) 377 def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1) 378 379 def check_ad(self,x): 380 """ 381 Puts zero ad coefficients, with the correct c-contiguity, if those are empty. 382 Basic additional checks of shapes, contiguity. 383 """ 384 assert self.way_ad!=0 # Either forward and reverse AD must be supported 385 if x.size_ad==0: x.coef = np.zeros_like(x.value,shape=(*x.shape,self.size_ad)) 386 assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad 387 assert x.coef.flags.c_contiguous 388 assert x.coef.dtype==self.float_t 389 self.check(x.value) 390 391 def check(self,x): 392 """ Basic check of the types, shapes, contiguity of GPU inputs. """ 393 assert x.flags.c_contiguous 394 assert x.shape[:self.ndim]==self.shape_dom 395 assert x.dtype in (self.float_t,self.int_t)
The Hamiltonian of an anisotropic acoustic wave equation, implemented with GPU kernels, whose geometry is defined by a generic Riemannianian metric field. The Hamiltonian is a sum of squares of finite differences, via Selling's decomposition.
The Mathematical expression of the Hamiltonian is (1/2) int_X iρ(x) p(x)^2 + D(x,grad q(x), grad q(x)) dx where X is the domain, and D the is the (dual-)metric.
211 def __init__(self,ρ,D,dx=1., 212 order_x=2,shape_dom=None,periodic=False, 213 flattened=False,rev_ad=0,iρ=None, 214 block_size=256,traits=None,**kwargs): 215 if cp is None: raise ImportError("Cupy library needed for this class") 216 super(AcousticHamiltonian_Kernel,self).__init__(**kwargs) 217 218 if shape_dom is None: shape_dom = fd.common_shape((ρ,D), 219 depths=(0,1 if flattened else 2)) 220 self._shape_dom = shape_dom 221 self.shape_free = shape_dom 222 size_dom = np.prod(shape_dom) 223 self._sizes_oi = (int(np.ceil(size_dom/block_size)),),(block_size,) 224 self.dx = dx 225 226 fwd_ad = ρ.size_ad if ad.is_ad(ρ) else 0 227 self._size_ad = max(rev_ad,fwd_ad) 228 229 # Init the GPU kernel 230 traits_default = { 231 'fourth_order_macro':{2:False,4:True}[order_x], 232 'Scalar':np.float32, 233 'Int':self.int_t, 234 'OffsetT':np.int8, 235 'ndim_macro':self.ndim, 236 'periodic_macro':periodic, 237 'periodic_axes':(True,)*self.ndim, 238 'fwd_macro':fwd_ad>0, 239 } 240 self._traits = traits_default if traits is None else {**traits_default,**traits} 241 242 # Setup the problem data 243 dtype32 = (self.float_t==np.float32) 244 iρ = ad.cupy_generic.cupy_set(1/ρ if iρ is None else iρ, dtype32=dtype32) 245 ρ=None 246 iρ = fd.as_field(iρ,shape_dom) 247 self.iρ = ad.Base.ascontiguousarray(iρ) 248 iρ=None 249 250 D = ad.cupy_generic.cupy_set(D, dtype32=dtype32) 251 λ,e = VoronoiDecomposition(D,offset_t=np.int8,flattened=flattened) 252 D=None 253 self._traits['decompdim'] = len(λ) 254 self.dt_max = _mk_dt_max(dx/np.sqrt( 255 np.max(rm_ad(self.iρ)*rm_ad(λ).sum(axis=0))), order_x) 256 λ = fd.as_field(λ,shape_dom,depth=1) 257 self.weights = ad.Base.ascontiguousarray(np.moveaxis(λ,0,-1)) 258 λ=None 259 260 if self.way_ad<0: # Reverse autodiff 261 self.weights = ad.Dense.denseAD(self.weights) 262 self.iρ = ad.Dense.denseAD(self.iρ) 263 for arr in (self.weights,self.iρ): 264 self.check_ad(arr) if self.size_ad>0 else self.check(arr) 265 266 # Generate the cuda module 267 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 268 date_modified = cupy_module_helper.getmtime_max(cuda_path) 269 source = cupy_module_helper.traits_header(self.traits,size_of_shape=True) 270 271 source += [ 272 '#include "Kernel_AcousticWave.h"', 273 f"// Date cuda code last modified : {date_modified}"] 274 cuoptions = ("-default-device", f"-I {cuda_path}") 275 276 source="\n".join(source) 277 module = cupy_module_helper.GetModule(source,cuoptions) 278 get_indices = module.get_function('get_indices') 279 self._DpH_Kernel = module.get_function("DpH") 280 self._DqH_Kernel = module.get_function("DqH") 281 282 if self.size_ad: 283 source_ad = f"#define size_ad_macro {self.size_ad}\n"+source 284 module_ad = cupy_module_helper.GetModule(source_ad,cuoptions) 285 self._DpH_Kernel_ad = module_ad.get_function("DpH") 286 self._DqH_Kernel_ad = module_ad.get_function("DqH") 287 self._modules = (module,module_ad) 288 else: self._modules = (module,) 289 290 for module in self._modules: 291 SetModuleConstant(module,'shape_tot',self.shape_dom,self.int_t) 292 SetModuleConstant(module,'size_tot',np.prod(self.shape_dom),self.int_t) 293 294 # Get the indices of the scheme neighbors 295 self._ineigh = cp.full((*self.weights.shape,order_x),-2**30,dtype=self.int_t) 296 e = cp.ascontiguousarray(fd.as_field(e,shape_dom,depth=2)) 297 get_indices(*self._sizes_oi,(e,self._ineigh)) 298 e=None 299 self.check(self._ineigh)
307 @property 308 def way_ad(self): 309 """0 : no AD. 1 : forward AD. -1 : reverse AD""" 310 return 0 if self.size_ad==0 else 1 if self.traits['fwd_macro'] else -1
0 : no AD. 1 : forward AD. -1 : reverse AD
311 def rev_reset(self): 312 """ 313 Reset the accumulators for reverse autodiff 314 Namely (self.metric.coef and self.weights.coef) 315 """ 316 self.weights.coef[:]=0 317 self.iρ.coef[:]=0
Reset the accumulators for reverse autodiff Namely (self.metric.coef and self.weights.coef)
319 @property 320 def shape_dom(self): 321 """Shape of the PDE discretization domain.""" 322 return self._shape_dom
Shape of the PDE discretization domain.
323 @property 324 def ndim(self): 325 """Number of dimensions of the domain.""" 326 return len(self.shape_dom)
Number of dimensions of the domain.
327 @property 328 def decompdim(self): 329 """Length of quadratic form decomposition.""" 330 return self.weights.shape[-1]
Length of quadratic form decomposition.
341 def Expl_p(self,q,p,δ): 342 """ 343 Explicit time step for the impulsion p. 344 Expects : q and p reshaped in GPU friendly format, using self.reshape 345 """ 346 mult = -δ/self.dx**2/{2:2,4:24}[self.order_x] 347 if self.preserve_p: p = self._mk_copy(p) 348 if ad.is_ad(q): # Use automatic differentiation, forward or reverse 349 SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t) 350 self.check_ad(q); self.check_ad(p) 351 self._DqH_Kernel_ad(*self._sizes_oi,(self.weights.value,self._ineigh, 352 q.value,self.weights.coef,q.coef,p.coef,p.value)) 353 else: # No automatic differentiation 354 SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t) 355 self.check(q); self.check(p) 356 self._DqH_Kernel(*self._sizes_oi, 357 (ad.remove_ad(self.weights),self._ineigh,q,p)) 358 return p
Explicit time step for the impulsion p. Expects : q and p reshaped in GPU friendly format, using self.reshape
360 def Expl_q(self,q,p,δ): 361 """ 362 Explicit time step for the position q. 363 Expects : q and p reshaped in GPU friendly format, using self.reshape 364 """ 365 if self.preserve_q: q = self._mk_copy(q) 366 if ad.is_ad(p): # Use automatic differentiation, forward or reverse 367 SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t) 368 self.check_ad(q); self.check_ad(p) 369 self._DpH_Kernel_ad(*self._sizes_oi,(self.iρ.value,p.value, 370 self.iρ.coef,p.coef,q.coef,q.value)) 371 else: # No automatic differentiation 372 self.check(q); self.check(p) 373 q += δ*ad.remove_ad(self.iρ)*p 374 return q
Explicit time step for the position q. Expects : q and p reshaped in GPU friendly format, using self.reshape
379 def check_ad(self,x): 380 """ 381 Puts zero ad coefficients, with the correct c-contiguity, if those are empty. 382 Basic additional checks of shapes, contiguity. 383 """ 384 assert self.way_ad!=0 # Either forward and reverse AD must be supported 385 if x.size_ad==0: x.coef = np.zeros_like(x.value,shape=(*x.shape,self.size_ad)) 386 assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad 387 assert x.coef.flags.c_contiguous 388 assert x.coef.dtype==self.float_t 389 self.check(x.value)
Puts zero ad coefficients, with the correct c-contiguity, if those are empty. Basic additional checks of shapes, contiguity.
391 def check(self,x): 392 """ Basic check of the types, shapes, contiguity of GPU inputs. """ 393 assert x.flags.c_contiguous 394 assert x.shape[:self.ndim]==self.shape_dom 395 assert x.dtype in (self.float_t,self.int_t)
Basic check of the types, shapes, contiguity of GPU inputs.
Inherited Members
398class WaveHamiltonianBase(QuadraticHamiltonianBase): 399 """ 400 A base class for GPU implementations of Hamiltonians of wave equations. 401 Warning : position and impulsion arrays are padded and reshaped in a GPU friendly format. 402 __init__ arguments 403 - constant values : used as padding reshape function 404 """ 405 406 def __init__(self,shape_dom,traits=None,periodic=False,constant_values=0,**kwargs): 407 super(WaveHamiltonianBase,self).__init__(**kwargs) 408 self._shape_dom = shape_dom 409 410 traits_default = { 411 'Scalar':np.float32, 412 'Int':np.int32, 413 'bypass_zeros_macro':True, 414 'fourth_order_macro':False, 415 'shape_i': {1:(64,),2:(8,8),3:(4,4,4),4:(4,4,2,2)}[self.ndim], 416 } 417 if traits is not None: traits_default.update(traits) 418 traits = traits_default 419 420 if np.ndim(periodic)==0: periodic=(periodic,)*self.ndim 421 assert periodic is None or len(periodic)==self.ndim 422 self._periodic = periodic 423 traits.update({ 424 'ndim_macro':self.ndim, 425 'periodic_macro':any(periodic), 426 'periodic_axes':periodic, 427 }) 428 self._traits = traits 429 self._check = True # Perform some safety input checks before calling GPU kernels 430 431 assert len(self.shape_i) == self.ndim 432 self._shape_o = tuple(fd.round_up_ratio(shape_dom,self.shape_i)) 433 self.constant_values = constant_values 434 435 def _dot(self,q,p): 436 """Duality bracket, ignoring NaNs used for padding.""" 437 return np.nansum(q*p) if np.isnan(self.constant_values) else np.sum(q*p) 438 439 # Domain shape 440 @property 441 def shape_dom(self): 442 """Shape of the PDE discretization domain.""" 443 return self._shape_dom 444 @property 445 def shape_o(self): 446 """Outer shape : number of blocks in each dimension (for GPU kernels).""" 447 return self._shape_o 448 @property 449 def shape_i(self): 450 """Inner shape : accessed by a block of threads (for GPU kernels).""" 451 return self.traits['shape_i'] 452 @property 453 def size_o(self): return np.prod(self.shape_o) 454 @property 455 def size_i(self): return np.prod(self.shape_i) 456 457 @property 458 def ndim(self): 459 """Number of dimensions of the domain.""" 460 return len(self.shape_dom) 461 @property 462 def symdim(self): 463 """DImension of the space of symmetric matrices.""" 464 return _triangular_number(self.ndim) 465 466 # Traits 467 @property 468 def traits(self): 469 """Collection of traits, passed to GPU kernel.""" 470 return self._traits 471 472 @property 473 def float_t(self): 474 """Scalar type used by the GPU kernel. Defaults to float32.""" 475 return self.traits['Scalar'] 476 @property 477 def int_t(self): 478 """Int type used by the GPU kernel. Defaults to int32.""" 479 return self.traits['Int'] 480 @property 481 def order_x(self): 482 """Consistency order of the finite differences scheme.""" 483 return 4 if self.traits['fourth_order_macro'] else 2 484 @property 485 def periodic(self): 486 """Wether to apply periodic boundary conditions, for each axis.""" 487 return self._periodic 488 489 def SetCst(self,name,value,dtype): 490 """Set a constant un the cuda module""" 491 for module in self._modules: 492 SetModuleConstant(module,name,value,dtype) 493 494 def reshape(self,x,constant_values=None,**kwargs): 495 """ 496 Reshapes and pads the array x in a kernel friendly format. 497 Factors shape_i. Also moves the geometry axis before 498 shape_i, following the convention of HookeWave.h 499 - **kwargs : passed to fd.block_expand 500 """ 501 if constant_values is None: constant_values = self.constant_values 502 x = fd.block_expand(x,self.shape_i,constant_values=constant_values,**kwargs) 503 if x.ndim == 2*self.ndim: pass 504 elif x.ndim == 2*self.ndim+1: x = np.moveaxis(x,0,self.ndim) 505 else: raise ValueError("Unsupported geometry depth") 506 #Cast to correct float and int types 507 if x.dtype==np.float64: dtype=self.float_t 508 elif x.dtype==np.int64: dtype=self.int_t 509 else: dtype=x.dtype 510 #dtype = {np.float64:self.float_t,np.int64:self.int_t}.get(value.dtype,value.dtype) fails 511 if ad.is_ad(x): 512 x.value = cp.ascontiguousarray(cp.asarray(x.value,dtype=dtype)) 513 # Setup a suitable contiguity of the AD variable coefficients for the GPU kernel 514 x_coef = cp.ascontiguousarray(cp.asarray(np.moveaxis(x.coef,-1,self.ndim),dtype=dtype)) 515 x.coef = np.moveaxis(x_coef,self.ndim,-1) 516 return x 517 else: return cp.ascontiguousarray(cp.asarray(x,dtype=dtype)) 518 519 return cp.ascontiguousarray(cp.asarray(value,dtype=dtype)) 520 521 def unshape(self,value): 522 """Inverse operation to reshape""" 523 if value.ndim==2*self.ndim: pass 524 elif value.ndim==2*self.ndim+1: value = np.moveaxis(value,self.ndim,0) 525 else: raise ValueError("Unsupported geometry depth") 526 return fd.block_squeeze(value,self.shape_dom)
A base class for GPU implementations of Hamiltonians of wave equations. Warning : position and impulsion arrays are padded and reshaped in a GPU friendly format. __init__ arguments
- constant values : used as padding reshape function
406 def __init__(self,shape_dom,traits=None,periodic=False,constant_values=0,**kwargs): 407 super(WaveHamiltonianBase,self).__init__(**kwargs) 408 self._shape_dom = shape_dom 409 410 traits_default = { 411 'Scalar':np.float32, 412 'Int':np.int32, 413 'bypass_zeros_macro':True, 414 'fourth_order_macro':False, 415 'shape_i': {1:(64,),2:(8,8),3:(4,4,4),4:(4,4,2,2)}[self.ndim], 416 } 417 if traits is not None: traits_default.update(traits) 418 traits = traits_default 419 420 if np.ndim(periodic)==0: periodic=(periodic,)*self.ndim 421 assert periodic is None or len(periodic)==self.ndim 422 self._periodic = periodic 423 traits.update({ 424 'ndim_macro':self.ndim, 425 'periodic_macro':any(periodic), 426 'periodic_axes':periodic, 427 }) 428 self._traits = traits 429 self._check = True # Perform some safety input checks before calling GPU kernels 430 431 assert len(self.shape_i) == self.ndim 432 self._shape_o = tuple(fd.round_up_ratio(shape_dom,self.shape_i)) 433 self.constant_values = constant_values
440 @property 441 def shape_dom(self): 442 """Shape of the PDE discretization domain.""" 443 return self._shape_dom
Shape of the PDE discretization domain.
444 @property 445 def shape_o(self): 446 """Outer shape : number of blocks in each dimension (for GPU kernels).""" 447 return self._shape_o
Outer shape : number of blocks in each dimension (for GPU kernels).
448 @property 449 def shape_i(self): 450 """Inner shape : accessed by a block of threads (for GPU kernels).""" 451 return self.traits['shape_i']
Inner shape : accessed by a block of threads (for GPU kernels).
457 @property 458 def ndim(self): 459 """Number of dimensions of the domain.""" 460 return len(self.shape_dom)
Number of dimensions of the domain.
461 @property 462 def symdim(self): 463 """DImension of the space of symmetric matrices.""" 464 return _triangular_number(self.ndim)
DImension of the space of symmetric matrices.
467 @property 468 def traits(self): 469 """Collection of traits, passed to GPU kernel.""" 470 return self._traits
Collection of traits, passed to GPU kernel.
472 @property 473 def float_t(self): 474 """Scalar type used by the GPU kernel. Defaults to float32.""" 475 return self.traits['Scalar']
Scalar type used by the GPU kernel. Defaults to float32.
476 @property 477 def int_t(self): 478 """Int type used by the GPU kernel. Defaults to int32.""" 479 return self.traits['Int']
Int type used by the GPU kernel. Defaults to int32.
480 @property 481 def order_x(self): 482 """Consistency order of the finite differences scheme.""" 483 return 4 if self.traits['fourth_order_macro'] else 2
Consistency order of the finite differences scheme.
484 @property 485 def periodic(self): 486 """Wether to apply periodic boundary conditions, for each axis.""" 487 return self._periodic
Wether to apply periodic boundary conditions, for each axis.
489 def SetCst(self,name,value,dtype): 490 """Set a constant un the cuda module""" 491 for module in self._modules: 492 SetModuleConstant(module,name,value,dtype)
Set a constant un the cuda module
494 def reshape(self,x,constant_values=None,**kwargs): 495 """ 496 Reshapes and pads the array x in a kernel friendly format. 497 Factors shape_i. Also moves the geometry axis before 498 shape_i, following the convention of HookeWave.h 499 - **kwargs : passed to fd.block_expand 500 """ 501 if constant_values is None: constant_values = self.constant_values 502 x = fd.block_expand(x,self.shape_i,constant_values=constant_values,**kwargs) 503 if x.ndim == 2*self.ndim: pass 504 elif x.ndim == 2*self.ndim+1: x = np.moveaxis(x,0,self.ndim) 505 else: raise ValueError("Unsupported geometry depth") 506 #Cast to correct float and int types 507 if x.dtype==np.float64: dtype=self.float_t 508 elif x.dtype==np.int64: dtype=self.int_t 509 else: dtype=x.dtype 510 #dtype = {np.float64:self.float_t,np.int64:self.int_t}.get(value.dtype,value.dtype) fails 511 if ad.is_ad(x): 512 x.value = cp.ascontiguousarray(cp.asarray(x.value,dtype=dtype)) 513 # Setup a suitable contiguity of the AD variable coefficients for the GPU kernel 514 x_coef = cp.ascontiguousarray(cp.asarray(np.moveaxis(x.coef,-1,self.ndim),dtype=dtype)) 515 x.coef = np.moveaxis(x_coef,self.ndim,-1) 516 return x 517 else: return cp.ascontiguousarray(cp.asarray(x,dtype=dtype)) 518 519 return cp.ascontiguousarray(cp.asarray(value,dtype=dtype))
Reshapes and pads the array x in a kernel friendly format. Factors shape_i. Also moves the geometry axis before shape_i, following the convention of HookeWave.h
- **kwargs : passed to fd.block_expand
521 def unshape(self,value): 522 """Inverse operation to reshape""" 523 if value.ndim==2*self.ndim: pass 524 elif value.ndim==2*self.ndim+1: value = np.moveaxis(value,self.ndim,0) 525 else: raise ValueError("Unsupported geometry depth") 526 return fd.block_squeeze(value,self.shape_dom)
Inverse operation to reshape
Inherited Members
528class ElasticHamiltonian_Kernel(WaveHamiltonianBase): 529 """ 530 The Hamiltonian of an anisotropic elastic wave equation, implemented with GPU kernels, 531 whose geometry is defined by a generic Hooke tensor field. 532 The Hamiltonian is a sum of squares of finite differences, via Voronoi's decomposition. 533 Dirichlet boundary conditions are applied, see also optional damping layers. 534 535 The Mathematical expression of the Hamiltonian is 536 (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx 537 where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain. 538 539 - M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), 540 Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None] 541 - C : (hooke tensor in voigt notation) array of positive definite matrices, 542 shape (s,s,n1,...,nd) where s = d (d+1)/2 543 Reuse decomposition from previous run : C = H_prev.C_for_reuse 544 545 Warning : accessing some of this object's properties has a significant memory and 546 computational cost, because all data is converted to a GPU kernel friendly format. 547 """ 548 def __init__(self,M,C,dx=1.,order_x=2,shape_dom=None, 549 traits=None,rev_ad=0,**kwargs): 550 if cp is None: raise ImportError("Cupy library needed for this class") 551 fwd_ad = M.size_ad if ad.is_ad(M) else 0 552 if fwd_ad>0 and rev_ad>0: 553 raise ValueError("Please choose between forward and reverse differentiation") 554 self._size_ad = max(fwd_ad,rev_ad) 555 556 traits_default = { 557 'isotropic_metric_macro':M.shape[0]==1, 558 'fourth_order_macro':{2:False,4:True}[order_x], 559 'fwd_macro': fwd_ad>0 560 } 561 if traits is not None: traits_default.update(traits) 562 traits = traits_default 563 564 # Flatten the symmetric matrix arrays, if necessary 565 if (M.ndim==2 or M.shape[0] in (1,M.ndim-2)) and M.shape[0]==M.shape[1]: 566 M = Metrics.misc.flatten_symmetric_matrix(M) 567 if isinstance(C,tuple): self._weights,self._offsets,shape_dom = C # Reuse decomposition 568 elif (C.ndim==2 or C.shape[0]==_triangular_number(C.ndim-2)) and C.shape[0]==C.shape[1]: 569 C = Metrics.misc.flatten_symmetric_matrix(C) 570 571 # Get the domain shape 572 if shape_dom is None: shape_dom = M.shape[1:] if isinstance(C,tuple) else \ 573 fd.common_shape((M,C),depths=(1,1)) 574 super(ElasticHamiltonian_Kernel,self).__init__(shape_dom,traits,**kwargs) 575 576 if self.ndim not in (1,2,3): 577 raise ValueError("Only domains of dimension 1, 2 and 3 are supported") 578 579 self._offsetpack_t = np.int32 580 self.dx = dx 581 582 # Voronoi decomposition of the Hooke tensor 583 if not isinstance(C,tuple): # Decomposition was bypassed by feeding weights, offsets 584 assert len(C) == self.decompdim 585 from .. import VoronoiDecomposition 586 weights,offsets = VoronoiDecomposition(ad.cupy_generic.cupy_set(C), 587 offset_t=np.int8,flattened=True) 588 C = None 589 # Broadcast if necessary 590 weights = fd.as_field(weights,self.shape_dom,depth=1) 591 offsets = fd.as_field(offsets,self.shape_dom,depth=2) 592 self._weights = self.reshape(weights) 593 weights = None 594 # offsets = offsets.get() # Uncomment if desperate to save memory... 595 self._offsets = self.reshape(self._compress_offsets(offsets),constant_values=0) 596 offsets = None 597 598 # Setup the metric 599 assert len(M) == self.symdim or self.isotropic_metric 600 self._metric = self.reshape(fd.as_field(M,self.shape_dom,depth=1)) 601 M = None 602 603 if self.way_ad<0: # Reverse autodiff 604 self._weights = ad.Dense.denseAD(self._weights) 605 self._metric = ad.Dense.denseAD(self._metric) 606 if self.size_ad: # Forward or reverse autodiff 607 self.check_ad(self._weights) 608 self.check_ad(self._metric) 609 else: 610 self.check(self._weights) 611 self.check(self._metric) 612 self.check(self._offsets) 613 614 615 if self.isotropic_metric: # TODO : case where M is anisotropic, see Sparse variant 616 self.dt_max = _mk_dt_max(dx/(self.ndim*np.sqrt(np.nanmax( 617 ad.remove_ad(self._metric).squeeze(axis=self.ndim) 618 *ad.remove_ad(self._weights).sum(axis=self.ndim)))), 619 order_x=order_x) 620 621 self.shape_free = (*self.shape_o,self.ndim,*self.shape_i) 622 self._sizes_oi = (self.size_o,),(self.size_i,) 623 624 # Generate the cuda module 625 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 626 date_modified = cupy_module_helper.getmtime_max(cuda_path) 627 source = cupy_module_helper.traits_header(self.traits,size_of_shape=True) 628 629 source += [ 630 '#include "Kernel_ElasticWave.h"', 631 f"// Date cuda code last modified : {date_modified}"] 632 cuoptions = ("-default-device", f"-I {cuda_path}") 633 634 source="\n".join(source) 635 module = cupy_module_helper.GetModule(source,cuoptions) 636 self._DpH_Kernel = module.get_function("DpH") 637 self._DqH_Kernel = module.get_function("DqH") 638 639 if self.size_ad: 640 source_ad = f"#define size_ad_macro {self.size_ad}\n"+source 641 module_ad = cupy_module_helper.GetModule(source_ad,cuoptions) 642 self._DpH_Kernel_ad = module_ad.get_function("DpH") 643 self._DqH_Kernel_ad = module_ad.get_function("DqH") 644 self._modules = (module,module_ad) 645 else: self._modules = (module,) 646 647 self.SetCst('shape_o',self.shape_o,self.int_t) 648 self.SetCst('size_o',np.prod(self.shape_o),self.int_t) 649 self.SetCst('shape_tot',self.shape_dom,self.int_t) 650 651 # Traits 652 @property 653 def size_ad(self): return self._size_ad #return self._traits['size_ad_macro'] 654 @property 655 def way_ad(self): 656 """0 : no AD. 1 : forward AD. -1 : reverse AD""" 657 return 0 if self.size_ad==0 else 1 if self._traits['fwd_macro'] else -1 658 def rev_reset(self): 659 """ 660 Reset the accumulators for reverse autodiff 661 Namely (self.metric.coef and self.weights.coef) 662 """ 663 assert self.way_ad<0 664 self._metric.coef[:]=0 665 self._weights.coef[:]=0 666 667 @property 668 def isotropic_metric(self): 669 """Wether M has shape (1,1,...) or (d,d,...)""" 670 return self._traits['isotropic_metric_macro'] 671 672 @property 673 def offsetpack_t(self): 674 """Type used to store a matrix offset. Defaults to int32.""" 675 return self._offsetpack_t 676 677 @property 678 def decompdim(self): 679 """Number of terms in the decomposition of a generic Hooke tensor.""" 680 return _triangular_number(self.symdim) 681 682 @property 683 def offsetnbits(self): 684 """Number of bits for storing each integer coefficient of the matrix offsets""" 685 return (None,10,10,5)[self.ndim] 686 687 @property 688 def voigt2lower(self): 689 """ 690 Correspondance between voigt notation and symmetric matrix indexing. 691 d=2 : [0 ] d=3 : [0 ] 692 [2 1] [5 1 ] 693 [4 3 2] 694 """ 695 return {1:(0,), 2:(0,2,1), 3:(0,5,1,4,3,2)}[self.ndim] 696 697 # PDE parameters 698 @property 699 def weights(self): 700 """Weights, obtained from Voronoi's decomposition of the Hooke tensors.""" 701 return self.unshape(self._weights) 702 @property 703 def offsets(self): 704 """Offsets, obtained from Voronoi's decomposition of the Hooke tensors.""" 705 offsets2 = self.unshape(self._offsets) 706 # Uncompress 707 offsets = cp.zeros((self.symdim,)+offsets2.shape,dtype=np.int8) 708 order = self.voigt2lower 709 nbits = self.offsetnbits 710 for i,o in enumerate(order): 711 offsets[o]=((offsets2//2**(i*nbits))% 2**nbits) - 2**(nbits-1) 712 return offsets 713 714 @property 715 def C_for_reuse(self): 716 """Avoid the initial tensor decomposition step.""" 717 return self._weights,self._offsets,self.shape_dom 718 719 def _compress_offsets(self,offsets): 720 """Compress each matrix offset (symmetric, integer valued), into a single int_t""" 721 offsets2 = np.zeros_like(offsets,shape=offsets.shape[1:],dtype=self.offsetpack_t) 722 order = self.voigt2lower 723 nbits = self.offsetnbits 724 for i,o in enumerate(order): 725 offsets2 += (offsets[o].astype(self.int_t)+2**(nbits-1)) * 2**(nbits*i) 726 return offsets2 727 728 @property 729 def hooke(self): 730 """The Hooke tensor, input 'C', defining the elasticity properties of the medium.""" 731 # The Hooke tensor is not stored, so as to save memory. We thus reconstruct it 732 # from Voronoi's decomposition. 733 weights,offsets = self.weights,self.offsets 734 full_hooke = (weights * lp.outer_self(offsets)).sum(axis=2) 735 return full_hooke 736 737 @property 738 def metric(self): 739 """ 740 The metric tensor, input 'M'. Defines the norm for measuring momentum. 741 Usually metric = Id/ρ . 742 """ 743 return Metrics.misc.expand_symmetric_matrix(self.unshape(self._metric)) 744 745 @property 746 def iρ(self): 747 """Inverse density. Alias for the metric M used to define the kinetic energy.""" 748 return self.metric 749 750 def Expl_p(self,q,p,δ): 751 """ 752 Explicit time step for the impulsion p. 753 Expects : q and p reshaped in GPU friendly format, using self.reshape 754 """ 755 mult = -δ/self.dx**2/{2:4,4:48}[self.order_x] 756 if self.preserve_p: p = self._mk_copy(p) 757 if ad.is_ad(q): # Use automatic differentiation, forward or reverse 758 SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t) 759 self.check_ad(q); self.check_ad(p) 760 self._DqH_Kernel_ad(self.shape_o,self.shape_i,(self._weights.value,self._offsets, 761 q.value,self._weights.coef,q.coef,p.coef,p.value)) 762 else: # No automatic differentiation 763 SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t) 764 self.check(q); self.check(p) 765 self._DqH_Kernel(self.shape_o,self.shape_i, 766 (ad.remove_ad(self._weights),self._offsets,q,p)) 767 return p 768 769 def Expl_q(self,q,p,δ): 770 """ 771 Explicit time step for the position q. 772 Expects : q and p reshaped in GPU friendly format, using self.reshape 773 """ 774 if self.preserve_q: q = self._mk_copy(q) 775 if ad.is_ad(p): # Use automatic differentiation, forward or reverse 776 SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t) 777 self.check_ad(q); self.check_ad(p) 778 self._DpH_Kernel_ad(*self._sizes_oi,(self._metric.value,p.value, 779 self._metric.coef,p.coef,q.coef,q.value)) 780 else: # No automatic differentiation 781 SetModuleConstant(self._modules[0],'DpH_mult',δ,self.float_t) 782 self.check(q); self.check(p) 783 self._DpH_Kernel(*self._sizes_oi,(ad.remove_ad(self._metric),p,q)) 784 return q 785 786 def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1) 787 def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1) 788 789 def _mk_copy(self,x): 790 """Copies the variable x, with a specific contiguity for the AD coefficients""" 791 if ad.is_ad(x): 792 assert isinstance(x,ad.Dense.denseAD) and x.size_ad in (0,self.size_ad) 793 return ad.Dense.denseAD(x.value.copy(), 794 np.moveaxis(np.moveaxis(x.coef,-1,self.ndim).copy(),self.ndim,-1)) 795 return x.copy() 796 797 def check_ad(self,x): 798 """ 799 Puts zero coefficients with the correct contiguity if those are empty. 800 Checks that the AD variable x has the correct c-contiguity for the kernel. 801 """ 802 assert self.way_ad!=0 # Both forward and reverse 803 if x.size_ad==0: 804 x.coef = np.moveaxis(np.zeros_like(x.value, 805 shape=(*x.shape[:self.ndim],self.size_ad,*x.shape[self.ndim:])),self.ndim,-1) 806 assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad 807 assert np.moveaxis(x.coef,-1,self.ndim).flags.c_contiguous 808 assert x.coef.dtype==self.float_t 809 self.check(x.value) 810 811 def check(self,x): 812 """ 813 Basic check of the types, shapes, contiguity of GPU inputs. 814 """ 815 assert x.flags.c_contiguous 816 assert x.shape[:self.ndim]==self.shape_o and x.shape[-self.ndim:]==self.shape_i 817 assert x.dtype in (self.float_t,self.int_t,self.offsetpack_t) 818 assert x.ndim in (2*self.ndim,2*self.ndim+1)
The Hamiltonian of an anisotropic elastic wave equation, implemented with GPU kernels, whose geometry is defined by a generic Hooke tensor field. The Hamiltonian is a sum of squares of finite differences, via Voronoi's decomposition. Dirichlet boundary conditions are applied, see also optional damping layers.
The Mathematical expression of the Hamiltonian is (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.
- M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
- C : (hooke tensor in voigt notation) array of positive definite matrices, shape (s,s,n1,...,nd) where s = d (d+1)/2 Reuse decomposition from previous run : C = H_prev.C_for_reuse
Warning : accessing some of this object's properties has a significant memory and computational cost, because all data is converted to a GPU kernel friendly format.
548 def __init__(self,M,C,dx=1.,order_x=2,shape_dom=None, 549 traits=None,rev_ad=0,**kwargs): 550 if cp is None: raise ImportError("Cupy library needed for this class") 551 fwd_ad = M.size_ad if ad.is_ad(M) else 0 552 if fwd_ad>0 and rev_ad>0: 553 raise ValueError("Please choose between forward and reverse differentiation") 554 self._size_ad = max(fwd_ad,rev_ad) 555 556 traits_default = { 557 'isotropic_metric_macro':M.shape[0]==1, 558 'fourth_order_macro':{2:False,4:True}[order_x], 559 'fwd_macro': fwd_ad>0 560 } 561 if traits is not None: traits_default.update(traits) 562 traits = traits_default 563 564 # Flatten the symmetric matrix arrays, if necessary 565 if (M.ndim==2 or M.shape[0] in (1,M.ndim-2)) and M.shape[0]==M.shape[1]: 566 M = Metrics.misc.flatten_symmetric_matrix(M) 567 if isinstance(C,tuple): self._weights,self._offsets,shape_dom = C # Reuse decomposition 568 elif (C.ndim==2 or C.shape[0]==_triangular_number(C.ndim-2)) and C.shape[0]==C.shape[1]: 569 C = Metrics.misc.flatten_symmetric_matrix(C) 570 571 # Get the domain shape 572 if shape_dom is None: shape_dom = M.shape[1:] if isinstance(C,tuple) else \ 573 fd.common_shape((M,C),depths=(1,1)) 574 super(ElasticHamiltonian_Kernel,self).__init__(shape_dom,traits,**kwargs) 575 576 if self.ndim not in (1,2,3): 577 raise ValueError("Only domains of dimension 1, 2 and 3 are supported") 578 579 self._offsetpack_t = np.int32 580 self.dx = dx 581 582 # Voronoi decomposition of the Hooke tensor 583 if not isinstance(C,tuple): # Decomposition was bypassed by feeding weights, offsets 584 assert len(C) == self.decompdim 585 from .. import VoronoiDecomposition 586 weights,offsets = VoronoiDecomposition(ad.cupy_generic.cupy_set(C), 587 offset_t=np.int8,flattened=True) 588 C = None 589 # Broadcast if necessary 590 weights = fd.as_field(weights,self.shape_dom,depth=1) 591 offsets = fd.as_field(offsets,self.shape_dom,depth=2) 592 self._weights = self.reshape(weights) 593 weights = None 594 # offsets = offsets.get() # Uncomment if desperate to save memory... 595 self._offsets = self.reshape(self._compress_offsets(offsets),constant_values=0) 596 offsets = None 597 598 # Setup the metric 599 assert len(M) == self.symdim or self.isotropic_metric 600 self._metric = self.reshape(fd.as_field(M,self.shape_dom,depth=1)) 601 M = None 602 603 if self.way_ad<0: # Reverse autodiff 604 self._weights = ad.Dense.denseAD(self._weights) 605 self._metric = ad.Dense.denseAD(self._metric) 606 if self.size_ad: # Forward or reverse autodiff 607 self.check_ad(self._weights) 608 self.check_ad(self._metric) 609 else: 610 self.check(self._weights) 611 self.check(self._metric) 612 self.check(self._offsets) 613 614 615 if self.isotropic_metric: # TODO : case where M is anisotropic, see Sparse variant 616 self.dt_max = _mk_dt_max(dx/(self.ndim*np.sqrt(np.nanmax( 617 ad.remove_ad(self._metric).squeeze(axis=self.ndim) 618 *ad.remove_ad(self._weights).sum(axis=self.ndim)))), 619 order_x=order_x) 620 621 self.shape_free = (*self.shape_o,self.ndim,*self.shape_i) 622 self._sizes_oi = (self.size_o,),(self.size_i,) 623 624 # Generate the cuda module 625 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 626 date_modified = cupy_module_helper.getmtime_max(cuda_path) 627 source = cupy_module_helper.traits_header(self.traits,size_of_shape=True) 628 629 source += [ 630 '#include "Kernel_ElasticWave.h"', 631 f"// Date cuda code last modified : {date_modified}"] 632 cuoptions = ("-default-device", f"-I {cuda_path}") 633 634 source="\n".join(source) 635 module = cupy_module_helper.GetModule(source,cuoptions) 636 self._DpH_Kernel = module.get_function("DpH") 637 self._DqH_Kernel = module.get_function("DqH") 638 639 if self.size_ad: 640 source_ad = f"#define size_ad_macro {self.size_ad}\n"+source 641 module_ad = cupy_module_helper.GetModule(source_ad,cuoptions) 642 self._DpH_Kernel_ad = module_ad.get_function("DpH") 643 self._DqH_Kernel_ad = module_ad.get_function("DqH") 644 self._modules = (module,module_ad) 645 else: self._modules = (module,) 646 647 self.SetCst('shape_o',self.shape_o,self.int_t) 648 self.SetCst('size_o',np.prod(self.shape_o),self.int_t) 649 self.SetCst('shape_tot',self.shape_dom,self.int_t)
654 @property 655 def way_ad(self): 656 """0 : no AD. 1 : forward AD. -1 : reverse AD""" 657 return 0 if self.size_ad==0 else 1 if self._traits['fwd_macro'] else -1
0 : no AD. 1 : forward AD. -1 : reverse AD
658 def rev_reset(self): 659 """ 660 Reset the accumulators for reverse autodiff 661 Namely (self.metric.coef and self.weights.coef) 662 """ 663 assert self.way_ad<0 664 self._metric.coef[:]=0 665 self._weights.coef[:]=0
Reset the accumulators for reverse autodiff Namely (self.metric.coef and self.weights.coef)
667 @property 668 def isotropic_metric(self): 669 """Wether M has shape (1,1,...) or (d,d,...)""" 670 return self._traits['isotropic_metric_macro']
Wether M has shape (1,1,...) or (d,d,...)
672 @property 673 def offsetpack_t(self): 674 """Type used to store a matrix offset. Defaults to int32.""" 675 return self._offsetpack_t
Type used to store a matrix offset. Defaults to int32.
677 @property 678 def decompdim(self): 679 """Number of terms in the decomposition of a generic Hooke tensor.""" 680 return _triangular_number(self.symdim)
Number of terms in the decomposition of a generic Hooke tensor.
682 @property 683 def offsetnbits(self): 684 """Number of bits for storing each integer coefficient of the matrix offsets""" 685 return (None,10,10,5)[self.ndim]
Number of bits for storing each integer coefficient of the matrix offsets
687 @property 688 def voigt2lower(self): 689 """ 690 Correspondance between voigt notation and symmetric matrix indexing. 691 d=2 : [0 ] d=3 : [0 ] 692 [2 1] [5 1 ] 693 [4 3 2] 694 """ 695 return {1:(0,), 2:(0,2,1), 3:(0,5,1,4,3,2)}[self.ndim]
Correspondance between voigt notation and symmetric matrix indexing. d=2 : [0 ] d=3 : [0 ] [2 1] [5 1 ] [4 3 2]
698 @property 699 def weights(self): 700 """Weights, obtained from Voronoi's decomposition of the Hooke tensors.""" 701 return self.unshape(self._weights)
Weights, obtained from Voronoi's decomposition of the Hooke tensors.
702 @property 703 def offsets(self): 704 """Offsets, obtained from Voronoi's decomposition of the Hooke tensors.""" 705 offsets2 = self.unshape(self._offsets) 706 # Uncompress 707 offsets = cp.zeros((self.symdim,)+offsets2.shape,dtype=np.int8) 708 order = self.voigt2lower 709 nbits = self.offsetnbits 710 for i,o in enumerate(order): 711 offsets[o]=((offsets2//2**(i*nbits))% 2**nbits) - 2**(nbits-1) 712 return offsets
Offsets, obtained from Voronoi's decomposition of the Hooke tensors.
714 @property 715 def C_for_reuse(self): 716 """Avoid the initial tensor decomposition step.""" 717 return self._weights,self._offsets,self.shape_dom
Avoid the initial tensor decomposition step.
728 @property 729 def hooke(self): 730 """The Hooke tensor, input 'C', defining the elasticity properties of the medium.""" 731 # The Hooke tensor is not stored, so as to save memory. We thus reconstruct it 732 # from Voronoi's decomposition. 733 weights,offsets = self.weights,self.offsets 734 full_hooke = (weights * lp.outer_self(offsets)).sum(axis=2) 735 return full_hooke
The Hooke tensor, input 'C', defining the elasticity properties of the medium.
737 @property 738 def metric(self): 739 """ 740 The metric tensor, input 'M'. Defines the norm for measuring momentum. 741 Usually metric = Id/ρ . 742 """ 743 return Metrics.misc.expand_symmetric_matrix(self.unshape(self._metric))
The metric tensor, input 'M'. Defines the norm for measuring momentum. Usually metric = Id/ρ .
745 @property 746 def iρ(self): 747 """Inverse density. Alias for the metric M used to define the kinetic energy.""" 748 return self.metric
Inverse density. Alias for the metric M used to define the kinetic energy.
750 def Expl_p(self,q,p,δ): 751 """ 752 Explicit time step for the impulsion p. 753 Expects : q and p reshaped in GPU friendly format, using self.reshape 754 """ 755 mult = -δ/self.dx**2/{2:4,4:48}[self.order_x] 756 if self.preserve_p: p = self._mk_copy(p) 757 if ad.is_ad(q): # Use automatic differentiation, forward or reverse 758 SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t) 759 self.check_ad(q); self.check_ad(p) 760 self._DqH_Kernel_ad(self.shape_o,self.shape_i,(self._weights.value,self._offsets, 761 q.value,self._weights.coef,q.coef,p.coef,p.value)) 762 else: # No automatic differentiation 763 SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t) 764 self.check(q); self.check(p) 765 self._DqH_Kernel(self.shape_o,self.shape_i, 766 (ad.remove_ad(self._weights),self._offsets,q,p)) 767 return p
Explicit time step for the impulsion p. Expects : q and p reshaped in GPU friendly format, using self.reshape
769 def Expl_q(self,q,p,δ): 770 """ 771 Explicit time step for the position q. 772 Expects : q and p reshaped in GPU friendly format, using self.reshape 773 """ 774 if self.preserve_q: q = self._mk_copy(q) 775 if ad.is_ad(p): # Use automatic differentiation, forward or reverse 776 SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t) 777 self.check_ad(q); self.check_ad(p) 778 self._DpH_Kernel_ad(*self._sizes_oi,(self._metric.value,p.value, 779 self._metric.coef,p.coef,q.coef,q.value)) 780 else: # No automatic differentiation 781 SetModuleConstant(self._modules[0],'DpH_mult',δ,self.float_t) 782 self.check(q); self.check(p) 783 self._DpH_Kernel(*self._sizes_oi,(ad.remove_ad(self._metric),p,q)) 784 return q
Explicit time step for the position q. Expects : q and p reshaped in GPU friendly format, using self.reshape
797 def check_ad(self,x): 798 """ 799 Puts zero coefficients with the correct contiguity if those are empty. 800 Checks that the AD variable x has the correct c-contiguity for the kernel. 801 """ 802 assert self.way_ad!=0 # Both forward and reverse 803 if x.size_ad==0: 804 x.coef = np.moveaxis(np.zeros_like(x.value, 805 shape=(*x.shape[:self.ndim],self.size_ad,*x.shape[self.ndim:])),self.ndim,-1) 806 assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad 807 assert np.moveaxis(x.coef,-1,self.ndim).flags.c_contiguous 808 assert x.coef.dtype==self.float_t 809 self.check(x.value)
Puts zero coefficients with the correct contiguity if those are empty. Checks that the AD variable x has the correct c-contiguity for the kernel.
811 def check(self,x): 812 """ 813 Basic check of the types, shapes, contiguity of GPU inputs. 814 """ 815 assert x.flags.c_contiguous 816 assert x.shape[:self.ndim]==self.shape_o and x.shape[-self.ndim:]==self.shape_i 817 assert x.dtype in (self.float_t,self.int_t,self.offsetpack_t) 818 assert x.ndim in (2*self.ndim,2*self.ndim+1)
Basic check of the types, shapes, contiguity of GPU inputs.