agd.Eikonal.HFM_CUDA.AnisotropicWave

This file implements the linear acoustic and elastic wave equations, using custom GPU kernels for efficiency. A reference implementation using sparse matrices is provided for completeness.

  1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
  2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
  3
  4"""
  5This file implements the linear acoustic and elastic wave equations, using custom 
  6GPU kernels for efficiency. A reference implementation using sparse matrices is provided 
  7for completeness. 
  8"""
  9
 10import numpy as np
 11import os
 12
 13from ... import AutomaticDifferentiation as ad
 14from ... import FiniteDifferences as fd
 15from ... import LinearParallel as lp
 16from ... import Metrics
 17from ... import Selling
 18from ...Metrics.Seismic import Hooke
 19from ...ODE.hamiltonian import QuadraticHamiltonian,QuadraticHamiltonianBase
 20rm_ad = ad.remove_ad
 21
 22try: # These features require a GPU, but are optional.
 23	import cupy as cp
 24	from . import cupy_module_helper
 25	from .cupy_module_helper import SetModuleConstant
 26	from . import SellingAnisotropicDiffusion
 27	from .. import VoronoiDecomposition 
 28
 29except ImportError:
 30	cp = None
 31
 32# ------ Reference implementations using sparse matrices -------
 33# These implementations can be used to check the validity of the GPU kernels. 
 34# Additionally, they support automatic differentiation (ad.Dense.denseAD_Lin operators).
 35
 36bc_to_padding = {'Periodic':None,'Neumann':np.nan,'Dirichlet':0}
 37
 38def _mk_dt_max(dt_max_22, order_x):
 39	"""
 40	Helper for CFL condition. Includes CFL factors coming from the time and spatial 
 41	discretization orders in the considered wave equations.
 42	- dt_max_22 : CFL in the case where order_x == order_t == 2 
 43	"""
 44	dt_mult_x = {2:1/2, 4:1/np.sqrt(6)}
 45	dt_mult_t = {1:2, 2:2, 4:1.28596}
 46	return lambda order_t=2, order_x=order_x : dt_max_22 * dt_mult_x[order_x] * dt_mult_t[order_t]
 47
 48def AcousticHamiltonian_Sparse(ρ,D,dx=1.,order_x=2,shape_dom=None,bc='Periodic',
 49	rev_ad=0,save_weights=False):
 50	"""
 51	Sparse matrix based implementation of the Hamiltonian of the acoustic wave equation.
 52	- bc : boundary conditions, see bc_to_padding.keys()
 53	- rev_ad (optional) : Implement reverse autodiff for the decomposition weights and inverse density
 54	"""
 55	padding = bc_to_padding[bc]
 56	if shape_dom is None: shape_dom = fd.common_shape((ρ,D),depths=(0,2))
 57	assert len(shape_dom)==len(D)
 58	=1/ρ
 59	from .. import VoronoiDecomposition # CPU or GPU
 60	try: λ,e = VoronoiDecomposition(D)
 61	except FileNotFoundError: λ,e = Selling.Decomposition(D) # Fallback to python implem
 62	λ = fd.as_field(λ,shape_dom)
 63	def Hq(q,ad_channel=lambda x:x):
 64		dq = fd.DiffEll(q,e,dx,padding=padding,order=order_x) 
 65		if padding!=padding: dq[np.isnan(dq)]=0
 66		return 0.5 * ad_channel(λ)  * np.sum(dq**2,axis=0)
 67	def Hp(p,ad_channel=lambda x:x):
 68		return 0.5 * ad_channel() * p**2
 69	H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices
 70	if rev_ad>0: # Reverse autodiff support
 71		H.weights = ad.Dense.denseAD(λ, coef=np.zeros_like(λ, shape=(*λ.shape, rev_ad)))
 72		H.offsets = e
 73		H.      = ad.Dense.denseAD(,coef=np.zeros_like(,shape=(*.shape,rev_ad)))
 74		rev_ad = (H.weights.coef,H..coef)
 75	else:
 76		if save_weights: H.weights,H.M,H. = λ,, 
 77		rev_ad = (None,None)
 78	H.set_spmat(np.zeros_like(rm_ad(D),shape=shape_dom),rev_ad=rev_ad) 
 79	H.dt_max = _mk_dt_max(dx * np.sqrt(np.min(rm_ad(ρ)/rm_ad(λ).sum(axis=0))), order_x)
 80	return H
 81
 82def ElasticHamiltonian_Sparse(M,C,dx,order_x=2,S=None,shape_dom=None,bc='Periodic',
 83	rev_ad=0,save_weights=False):
 84	"""
 85	Sparse matrix based implementation of the Hamiltonian of the elastic wave equation, namely
 86	(1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx
 87	where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.
 88
 89	- M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd),
 90		Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
 91	- C : (hooke tensor in voigt notation) array of positive definite matrices,
 92		shape (s,s,n1,...,nd) where s = d (d+1)/2
 93	- bc : boundary conditions, see bc_to_padding.keys()
 94	- rev_ad (optional) : Implement reverse autodiff for the decomposition weights and M.
 95	"""
 96	padding = bc_to_padding[bc]
 97	if shape_dom is None: shape_dom = fd.common_shape((M,C),depths=(2,2))
 98	vdim = len(shape_dom); assert len(C)==(vdim*(vdim+1))//2
 99	λ,E = Hooke(C).Selling()
100	λ,E,S,M = fd.common_field( (λ,E,S,M), depths=(1,3,3,2), shape=shape_dom) # broadcasting
101	if S is None: ES = (0,)*vdim
102	else: ES = np.sum(E[:,:,None,:]*S[:,:,:,None],axis=(0,1))
103
104	def Hq(q,ad_channel=lambda x:x): # Elastic energy
105		λ_ = 2**-vdim * ad_channel(λ) # Normalize, extract relevant ad part
106		dq0 = fd.DiffEll(q[0],E[0],dx,order=order_x,α=ES[0]*q[0],padding=padding)
107		if padding!=padding: dq0[np.isnan(dq0)]=0
108		if vdim==1: return λ_ * np.sum(dq0**2, axis=0)
109		dq1 = fd.DiffEll(q[1],E[1],dx,order=order_x,α=ES[1]*q[1],padding=padding)
110		if padding!=padding: dq1[np.isnan(dq1)]=0
111		if vdim==2: return λ_ * np.sum((dq0+dq1)**2+(dq0+dq1[::-1])**2, axis=0)
112		dq2 = fd.DiffEll(q[2],E[2],dx,order=order_x,α=ES[2]*q[2],padding=padding)
113		if padding!=padding: dq2[np.isnan(dq2)]=0
114		return λ_ * np.sum((dq0+dq1+dq2)**2+(dq0+dq1+dq2[::-1])**2
115			+(dq0+dq1[::-1]+dq2)**2+(dq0+dq1[::-1]+dq2[::-1])**2, axis=0)
116
117	def Hp(p,ad_channel=lambda x:x): # Kinetic energy
118		if M.shape[:2]==(1,1): return 0.5*ad_channel(M[0,0])*np.sum(p**2,axis=0)# Typically M = 1/ρ
119		return 0.5 * p[None,:]*ad_channel(M)*p[:,None] #lp.dot_VAV(p,ad_channel(M),p) changed for rev_ad
120
121	H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices
122	if rev_ad>0: # Reverse autodiff support
123		H.weights  = ad.Dense.denseAD(λ,coef=np.zeros_like(λ,shape=(*λ.shape,rev_ad)))
124		H.M = H. = ad.Dense.denseAD(M,coef=np.zeros_like(M,shape=(*M.shape,rev_ad)))
125		H.moffsets = E
126		rev_ad = (H.weights.coef,H.M.coef)
127	else: 
128		if save_weights: H.weights,H.M,H. = λ,M,M 
129		rev_ad = (None,None)
130	H.set_spmat(np.zeros_like(rm_ad(C),shape=(vdim,*shape_dom)),rev_ad=rev_ad)
131	H.reshape = lambda x:x; H.unshape = lambda x:x # Dummy, for compatibility only with the GPU kernel
132
133	ev = np.linalg.eigvalsh(np.moveaxis(rm_ad(M),(0,1),(-2,-1)))[...,-1] # Take largest eigvenvalue 
134	H.dt_max = _mk_dt_max( dx/(np.sqrt(np.max(ev*rm_ad(λ).sum(axis=0)))*vdim), order_x)
135	return H
136
137def AcousticChgVar(q,p,ρ,D,ϕ,X):
138	"""
139	Change of variables in the acoustic wave equation.
140	- q,p,ρ,D (callable) : problem data
141	- ϕ : change of variables
142	- X : points where to evaluate 
143	returns
144	- tq,tp,tρ,tD,ϕ(X) (arrays) : coordinate changed problem data, obtained as 
145		q(ϕ), p(ϕ) J, ρ(ϕ) J, Φ^-1 D(ϕ) Φ^-t J
146	"""
147	X_ad = ad.Dense.identity(constant=X,shape_free=(len(X),))
148	ϕ_ad = ϕ_fun(X_ad)
149	ϕ = ϕ_ad.value
150	 = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed
151	inv_dϕ = lp.inverse()
152	 = lp.det()
153
154	D_ = fd.as_field(D(ϕ),.shape,depth=2)
155	tD = lp.dot_AA(inv_dϕ,lp.dot_AA(D_,lp.transpose(inv_dϕ))) * 
156
157	return q(ϕ), p(ϕ)*, ρ(ϕ)*, tD, ϕ
158
159def ElasticChgVar(q,p,M,C,S,ϕ,X):
160	"""
161	Change of variables in the elastic wave equation.
162	- q,p,M,C,S (callable) : problem data
163	- ϕ (callable) : change of variables
164	- X : points where to evaluate
165	returns
166	- tq,tp,tM,tC,tS,ϕ(X) (arrays) : coordinate changed problem data, obtained as 
167	Φ^t q(ϕ), Φ^-1 p(ϕ) J, Φ^t M(ϕ) Φ / J, (Φ^t ε(ϕ) Φ,)
168	∑_{i'j'k'l'} C_{i'j'k'l'}(ϕ) Ψ^{i'}_i Ψ^{j'}_j Ψ^{k'}_k Ψ^{l'}_l  J,
169	∑_{i'j'} Φ^i_{i'} Φ^j_{j'} S^{i'j'}_{k'}(ϕ) Ψ_k^{k'} + ∑_{k'} ∂^{ij} ϕ_{k'} Ψ_k^{k'}
170	"""
171	X_ad = ad.Dense2.identity(constant=X,shape_free=(len(X),))
172	ϕ_ad = ϕ_fun(X_ad)
173	ϕ = ϕ_ad.value
174	 = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed
175	inv_dϕ = lp.inverse()
176	 = lp.det()
177	d2ϕ = np.moveaxis(ϕ_ad.hessian(),2,0)
178
179	tq = lp.dot_AV(lp.transpose(),q(ϕ))
180	tp = lp.dot_AV(inv_dϕ,p(ϕ))*
181
182	M_ = fd.as_field(M(ϕ),.shape,depth=2)
183	tM = lp.dot_AA(lp.transpose(),lp.dot_AA(M_,))/
184
185	C_ = fd.as_field(C(ϕ),.shape,depth=2)
186	tC = Hooke(C_).rotate(inv_dϕ).hooke*
187
188	S_ = fd.as_field(S(ϕ),.shape,depth=3)
189	vdim = len()
190	S1 = sum([ip,:,None,None]*[jp,None,:,None]*S_[ip,jp,kp]*inv_dϕ[:,kp]
191		for ip in range(vdim) for jp in range(vdim) for kp in range(vdim))
192	S2 = sum(d2ϕ[kp,:,:,None]*inv_dϕ[:,kp] for kp in range(vdim))
193	tS = S1 + S2
194
195	return tq,tp,tM,tC,tS,ϕ
196
197# ------- Implementations based on GPU kernels -------
198
199class AcousticHamiltonian_Kernel(QuadraticHamiltonianBase):
200	"""
201	The Hamiltonian of an anisotropic acoustic wave equation, implemented with GPU kernels,
202	whose geometry is defined by a generic Riemannianian metric field.
203	The Hamiltonian is a sum of squares of finite differences, via Selling's decomposition.
204
205	The Mathematical expression of the Hamiltonian is 
206	(1/2) int_X iρ(x) p(x)^2 + D(x,grad q(x), grad q(x)) dx
207	where X is the domain, and D the is the (dual-)metric.
208	"""
209
210	def __init__(self,ρ,D,dx=1.,
211		order_x=2,shape_dom=None,periodic=False,
212		flattened=False,rev_ad=0,=None,
213		block_size=256,traits=None,**kwargs):
214		if cp is None: raise ImportError("Cupy library needed for this class")
215		super(AcousticHamiltonian_Kernel,self).__init__(**kwargs)
216
217		if shape_dom is None: shape_dom = fd.common_shape((ρ,D),
218			depths=(0,1 if flattened else 2))
219		self._shape_dom = shape_dom
220		self.shape_free = shape_dom
221		size_dom = np.prod(shape_dom)
222		self._sizes_oi = (int(np.ceil(size_dom/block_size)),),(block_size,)
223		self.dx = dx
224
225		fwd_ad = ρ.size_ad if ad.is_ad(ρ) else 0
226		self._size_ad = max(rev_ad,fwd_ad)
227
228		# Init the GPU kernel
229		traits_default = {
230			'fourth_order_macro':{2:False,4:True}[order_x],
231			'Scalar':np.float32,
232			'Int':self.int_t,
233			'OffsetT':np.int8,
234			'ndim_macro':self.ndim,
235			'periodic_macro':periodic,
236			'periodic_axes':(True,)*self.ndim,
237			'fwd_macro':fwd_ad>0,
238		}
239		self._traits = traits_default if traits is None else {**traits_default,**traits}
240
241		# Setup the problem data
242		dtype32 = (self.float_t==np.float32)
243		 = ad.cupy_generic.cupy_set(1/ρ if  is None else , dtype32=dtype32) 
244		ρ=None
245		 = fd.as_field(,shape_dom)
246		self. = ad.Base.ascontiguousarray()
247		=None
248
249		D = ad.cupy_generic.cupy_set(D, dtype32=dtype32)
250		λ,e = VoronoiDecomposition(D,offset_t=np.int8,flattened=flattened)
251		D=None
252		self._traits['decompdim'] = len(λ)
253		self.dt_max = _mk_dt_max(dx/np.sqrt(
254			np.max(rm_ad(self.)*rm_ad(λ).sum(axis=0))), order_x)
255		λ = fd.as_field(λ,shape_dom,depth=1)
256		self.weights = ad.Base.ascontiguousarray(np.moveaxis(λ,0,-1))
257		λ=None
258
259		if self.way_ad<0: # Reverse autodiff
260			self.weights  = ad.Dense.denseAD(self.weights)
261			self. = ad.Dense.denseAD(self.)
262		for arr in (self.weights,self.): 
263			self.check_ad(arr) if self.size_ad>0 else self.check(arr)
264
265		# Generate the cuda module
266		cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
267		date_modified = cupy_module_helper.getmtime_max(cuda_path)
268		source = cupy_module_helper.traits_header(self.traits,size_of_shape=True)
269
270		source += [
271		'#include "Kernel_AcousticWave.h"',
272		f"// Date cuda code last modified : {date_modified}"]
273		cuoptions = ("-default-device", f"-I {cuda_path}") 
274
275		source="\n".join(source)
276		module = cupy_module_helper.GetModule(source,cuoptions)
277		get_indices = module.get_function('get_indices')
278		self._DpH_Kernel = module.get_function("DpH")
279		self._DqH_Kernel = module.get_function("DqH")
280
281		if self.size_ad:
282			source_ad = f"#define size_ad_macro {self.size_ad}\n"+source
283			module_ad = cupy_module_helper.GetModule(source_ad,cuoptions)
284			self._DpH_Kernel_ad = module_ad.get_function("DpH")
285			self._DqH_Kernel_ad = module_ad.get_function("DqH")
286			self._modules = (module,module_ad)
287		else: self._modules = (module,)
288
289		for module in self._modules:
290			SetModuleConstant(module,'shape_tot',self.shape_dom,self.int_t)
291			SetModuleConstant(module,'size_tot',np.prod(self.shape_dom),self.int_t)
292
293		# Get the indices of the scheme neighbors
294		self._ineigh = cp.full((*self.weights.shape,order_x),-2**30,dtype=self.int_t)
295		e = cp.ascontiguousarray(fd.as_field(e,shape_dom,depth=2))
296		get_indices(*self._sizes_oi,(e,self._ineigh))
297		e=None
298		self.check(self._ineigh)
299
300	@property
301	def M(self): 
302		"""Alias for the inverse density"""
303		return self.
304	@property
305	def size_ad(self): return self._size_ad
306	@property
307	def way_ad(self):
308		"""0 : no AD. 1 : forward AD. -1 : reverse AD"""
309		return 0 if self.size_ad==0 else 1 if self.traits['fwd_macro'] else -1
310	def rev_reset(self):
311		"""
312		Reset the accumulators for reverse autodiff 
313		Namely (self.metric.coef and self.weights.coef)
314		"""
315		self.weights.coef[:]=0
316		self..coef[:]=0
317
318	@property	
319	def shape_dom(self):
320		"""Shape of the PDE discretization domain."""
321		return self._shape_dom
322	@property	
323	def ndim(self):
324		"""Number of dimensions of the domain."""
325		return len(self.shape_dom)
326	@property	
327	def decompdim(self):
328		"""Length of quadratic form decomposition."""
329		return self.weights.shape[-1]
330
331	@property
332	def traits(self): return self._traits
333	@property
334	def int_t(self): return np.int32
335	@property
336	def float_t(self): return self.traits['Scalar']
337	@property
338	def order_x(self): return 4 if self.traits['fourth_order_macro'] else 2
339
340	def Expl_p(self,q,p,δ):
341		"""
342		Explicit time step for the impulsion p.
343		Expects : q and p reshaped in GPU friendly format, using self.reshape
344		"""
345		mult = -δ/self.dx**2/{2:2,4:24}[self.order_x]
346		if self.preserve_p: p = self._mk_copy(p)
347		if ad.is_ad(q): # Use automatic differentiation, forward or reverse
348			SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t)
349			self.check_ad(q); self.check_ad(p)
350			self._DqH_Kernel_ad(*self._sizes_oi,(self.weights.value,self._ineigh,
351				q.value,self.weights.coef,q.coef,p.coef,p.value))
352		else: # No automatic differentiation
353			SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t)
354			self.check(q); self.check(p)
355			self._DqH_Kernel(*self._sizes_oi,
356				(ad.remove_ad(self.weights),self._ineigh,q,p))
357		return p
358
359	def Expl_q(self,q,p,δ): 
360		"""
361		Explicit time step for the position q.
362		Expects : q and p reshaped in GPU friendly format, using self.reshape
363		"""
364		if self.preserve_q: q = self._mk_copy(q)
365		if ad.is_ad(p): # Use automatic differentiation, forward or reverse
366			SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t)
367			self.check_ad(q); self.check_ad(p)
368			self._DpH_Kernel_ad(*self._sizes_oi,(self..value,p.value,
369				self..coef,p.coef,q.coef,q.value))
370		else: # No automatic differentiation
371			self.check(q); self.check(p)
372			q += δ*ad.remove_ad(self.)*p 
373		return q
374	
375	def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1)
376	def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1)
377
378	def check_ad(self,x):
379		"""
380		Puts zero ad coefficients, with the correct c-contiguity, if those are empty.
381		Basic additional checks of shapes, contiguity.
382		"""
383		assert self.way_ad!=0 # Either forward and reverse AD must be supported
384		if x.size_ad==0: x.coef = np.zeros_like(x.value,shape=(*x.shape,self.size_ad))
385		assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad
386		assert x.coef.flags.c_contiguous 
387		assert x.coef.dtype==self.float_t
388		self.check(x.value)
389
390	def check(self,x):
391		""" Basic check of the types, shapes, contiguity of GPU inputs. """
392		assert x.flags.c_contiguous
393		assert x.shape[:self.ndim]==self.shape_dom
394		assert x.dtype in (self.float_t,self.int_t)
395
396
397class WaveHamiltonianBase(QuadraticHamiltonianBase):
398	"""
399	A base class for GPU implementations of Hamiltonians of wave equations.
400	Warning : position and impulsion arrays are padded and reshaped in a GPU friendly format.
401	__init__ arguments 
402	- constant values : used as padding reshape function
403	"""
404
405	def __init__(self,shape_dom,traits=None,periodic=False,constant_values=0,**kwargs):
406		super(WaveHamiltonianBase,self).__init__(**kwargs)
407		self._shape_dom = shape_dom
408
409		traits_default = {
410			'Scalar':np.float32,
411			'Int':np.int32,
412			'bypass_zeros_macro':True,
413			'fourth_order_macro':False,
414			'shape_i': {1:(64,),2:(8,8),3:(4,4,4),4:(4,4,2,2)}[self.ndim],
415			}
416		if traits is not None: traits_default.update(traits)
417		traits = traits_default
418
419		if np.ndim(periodic)==0: periodic=(periodic,)*self.ndim
420		assert periodic is None or len(periodic)==self.ndim
421		self._periodic = periodic
422		traits.update({
423			'ndim_macro':self.ndim,
424			'periodic_macro':any(periodic),
425			'periodic_axes':periodic,
426		})
427		self._traits = traits
428		self._check = True # Perform some safety input checks before calling GPU kernels
429
430		assert len(self.shape_i) == self.ndim
431		self._shape_o = tuple(fd.round_up_ratio(shape_dom,self.shape_i))
432		self.constant_values = constant_values
433
434	def _dot(self,q,p): 
435		"""Duality bracket, ignoring NaNs used for padding."""
436		return np.nansum(q*p) if np.isnan(self.constant_values) else np.sum(q*p)
437
438	# Domain shape
439	@property	
440	def shape_dom(self):
441		"""Shape of the PDE discretization domain."""
442		return self._shape_dom
443	@property	
444	def shape_o(self):  
445		"""Outer shape : number of blocks in each dimension (for GPU kernels)."""
446		return self._shape_o
447	@property	
448	def shape_i(self):
449		"""Inner shape : accessed by a block of threads (for GPU kernels)."""
450		return self.traits['shape_i']
451	@property	
452	def size_o(self):   return np.prod(self.shape_o)
453	@property	
454	def size_i(self):   return np.prod(self.shape_i)
455
456	@property	
457	def ndim(self):
458		"""Number of dimensions of the domain."""
459		return len(self.shape_dom)
460	@property
461	def symdim(self):   
462		"""DImension of the space of symmetric matrices."""
463		return _triangular_number(self.ndim)
464
465	# Traits
466	@property
467	def traits(self):
468		"""Collection of traits, passed to GPU kernel."""
469		return self._traits
470
471	@property	
472	def float_t(self):  
473		"""Scalar type used by the GPU kernel. Defaults to float32."""
474		return self.traits['Scalar']
475	@property
476	def int_t(self):
477		"""Int type used by the GPU kernel. Defaults to int32."""
478		return self.traits['Int']
479	@property	
480	def order_x(self):
481		"""Consistency order of the finite differences scheme."""
482		return 4 if self.traits['fourth_order_macro'] else 2
483	@property	
484	def periodic(self): 
485		"""Wether to apply periodic boundary conditions, for each axis."""
486		return self._periodic
487
488	def SetCst(self,name,value,dtype):
489		"""Set a constant un the cuda module"""
490		for module in self._modules:
491			SetModuleConstant(module,name,value,dtype)
492
493	def reshape(self,x,constant_values=None,**kwargs):
494		"""
495		Reshapes and pads the array x in a kernel friendly format.
496		Factors shape_i. Also moves the geometry axis before
497		shape_i, following the convention of HookeWave.h
498		- **kwargs : passed to fd.block_expand
499		"""
500		if constant_values is None: constant_values = self.constant_values
501		x = fd.block_expand(x,self.shape_i,constant_values=constant_values,**kwargs)
502		if x.ndim == 2*self.ndim: pass
503		elif x.ndim == 2*self.ndim+1: x = np.moveaxis(x,0,self.ndim)
504		else: raise ValueError("Unsupported geometry depth")
505		#Cast to correct float and int types
506		if x.dtype==np.float64: dtype=self.float_t
507		elif x.dtype==np.int64: dtype=self.int_t
508		else: dtype=x.dtype
509		#dtype = {np.float64:self.float_t,np.int64:self.int_t}.get(value.dtype,value.dtype) fails
510		if ad.is_ad(x):
511			x.value = cp.ascontiguousarray(cp.asarray(x.value,dtype=dtype))
512			# Setup a suitable contiguity of the AD variable coefficients for the GPU kernel
513			x_coef = cp.ascontiguousarray(cp.asarray(np.moveaxis(x.coef,-1,self.ndim),dtype=dtype))
514			x.coef = np.moveaxis(x_coef,self.ndim,-1)
515			return x
516		else: return cp.ascontiguousarray(cp.asarray(x,dtype=dtype))
517
518		return cp.ascontiguousarray(cp.asarray(value,dtype=dtype))
519
520	def unshape(self,value):
521		"""Inverse operation to reshape"""
522		if value.ndim==2*self.ndim: pass
523		elif value.ndim==2*self.ndim+1: value = np.moveaxis(value,self.ndim,0)
524		else: raise ValueError("Unsupported geometry depth")
525		return fd.block_squeeze(value,self.shape_dom)
526
527class ElasticHamiltonian_Kernel(WaveHamiltonianBase):
528	"""
529	The Hamiltonian of an anisotropic elastic wave equation, implemented with GPU kernels,
530	whose geometry is defined by a generic Hooke tensor field.
531	The Hamiltonian is a sum of squares of finite differences, via Voronoi's decomposition.
532	Dirichlet boundary conditions are applied, see also optional damping layers.
533
534	The Mathematical expression of the Hamiltonian is 
535	(1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx
536	where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.
537
538	- M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd),
539		Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
540	- C : (hooke tensor in voigt notation) array of positive definite matrices,
541		shape (s,s,n1,...,nd) where s = d (d+1)/2
542		Reuse decomposition from previous run : C = H_prev.C_for_reuse
543
544	Warning : accessing some of this object's properties has a significant memory and 
545	computational cost, because all data is converted to a GPU kernel friendly format.
546	"""
547	def __init__(self,M,C,dx=1.,order_x=2,shape_dom=None,
548		traits=None,rev_ad=0,**kwargs):
549		if cp is None: raise ImportError("Cupy library needed for this class")
550		fwd_ad = M.size_ad if ad.is_ad(M) else 0
551		if fwd_ad>0 and rev_ad>0:
552			raise ValueError("Please choose between forward and reverse differentiation")
553		self._size_ad = max(fwd_ad,rev_ad)
554
555		traits_default = {
556			'isotropic_metric_macro':M.shape[0]==1,
557			'fourth_order_macro':{2:False,4:True}[order_x],
558			'fwd_macro': fwd_ad>0
559			}
560		if traits is not None: traits_default.update(traits)
561		traits = traits_default
562
563		# Flatten the symmetric matrix arrays, if necessary
564		if (M.ndim==2 or M.shape[0] in (1,M.ndim-2)) and M.shape[0]==M.shape[1]: 
565			M = Metrics.misc.flatten_symmetric_matrix(M)
566		if isinstance(C,tuple): self._weights,self._offsets,shape_dom = C # Reuse decomposition
567		elif (C.ndim==2 or C.shape[0]==_triangular_number(C.ndim-2)) and C.shape[0]==C.shape[1]: 
568			C = Metrics.misc.flatten_symmetric_matrix(C)
569
570		# Get the domain shape
571		if shape_dom is None: shape_dom = M.shape[1:] if isinstance(C,tuple) else \
572			fd.common_shape((M,C),depths=(1,1))
573		super(ElasticHamiltonian_Kernel,self).__init__(shape_dom,traits,**kwargs)
574
575		if self.ndim not in (1,2,3):
576			raise ValueError("Only domains of dimension 1, 2 and 3 are supported")
577
578		self._offsetpack_t = np.int32
579		self.dx = dx
580
581		# Voronoi decomposition of the Hooke tensor
582		if not isinstance(C,tuple): # Decomposition was bypassed by feeding weights, offsets
583			assert len(C) == self.decompdim
584			from .. import VoronoiDecomposition 
585			weights,offsets = VoronoiDecomposition(ad.cupy_generic.cupy_set(C),
586				offset_t=np.int8,flattened=True)
587			C = None
588			# Broadcast if necessary
589			weights = fd.as_field(weights,self.shape_dom,depth=1)
590			offsets = fd.as_field(offsets,self.shape_dom,depth=2)
591			self._weights = self.reshape(weights)
592			weights = None
593			# offsets = offsets.get() # Uncomment if desperate to save memory...
594			self._offsets = self.reshape(self._compress_offsets(offsets),constant_values=0)
595			offsets = None
596
597		# Setup the metric
598		assert len(M) == self.symdim or self.isotropic_metric
599		self._metric = self.reshape(fd.as_field(M,self.shape_dom,depth=1))
600		M = None
601
602		if self.way_ad<0: # Reverse autodiff
603			self._weights = ad.Dense.denseAD(self._weights)
604			self._metric  = ad.Dense.denseAD(self._metric)
605		if self.size_ad:   # Forward or reverse autodiff 
606			self.check_ad(self._weights) 
607			self.check_ad(self._metric)
608		else:
609			self.check(self._weights)
610			self.check(self._metric)
611		self.check(self._offsets)
612
613
614		if self.isotropic_metric: # TODO : case where M is anisotropic, see Sparse variant
615			self.dt_max = _mk_dt_max(dx/(self.ndim*np.sqrt(np.nanmax(
616				ad.remove_ad(self._metric).squeeze(axis=self.ndim)
617				*ad.remove_ad(self._weights).sum(axis=self.ndim)))),
618			order_x=order_x)
619
620		self.shape_free = (*self.shape_o,self.ndim,*self.shape_i)
621		self._sizes_oi = (self.size_o,),(self.size_i,)
622
623		# Generate the cuda module
624		cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
625		date_modified = cupy_module_helper.getmtime_max(cuda_path)
626		source = cupy_module_helper.traits_header(self.traits,size_of_shape=True)
627
628		source += [
629		'#include "Kernel_ElasticWave.h"',
630		f"// Date cuda code last modified : {date_modified}"]
631		cuoptions = ("-default-device", f"-I {cuda_path}") 
632
633		source="\n".join(source)
634		module = cupy_module_helper.GetModule(source,cuoptions)
635		self._DpH_Kernel = module.get_function("DpH")
636		self._DqH_Kernel = module.get_function("DqH")
637
638		if self.size_ad:
639			source_ad = f"#define size_ad_macro {self.size_ad}\n"+source
640			module_ad = cupy_module_helper.GetModule(source_ad,cuoptions)
641			self._DpH_Kernel_ad = module_ad.get_function("DpH")
642			self._DqH_Kernel_ad = module_ad.get_function("DqH")
643			self._modules = (module,module_ad)
644		else: self._modules = (module,)
645
646		self.SetCst('shape_o',self.shape_o,self.int_t)
647		self.SetCst('size_o',np.prod(self.shape_o),self.int_t)
648		self.SetCst('shape_tot',self.shape_dom,self.int_t)
649
650	# Traits
651	@property
652	def size_ad(self): return self._size_ad #return self._traits['size_ad_macro']
653	@property
654	def way_ad(self):
655		"""0 : no AD. 1 : forward AD. -1 : reverse AD"""
656		return 0 if self.size_ad==0 else 1 if self._traits['fwd_macro'] else -1
657	def rev_reset(self):
658		"""
659		Reset the accumulators for reverse autodiff 
660		Namely (self.metric.coef and self.weights.coef)
661		"""
662		assert self.way_ad<0
663		self._metric.coef[:]=0
664		self._weights.coef[:]=0
665
666	@property
667	def isotropic_metric(self): 
668		"""Wether M has shape (1,1,...) or (d,d,...)"""
669		return self._traits['isotropic_metric_macro']
670
671	@property	
672	def offsetpack_t(self): 
673		"""Type used to store a matrix offset. Defaults to int32."""
674		return self._offsetpack_t
675
676	@property
677	def decompdim(self):
678		"""Number of terms in the decomposition of a generic Hooke tensor."""
679		return _triangular_number(self.symdim)
680	
681	@property
682	def offsetnbits(self):
683		"""Number of bits for storing each integer coefficient of the matrix offsets"""
684		return (None,10,10,5)[self.ndim]
685
686	@property
687	def voigt2lower(self):
688		"""
689		Correspondance between voigt notation and symmetric matrix indexing.
690		d=2 : [0  ]	   d=3 : [0    ]
691		      [2 1]          [5 1  ]
692		                     [4 3 2]
693		"""
694		return {1:(0,), 2:(0,2,1), 3:(0,5,1,4,3,2)}[self.ndim]
695	
696	# PDE parameters
697	@property
698	def weights(self):
699		"""Weights, obtained from Voronoi's decomposition of the Hooke tensors."""
700		return self.unshape(self._weights)
701	@property	
702	def offsets(self):
703		"""Offsets, obtained from Voronoi's decomposition of the Hooke tensors."""
704		offsets2 = self.unshape(self._offsets)
705		# Uncompress
706		offsets = cp.zeros((self.symdim,)+offsets2.shape,dtype=np.int8)
707		order = self.voigt2lower
708		nbits = self.offsetnbits
709		for i,o in enumerate(order):
710			offsets[o]=((offsets2//2**(i*nbits))% 2**nbits) - 2**(nbits-1)
711		return offsets
712
713	@property
714	def C_for_reuse(self):
715		"""Avoid the initial tensor decomposition step."""
716		return self._weights,self._offsets,self.shape_dom
717
718	def _compress_offsets(self,offsets):
719		"""Compress each matrix offset (symmetric, integer valued), into a single int_t"""
720		offsets2 = np.zeros_like(offsets,shape=offsets.shape[1:],dtype=self.offsetpack_t)
721		order = self.voigt2lower
722		nbits = self.offsetnbits
723		for i,o in enumerate(order):
724			offsets2 += (offsets[o].astype(self.int_t)+2**(nbits-1)) * 2**(nbits*i)
725		return offsets2
726
727	@property	
728	def hooke(self):
729		"""The Hooke tensor, input 'C', defining the elasticity properties of the medium."""
730		# The Hooke tensor is not stored, so as to save memory. We thus reconstruct it 
731		# from Voronoi's decomposition.
732		weights,offsets = self.weights,self.offsets
733		full_hooke = (weights * lp.outer_self(offsets)).sum(axis=2)
734		return full_hooke
735				
736	@property
737	def metric(self):
738		"""
739		The metric tensor, input 'M'. Defines the norm for measuring momentum. 
740		Usually metric = Id/ρ .
741		"""
742		return Metrics.misc.expand_symmetric_matrix(self.unshape(self._metric))
743
744	@property
745	def (self):
746		"""Inverse density. Alias for the metric M used to define the kinetic energy."""
747		return self.metric
748
749	def Expl_p(self,q,p,δ):
750		"""
751		Explicit time step for the impulsion p.
752		Expects : q and p reshaped in GPU friendly format, using self.reshape
753		"""
754		mult = -δ/self.dx**2/{2:4,4:48}[self.order_x]
755		if self.preserve_p: p = self._mk_copy(p)
756		if ad.is_ad(q): # Use automatic differentiation, forward or reverse
757			SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t)
758			self.check_ad(q); self.check_ad(p)
759			self._DqH_Kernel_ad(self.shape_o,self.shape_i,(self._weights.value,self._offsets,
760				q.value,self._weights.coef,q.coef,p.coef,p.value))
761		else: # No automatic differentiation
762			SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t)
763			self.check(q); self.check(p)
764			self._DqH_Kernel(self.shape_o,self.shape_i,
765				(ad.remove_ad(self._weights),self._offsets,q,p))
766		return p
767
768	def Expl_q(self,q,p,δ): 
769		"""
770		Explicit time step for the position q.
771		Expects : q and p reshaped in GPU friendly format, using self.reshape
772		"""
773		if self.preserve_q: q = self._mk_copy(q)
774		if ad.is_ad(p): # Use automatic differentiation, forward or reverse
775			SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t)
776			self.check_ad(q); self.check_ad(p)
777			self._DpH_Kernel_ad(*self._sizes_oi,(self._metric.value,p.value,
778				self._metric.coef,p.coef,q.coef,q.value))
779		else: # No automatic differentiation
780			SetModuleConstant(self._modules[0],'DpH_mult',δ,self.float_t)
781			self.check(q); self.check(p)
782			self._DpH_Kernel(*self._sizes_oi,(ad.remove_ad(self._metric),p,q))
783		return q
784
785	def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1)
786	def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1)
787
788	def _mk_copy(self,x):
789		"""Copies the variable x, with a specific contiguity for the AD coefficients"""
790		if ad.is_ad(x):
791			assert isinstance(x,ad.Dense.denseAD) and x.size_ad in (0,self.size_ad)
792			return ad.Dense.denseAD(x.value.copy(),
793				np.moveaxis(np.moveaxis(x.coef,-1,self.ndim).copy(),self.ndim,-1))
794		return x.copy()
795
796	def check_ad(self,x):
797		"""
798		Puts zero coefficients with the correct contiguity if those are empty.
799		Checks that the AD variable x has the correct c-contiguity for the kernel.
800		"""
801		assert self.way_ad!=0 # Both forward and reverse
802		if x.size_ad==0:
803			x.coef = np.moveaxis(np.zeros_like(x.value,
804			shape=(*x.shape[:self.ndim],self.size_ad,*x.shape[self.ndim:])),self.ndim,-1)
805		assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad
806		assert np.moveaxis(x.coef,-1,self.ndim).flags.c_contiguous
807		assert x.coef.dtype==self.float_t
808		self.check(x.value)
809
810	def check(self,x):
811		""" 
812		Basic check of the types, shapes, contiguity of GPU inputs.
813		"""
814		assert x.flags.c_contiguous
815		assert x.shape[:self.ndim]==self.shape_o and x.shape[-self.ndim:]==self.shape_i
816		assert x.dtype in (self.float_t,self.int_t,self.offsetpack_t)
817		assert x.ndim in (2*self.ndim,2*self.ndim+1)
818
819# Utility functions
820def _triangular_number(n): return (n*(n+1))//2
def rm_ad(data, iterables=()):
36def remove_ad(data,iterables=tuple()):
37	def f(a): return a.value if is_ad(a) else a
38	return functional.map_iterables(f,data,iterables)
bc_to_padding = {'Periodic': None, 'Neumann': nan, 'Dirichlet': 0}
def AcousticHamiltonian_Sparse( ρ, D, dx=1.0, order_x=2, shape_dom=None, bc='Periodic', rev_ad=0, save_weights=False):
49def AcousticHamiltonian_Sparse(ρ,D,dx=1.,order_x=2,shape_dom=None,bc='Periodic',
50	rev_ad=0,save_weights=False):
51	"""
52	Sparse matrix based implementation of the Hamiltonian of the acoustic wave equation.
53	- bc : boundary conditions, see bc_to_padding.keys()
54	- rev_ad (optional) : Implement reverse autodiff for the decomposition weights and inverse density
55	"""
56	padding = bc_to_padding[bc]
57	if shape_dom is None: shape_dom = fd.common_shape((ρ,D),depths=(0,2))
58	assert len(shape_dom)==len(D)
59	=1/ρ
60	from .. import VoronoiDecomposition # CPU or GPU
61	try: λ,e = VoronoiDecomposition(D)
62	except FileNotFoundError: λ,e = Selling.Decomposition(D) # Fallback to python implem
63	λ = fd.as_field(λ,shape_dom)
64	def Hq(q,ad_channel=lambda x:x):
65		dq = fd.DiffEll(q,e,dx,padding=padding,order=order_x) 
66		if padding!=padding: dq[np.isnan(dq)]=0
67		return 0.5 * ad_channel(λ)  * np.sum(dq**2,axis=0)
68	def Hp(p,ad_channel=lambda x:x):
69		return 0.5 * ad_channel() * p**2
70	H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices
71	if rev_ad>0: # Reverse autodiff support
72		H.weights = ad.Dense.denseAD(λ, coef=np.zeros_like(λ, shape=(*λ.shape, rev_ad)))
73		H.offsets = e
74		H.      = ad.Dense.denseAD(,coef=np.zeros_like(,shape=(*.shape,rev_ad)))
75		rev_ad = (H.weights.coef,H..coef)
76	else:
77		if save_weights: H.weights,H.M,H. = λ,, 
78		rev_ad = (None,None)
79	H.set_spmat(np.zeros_like(rm_ad(D),shape=shape_dom),rev_ad=rev_ad) 
80	H.dt_max = _mk_dt_max(dx * np.sqrt(np.min(rm_ad(ρ)/rm_ad(λ).sum(axis=0))), order_x)
81	return H

Sparse matrix based implementation of the Hamiltonian of the acoustic wave equation.

  • bc : boundary conditions, see bc_to_padding.keys()
  • rev_ad (optional) : Implement reverse autodiff for the decomposition weights and inverse density
def ElasticHamiltonian_Sparse( M, C, dx, order_x=2, S=None, shape_dom=None, bc='Periodic', rev_ad=0, save_weights=False):
 83def ElasticHamiltonian_Sparse(M,C,dx,order_x=2,S=None,shape_dom=None,bc='Periodic',
 84	rev_ad=0,save_weights=False):
 85	"""
 86	Sparse matrix based implementation of the Hamiltonian of the elastic wave equation, namely
 87	(1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx
 88	where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.
 89
 90	- M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd),
 91		Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
 92	- C : (hooke tensor in voigt notation) array of positive definite matrices,
 93		shape (s,s,n1,...,nd) where s = d (d+1)/2
 94	- bc : boundary conditions, see bc_to_padding.keys()
 95	- rev_ad (optional) : Implement reverse autodiff for the decomposition weights and M.
 96	"""
 97	padding = bc_to_padding[bc]
 98	if shape_dom is None: shape_dom = fd.common_shape((M,C),depths=(2,2))
 99	vdim = len(shape_dom); assert len(C)==(vdim*(vdim+1))//2
100	λ,E = Hooke(C).Selling()
101	λ,E,S,M = fd.common_field( (λ,E,S,M), depths=(1,3,3,2), shape=shape_dom) # broadcasting
102	if S is None: ES = (0,)*vdim
103	else: ES = np.sum(E[:,:,None,:]*S[:,:,:,None],axis=(0,1))
104
105	def Hq(q,ad_channel=lambda x:x): # Elastic energy
106		λ_ = 2**-vdim * ad_channel(λ) # Normalize, extract relevant ad part
107		dq0 = fd.DiffEll(q[0],E[0],dx,order=order_x,α=ES[0]*q[0],padding=padding)
108		if padding!=padding: dq0[np.isnan(dq0)]=0
109		if vdim==1: return λ_ * np.sum(dq0**2, axis=0)
110		dq1 = fd.DiffEll(q[1],E[1],dx,order=order_x,α=ES[1]*q[1],padding=padding)
111		if padding!=padding: dq1[np.isnan(dq1)]=0
112		if vdim==2: return λ_ * np.sum((dq0+dq1)**2+(dq0+dq1[::-1])**2, axis=0)
113		dq2 = fd.DiffEll(q[2],E[2],dx,order=order_x,α=ES[2]*q[2],padding=padding)
114		if padding!=padding: dq2[np.isnan(dq2)]=0
115		return λ_ * np.sum((dq0+dq1+dq2)**2+(dq0+dq1+dq2[::-1])**2
116			+(dq0+dq1[::-1]+dq2)**2+(dq0+dq1[::-1]+dq2[::-1])**2, axis=0)
117
118	def Hp(p,ad_channel=lambda x:x): # Kinetic energy
119		if M.shape[:2]==(1,1): return 0.5*ad_channel(M[0,0])*np.sum(p**2,axis=0)# Typically M = 1/ρ
120		return 0.5 * p[None,:]*ad_channel(M)*p[:,None] #lp.dot_VAV(p,ad_channel(M),p) changed for rev_ad
121
122	H = QuadraticHamiltonian(Hq,Hp) # Create then replace quadratic functions with sparse matrices
123	if rev_ad>0: # Reverse autodiff support
124		H.weights  = ad.Dense.denseAD(λ,coef=np.zeros_like(λ,shape=(*λ.shape,rev_ad)))
125		H.M = H. = ad.Dense.denseAD(M,coef=np.zeros_like(M,shape=(*M.shape,rev_ad)))
126		H.moffsets = E
127		rev_ad = (H.weights.coef,H.M.coef)
128	else: 
129		if save_weights: H.weights,H.M,H. = λ,M,M 
130		rev_ad = (None,None)
131	H.set_spmat(np.zeros_like(rm_ad(C),shape=(vdim,*shape_dom)),rev_ad=rev_ad)
132	H.reshape = lambda x:x; H.unshape = lambda x:x # Dummy, for compatibility only with the GPU kernel
133
134	ev = np.linalg.eigvalsh(np.moveaxis(rm_ad(M),(0,1),(-2,-1)))[...,-1] # Take largest eigvenvalue 
135	H.dt_max = _mk_dt_max( dx/(np.sqrt(np.max(ev*rm_ad(λ).sum(axis=0)))*vdim), order_x)
136	return H

Sparse matrix based implementation of the Hamiltonian of the elastic wave equation, namely (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.

  • M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
  • C : (hooke tensor in voigt notation) array of positive definite matrices, shape (s,s,n1,...,nd) where s = d (d+1)/2
  • bc : boundary conditions, see bc_to_padding.keys()
  • rev_ad (optional) : Implement reverse autodiff for the decomposition weights and M.
def AcousticChgVar(q, p, ρ, D, φ, X):
138def AcousticChgVar(q,p,ρ,D,ϕ,X):
139	"""
140	Change of variables in the acoustic wave equation.
141	- q,p,ρ,D (callable) : problem data
142	- ϕ : change of variables
143	- X : points where to evaluate 
144	returns
145	- tq,tp,tρ,tD,ϕ(X) (arrays) : coordinate changed problem data, obtained as 
146		q(ϕ), p(ϕ) J, ρ(ϕ) J, Φ^-1 D(ϕ) Φ^-t J
147	"""
148	X_ad = ad.Dense.identity(constant=X,shape_free=(len(X),))
149	ϕ_ad = ϕ_fun(X_ad)
150	ϕ = ϕ_ad.value
151	 = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed
152	inv_dϕ = lp.inverse()
153	 = lp.det()
154
155	D_ = fd.as_field(D(ϕ),.shape,depth=2)
156	tD = lp.dot_AA(inv_dϕ,lp.dot_AA(D_,lp.transpose(inv_dϕ))) * 
157
158	return q(ϕ), p(ϕ)*, ρ(ϕ)*, tD, ϕ

Change of variables in the acoustic wave equation.

  • q,p,ρ,D (callable) : problem data
  • ϕ : change of variables
  • X : points where to evaluate returns
  • tq,tp,tρ,tD,ϕ(X) (arrays) : coordinate changed problem data, obtained as q(ϕ), p(ϕ) J, ρ(ϕ) J, Φ^-1 D(ϕ) Φ^-t J
def ElasticChgVar(q, p, M, C, S, φ, X):
160def ElasticChgVar(q,p,M,C,S,ϕ,X):
161	"""
162	Change of variables in the elastic wave equation.
163	- q,p,M,C,S (callable) : problem data
164	- ϕ (callable) : change of variables
165	- X : points where to evaluate
166	returns
167	- tq,tp,tM,tC,tS,ϕ(X) (arrays) : coordinate changed problem data, obtained as 
168	Φ^t q(ϕ), Φ^-1 p(ϕ) J, Φ^t M(ϕ) Φ / J, (Φ^t ε(ϕ) Φ,)
169	∑_{i'j'k'l'} C_{i'j'k'l'}(ϕ) Ψ^{i'}_i Ψ^{j'}_j Ψ^{k'}_k Ψ^{l'}_l  J,
170	∑_{i'j'} Φ^i_{i'} Φ^j_{j'} S^{i'j'}_{k'}(ϕ) Ψ_k^{k'} + ∑_{k'} ∂^{ij} ϕ_{k'} Ψ_k^{k'}
171	"""
172	X_ad = ad.Dense2.identity(constant=X,shape_free=(len(X),))
173	ϕ_ad = ϕ_fun(X_ad)
174	ϕ = ϕ_ad.value
175	 = np.moveaxis(ϕ_ad.gradient(),1,0) # Gradient is differential transposed
176	inv_dϕ = lp.inverse()
177	 = lp.det()
178	d2ϕ = np.moveaxis(ϕ_ad.hessian(),2,0)
179
180	tq = lp.dot_AV(lp.transpose(),q(ϕ))
181	tp = lp.dot_AV(inv_dϕ,p(ϕ))*
182
183	M_ = fd.as_field(M(ϕ),.shape,depth=2)
184	tM = lp.dot_AA(lp.transpose(),lp.dot_AA(M_,))/
185
186	C_ = fd.as_field(C(ϕ),.shape,depth=2)
187	tC = Hooke(C_).rotate(inv_dϕ).hooke*
188
189	S_ = fd.as_field(S(ϕ),.shape,depth=3)
190	vdim = len()
191	S1 = sum([ip,:,None,None]*[jp,None,:,None]*S_[ip,jp,kp]*inv_dϕ[:,kp]
192		for ip in range(vdim) for jp in range(vdim) for kp in range(vdim))
193	S2 = sum(d2ϕ[kp,:,:,None]*inv_dϕ[:,kp] for kp in range(vdim))
194	tS = S1 + S2
195
196	return tq,tp,tM,tC,tS,ϕ

Change of variables in the elastic wave equation.

  • q,p,M,C,S (callable) : problem data
  • ϕ (callable) : change of variables
  • X : points where to evaluate returns
  • tq,tp,tM,tC,tS,ϕ(X) (arrays) : coordinate changed problem data, obtained as Φ^t q(ϕ), Φ^-1 p(ϕ) J, Φ^t M(ϕ) Φ / J, (Φ^t ε(ϕ) Φ,) ∑_{i'j'k'l'} C_{i'j'k'l'}(ϕ) Ψ^{i'}_i Ψ^{j'}_j Ψ^{k'}_k Ψ^{l'}_l J, ∑_{i'j'} Φ^i_{i'} Φ^j_{j'} S^{i'j'}_{k'}(ϕ) Ψ_k^{k'} + ∑_{k'} ∂^{ij} ϕ_{k'} Ψ_k^{k'}
class AcousticHamiltonian_Kernel(agd.ODE.hamiltonian.QuadraticHamiltonianBase):
200class AcousticHamiltonian_Kernel(QuadraticHamiltonianBase):
201	"""
202	The Hamiltonian of an anisotropic acoustic wave equation, implemented with GPU kernels,
203	whose geometry is defined by a generic Riemannianian metric field.
204	The Hamiltonian is a sum of squares of finite differences, via Selling's decomposition.
205
206	The Mathematical expression of the Hamiltonian is 
207	(1/2) int_X iρ(x) p(x)^2 + D(x,grad q(x), grad q(x)) dx
208	where X is the domain, and D the is the (dual-)metric.
209	"""
210
211	def __init__(self,ρ,D,dx=1.,
212		order_x=2,shape_dom=None,periodic=False,
213		flattened=False,rev_ad=0,=None,
214		block_size=256,traits=None,**kwargs):
215		if cp is None: raise ImportError("Cupy library needed for this class")
216		super(AcousticHamiltonian_Kernel,self).__init__(**kwargs)
217
218		if shape_dom is None: shape_dom = fd.common_shape((ρ,D),
219			depths=(0,1 if flattened else 2))
220		self._shape_dom = shape_dom
221		self.shape_free = shape_dom
222		size_dom = np.prod(shape_dom)
223		self._sizes_oi = (int(np.ceil(size_dom/block_size)),),(block_size,)
224		self.dx = dx
225
226		fwd_ad = ρ.size_ad if ad.is_ad(ρ) else 0
227		self._size_ad = max(rev_ad,fwd_ad)
228
229		# Init the GPU kernel
230		traits_default = {
231			'fourth_order_macro':{2:False,4:True}[order_x],
232			'Scalar':np.float32,
233			'Int':self.int_t,
234			'OffsetT':np.int8,
235			'ndim_macro':self.ndim,
236			'periodic_macro':periodic,
237			'periodic_axes':(True,)*self.ndim,
238			'fwd_macro':fwd_ad>0,
239		}
240		self._traits = traits_default if traits is None else {**traits_default,**traits}
241
242		# Setup the problem data
243		dtype32 = (self.float_t==np.float32)
244		 = ad.cupy_generic.cupy_set(1/ρ if  is None else , dtype32=dtype32) 
245		ρ=None
246		 = fd.as_field(,shape_dom)
247		self. = ad.Base.ascontiguousarray()
248		=None
249
250		D = ad.cupy_generic.cupy_set(D, dtype32=dtype32)
251		λ,e = VoronoiDecomposition(D,offset_t=np.int8,flattened=flattened)
252		D=None
253		self._traits['decompdim'] = len(λ)
254		self.dt_max = _mk_dt_max(dx/np.sqrt(
255			np.max(rm_ad(self.)*rm_ad(λ).sum(axis=0))), order_x)
256		λ = fd.as_field(λ,shape_dom,depth=1)
257		self.weights = ad.Base.ascontiguousarray(np.moveaxis(λ,0,-1))
258		λ=None
259
260		if self.way_ad<0: # Reverse autodiff
261			self.weights  = ad.Dense.denseAD(self.weights)
262			self. = ad.Dense.denseAD(self.)
263		for arr in (self.weights,self.): 
264			self.check_ad(arr) if self.size_ad>0 else self.check(arr)
265
266		# Generate the cuda module
267		cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
268		date_modified = cupy_module_helper.getmtime_max(cuda_path)
269		source = cupy_module_helper.traits_header(self.traits,size_of_shape=True)
270
271		source += [
272		'#include "Kernel_AcousticWave.h"',
273		f"// Date cuda code last modified : {date_modified}"]
274		cuoptions = ("-default-device", f"-I {cuda_path}") 
275
276		source="\n".join(source)
277		module = cupy_module_helper.GetModule(source,cuoptions)
278		get_indices = module.get_function('get_indices')
279		self._DpH_Kernel = module.get_function("DpH")
280		self._DqH_Kernel = module.get_function("DqH")
281
282		if self.size_ad:
283			source_ad = f"#define size_ad_macro {self.size_ad}\n"+source
284			module_ad = cupy_module_helper.GetModule(source_ad,cuoptions)
285			self._DpH_Kernel_ad = module_ad.get_function("DpH")
286			self._DqH_Kernel_ad = module_ad.get_function("DqH")
287			self._modules = (module,module_ad)
288		else: self._modules = (module,)
289
290		for module in self._modules:
291			SetModuleConstant(module,'shape_tot',self.shape_dom,self.int_t)
292			SetModuleConstant(module,'size_tot',np.prod(self.shape_dom),self.int_t)
293
294		# Get the indices of the scheme neighbors
295		self._ineigh = cp.full((*self.weights.shape,order_x),-2**30,dtype=self.int_t)
296		e = cp.ascontiguousarray(fd.as_field(e,shape_dom,depth=2))
297		get_indices(*self._sizes_oi,(e,self._ineigh))
298		e=None
299		self.check(self._ineigh)
300
301	@property
302	def M(self): 
303		"""Alias for the inverse density"""
304		return self.
305	@property
306	def size_ad(self): return self._size_ad
307	@property
308	def way_ad(self):
309		"""0 : no AD. 1 : forward AD. -1 : reverse AD"""
310		return 0 if self.size_ad==0 else 1 if self.traits['fwd_macro'] else -1
311	def rev_reset(self):
312		"""
313		Reset the accumulators for reverse autodiff 
314		Namely (self.metric.coef and self.weights.coef)
315		"""
316		self.weights.coef[:]=0
317		self..coef[:]=0
318
319	@property	
320	def shape_dom(self):
321		"""Shape of the PDE discretization domain."""
322		return self._shape_dom
323	@property	
324	def ndim(self):
325		"""Number of dimensions of the domain."""
326		return len(self.shape_dom)
327	@property	
328	def decompdim(self):
329		"""Length of quadratic form decomposition."""
330		return self.weights.shape[-1]
331
332	@property
333	def traits(self): return self._traits
334	@property
335	def int_t(self): return np.int32
336	@property
337	def float_t(self): return self.traits['Scalar']
338	@property
339	def order_x(self): return 4 if self.traits['fourth_order_macro'] else 2
340
341	def Expl_p(self,q,p,δ):
342		"""
343		Explicit time step for the impulsion p.
344		Expects : q and p reshaped in GPU friendly format, using self.reshape
345		"""
346		mult = -δ/self.dx**2/{2:2,4:24}[self.order_x]
347		if self.preserve_p: p = self._mk_copy(p)
348		if ad.is_ad(q): # Use automatic differentiation, forward or reverse
349			SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t)
350			self.check_ad(q); self.check_ad(p)
351			self._DqH_Kernel_ad(*self._sizes_oi,(self.weights.value,self._ineigh,
352				q.value,self.weights.coef,q.coef,p.coef,p.value))
353		else: # No automatic differentiation
354			SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t)
355			self.check(q); self.check(p)
356			self._DqH_Kernel(*self._sizes_oi,
357				(ad.remove_ad(self.weights),self._ineigh,q,p))
358		return p
359
360	def Expl_q(self,q,p,δ): 
361		"""
362		Explicit time step for the position q.
363		Expects : q and p reshaped in GPU friendly format, using self.reshape
364		"""
365		if self.preserve_q: q = self._mk_copy(q)
366		if ad.is_ad(p): # Use automatic differentiation, forward or reverse
367			SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t)
368			self.check_ad(q); self.check_ad(p)
369			self._DpH_Kernel_ad(*self._sizes_oi,(self..value,p.value,
370				self..coef,p.coef,q.coef,q.value))
371		else: # No automatic differentiation
372			self.check(q); self.check(p)
373			q += δ*ad.remove_ad(self.)*p 
374		return q
375	
376	def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1)
377	def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1)
378
379	def check_ad(self,x):
380		"""
381		Puts zero ad coefficients, with the correct c-contiguity, if those are empty.
382		Basic additional checks of shapes, contiguity.
383		"""
384		assert self.way_ad!=0 # Either forward and reverse AD must be supported
385		if x.size_ad==0: x.coef = np.zeros_like(x.value,shape=(*x.shape,self.size_ad))
386		assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad
387		assert x.coef.flags.c_contiguous 
388		assert x.coef.dtype==self.float_t
389		self.check(x.value)
390
391	def check(self,x):
392		""" Basic check of the types, shapes, contiguity of GPU inputs. """
393		assert x.flags.c_contiguous
394		assert x.shape[:self.ndim]==self.shape_dom
395		assert x.dtype in (self.float_t,self.int_t)

The Hamiltonian of an anisotropic acoustic wave equation, implemented with GPU kernels, whose geometry is defined by a generic Riemannianian metric field. The Hamiltonian is a sum of squares of finite differences, via Selling's decomposition.

The Mathematical expression of the Hamiltonian is (1/2) int_X iρ(x) p(x)^2 + D(x,grad q(x), grad q(x)) dx where X is the domain, and D the is the (dual-)metric.

AcousticHamiltonian_Kernel( ρ, D, dx=1.0, order_x=2, shape_dom=None, periodic=False, flattened=False, rev_ad=0, =None, block_size=256, traits=None, **kwargs)
211	def __init__(self,ρ,D,dx=1.,
212		order_x=2,shape_dom=None,periodic=False,
213		flattened=False,rev_ad=0,=None,
214		block_size=256,traits=None,**kwargs):
215		if cp is None: raise ImportError("Cupy library needed for this class")
216		super(AcousticHamiltonian_Kernel,self).__init__(**kwargs)
217
218		if shape_dom is None: shape_dom = fd.common_shape((ρ,D),
219			depths=(0,1 if flattened else 2))
220		self._shape_dom = shape_dom
221		self.shape_free = shape_dom
222		size_dom = np.prod(shape_dom)
223		self._sizes_oi = (int(np.ceil(size_dom/block_size)),),(block_size,)
224		self.dx = dx
225
226		fwd_ad = ρ.size_ad if ad.is_ad(ρ) else 0
227		self._size_ad = max(rev_ad,fwd_ad)
228
229		# Init the GPU kernel
230		traits_default = {
231			'fourth_order_macro':{2:False,4:True}[order_x],
232			'Scalar':np.float32,
233			'Int':self.int_t,
234			'OffsetT':np.int8,
235			'ndim_macro':self.ndim,
236			'periodic_macro':periodic,
237			'periodic_axes':(True,)*self.ndim,
238			'fwd_macro':fwd_ad>0,
239		}
240		self._traits = traits_default if traits is None else {**traits_default,**traits}
241
242		# Setup the problem data
243		dtype32 = (self.float_t==np.float32)
244		 = ad.cupy_generic.cupy_set(1/ρ if  is None else , dtype32=dtype32) 
245		ρ=None
246		 = fd.as_field(,shape_dom)
247		self. = ad.Base.ascontiguousarray()
248		=None
249
250		D = ad.cupy_generic.cupy_set(D, dtype32=dtype32)
251		λ,e = VoronoiDecomposition(D,offset_t=np.int8,flattened=flattened)
252		D=None
253		self._traits['decompdim'] = len(λ)
254		self.dt_max = _mk_dt_max(dx/np.sqrt(
255			np.max(rm_ad(self.)*rm_ad(λ).sum(axis=0))), order_x)
256		λ = fd.as_field(λ,shape_dom,depth=1)
257		self.weights = ad.Base.ascontiguousarray(np.moveaxis(λ,0,-1))
258		λ=None
259
260		if self.way_ad<0: # Reverse autodiff
261			self.weights  = ad.Dense.denseAD(self.weights)
262			self. = ad.Dense.denseAD(self.)
263		for arr in (self.weights,self.): 
264			self.check_ad(arr) if self.size_ad>0 else self.check(arr)
265
266		# Generate the cuda module
267		cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
268		date_modified = cupy_module_helper.getmtime_max(cuda_path)
269		source = cupy_module_helper.traits_header(self.traits,size_of_shape=True)
270
271		source += [
272		'#include "Kernel_AcousticWave.h"',
273		f"// Date cuda code last modified : {date_modified}"]
274		cuoptions = ("-default-device", f"-I {cuda_path}") 
275
276		source="\n".join(source)
277		module = cupy_module_helper.GetModule(source,cuoptions)
278		get_indices = module.get_function('get_indices')
279		self._DpH_Kernel = module.get_function("DpH")
280		self._DqH_Kernel = module.get_function("DqH")
281
282		if self.size_ad:
283			source_ad = f"#define size_ad_macro {self.size_ad}\n"+source
284			module_ad = cupy_module_helper.GetModule(source_ad,cuoptions)
285			self._DpH_Kernel_ad = module_ad.get_function("DpH")
286			self._DqH_Kernel_ad = module_ad.get_function("DqH")
287			self._modules = (module,module_ad)
288		else: self._modules = (module,)
289
290		for module in self._modules:
291			SetModuleConstant(module,'shape_tot',self.shape_dom,self.int_t)
292			SetModuleConstant(module,'size_tot',np.prod(self.shape_dom),self.int_t)
293
294		# Get the indices of the scheme neighbors
295		self._ineigh = cp.full((*self.weights.shape,order_x),-2**30,dtype=self.int_t)
296		e = cp.ascontiguousarray(fd.as_field(e,shape_dom,depth=2))
297		get_indices(*self._sizes_oi,(e,self._ineigh))
298		e=None
299		self.check(self._ineigh)
shape_free
dx
dt_max
weights
M
301	@property
302	def M(self): 
303		"""Alias for the inverse density"""
304		return self.

Alias for the inverse density

size_ad
305	@property
306	def size_ad(self): return self._size_ad
way_ad
307	@property
308	def way_ad(self):
309		"""0 : no AD. 1 : forward AD. -1 : reverse AD"""
310		return 0 if self.size_ad==0 else 1 if self.traits['fwd_macro'] else -1

0 : no AD. 1 : forward AD. -1 : reverse AD

def rev_reset(self):
311	def rev_reset(self):
312		"""
313		Reset the accumulators for reverse autodiff 
314		Namely (self.metric.coef and self.weights.coef)
315		"""
316		self.weights.coef[:]=0
317		self..coef[:]=0

Reset the accumulators for reverse autodiff Namely (self.metric.coef and self.weights.coef)

shape_dom
319	@property	
320	def shape_dom(self):
321		"""Shape of the PDE discretization domain."""
322		return self._shape_dom

Shape of the PDE discretization domain.

ndim
323	@property	
324	def ndim(self):
325		"""Number of dimensions of the domain."""
326		return len(self.shape_dom)

Number of dimensions of the domain.

decompdim
327	@property	
328	def decompdim(self):
329		"""Length of quadratic form decomposition."""
330		return self.weights.shape[-1]

Length of quadratic form decomposition.

traits
332	@property
333	def traits(self): return self._traits
int_t
334	@property
335	def int_t(self): return np.int32
float_t
336	@property
337	def float_t(self): return self.traits['Scalar']
order_x
338	@property
339	def order_x(self): return 4 if self.traits['fourth_order_macro'] else 2
def Expl_p(self, q, p, δ):
341	def Expl_p(self,q,p,δ):
342		"""
343		Explicit time step for the impulsion p.
344		Expects : q and p reshaped in GPU friendly format, using self.reshape
345		"""
346		mult = -δ/self.dx**2/{2:2,4:24}[self.order_x]
347		if self.preserve_p: p = self._mk_copy(p)
348		if ad.is_ad(q): # Use automatic differentiation, forward or reverse
349			SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t)
350			self.check_ad(q); self.check_ad(p)
351			self._DqH_Kernel_ad(*self._sizes_oi,(self.weights.value,self._ineigh,
352				q.value,self.weights.coef,q.coef,p.coef,p.value))
353		else: # No automatic differentiation
354			SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t)
355			self.check(q); self.check(p)
356			self._DqH_Kernel(*self._sizes_oi,
357				(ad.remove_ad(self.weights),self._ineigh,q,p))
358		return p

Explicit time step for the impulsion p. Expects : q and p reshaped in GPU friendly format, using self.reshape

def Expl_q(self, q, p, δ):
360	def Expl_q(self,q,p,δ): 
361		"""
362		Explicit time step for the position q.
363		Expects : q and p reshaped in GPU friendly format, using self.reshape
364		"""
365		if self.preserve_q: q = self._mk_copy(q)
366		if ad.is_ad(p): # Use automatic differentiation, forward or reverse
367			SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t)
368			self.check_ad(q); self.check_ad(p)
369			self._DpH_Kernel_ad(*self._sizes_oi,(self..value,p.value,
370				self..coef,p.coef,q.coef,q.value))
371		else: # No automatic differentiation
372			self.check(q); self.check(p)
373			q += δ*ad.remove_ad(self.)*p 
374		return q

Explicit time step for the position q. Expects : q and p reshaped in GPU friendly format, using self.reshape

def check_ad(self, x):
379	def check_ad(self,x):
380		"""
381		Puts zero ad coefficients, with the correct c-contiguity, if those are empty.
382		Basic additional checks of shapes, contiguity.
383		"""
384		assert self.way_ad!=0 # Either forward and reverse AD must be supported
385		if x.size_ad==0: x.coef = np.zeros_like(x.value,shape=(*x.shape,self.size_ad))
386		assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad
387		assert x.coef.flags.c_contiguous 
388		assert x.coef.dtype==self.float_t
389		self.check(x.value)

Puts zero ad coefficients, with the correct c-contiguity, if those are empty. Basic additional checks of shapes, contiguity.

def check(self, x):
391	def check(self,x):
392		""" Basic check of the types, shapes, contiguity of GPU inputs. """
393		assert x.flags.c_contiguous
394		assert x.shape[:self.ndim]==self.shape_dom
395		assert x.dtype in (self.float_t,self.int_t)

Basic check of the types, shapes, contiguity of GPU inputs.

class WaveHamiltonianBase(agd.ODE.hamiltonian.QuadraticHamiltonianBase):
398class WaveHamiltonianBase(QuadraticHamiltonianBase):
399	"""
400	A base class for GPU implementations of Hamiltonians of wave equations.
401	Warning : position and impulsion arrays are padded and reshaped in a GPU friendly format.
402	__init__ arguments 
403	- constant values : used as padding reshape function
404	"""
405
406	def __init__(self,shape_dom,traits=None,periodic=False,constant_values=0,**kwargs):
407		super(WaveHamiltonianBase,self).__init__(**kwargs)
408		self._shape_dom = shape_dom
409
410		traits_default = {
411			'Scalar':np.float32,
412			'Int':np.int32,
413			'bypass_zeros_macro':True,
414			'fourth_order_macro':False,
415			'shape_i': {1:(64,),2:(8,8),3:(4,4,4),4:(4,4,2,2)}[self.ndim],
416			}
417		if traits is not None: traits_default.update(traits)
418		traits = traits_default
419
420		if np.ndim(periodic)==0: periodic=(periodic,)*self.ndim
421		assert periodic is None or len(periodic)==self.ndim
422		self._periodic = periodic
423		traits.update({
424			'ndim_macro':self.ndim,
425			'periodic_macro':any(periodic),
426			'periodic_axes':periodic,
427		})
428		self._traits = traits
429		self._check = True # Perform some safety input checks before calling GPU kernels
430
431		assert len(self.shape_i) == self.ndim
432		self._shape_o = tuple(fd.round_up_ratio(shape_dom,self.shape_i))
433		self.constant_values = constant_values
434
435	def _dot(self,q,p): 
436		"""Duality bracket, ignoring NaNs used for padding."""
437		return np.nansum(q*p) if np.isnan(self.constant_values) else np.sum(q*p)
438
439	# Domain shape
440	@property	
441	def shape_dom(self):
442		"""Shape of the PDE discretization domain."""
443		return self._shape_dom
444	@property	
445	def shape_o(self):  
446		"""Outer shape : number of blocks in each dimension (for GPU kernels)."""
447		return self._shape_o
448	@property	
449	def shape_i(self):
450		"""Inner shape : accessed by a block of threads (for GPU kernels)."""
451		return self.traits['shape_i']
452	@property	
453	def size_o(self):   return np.prod(self.shape_o)
454	@property	
455	def size_i(self):   return np.prod(self.shape_i)
456
457	@property	
458	def ndim(self):
459		"""Number of dimensions of the domain."""
460		return len(self.shape_dom)
461	@property
462	def symdim(self):   
463		"""DImension of the space of symmetric matrices."""
464		return _triangular_number(self.ndim)
465
466	# Traits
467	@property
468	def traits(self):
469		"""Collection of traits, passed to GPU kernel."""
470		return self._traits
471
472	@property	
473	def float_t(self):  
474		"""Scalar type used by the GPU kernel. Defaults to float32."""
475		return self.traits['Scalar']
476	@property
477	def int_t(self):
478		"""Int type used by the GPU kernel. Defaults to int32."""
479		return self.traits['Int']
480	@property	
481	def order_x(self):
482		"""Consistency order of the finite differences scheme."""
483		return 4 if self.traits['fourth_order_macro'] else 2
484	@property	
485	def periodic(self): 
486		"""Wether to apply periodic boundary conditions, for each axis."""
487		return self._periodic
488
489	def SetCst(self,name,value,dtype):
490		"""Set a constant un the cuda module"""
491		for module in self._modules:
492			SetModuleConstant(module,name,value,dtype)
493
494	def reshape(self,x,constant_values=None,**kwargs):
495		"""
496		Reshapes and pads the array x in a kernel friendly format.
497		Factors shape_i. Also moves the geometry axis before
498		shape_i, following the convention of HookeWave.h
499		- **kwargs : passed to fd.block_expand
500		"""
501		if constant_values is None: constant_values = self.constant_values
502		x = fd.block_expand(x,self.shape_i,constant_values=constant_values,**kwargs)
503		if x.ndim == 2*self.ndim: pass
504		elif x.ndim == 2*self.ndim+1: x = np.moveaxis(x,0,self.ndim)
505		else: raise ValueError("Unsupported geometry depth")
506		#Cast to correct float and int types
507		if x.dtype==np.float64: dtype=self.float_t
508		elif x.dtype==np.int64: dtype=self.int_t
509		else: dtype=x.dtype
510		#dtype = {np.float64:self.float_t,np.int64:self.int_t}.get(value.dtype,value.dtype) fails
511		if ad.is_ad(x):
512			x.value = cp.ascontiguousarray(cp.asarray(x.value,dtype=dtype))
513			# Setup a suitable contiguity of the AD variable coefficients for the GPU kernel
514			x_coef = cp.ascontiguousarray(cp.asarray(np.moveaxis(x.coef,-1,self.ndim),dtype=dtype))
515			x.coef = np.moveaxis(x_coef,self.ndim,-1)
516			return x
517		else: return cp.ascontiguousarray(cp.asarray(x,dtype=dtype))
518
519		return cp.ascontiguousarray(cp.asarray(value,dtype=dtype))
520
521	def unshape(self,value):
522		"""Inverse operation to reshape"""
523		if value.ndim==2*self.ndim: pass
524		elif value.ndim==2*self.ndim+1: value = np.moveaxis(value,self.ndim,0)
525		else: raise ValueError("Unsupported geometry depth")
526		return fd.block_squeeze(value,self.shape_dom)

A base class for GPU implementations of Hamiltonians of wave equations. Warning : position and impulsion arrays are padded and reshaped in a GPU friendly format. __init__ arguments

  • constant values : used as padding reshape function
WaveHamiltonianBase(shape_dom, traits=None, periodic=False, constant_values=0, **kwargs)
406	def __init__(self,shape_dom,traits=None,periodic=False,constant_values=0,**kwargs):
407		super(WaveHamiltonianBase,self).__init__(**kwargs)
408		self._shape_dom = shape_dom
409
410		traits_default = {
411			'Scalar':np.float32,
412			'Int':np.int32,
413			'bypass_zeros_macro':True,
414			'fourth_order_macro':False,
415			'shape_i': {1:(64,),2:(8,8),3:(4,4,4),4:(4,4,2,2)}[self.ndim],
416			}
417		if traits is not None: traits_default.update(traits)
418		traits = traits_default
419
420		if np.ndim(periodic)==0: periodic=(periodic,)*self.ndim
421		assert periodic is None or len(periodic)==self.ndim
422		self._periodic = periodic
423		traits.update({
424			'ndim_macro':self.ndim,
425			'periodic_macro':any(periodic),
426			'periodic_axes':periodic,
427		})
428		self._traits = traits
429		self._check = True # Perform some safety input checks before calling GPU kernels
430
431		assert len(self.shape_i) == self.ndim
432		self._shape_o = tuple(fd.round_up_ratio(shape_dom,self.shape_i))
433		self.constant_values = constant_values
constant_values
shape_dom
440	@property	
441	def shape_dom(self):
442		"""Shape of the PDE discretization domain."""
443		return self._shape_dom

Shape of the PDE discretization domain.

shape_o
444	@property	
445	def shape_o(self):  
446		"""Outer shape : number of blocks in each dimension (for GPU kernels)."""
447		return self._shape_o

Outer shape : number of blocks in each dimension (for GPU kernels).

shape_i
448	@property	
449	def shape_i(self):
450		"""Inner shape : accessed by a block of threads (for GPU kernels)."""
451		return self.traits['shape_i']

Inner shape : accessed by a block of threads (for GPU kernels).

size_o
452	@property	
453	def size_o(self):   return np.prod(self.shape_o)
size_i
454	@property	
455	def size_i(self):   return np.prod(self.shape_i)
ndim
457	@property	
458	def ndim(self):
459		"""Number of dimensions of the domain."""
460		return len(self.shape_dom)

Number of dimensions of the domain.

symdim
461	@property
462	def symdim(self):   
463		"""DImension of the space of symmetric matrices."""
464		return _triangular_number(self.ndim)

DImension of the space of symmetric matrices.

traits
467	@property
468	def traits(self):
469		"""Collection of traits, passed to GPU kernel."""
470		return self._traits

Collection of traits, passed to GPU kernel.

float_t
472	@property	
473	def float_t(self):  
474		"""Scalar type used by the GPU kernel. Defaults to float32."""
475		return self.traits['Scalar']

Scalar type used by the GPU kernel. Defaults to float32.

int_t
476	@property
477	def int_t(self):
478		"""Int type used by the GPU kernel. Defaults to int32."""
479		return self.traits['Int']

Int type used by the GPU kernel. Defaults to int32.

order_x
480	@property	
481	def order_x(self):
482		"""Consistency order of the finite differences scheme."""
483		return 4 if self.traits['fourth_order_macro'] else 2

Consistency order of the finite differences scheme.

periodic
484	@property	
485	def periodic(self): 
486		"""Wether to apply periodic boundary conditions, for each axis."""
487		return self._periodic

Wether to apply periodic boundary conditions, for each axis.

def SetCst(self, name, value, dtype):
489	def SetCst(self,name,value,dtype):
490		"""Set a constant un the cuda module"""
491		for module in self._modules:
492			SetModuleConstant(module,name,value,dtype)

Set a constant un the cuda module

def reshape(self, x, constant_values=None, **kwargs):
494	def reshape(self,x,constant_values=None,**kwargs):
495		"""
496		Reshapes and pads the array x in a kernel friendly format.
497		Factors shape_i. Also moves the geometry axis before
498		shape_i, following the convention of HookeWave.h
499		- **kwargs : passed to fd.block_expand
500		"""
501		if constant_values is None: constant_values = self.constant_values
502		x = fd.block_expand(x,self.shape_i,constant_values=constant_values,**kwargs)
503		if x.ndim == 2*self.ndim: pass
504		elif x.ndim == 2*self.ndim+1: x = np.moveaxis(x,0,self.ndim)
505		else: raise ValueError("Unsupported geometry depth")
506		#Cast to correct float and int types
507		if x.dtype==np.float64: dtype=self.float_t
508		elif x.dtype==np.int64: dtype=self.int_t
509		else: dtype=x.dtype
510		#dtype = {np.float64:self.float_t,np.int64:self.int_t}.get(value.dtype,value.dtype) fails
511		if ad.is_ad(x):
512			x.value = cp.ascontiguousarray(cp.asarray(x.value,dtype=dtype))
513			# Setup a suitable contiguity of the AD variable coefficients for the GPU kernel
514			x_coef = cp.ascontiguousarray(cp.asarray(np.moveaxis(x.coef,-1,self.ndim),dtype=dtype))
515			x.coef = np.moveaxis(x_coef,self.ndim,-1)
516			return x
517		else: return cp.ascontiguousarray(cp.asarray(x,dtype=dtype))
518
519		return cp.ascontiguousarray(cp.asarray(value,dtype=dtype))

Reshapes and pads the array x in a kernel friendly format. Factors shape_i. Also moves the geometry axis before shape_i, following the convention of HookeWave.h

  • **kwargs : passed to fd.block_expand
def unshape(self, value):
521	def unshape(self,value):
522		"""Inverse operation to reshape"""
523		if value.ndim==2*self.ndim: pass
524		elif value.ndim==2*self.ndim+1: value = np.moveaxis(value,self.ndim,0)
525		else: raise ValueError("Unsupported geometry depth")
526		return fd.block_squeeze(value,self.shape_dom)

Inverse operation to reshape

class ElasticHamiltonian_Kernel(WaveHamiltonianBase):
528class ElasticHamiltonian_Kernel(WaveHamiltonianBase):
529	"""
530	The Hamiltonian of an anisotropic elastic wave equation, implemented with GPU kernels,
531	whose geometry is defined by a generic Hooke tensor field.
532	The Hamiltonian is a sum of squares of finite differences, via Voronoi's decomposition.
533	Dirichlet boundary conditions are applied, see also optional damping layers.
534
535	The Mathematical expression of the Hamiltonian is 
536	(1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx
537	where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.
538
539	- M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd),
540		Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
541	- C : (hooke tensor in voigt notation) array of positive definite matrices,
542		shape (s,s,n1,...,nd) where s = d (d+1)/2
543		Reuse decomposition from previous run : C = H_prev.C_for_reuse
544
545	Warning : accessing some of this object's properties has a significant memory and 
546	computational cost, because all data is converted to a GPU kernel friendly format.
547	"""
548	def __init__(self,M,C,dx=1.,order_x=2,shape_dom=None,
549		traits=None,rev_ad=0,**kwargs):
550		if cp is None: raise ImportError("Cupy library needed for this class")
551		fwd_ad = M.size_ad if ad.is_ad(M) else 0
552		if fwd_ad>0 and rev_ad>0:
553			raise ValueError("Please choose between forward and reverse differentiation")
554		self._size_ad = max(fwd_ad,rev_ad)
555
556		traits_default = {
557			'isotropic_metric_macro':M.shape[0]==1,
558			'fourth_order_macro':{2:False,4:True}[order_x],
559			'fwd_macro': fwd_ad>0
560			}
561		if traits is not None: traits_default.update(traits)
562		traits = traits_default
563
564		# Flatten the symmetric matrix arrays, if necessary
565		if (M.ndim==2 or M.shape[0] in (1,M.ndim-2)) and M.shape[0]==M.shape[1]: 
566			M = Metrics.misc.flatten_symmetric_matrix(M)
567		if isinstance(C,tuple): self._weights,self._offsets,shape_dom = C # Reuse decomposition
568		elif (C.ndim==2 or C.shape[0]==_triangular_number(C.ndim-2)) and C.shape[0]==C.shape[1]: 
569			C = Metrics.misc.flatten_symmetric_matrix(C)
570
571		# Get the domain shape
572		if shape_dom is None: shape_dom = M.shape[1:] if isinstance(C,tuple) else \
573			fd.common_shape((M,C),depths=(1,1))
574		super(ElasticHamiltonian_Kernel,self).__init__(shape_dom,traits,**kwargs)
575
576		if self.ndim not in (1,2,3):
577			raise ValueError("Only domains of dimension 1, 2 and 3 are supported")
578
579		self._offsetpack_t = np.int32
580		self.dx = dx
581
582		# Voronoi decomposition of the Hooke tensor
583		if not isinstance(C,tuple): # Decomposition was bypassed by feeding weights, offsets
584			assert len(C) == self.decompdim
585			from .. import VoronoiDecomposition 
586			weights,offsets = VoronoiDecomposition(ad.cupy_generic.cupy_set(C),
587				offset_t=np.int8,flattened=True)
588			C = None
589			# Broadcast if necessary
590			weights = fd.as_field(weights,self.shape_dom,depth=1)
591			offsets = fd.as_field(offsets,self.shape_dom,depth=2)
592			self._weights = self.reshape(weights)
593			weights = None
594			# offsets = offsets.get() # Uncomment if desperate to save memory...
595			self._offsets = self.reshape(self._compress_offsets(offsets),constant_values=0)
596			offsets = None
597
598		# Setup the metric
599		assert len(M) == self.symdim or self.isotropic_metric
600		self._metric = self.reshape(fd.as_field(M,self.shape_dom,depth=1))
601		M = None
602
603		if self.way_ad<0: # Reverse autodiff
604			self._weights = ad.Dense.denseAD(self._weights)
605			self._metric  = ad.Dense.denseAD(self._metric)
606		if self.size_ad:   # Forward or reverse autodiff 
607			self.check_ad(self._weights) 
608			self.check_ad(self._metric)
609		else:
610			self.check(self._weights)
611			self.check(self._metric)
612		self.check(self._offsets)
613
614
615		if self.isotropic_metric: # TODO : case where M is anisotropic, see Sparse variant
616			self.dt_max = _mk_dt_max(dx/(self.ndim*np.sqrt(np.nanmax(
617				ad.remove_ad(self._metric).squeeze(axis=self.ndim)
618				*ad.remove_ad(self._weights).sum(axis=self.ndim)))),
619			order_x=order_x)
620
621		self.shape_free = (*self.shape_o,self.ndim,*self.shape_i)
622		self._sizes_oi = (self.size_o,),(self.size_i,)
623
624		# Generate the cuda module
625		cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
626		date_modified = cupy_module_helper.getmtime_max(cuda_path)
627		source = cupy_module_helper.traits_header(self.traits,size_of_shape=True)
628
629		source += [
630		'#include "Kernel_ElasticWave.h"',
631		f"// Date cuda code last modified : {date_modified}"]
632		cuoptions = ("-default-device", f"-I {cuda_path}") 
633
634		source="\n".join(source)
635		module = cupy_module_helper.GetModule(source,cuoptions)
636		self._DpH_Kernel = module.get_function("DpH")
637		self._DqH_Kernel = module.get_function("DqH")
638
639		if self.size_ad:
640			source_ad = f"#define size_ad_macro {self.size_ad}\n"+source
641			module_ad = cupy_module_helper.GetModule(source_ad,cuoptions)
642			self._DpH_Kernel_ad = module_ad.get_function("DpH")
643			self._DqH_Kernel_ad = module_ad.get_function("DqH")
644			self._modules = (module,module_ad)
645		else: self._modules = (module,)
646
647		self.SetCst('shape_o',self.shape_o,self.int_t)
648		self.SetCst('size_o',np.prod(self.shape_o),self.int_t)
649		self.SetCst('shape_tot',self.shape_dom,self.int_t)
650
651	# Traits
652	@property
653	def size_ad(self): return self._size_ad #return self._traits['size_ad_macro']
654	@property
655	def way_ad(self):
656		"""0 : no AD. 1 : forward AD. -1 : reverse AD"""
657		return 0 if self.size_ad==0 else 1 if self._traits['fwd_macro'] else -1
658	def rev_reset(self):
659		"""
660		Reset the accumulators for reverse autodiff 
661		Namely (self.metric.coef and self.weights.coef)
662		"""
663		assert self.way_ad<0
664		self._metric.coef[:]=0
665		self._weights.coef[:]=0
666
667	@property
668	def isotropic_metric(self): 
669		"""Wether M has shape (1,1,...) or (d,d,...)"""
670		return self._traits['isotropic_metric_macro']
671
672	@property	
673	def offsetpack_t(self): 
674		"""Type used to store a matrix offset. Defaults to int32."""
675		return self._offsetpack_t
676
677	@property
678	def decompdim(self):
679		"""Number of terms in the decomposition of a generic Hooke tensor."""
680		return _triangular_number(self.symdim)
681	
682	@property
683	def offsetnbits(self):
684		"""Number of bits for storing each integer coefficient of the matrix offsets"""
685		return (None,10,10,5)[self.ndim]
686
687	@property
688	def voigt2lower(self):
689		"""
690		Correspondance between voigt notation and symmetric matrix indexing.
691		d=2 : [0  ]	   d=3 : [0    ]
692		      [2 1]          [5 1  ]
693		                     [4 3 2]
694		"""
695		return {1:(0,), 2:(0,2,1), 3:(0,5,1,4,3,2)}[self.ndim]
696	
697	# PDE parameters
698	@property
699	def weights(self):
700		"""Weights, obtained from Voronoi's decomposition of the Hooke tensors."""
701		return self.unshape(self._weights)
702	@property	
703	def offsets(self):
704		"""Offsets, obtained from Voronoi's decomposition of the Hooke tensors."""
705		offsets2 = self.unshape(self._offsets)
706		# Uncompress
707		offsets = cp.zeros((self.symdim,)+offsets2.shape,dtype=np.int8)
708		order = self.voigt2lower
709		nbits = self.offsetnbits
710		for i,o in enumerate(order):
711			offsets[o]=((offsets2//2**(i*nbits))% 2**nbits) - 2**(nbits-1)
712		return offsets
713
714	@property
715	def C_for_reuse(self):
716		"""Avoid the initial tensor decomposition step."""
717		return self._weights,self._offsets,self.shape_dom
718
719	def _compress_offsets(self,offsets):
720		"""Compress each matrix offset (symmetric, integer valued), into a single int_t"""
721		offsets2 = np.zeros_like(offsets,shape=offsets.shape[1:],dtype=self.offsetpack_t)
722		order = self.voigt2lower
723		nbits = self.offsetnbits
724		for i,o in enumerate(order):
725			offsets2 += (offsets[o].astype(self.int_t)+2**(nbits-1)) * 2**(nbits*i)
726		return offsets2
727
728	@property	
729	def hooke(self):
730		"""The Hooke tensor, input 'C', defining the elasticity properties of the medium."""
731		# The Hooke tensor is not stored, so as to save memory. We thus reconstruct it 
732		# from Voronoi's decomposition.
733		weights,offsets = self.weights,self.offsets
734		full_hooke = (weights * lp.outer_self(offsets)).sum(axis=2)
735		return full_hooke
736				
737	@property
738	def metric(self):
739		"""
740		The metric tensor, input 'M'. Defines the norm for measuring momentum. 
741		Usually metric = Id/ρ .
742		"""
743		return Metrics.misc.expand_symmetric_matrix(self.unshape(self._metric))
744
745	@property
746	def (self):
747		"""Inverse density. Alias for the metric M used to define the kinetic energy."""
748		return self.metric
749
750	def Expl_p(self,q,p,δ):
751		"""
752		Explicit time step for the impulsion p.
753		Expects : q and p reshaped in GPU friendly format, using self.reshape
754		"""
755		mult = -δ/self.dx**2/{2:4,4:48}[self.order_x]
756		if self.preserve_p: p = self._mk_copy(p)
757		if ad.is_ad(q): # Use automatic differentiation, forward or reverse
758			SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t)
759			self.check_ad(q); self.check_ad(p)
760			self._DqH_Kernel_ad(self.shape_o,self.shape_i,(self._weights.value,self._offsets,
761				q.value,self._weights.coef,q.coef,p.coef,p.value))
762		else: # No automatic differentiation
763			SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t)
764			self.check(q); self.check(p)
765			self._DqH_Kernel(self.shape_o,self.shape_i,
766				(ad.remove_ad(self._weights),self._offsets,q,p))
767		return p
768
769	def Expl_q(self,q,p,δ): 
770		"""
771		Explicit time step for the position q.
772		Expects : q and p reshaped in GPU friendly format, using self.reshape
773		"""
774		if self.preserve_q: q = self._mk_copy(q)
775		if ad.is_ad(p): # Use automatic differentiation, forward or reverse
776			SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t)
777			self.check_ad(q); self.check_ad(p)
778			self._DpH_Kernel_ad(*self._sizes_oi,(self._metric.value,p.value,
779				self._metric.coef,p.coef,q.coef,q.value))
780		else: # No automatic differentiation
781			SetModuleConstant(self._modules[0],'DpH_mult',δ,self.float_t)
782			self.check(q); self.check(p)
783			self._DpH_Kernel(*self._sizes_oi,(ad.remove_ad(self._metric),p,q))
784		return q
785
786	def _DqH(self,q): return self.Expl_p(q,np.zeros_like(q),-1)
787	def _DpH(self,p): return self.Expl_q(np.zeros_like(p),p, 1)
788
789	def _mk_copy(self,x):
790		"""Copies the variable x, with a specific contiguity for the AD coefficients"""
791		if ad.is_ad(x):
792			assert isinstance(x,ad.Dense.denseAD) and x.size_ad in (0,self.size_ad)
793			return ad.Dense.denseAD(x.value.copy(),
794				np.moveaxis(np.moveaxis(x.coef,-1,self.ndim).copy(),self.ndim,-1))
795		return x.copy()
796
797	def check_ad(self,x):
798		"""
799		Puts zero coefficients with the correct contiguity if those are empty.
800		Checks that the AD variable x has the correct c-contiguity for the kernel.
801		"""
802		assert self.way_ad!=0 # Both forward and reverse
803		if x.size_ad==0:
804			x.coef = np.moveaxis(np.zeros_like(x.value,
805			shape=(*x.shape[:self.ndim],self.size_ad,*x.shape[self.ndim:])),self.ndim,-1)
806		assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad
807		assert np.moveaxis(x.coef,-1,self.ndim).flags.c_contiguous
808		assert x.coef.dtype==self.float_t
809		self.check(x.value)
810
811	def check(self,x):
812		""" 
813		Basic check of the types, shapes, contiguity of GPU inputs.
814		"""
815		assert x.flags.c_contiguous
816		assert x.shape[:self.ndim]==self.shape_o and x.shape[-self.ndim:]==self.shape_i
817		assert x.dtype in (self.float_t,self.int_t,self.offsetpack_t)
818		assert x.ndim in (2*self.ndim,2*self.ndim+1)

The Hamiltonian of an anisotropic elastic wave equation, implemented with GPU kernels, whose geometry is defined by a generic Hooke tensor field. The Hamiltonian is a sum of squares of finite differences, via Voronoi's decomposition. Dirichlet boundary conditions are applied, see also optional damping layers.

The Mathematical expression of the Hamiltonian is (1/2) int_X M(x,p(x),p(x)) + C(x,ε(x),ε(x)) dx where ε = grad(q) + grad(q)^T - S q is the stress tensor, and X is the domain.

  • M : (metric) array of positive definite matrices, shape (d,d,n1,...,nd), Also accepts (1,1,n1,...,nd) for isotropic metric. Ex: M = (1/ρ)[None,None]
  • C : (hooke tensor in voigt notation) array of positive definite matrices, shape (s,s,n1,...,nd) where s = d (d+1)/2 Reuse decomposition from previous run : C = H_prev.C_for_reuse

Warning : accessing some of this object's properties has a significant memory and computational cost, because all data is converted to a GPU kernel friendly format.

ElasticHamiltonian_Kernel( M, C, dx=1.0, order_x=2, shape_dom=None, traits=None, rev_ad=0, **kwargs)
548	def __init__(self,M,C,dx=1.,order_x=2,shape_dom=None,
549		traits=None,rev_ad=0,**kwargs):
550		if cp is None: raise ImportError("Cupy library needed for this class")
551		fwd_ad = M.size_ad if ad.is_ad(M) else 0
552		if fwd_ad>0 and rev_ad>0:
553			raise ValueError("Please choose between forward and reverse differentiation")
554		self._size_ad = max(fwd_ad,rev_ad)
555
556		traits_default = {
557			'isotropic_metric_macro':M.shape[0]==1,
558			'fourth_order_macro':{2:False,4:True}[order_x],
559			'fwd_macro': fwd_ad>0
560			}
561		if traits is not None: traits_default.update(traits)
562		traits = traits_default
563
564		# Flatten the symmetric matrix arrays, if necessary
565		if (M.ndim==2 or M.shape[0] in (1,M.ndim-2)) and M.shape[0]==M.shape[1]: 
566			M = Metrics.misc.flatten_symmetric_matrix(M)
567		if isinstance(C,tuple): self._weights,self._offsets,shape_dom = C # Reuse decomposition
568		elif (C.ndim==2 or C.shape[0]==_triangular_number(C.ndim-2)) and C.shape[0]==C.shape[1]: 
569			C = Metrics.misc.flatten_symmetric_matrix(C)
570
571		# Get the domain shape
572		if shape_dom is None: shape_dom = M.shape[1:] if isinstance(C,tuple) else \
573			fd.common_shape((M,C),depths=(1,1))
574		super(ElasticHamiltonian_Kernel,self).__init__(shape_dom,traits,**kwargs)
575
576		if self.ndim not in (1,2,3):
577			raise ValueError("Only domains of dimension 1, 2 and 3 are supported")
578
579		self._offsetpack_t = np.int32
580		self.dx = dx
581
582		# Voronoi decomposition of the Hooke tensor
583		if not isinstance(C,tuple): # Decomposition was bypassed by feeding weights, offsets
584			assert len(C) == self.decompdim
585			from .. import VoronoiDecomposition 
586			weights,offsets = VoronoiDecomposition(ad.cupy_generic.cupy_set(C),
587				offset_t=np.int8,flattened=True)
588			C = None
589			# Broadcast if necessary
590			weights = fd.as_field(weights,self.shape_dom,depth=1)
591			offsets = fd.as_field(offsets,self.shape_dom,depth=2)
592			self._weights = self.reshape(weights)
593			weights = None
594			# offsets = offsets.get() # Uncomment if desperate to save memory...
595			self._offsets = self.reshape(self._compress_offsets(offsets),constant_values=0)
596			offsets = None
597
598		# Setup the metric
599		assert len(M) == self.symdim or self.isotropic_metric
600		self._metric = self.reshape(fd.as_field(M,self.shape_dom,depth=1))
601		M = None
602
603		if self.way_ad<0: # Reverse autodiff
604			self._weights = ad.Dense.denseAD(self._weights)
605			self._metric  = ad.Dense.denseAD(self._metric)
606		if self.size_ad:   # Forward or reverse autodiff 
607			self.check_ad(self._weights) 
608			self.check_ad(self._metric)
609		else:
610			self.check(self._weights)
611			self.check(self._metric)
612		self.check(self._offsets)
613
614
615		if self.isotropic_metric: # TODO : case where M is anisotropic, see Sparse variant
616			self.dt_max = _mk_dt_max(dx/(self.ndim*np.sqrt(np.nanmax(
617				ad.remove_ad(self._metric).squeeze(axis=self.ndim)
618				*ad.remove_ad(self._weights).sum(axis=self.ndim)))),
619			order_x=order_x)
620
621		self.shape_free = (*self.shape_o,self.ndim,*self.shape_i)
622		self._sizes_oi = (self.size_o,),(self.size_i,)
623
624		# Generate the cuda module
625		cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
626		date_modified = cupy_module_helper.getmtime_max(cuda_path)
627		source = cupy_module_helper.traits_header(self.traits,size_of_shape=True)
628
629		source += [
630		'#include "Kernel_ElasticWave.h"',
631		f"// Date cuda code last modified : {date_modified}"]
632		cuoptions = ("-default-device", f"-I {cuda_path}") 
633
634		source="\n".join(source)
635		module = cupy_module_helper.GetModule(source,cuoptions)
636		self._DpH_Kernel = module.get_function("DpH")
637		self._DqH_Kernel = module.get_function("DqH")
638
639		if self.size_ad:
640			source_ad = f"#define size_ad_macro {self.size_ad}\n"+source
641			module_ad = cupy_module_helper.GetModule(source_ad,cuoptions)
642			self._DpH_Kernel_ad = module_ad.get_function("DpH")
643			self._DqH_Kernel_ad = module_ad.get_function("DqH")
644			self._modules = (module,module_ad)
645		else: self._modules = (module,)
646
647		self.SetCst('shape_o',self.shape_o,self.int_t)
648		self.SetCst('size_o',np.prod(self.shape_o),self.int_t)
649		self.SetCst('shape_tot',self.shape_dom,self.int_t)
dx
shape_free
size_ad
652	@property
653	def size_ad(self): return self._size_ad #return self._traits['size_ad_macro']
way_ad
654	@property
655	def way_ad(self):
656		"""0 : no AD. 1 : forward AD. -1 : reverse AD"""
657		return 0 if self.size_ad==0 else 1 if self._traits['fwd_macro'] else -1

0 : no AD. 1 : forward AD. -1 : reverse AD

def rev_reset(self):
658	def rev_reset(self):
659		"""
660		Reset the accumulators for reverse autodiff 
661		Namely (self.metric.coef and self.weights.coef)
662		"""
663		assert self.way_ad<0
664		self._metric.coef[:]=0
665		self._weights.coef[:]=0

Reset the accumulators for reverse autodiff Namely (self.metric.coef and self.weights.coef)

isotropic_metric
667	@property
668	def isotropic_metric(self): 
669		"""Wether M has shape (1,1,...) or (d,d,...)"""
670		return self._traits['isotropic_metric_macro']

Wether M has shape (1,1,...) or (d,d,...)

offsetpack_t
672	@property	
673	def offsetpack_t(self): 
674		"""Type used to store a matrix offset. Defaults to int32."""
675		return self._offsetpack_t

Type used to store a matrix offset. Defaults to int32.

decompdim
677	@property
678	def decompdim(self):
679		"""Number of terms in the decomposition of a generic Hooke tensor."""
680		return _triangular_number(self.symdim)

Number of terms in the decomposition of a generic Hooke tensor.

offsetnbits
682	@property
683	def offsetnbits(self):
684		"""Number of bits for storing each integer coefficient of the matrix offsets"""
685		return (None,10,10,5)[self.ndim]

Number of bits for storing each integer coefficient of the matrix offsets

voigt2lower
687	@property
688	def voigt2lower(self):
689		"""
690		Correspondance between voigt notation and symmetric matrix indexing.
691		d=2 : [0  ]	   d=3 : [0    ]
692		      [2 1]          [5 1  ]
693		                     [4 3 2]
694		"""
695		return {1:(0,), 2:(0,2,1), 3:(0,5,1,4,3,2)}[self.ndim]

Correspondance between voigt notation and symmetric matrix indexing. d=2 : [0 ] d=3 : [0 ] [2 1] [5 1 ] [4 3 2]

weights
698	@property
699	def weights(self):
700		"""Weights, obtained from Voronoi's decomposition of the Hooke tensors."""
701		return self.unshape(self._weights)

Weights, obtained from Voronoi's decomposition of the Hooke tensors.

offsets
702	@property	
703	def offsets(self):
704		"""Offsets, obtained from Voronoi's decomposition of the Hooke tensors."""
705		offsets2 = self.unshape(self._offsets)
706		# Uncompress
707		offsets = cp.zeros((self.symdim,)+offsets2.shape,dtype=np.int8)
708		order = self.voigt2lower
709		nbits = self.offsetnbits
710		for i,o in enumerate(order):
711			offsets[o]=((offsets2//2**(i*nbits))% 2**nbits) - 2**(nbits-1)
712		return offsets

Offsets, obtained from Voronoi's decomposition of the Hooke tensors.

C_for_reuse
714	@property
715	def C_for_reuse(self):
716		"""Avoid the initial tensor decomposition step."""
717		return self._weights,self._offsets,self.shape_dom

Avoid the initial tensor decomposition step.

hooke
728	@property	
729	def hooke(self):
730		"""The Hooke tensor, input 'C', defining the elasticity properties of the medium."""
731		# The Hooke tensor is not stored, so as to save memory. We thus reconstruct it 
732		# from Voronoi's decomposition.
733		weights,offsets = self.weights,self.offsets
734		full_hooke = (weights * lp.outer_self(offsets)).sum(axis=2)
735		return full_hooke

The Hooke tensor, input 'C', defining the elasticity properties of the medium.

metric
737	@property
738	def metric(self):
739		"""
740		The metric tensor, input 'M'. Defines the norm for measuring momentum. 
741		Usually metric = Id/ρ .
742		"""
743		return Metrics.misc.expand_symmetric_matrix(self.unshape(self._metric))

The metric tensor, input 'M'. Defines the norm for measuring momentum. Usually metric = Id/ρ .

745	@property
746	def (self):
747		"""Inverse density. Alias for the metric M used to define the kinetic energy."""
748		return self.metric

Inverse density. Alias for the metric M used to define the kinetic energy.

def Expl_p(self, q, p, δ):
750	def Expl_p(self,q,p,δ):
751		"""
752		Explicit time step for the impulsion p.
753		Expects : q and p reshaped in GPU friendly format, using self.reshape
754		"""
755		mult = -δ/self.dx**2/{2:4,4:48}[self.order_x]
756		if self.preserve_p: p = self._mk_copy(p)
757		if ad.is_ad(q): # Use automatic differentiation, forward or reverse
758			SetModuleConstant(self._modules[1],'DqH_mult',mult,self.float_t)
759			self.check_ad(q); self.check_ad(p)
760			self._DqH_Kernel_ad(self.shape_o,self.shape_i,(self._weights.value,self._offsets,
761				q.value,self._weights.coef,q.coef,p.coef,p.value))
762		else: # No automatic differentiation
763			SetModuleConstant(self._modules[0],'DqH_mult',mult,self.float_t)
764			self.check(q); self.check(p)
765			self._DqH_Kernel(self.shape_o,self.shape_i,
766				(ad.remove_ad(self._weights),self._offsets,q,p))
767		return p

Explicit time step for the impulsion p. Expects : q and p reshaped in GPU friendly format, using self.reshape

def Expl_q(self, q, p, δ):
769	def Expl_q(self,q,p,δ): 
770		"""
771		Explicit time step for the position q.
772		Expects : q and p reshaped in GPU friendly format, using self.reshape
773		"""
774		if self.preserve_q: q = self._mk_copy(q)
775		if ad.is_ad(p): # Use automatic differentiation, forward or reverse
776			SetModuleConstant(self._modules[1],'DpH_mult',δ,self.float_t)
777			self.check_ad(q); self.check_ad(p)
778			self._DpH_Kernel_ad(*self._sizes_oi,(self._metric.value,p.value,
779				self._metric.coef,p.coef,q.coef,q.value))
780		else: # No automatic differentiation
781			SetModuleConstant(self._modules[0],'DpH_mult',δ,self.float_t)
782			self.check(q); self.check(p)
783			self._DpH_Kernel(*self._sizes_oi,(ad.remove_ad(self._metric),p,q))
784		return q

Explicit time step for the position q. Expects : q and p reshaped in GPU friendly format, using self.reshape

def check_ad(self, x):
797	def check_ad(self,x):
798		"""
799		Puts zero coefficients with the correct contiguity if those are empty.
800		Checks that the AD variable x has the correct c-contiguity for the kernel.
801		"""
802		assert self.way_ad!=0 # Both forward and reverse
803		if x.size_ad==0:
804			x.coef = np.moveaxis(np.zeros_like(x.value,
805			shape=(*x.shape[:self.ndim],self.size_ad,*x.shape[self.ndim:])),self.ndim,-1)
806		assert isinstance(x,ad.Dense.denseAD) and x.size_ad==self.size_ad
807		assert np.moveaxis(x.coef,-1,self.ndim).flags.c_contiguous
808		assert x.coef.dtype==self.float_t
809		self.check(x.value)

Puts zero coefficients with the correct contiguity if those are empty. Checks that the AD variable x has the correct c-contiguity for the kernel.

def check(self, x):
811	def check(self,x):
812		""" 
813		Basic check of the types, shapes, contiguity of GPU inputs.
814		"""
815		assert x.flags.c_contiguous
816		assert x.shape[:self.ndim]==self.shape_o and x.shape[-self.ndim:]==self.shape_i
817		assert x.dtype in (self.float_t,self.int_t,self.offsetpack_t)
818		assert x.ndim in (2*self.ndim,2*self.ndim+1)

Basic check of the types, shapes, contiguity of GPU inputs.