agd.AutomaticDifferentiation.AD_CUDA.simplify_ad

 1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
 2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
 3
 4import numpy as np
 5import cupy as cp
 6import os
 7from ...Eikonal.HFM_CUDA import cupy_module_helper as cmh
 8
 9
10#Compile times for the cupy kernel can be a bit long, presumably due to the merge sort.
11#Fortunately, this happens only once.
12
13def simplify_ad(x,atol,rtol,blockSize=256):
14	"""Calls the GPU implementation of the simplify_ad method"""
15	
16	# Get the data
17	coef,index = map(cp.ascontiguousarray,(x.coef,x.index))
18	size_ad = x.size_ad
19	if size_ad==0: return
20	bound_ad = int(2**np.ceil(np.log2(size_ad)))
21
22	# Set the traits
23	int_t = np.int32
24	size_t = int_t
25	index_t = index.dtype.type
26	scalar_t = coef.dtype.type
27	tol_macro = atol is not None
28	traits = {
29		'Int':int_t,
30		'IndexT':index_t,
31		'SizeT':size_t,
32		'Scalar':scalar_t,
33		'bound_ad':bound_ad,
34		'tol_macro':tol_macro,
35	}
36
37	# Setup the cupy kernel
38	source = cmh.traits_header(traits) #integral_max=True # needed for fixed length sort
39	cuda_rpaths = "cuda","../../Eikonal/HFM_CUDA/cuda"
40	cuda_paths = [os.path.join(os.path.dirname(os.path.realpath(__file__)),rpath) for rpath in cuda_rpaths]
41	date_modified = max(cmh.getmtime_max(path) for path in cuda_paths)
42
43	source += ['#include "simplify_ad.h"',
44	f"// Date cuda code last date_modified : {date_modified}"]
45	cuoptions = ("-default-device", f"-I {cuda_paths[0]}", f"-I {cuda_paths[1]}") 
46
47	source="\n".join(source)
48	module = cmh.GetModule(source,cuoptions)
49	cmh.SetModuleConstant(module,'size_ad',x.size_ad,int_t)
50	cmh.SetModuleConstant(module,'size_tot',x.size,size_t)
51	if tol_macro: 
52		cmh.SetModuleConstant(module,'atol',atol,scalar_t)
53		cmh.SetModuleConstant(module,'rtol',rtol,scalar_t)
54	cupy_kernel = module.get_function("simplify_ad")
55
56	# Call the kernel
57	gridSize = int(np.ceil(x.size/blockSize))
58	new_size_ad = cp.zeros(x.shape,dtype=np.int32)
59#	print("i,c",index,coef)
60	cupy_kernel((gridSize,),(blockSize,),(index,coef,new_size_ad))
61	new_size_ad = np.max(new_size_ad)
62	
63	x.coef  = coef[...,:new_size_ad]
64	x.index = index[...,:new_size_ad]
def simplify_ad(x, atol, rtol, blockSize=256):
14def simplify_ad(x,atol,rtol,blockSize=256):
15	"""Calls the GPU implementation of the simplify_ad method"""
16	
17	# Get the data
18	coef,index = map(cp.ascontiguousarray,(x.coef,x.index))
19	size_ad = x.size_ad
20	if size_ad==0: return
21	bound_ad = int(2**np.ceil(np.log2(size_ad)))
22
23	# Set the traits
24	int_t = np.int32
25	size_t = int_t
26	index_t = index.dtype.type
27	scalar_t = coef.dtype.type
28	tol_macro = atol is not None
29	traits = {
30		'Int':int_t,
31		'IndexT':index_t,
32		'SizeT':size_t,
33		'Scalar':scalar_t,
34		'bound_ad':bound_ad,
35		'tol_macro':tol_macro,
36	}
37
38	# Setup the cupy kernel
39	source = cmh.traits_header(traits) #integral_max=True # needed for fixed length sort
40	cuda_rpaths = "cuda","../../Eikonal/HFM_CUDA/cuda"
41	cuda_paths = [os.path.join(os.path.dirname(os.path.realpath(__file__)),rpath) for rpath in cuda_rpaths]
42	date_modified = max(cmh.getmtime_max(path) for path in cuda_paths)
43
44	source += ['#include "simplify_ad.h"',
45	f"// Date cuda code last date_modified : {date_modified}"]
46	cuoptions = ("-default-device", f"-I {cuda_paths[0]}", f"-I {cuda_paths[1]}") 
47
48	source="\n".join(source)
49	module = cmh.GetModule(source,cuoptions)
50	cmh.SetModuleConstant(module,'size_ad',x.size_ad,int_t)
51	cmh.SetModuleConstant(module,'size_tot',x.size,size_t)
52	if tol_macro: 
53		cmh.SetModuleConstant(module,'atol',atol,scalar_t)
54		cmh.SetModuleConstant(module,'rtol',rtol,scalar_t)
55	cupy_kernel = module.get_function("simplify_ad")
56
57	# Call the kernel
58	gridSize = int(np.ceil(x.size/blockSize))
59	new_size_ad = cp.zeros(x.shape,dtype=np.int32)
60#	print("i,c",index,coef)
61	cupy_kernel((gridSize,),(blockSize,),(index,coef,new_size_ad))
62	new_size_ad = np.max(new_size_ad)
63	
64	x.coef  = coef[...,:new_size_ad]
65	x.index = index[...,:new_size_ad]

Calls the GPU implementation of the simplify_ad method