agd.AutomaticDifferentiation.AD_CUDA.simplify_ad
1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay 2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0 3 4import numpy as np 5import cupy as cp 6import os 7from ...Eikonal.HFM_CUDA import cupy_module_helper as cmh 8 9 10#Compile times for the cupy kernel can be a bit long, presumably due to the merge sort. 11#Fortunately, this happens only once. 12 13def simplify_ad(x,atol,rtol,blockSize=256): 14 """Calls the GPU implementation of the simplify_ad method""" 15 16 # Get the data 17 coef,index = map(cp.ascontiguousarray,(x.coef,x.index)) 18 size_ad = x.size_ad 19 if size_ad==0: return 20 bound_ad = int(2**np.ceil(np.log2(size_ad))) 21 22 # Set the traits 23 int_t = np.int32 24 size_t = int_t 25 index_t = index.dtype.type 26 scalar_t = coef.dtype.type 27 tol_macro = atol is not None 28 traits = { 29 'Int':int_t, 30 'IndexT':index_t, 31 'SizeT':size_t, 32 'Scalar':scalar_t, 33 'bound_ad':bound_ad, 34 'tol_macro':tol_macro, 35 } 36 37 # Setup the cupy kernel 38 source = cmh.traits_header(traits) #integral_max=True # needed for fixed length sort 39 cuda_rpaths = "cuda","../../Eikonal/HFM_CUDA/cuda" 40 cuda_paths = [os.path.join(os.path.dirname(os.path.realpath(__file__)),rpath) for rpath in cuda_rpaths] 41 date_modified = max(cmh.getmtime_max(path) for path in cuda_paths) 42 43 source += ['#include "simplify_ad.h"', 44 f"// Date cuda code last date_modified : {date_modified}"] 45 cuoptions = ("-default-device", f"-I {cuda_paths[0]}", f"-I {cuda_paths[1]}") 46 47 source="\n".join(source) 48 module = cmh.GetModule(source,cuoptions) 49 cmh.SetModuleConstant(module,'size_ad',x.size_ad,int_t) 50 cmh.SetModuleConstant(module,'size_tot',x.size,size_t) 51 if tol_macro: 52 cmh.SetModuleConstant(module,'atol',atol,scalar_t) 53 cmh.SetModuleConstant(module,'rtol',rtol,scalar_t) 54 cupy_kernel = module.get_function("simplify_ad") 55 56 # Call the kernel 57 gridSize = int(np.ceil(x.size/blockSize)) 58 new_size_ad = cp.zeros(x.shape,dtype=np.int32) 59# print("i,c",index,coef) 60 cupy_kernel((gridSize,),(blockSize,),(index,coef,new_size_ad)) 61 new_size_ad = np.max(new_size_ad) 62 63 x.coef = coef[...,:new_size_ad] 64 x.index = index[...,:new_size_ad]
def
simplify_ad(x, atol, rtol, blockSize=256):
14def simplify_ad(x,atol,rtol,blockSize=256): 15 """Calls the GPU implementation of the simplify_ad method""" 16 17 # Get the data 18 coef,index = map(cp.ascontiguousarray,(x.coef,x.index)) 19 size_ad = x.size_ad 20 if size_ad==0: return 21 bound_ad = int(2**np.ceil(np.log2(size_ad))) 22 23 # Set the traits 24 int_t = np.int32 25 size_t = int_t 26 index_t = index.dtype.type 27 scalar_t = coef.dtype.type 28 tol_macro = atol is not None 29 traits = { 30 'Int':int_t, 31 'IndexT':index_t, 32 'SizeT':size_t, 33 'Scalar':scalar_t, 34 'bound_ad':bound_ad, 35 'tol_macro':tol_macro, 36 } 37 38 # Setup the cupy kernel 39 source = cmh.traits_header(traits) #integral_max=True # needed for fixed length sort 40 cuda_rpaths = "cuda","../../Eikonal/HFM_CUDA/cuda" 41 cuda_paths = [os.path.join(os.path.dirname(os.path.realpath(__file__)),rpath) for rpath in cuda_rpaths] 42 date_modified = max(cmh.getmtime_max(path) for path in cuda_paths) 43 44 source += ['#include "simplify_ad.h"', 45 f"// Date cuda code last date_modified : {date_modified}"] 46 cuoptions = ("-default-device", f"-I {cuda_paths[0]}", f"-I {cuda_paths[1]}") 47 48 source="\n".join(source) 49 module = cmh.GetModule(source,cuoptions) 50 cmh.SetModuleConstant(module,'size_ad',x.size_ad,int_t) 51 cmh.SetModuleConstant(module,'size_tot',x.size,size_t) 52 if tol_macro: 53 cmh.SetModuleConstant(module,'atol',atol,scalar_t) 54 cmh.SetModuleConstant(module,'rtol',rtol,scalar_t) 55 cupy_kernel = module.get_function("simplify_ad") 56 57 # Call the kernel 58 gridSize = int(np.ceil(x.size/blockSize)) 59 new_size_ad = cp.zeros(x.shape,dtype=np.int32) 60# print("i,c",index,coef) 61 cupy_kernel((gridSize,),(blockSize,),(index,coef,new_size_ad)) 62 new_size_ad = np.max(new_size_ad) 63 64 x.coef = coef[...,:new_size_ad] 65 x.index = index[...,:new_size_ad]
Calls the GPU implementation of the simplify_ad method