agd.Eikonal.HFM_CUDA.cupy_module_helper
1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay 2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0 3 4import os 5import numpy as np 6import cupy as cp 7import numbers 8 9hfm_debug_macro = False 10# Use possibly in combination -lineinfo or -G and 11# <<< cuda-memcheck -- python MyCode.py 12 13def _cupy_has_RawModule(): 14 """ 15 RawModule appears in cupy 8. 16 """ 17 from packaging.version import Version 18 return Version(cp.__version__) >= Version("9") 19 20def getmtime_max(directory): 21 """ 22 Lists all the files in the given directory, and returns the last time one of them 23 was modified. Information needed when compiling cupy modules, because they are cached. 24 """ 25 return max(os.path.getmtime(os.path.join(directory,file)) 26 for file in os.listdir(directory)) 27 28def GetModule(source,cuoptions): 29 """Returns a cupy raw module""" 30 if _cupy_has_RawModule(): return cp.RawModule(code=source,options=cuoptions) 31 else: return cp.core.core.compile_with_cache(source, 32 options=cuoptions, prepend_cupy_headers=False) 33 34 35def SetModuleConstant(module,key,value,dtype): 36 """ 37 Sets a global constant in a cupy cuda module. 38 """ 39 if _cupy_has_RawModule(): 40 memptr = module.get_global(key) 41 else: 42 #https://github.com/cupy/cupy/issues/1703 43 b = cp.core.core.memory_module.BaseMemory() 44 b.ptr = module.get_global_var(key) 45 memptr = cp.cuda.MemoryPointer(b,0) 46 47 value=cp.ascontiguousarray(cp.asarray(value,dtype=dtype)) 48 module_constant = cp.ndarray(value.shape, value.dtype, memptr) 49 module_constant[...] = value 50 51# cuda does not have int8_t, int32_t, etc 52np2cuda_dtype = { 53 np.int8:'char', 54 np.uint8:'unsigned char', 55 np.int16:'short', 56 np.int32:'int', 57 np.int64:'long long', 58 np.float32:'float', 59 np.float64:'double', 60 } 61 62def traits_header(traits, 63 join=False,size_of_shape=False,log2_size=False,integral_max=False): 64 """ 65 Returns the source (mostly a preamble) for the gpu kernel code 66 for the given traits. 67 - join (optional): return a multiline string, rather than a list of strings 68 - size_of_shape: insert traits for the size of each shape. 69 - log2_size: insert a trait for the ceil of the base 2 logarithm of previous size. 70 - integral_max: declare max of integral typedefs 71 """ 72 traits.setdefault('hfm_debug_macro',hfm_debug_macro) 73 74 def to_c(value): 75 if isinstance(value,(bool,np.bool_)): return str(value).lower() 76 else: return value 77 78 source = [] 79 for key,value in traits.items(): 80 if key.endswith('macro'): 81 source.append(f"#define {key} {to_c(value)}") 82 continue 83# elif (key+'_macro') not in traits: 84# source.append(f"#define {key}_macro") 85 86 if isinstance(value,numbers.Integral): 87 source.append(f"const int {key}={to_c(value)};") 88 elif isinstance(value,tuple) and len(value)==2 and isinstance(value[1],type): 89 val,dtype=value 90 line = f"const {np2cuda_dtype[dtype]} {key} = " 91 if val== np.inf: line+="1./0." 92 elif val==-np.inf: line+="-1./0." 93 else: line+=str(val) 94 source.append(line+";") 95 96 elif isinstance(value,type): 97 ctype = np2cuda_dtype[value] 98 source.append(f"typedef {ctype} {key};") 99 source.append(f"#define {key}_macro") 100 if integral_max and issubclass(value,numbers.Integral): 101 source.append(f"const {ctype} {key}_Max = {np.iinfo(value).max};") 102 elif all(isinstance(v,numbers.Integral) for v in value): 103 source.append(f"const int {key}[{len(value)}] = " 104 +"{"+",".join(str(to_c(s)) for s in value)+ "};") 105 elif isinstance(value,np.ndarray) and isinstance(value.reshape(-1)[0],numbers.Integral): 106 s = f"const int {key}"; 107 for i in value.shape: s+=f"[{i}]" 108 s+='='+np.array2string(value,separator=',').replace('[','{').replace(']','}')+';' 109 source.append(s) 110 else: 111 raise ValueError(f"Unsupported trait {key}:{value}") 112 113 # Special treatment for some traits 114 for key,value in traits.items(): 115 if size_of_shape and key.startswith('shape_'): 116 suffix = key[len('shape_'):] 117 size = np.prod(value) 118 source.append(f"const int size_{suffix} = {size};") 119 if log2_size: 120 log2 = int(np.ceil(np.log2(size))) 121 source.append(f"const int log2_size_{suffix} = {log2};") 122 123 return "\n".join(source) if join else source
hfm_debug_macro =
False
def
getmtime_max(directory):
21def getmtime_max(directory): 22 """ 23 Lists all the files in the given directory, and returns the last time one of them 24 was modified. Information needed when compiling cupy modules, because they are cached. 25 """ 26 return max(os.path.getmtime(os.path.join(directory,file)) 27 for file in os.listdir(directory))
Lists all the files in the given directory, and returns the last time one of them was modified. Information needed when compiling cupy modules, because they are cached.
def
GetModule(source, cuoptions):
29def GetModule(source,cuoptions): 30 """Returns a cupy raw module""" 31 if _cupy_has_RawModule(): return cp.RawModule(code=source,options=cuoptions) 32 else: return cp.core.core.compile_with_cache(source, 33 options=cuoptions, prepend_cupy_headers=False)
Returns a cupy raw module
def
SetModuleConstant(module, key, value, dtype):
36def SetModuleConstant(module,key,value,dtype): 37 """ 38 Sets a global constant in a cupy cuda module. 39 """ 40 if _cupy_has_RawModule(): 41 memptr = module.get_global(key) 42 else: 43 #https://github.com/cupy/cupy/issues/1703 44 b = cp.core.core.memory_module.BaseMemory() 45 b.ptr = module.get_global_var(key) 46 memptr = cp.cuda.MemoryPointer(b,0) 47 48 value=cp.ascontiguousarray(cp.asarray(value,dtype=dtype)) 49 module_constant = cp.ndarray(value.shape, value.dtype, memptr) 50 module_constant[...] = value
Sets a global constant in a cupy cuda module.
np2cuda_dtype =
{<class 'numpy.int8'>: 'char', <class 'numpy.uint8'>: 'unsigned char', <class 'numpy.int16'>: 'short', <class 'numpy.int32'>: 'int', <class 'numpy.int64'>: 'long long', <class 'numpy.float32'>: 'float', <class 'numpy.float64'>: 'double'}
def
traits_header( traits, join=False, size_of_shape=False, log2_size=False, integral_max=False):
63def traits_header(traits, 64 join=False,size_of_shape=False,log2_size=False,integral_max=False): 65 """ 66 Returns the source (mostly a preamble) for the gpu kernel code 67 for the given traits. 68 - join (optional): return a multiline string, rather than a list of strings 69 - size_of_shape: insert traits for the size of each shape. 70 - log2_size: insert a trait for the ceil of the base 2 logarithm of previous size. 71 - integral_max: declare max of integral typedefs 72 """ 73 traits.setdefault('hfm_debug_macro',hfm_debug_macro) 74 75 def to_c(value): 76 if isinstance(value,(bool,np.bool_)): return str(value).lower() 77 else: return value 78 79 source = [] 80 for key,value in traits.items(): 81 if key.endswith('macro'): 82 source.append(f"#define {key} {to_c(value)}") 83 continue 84# elif (key+'_macro') not in traits: 85# source.append(f"#define {key}_macro") 86 87 if isinstance(value,numbers.Integral): 88 source.append(f"const int {key}={to_c(value)};") 89 elif isinstance(value,tuple) and len(value)==2 and isinstance(value[1],type): 90 val,dtype=value 91 line = f"const {np2cuda_dtype[dtype]} {key} = " 92 if val== np.inf: line+="1./0." 93 elif val==-np.inf: line+="-1./0." 94 else: line+=str(val) 95 source.append(line+";") 96 97 elif isinstance(value,type): 98 ctype = np2cuda_dtype[value] 99 source.append(f"typedef {ctype} {key};") 100 source.append(f"#define {key}_macro") 101 if integral_max and issubclass(value,numbers.Integral): 102 source.append(f"const {ctype} {key}_Max = {np.iinfo(value).max};") 103 elif all(isinstance(v,numbers.Integral) for v in value): 104 source.append(f"const int {key}[{len(value)}] = " 105 +"{"+",".join(str(to_c(s)) for s in value)+ "};") 106 elif isinstance(value,np.ndarray) and isinstance(value.reshape(-1)[0],numbers.Integral): 107 s = f"const int {key}"; 108 for i in value.shape: s+=f"[{i}]" 109 s+='='+np.array2string(value,separator=',').replace('[','{').replace(']','}')+';' 110 source.append(s) 111 else: 112 raise ValueError(f"Unsupported trait {key}:{value}") 113 114 # Special treatment for some traits 115 for key,value in traits.items(): 116 if size_of_shape and key.startswith('shape_'): 117 suffix = key[len('shape_'):] 118 size = np.prod(value) 119 source.append(f"const int size_{suffix} = {size};") 120 if log2_size: 121 log2 = int(np.ceil(np.log2(size))) 122 source.append(f"const int log2_size_{suffix} = {log2};") 123 124 return "\n".join(source) if join else source
Returns the source (mostly a preamble) for the gpu kernel code for the given traits.
- join (optional): return a multiline string, rather than a list of strings
- size_of_shape: insert traits for the size of each shape.
- log2_size: insert a trait for the ceil of the base 2 logarithm of previous size.
- integral_max: declare max of integral typedefs