agd.Eikonal.HFM_CUDA.cupy_module_helper

View Source

  1# Copyright 2020 Jean-Marie Mirebeau, University Paris-Sud, CNRS, University Paris-Saclay
  2# Distributed WITHOUT ANY WARRANTY. Licensed under the Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0
  3
  4import os
  5import numpy as np
  6import cupy as cp
  7import numbers
  8
  9hfm_debug_macro = False
 10# Use possibly in combination -lineinfo or -G and 
 11# <<< cuda-memcheck -- python MyCode.py
 12
 13def _cupy_has_RawModule():
 14	"""
 15	RawModule appears in cupy 8. 
 16	"""
 17	from packaging.version import Version
 18	return Version(cp.__version__) >= Version("9") 
 19
 20def getmtime_max(directory):
 21	"""
 22	Lists all the files in the given directory, and returns the last time one of them
 23	was modified. Information needed when compiling cupy modules, because they are cached.
 24	"""
 25	return max(os.path.getmtime(os.path.join(directory,file)) 
 26		for file in os.listdir(directory))
 27
 28def GetModule(source,cuoptions):
 29	"""Returns a cupy raw module"""
 30	if _cupy_has_RawModule(): return cp.RawModule(code=source,options=cuoptions)
 31	else: return cp.core.core.compile_with_cache(source, 
 32		options=cuoptions, prepend_cupy_headers=False)
 33
 34
 35def SetModuleConstant(module,key,value,dtype):
 36	"""
 37	Sets a global constant in a cupy cuda module.
 38	"""
 39	if _cupy_has_RawModule(): 
 40		memptr = module.get_global(key)
 41	else: 
 42		#https://github.com/cupy/cupy/issues/1703
 43		b = cp.core.core.memory_module.BaseMemory()
 44		b.ptr = module.get_global_var(key)
 45		memptr = cp.cuda.MemoryPointer(b,0)
 46
 47	value=cp.ascontiguousarray(cp.asarray(value,dtype=dtype))
 48	module_constant = cp.ndarray(value.shape, value.dtype, memptr)
 49	module_constant[...] = value
 50
 51# cuda does not have int8_t, int32_t, etc
 52np2cuda_dtype = {
 53	np.int8:'char',
 54	np.uint8:'unsigned char',
 55	np.int16:'short',
 56	np.int32:'int',
 57	np.int64:'long long',
 58	np.float32:'float',
 59	np.float64:'double',
 60	}
 61
 62def traits_header(traits,
 63	join=False,size_of_shape=False,log2_size=False,integral_max=False):
 64	"""
 65	Returns the source (mostly a preamble) for the gpu kernel code 
 66	for the given traits.
 67	- join (optional): return a multiline string, rather than a list of strings
 68	- size_of_shape: insert traits for the size of each shape.
 69	- log2_size: insert a trait for the ceil of the base 2 logarithm of previous size.
 70	- integral_max: declare max of integral typedefs
 71	"""
 72	traits.setdefault('hfm_debug_macro',hfm_debug_macro)
 73
 74	def to_c(value): 
 75		if isinstance(value,(bool,np.bool_)): return str(value).lower()
 76		else: return value
 77
 78	source = []
 79	for key,value in traits.items():
 80		if key.endswith('macro'):
 81			source.append(f"#define {key} {to_c(value)}")
 82			continue
 83#		elif (key+'_macro') not in traits:
 84#			source.append(f"#define {key}_macro")
 85
 86		if isinstance(value,numbers.Integral):
 87			source.append(f"const int {key}={to_c(value)};")
 88		elif isinstance(value,tuple) and len(value)==2 and isinstance(value[1],type):
 89			val,dtype=value
 90			line = f"const {np2cuda_dtype[dtype]} {key} = "
 91			if   val== np.inf: line+="1./0."
 92			elif val==-np.inf: line+="-1./0."
 93			else: line+=str(val)
 94			source.append(line+";")
 95
 96		elif isinstance(value,type):
 97			ctype = np2cuda_dtype[value]
 98			source.append(f"typedef {ctype} {key};")
 99			source.append(f"#define {key}_macro")
100			if integral_max and issubclass(value,numbers.Integral):
101				source.append(f"const {ctype} {key}_Max = {np.iinfo(value).max};")
102		elif all(isinstance(v,numbers.Integral) for v in value):
103			source.append(f"const int {key}[{len(value)}] = "
104				+"{"+",".join(str(to_c(s)) for s in value)+ "};")
105		elif isinstance(value,np.ndarray) and isinstance(value.reshape(-1)[0],numbers.Integral):
106			s = f"const int {key}";
107			for i in value.shape: s+=f"[{i}]"
108			s+='='+np.array2string(value,separator=',').replace('[','{').replace(']','}')+';'
109			source.append(s)
110		else: 
111			raise ValueError(f"Unsupported trait {key}:{value}")
112
113	# Special treatment for some traits
114	for key,value in traits.items():
115		if size_of_shape and key.startswith('shape_'):
116			suffix = key[len('shape_'):]
117			size = np.prod(value)
118			source.append(f"const int size_{suffix} = {size};")
119			if log2_size:
120				log2 = int(np.ceil(np.log2(size)))
121				source.append(f"const int log2_size_{suffix} = {log2};")
122
123	return "\n".join(source) if join else source

hfm_debug_macro = False

def getmtime_max(directory): View Source

21def getmtime_max(directory):
22	"""
23	Lists all the files in the given directory, and returns the last time one of them
24	was modified. Information needed when compiling cupy modules, because they are cached.
25	"""
26	return max(os.path.getmtime(os.path.join(directory,file)) 
27		for file in os.listdir(directory))

Lists all the files in the given directory, and returns the last time one of them was modified. Information needed when compiling cupy modules, because they are cached.

def GetModule(source, cuoptions): View Source

29def GetModule(source,cuoptions):
30	"""Returns a cupy raw module"""
31	if _cupy_has_RawModule(): return cp.RawModule(code=source,options=cuoptions)
32	else: return cp.core.core.compile_with_cache(source, 
33		options=cuoptions, prepend_cupy_headers=False)

Returns a cupy raw module

def SetModuleConstant(module, key, value, dtype): View Source

36def SetModuleConstant(module,key,value,dtype):
37	"""
38	Sets a global constant in a cupy cuda module.
39	"""
40	if _cupy_has_RawModule(): 
41		memptr = module.get_global(key)
42	else: 
43		#https://github.com/cupy/cupy/issues/1703
44		b = cp.core.core.memory_module.BaseMemory()
45		b.ptr = module.get_global_var(key)
46		memptr = cp.cuda.MemoryPointer(b,0)
47
48	value=cp.ascontiguousarray(cp.asarray(value,dtype=dtype))
49	module_constant = cp.ndarray(value.shape, value.dtype, memptr)
50	module_constant[...] = value

Sets a global constant in a cupy cuda module.

np2cuda_dtype = {<class 'numpy.int8'>: 'char', <class 'numpy.uint8'>: 'unsigned char', <class 'numpy.int16'>: 'short', <class 'numpy.int32'>: 'int', <class 'numpy.int64'>: 'long long', <class 'numpy.float32'>: 'float', <class 'numpy.float64'>: 'double'}

def traits_header( traits, join=False, size_of_shape=False, log2_size=False, integral_max=False): View Source

 63def traits_header(traits,
 64	join=False,size_of_shape=False,log2_size=False,integral_max=False):
 65	"""
 66	Returns the source (mostly a preamble) for the gpu kernel code 
 67	for the given traits.
 68	- join (optional): return a multiline string, rather than a list of strings
 69	- size_of_shape: insert traits for the size of each shape.
 70	- log2_size: insert a trait for the ceil of the base 2 logarithm of previous size.
 71	- integral_max: declare max of integral typedefs
 72	"""
 73	traits.setdefault('hfm_debug_macro',hfm_debug_macro)
 74
 75	def to_c(value): 
 76		if isinstance(value,(bool,np.bool_)): return str(value).lower()
 77		else: return value
 78
 79	source = []
 80	for key,value in traits.items():
 81		if key.endswith('macro'):
 82			source.append(f"#define {key} {to_c(value)}")
 83			continue
 84#		elif (key+'_macro') not in traits:
 85#			source.append(f"#define {key}_macro")
 86
 87		if isinstance(value,numbers.Integral):
 88			source.append(f"const int {key}={to_c(value)};")
 89		elif isinstance(value,tuple) and len(value)==2 and isinstance(value[1],type):
 90			val,dtype=value
 91			line = f"const {np2cuda_dtype[dtype]} {key} = "
 92			if   val== np.inf: line+="1./0."
 93			elif val==-np.inf: line+="-1./0."
 94			else: line+=str(val)
 95			source.append(line+";")
 96
 97		elif isinstance(value,type):
 98			ctype = np2cuda_dtype[value]
 99			source.append(f"typedef {ctype} {key};")
100			source.append(f"#define {key}_macro")
101			if integral_max and issubclass(value,numbers.Integral):
102				source.append(f"const {ctype} {key}_Max = {np.iinfo(value).max};")
103		elif all(isinstance(v,numbers.Integral) for v in value):
104			source.append(f"const int {key}[{len(value)}] = "
105				+"{"+",".join(str(to_c(s)) for s in value)+ "};")
106		elif isinstance(value,np.ndarray) and isinstance(value.reshape(-1)[0],numbers.Integral):
107			s = f"const int {key}";
108			for i in value.shape: s+=f"[{i}]"
109			s+='='+np.array2string(value,separator=',').replace('[','{').replace(']','}')+';'
110			source.append(s)
111		else: 
112			raise ValueError(f"Unsupported trait {key}:{value}")
113
114	# Special treatment for some traits
115	for key,value in traits.items():
116		if size_of_shape and key.startswith('shape_'):
117			suffix = key[len('shape_'):]
118			size = np.prod(value)
119			source.append(f"const int size_{suffix} = {size};")
120			if log2_size:
121				log2 = int(np.ceil(np.log2(size)))
122				source.append(f"const int log2_size_{suffix} = {log2};")
123
124	return "\n".join(source) if join else source

Returns the source (mostly a preamble) for the gpu kernel code for the given traits.

join (optional): return a multiline string, rather than a list of strings
size_of_shape: insert traits for the size of each shape.
log2_size: insert a trait for the ceil of the base 2 logarithm of previous size.
integral_max: declare max of integral typedefs