agd.Eikonal.HFM_CUDA.MinCut

  1import os
  2import numpy as np
  3from ... import FiniteDifferences as fd
  4import time
  5
  6try:
  7	import cupy as cp
  8	from . import cupy_module_helper
  9	from .cupy_module_helper import SetModuleConstant
 10	from . import Eigh
 11except ImportError:
 12	pass # Allow using the CPU implementation on a machine without cuda
 13
 14"""
 15This file provides a GPU solver of the mincut optimization problem, which reads
 16minimize int_Omega F(grad ϕ) + g ϕ subject to -1 <= ϕ <= 1,
 17and arises in image segmentation methods.
 18This problem is dual to 
 19minimize int_Omega |div(eta) - g| subject to F^*(eta) <=1,
 20which is solved numerically. 
 21"""
 22
 23def stag_grids(shape,corners,sparse=False,xp=np):
 24	"""
 25	Generates the grid, staggered grid, and boundary conditions, as suitable for the mincut
 26	problem, for a rectangular domain. Grids use 'ij' indexing.
 27	Input : 
 28	 - shape (tuple) : the domain dimensions (n1,...,nd)
 29	 - corners (array of shape (2,d)) : the extreme points of the domain
 30	 - sparse (bool) : wether to generate a dense or a sparse grid 
 31	Output : Xm,Xϕ,dx,weights (last element only if retweights=True)
 32	 - Xs : standard grid coordinates (for the scalar potential)
 33	 - Xv : staggered grid coordinates, at the center of each cell (for the gradient)
 34	 - dx : the discretization scale
 35	"""
 36	if isinstance(shape,int): shape = (shape,); corners = tuple((c,) for c in corners)
 37	float_t = np.float64 if xp is np else np.float32
 38	corners = xp.asarray(corners,dtype=float_t)
 39	assert corners.shape == (2,len(shape))
 40	bot,top = corners
 41	dx = (top-bot)/(xp.asarray(shape,dtype=float_t)-1)
 42	Xs = tuple(boti+dxi*xp.arange(si,dtype=float_t) for (boti,dxi,si) in zip(bot,dx,shape))
 43	Xv = tuple(dxi/2.+axi[:-1] for (dxi,axi) in zip(dx,Xs))
 44
 45	def make_grid(X):
 46		if sparse: return tuple(axi.reshape((1,)*i+(-1,)+(1,)*(bc.ndim-i-1)) for i,axi in enumerate(X))
 47		else: return xp.asarray(xp.meshgrid(*X,indexing='ij'),dtype=float_t)
 48
 49	return make_grid(Xs),make_grid(Xv),dx
 50
 51def _preproc_metric(metric,use_numpy_eigh):
 52	from ... import Metrics
 53	if isinstance(metric,Metrics.Isotropic): return metric.cost[None]
 54	elif isinstance(metric,Metrics.Diagonal): return metric.costs
 55	elif metric.model_HFM().startswith("AsymIso"): return [metric.a[None],metric.w]
 56
 57	elif isinstance(metric,Metrics.Rander): 
 58		return _preproc_metric(Metrics.Riemann(metric.m),use_numpy_eigh),metric.w
 59	elif metric.model_HFM().startswith("AsymRander"): #Not imported by default in Metrics
 60		assert cp.allclose(metric.v,0)
 61		return _preproc_metric(Metrics.AsymQuad(metric.m,metric.u),use_numpy_eigh),metric.w
 62
 63	def eigh(m):
 64		λ,v = Eigh.eigh(m,quaternion=True,flatsym=True,use_numpy=use_numpy_eigh)
 65		return [np.moveaxis(λ,-1,0),np.moveaxis(v,-1,0)]
 66
 67	m = np.concatenate([np.moveaxis(metric.m[i,:(i+1)],0,-1)
 68		for i in range(metric.vdim)],axis=-1)
 69	eigm = eigh(m)
 70	if isinstance(metric,Metrics.Riemann): return eigm
 71	assert isinstance(metric,Metrics.AsymQuad)
 72	k=0 # compute m + w w^T
 73	for i in range(metric.vdim): 
 74		for j in range(i+1): 
 75			m[...,k] += metric.w[i]*metric.w[j]
 76			k+=1
 77	return eigm+eigh(m)+[metric.w]
 78	
 79
 80def mincut(g,metric,dx=None,
 81	grad="gradb",τ_primal=0.2,ρ_overrelax=1.8,
 82	maxiter=5000,E_rtol=1e-2, #,rtol=1e-6,
 83	shape_i=None,use_numpy_eigh=False,verbosity=2):
 84	"""
 85	Numerically solves the mincut problem.
 86	- g (array) : the ground cost functions
 87	- metric : the geometric metric 
 88	- grad (optional, string): gradient discretization. Possible values : 
 89	   - 'gradb' -> upwind
 90	   - 'gradc' -> centered (accurate but unstable)
 91	   - 'grad2' -> use both upwind and downwind
 92	- τ_primal : time step for the primal proximal operator
 93	"""
 94
 95	# traits and sizes
 96	int_t = np.int32
 97	float_t = np.float32
 98	shape_s = g.shape # shape for vector fields
 99	shape_v = tuple(s-1 for s in shape_s)
100	vdim = len(shape_s) # Number of space dimensions
101	qdim = {1:np.nan, 2:2, 3:4}[vdim] # Data size for a rotation matrix (compact format)
102	assert τ_primal>0
103
104	# Prepare the metric
105	w_randers = None
106	if not isinstance(metric,cp.ndarray):
107		assert metric.shape==() or metric.shape==shape_v
108		from ... import AutomaticDifferentiation as ad
109		metric = ad.cupy_generic.cupy_set(metric,dtype32=True,iterables=type(metric)) 
110		if dx is not None:
111			dx = cp.asarray(dx,dtype=float_t)
112			if   np.ndim(dx)==0: metric = metric.with_speed(dx) 
113			elif np.ndim(dx)==1: metric = metric.with_speeds(dx)
114		metric = _preproc_metric(metric,use_numpy_eigh)
115		if isinstance(metric,tuple): metric,w_randers = metric
116		if isinstance(metric,list):  metric = np.concatenate(metric,axis=0)
117	
118	def asfield(a): return np.moveaxis(np.broadcast_to(a,shape_v+a.shape),-1,0)
119	if np.ndim(metric)==1:metric = asfield(metric)
120	if np.ndim(w_randers)==1:w_randers = asfield(w_randers)
121
122	assert metric.shape[1:]==shape_v # Metric is provided at cell centers
123	geomsize = len(metric)
124	metric_type = {1:'iso', (1+vdim):'iso_asym', (vdim+qdim):'riemann', 
125	(vdim+qdim+vdim+qdim+vdim):'riemann_asym'}[geomsize]
126
127	if shape_i is None: shape_i = {1:(64,), 2:(8,8), 3:(4,4,4)}[vdim]
128	assert len(shape_i)==vdim
129	size_i = np.prod(shape_i)
130
131	if vdim==1: grad='gradc' # Only one meaningful discretization in dimension 1
132	else: assert grad in ('gradb','gradc','grad2')
133	if grad=='grad2':
134		graddim = 2*vdim
135		gradnorm2 = 2*vdim
136		if w_randers is not None: w_randers = np.concatenate([w_randers,w_randers],axis=0)
137	else:
138		graddim = vdim
139		gradnorm2 = 4*vdim # Squared norm of gradient operator
140
141	# Format suitably for the cupy kernel
142	g = cp.ascontiguousarray(fd.block_expand(cp.asarray(g,dtype=float_t),
143		shape_i,constant_values=np.nan))
144	shape_o = g.shape[:vdim]
145	metric = cp.ascontiguousarray(fd.block_expand(cp.asarray(metric,dtype=float_t)
146		,shape_i,shape_o,constant_values=np.nan))
147
148	# Create the optimization variables
149	ϕ = cp.zeros(shape_o+shape_i,dtype=float_t) # primal point
150	ϕ_ext = cp.zeros(shape_o+shape_i,dtype=float_t) # extrapolated primal point
151	η = cp.zeros((graddim,*shape_o,*shape_i),dtype=float_t) # dual point
152	primal_value = cp.zeros(shape_o,dtype=float_t) # primal objective value, by block
153	dual_value = cp.zeros(shape_o,dtype=float_t) # dual objective value, by block
154
155	# --------------- cuda header construction and compilation ----------------
156	# Generate the load order for the boundary of shared data
157	x_top_e = fd.block_neighbors(shape_i,True)
158	x_bot_e = fd.block_neighbors(shape_i,False)
159
160	# Generate the kernels
161	traits = {
162		'ndim_macro':vdim,
163		'Int':int_t,
164		'Scalar':float_t,
165		'shape_i':shape_i,
166		'shape_e':tuple(s+1 for s in shape_i),
167		'newton_maxiter':7,
168		'metric_type_macro':{'iso':1,'iso_asym':2,'riemann':3,'riemann_asym':4}[metric_type],
169		'size_bd_e':len(x_top_e),
170		'x_top_e':x_top_e,
171		'x_bot_e':1+x_bot_e,
172		'grad_macro':{'gradb':1,'gradc':2,'grad2':3}[grad],
173		'preproc_randers_macro':w_randers is not None,
174	}
175
176	cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
177	date_modified = cupy_module_helper.getmtime_max(cuda_path)
178	source = cupy_module_helper.traits_header(traits,size_of_shape=True,log2_size=True)
179
180	source += [
181	'#include "Kernel_MinCut.h"',
182	f"// Date cuda code last modified : {date_modified}"]
183	cuoptions = ("-default-device", f"-I {cuda_path}") 
184
185	source = "\n".join(source)
186#	print(source)
187	module = cupy_module_helper.GetModule(source,cuoptions)
188	# ------------- cuda module generated (may be reused...) ----------------
189
190	def setcst(*args): cupy_module_helper.SetModuleConstant(module,*args)
191	setcst('shape_tot_s',shape_s,int_t)
192	setcst('shape_tot_v',shape_v,int_t)
193	setcst('shape_o',shape_o,int_t)
194	setcst('size_io',g.size,int_t)
195	setcst('tau_primal',τ_primal,float_t)
196	setcst('tau_dual',1./(gradnorm2*τ_primal),float_t)
197	setcst('rho_overrelax',ρ_overrelax,float_t)
198	
199	primal_step = module.get_function("primal_step")
200	dual_step = module.get_function("dual_step")
201	size_o = np.prod(shape_o)
202	primal_values,dual_values = [],[]
203
204	if w_randers is not None:
205		w_randers = cp.ascontiguousarray(
206		fd.block_expand(w_randers,shape_i,shape_o,constant_values=np.nan),dtype=float_t)
207		assert w_randers.shape == η.shape
208		dummy = cp.array([0.],dtype=float_t)
209#		print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}")
210#		g.fill(0)
211		setcst('preproc_randers',1,int_t)
212		primal_step((size_o,),(size_i,),(g,dummy,w_randers,dummy,dummy,dummy))
213		setcst('preproc_randers',0,int_t)
214#		print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}")
215#		print(np.any(np.isnan(g)))
216
217	# Main loop
218	top = time.time()
219	for niter in range(maxiter):
220#		setcst('niter',niter,int_t)
221#		print(f"{niter=}")
222#		print(f"{ϕ=}")
223#		print(f"η*dx={η.get()*dx}")
224		primal_step((size_o,),(size_i,),(ϕ,ϕ_ext,η,g,primal_value,dual_value))
225#		print(f"{ϕ=}")
226#		print(primal_value)
227		dual_step((size_o,),(size_i,),(η,ϕ_ext,metric,primal_value))
228#		print(primal_value)
229
230#		print(f"(After dual step η={η.get()}")
231#		print(f"(After dual step η*dx={η.get()*dx}")
232
233		# Check objectives and termination
234
235		e_primal =  float(primal_value.sum())
236		e_dual   = -float(dual_value.sum())
237		primal_values.append(e_primal)
238		dual_values.append(e_dual)
239		if E_rtol>0 and e_primal-e_dual<E_rtol*np.abs(e_primal): break
240	else: 
241		if E_rtol>0 and verbosity>=1: 
242			print("Exhausted iteration budget without satisfying convergence criterion") 
243	if verbosity>=2:
244		print(f"GPU primal-dual solver completed {niter+1} steps in {time.time()-top} seconds")
245
246	# Reshape, rescale the results
247	ϕ = fd.block_squeeze(ϕ,shape_s)
248	η = fd.block_squeeze(η,shape_v)
249	if grad=='grad2': η=np.moveaxis(η.reshape((2,vdim)+shape_v),0,1)
250	if dx is not None: 
251		if np.ndim(dx)==0: η*=dx
252		else: 
253			for ηi,dxi in zip(η,dx): ηi*=dxi
254
255	return {'ϕ':ϕ,'η':η,'niter':niter+1,
256	'primal_values':np.array(primal_values),'dual_values':np.array(dual_values)}
def stag_grids( shape, corners, sparse=False, xp=<module 'numpy' from 'C:\\Users\\jmmir\\miniconda3\\envs\\agd-hfm_cuda_312\\Lib\\site-packages\\numpy\\__init__.py'>):
24def stag_grids(shape,corners,sparse=False,xp=np):
25	"""
26	Generates the grid, staggered grid, and boundary conditions, as suitable for the mincut
27	problem, for a rectangular domain. Grids use 'ij' indexing.
28	Input : 
29	 - shape (tuple) : the domain dimensions (n1,...,nd)
30	 - corners (array of shape (2,d)) : the extreme points of the domain
31	 - sparse (bool) : wether to generate a dense or a sparse grid 
32	Output : Xm,Xϕ,dx,weights (last element only if retweights=True)
33	 - Xs : standard grid coordinates (for the scalar potential)
34	 - Xv : staggered grid coordinates, at the center of each cell (for the gradient)
35	 - dx : the discretization scale
36	"""
37	if isinstance(shape,int): shape = (shape,); corners = tuple((c,) for c in corners)
38	float_t = np.float64 if xp is np else np.float32
39	corners = xp.asarray(corners,dtype=float_t)
40	assert corners.shape == (2,len(shape))
41	bot,top = corners
42	dx = (top-bot)/(xp.asarray(shape,dtype=float_t)-1)
43	Xs = tuple(boti+dxi*xp.arange(si,dtype=float_t) for (boti,dxi,si) in zip(bot,dx,shape))
44	Xv = tuple(dxi/2.+axi[:-1] for (dxi,axi) in zip(dx,Xs))
45
46	def make_grid(X):
47		if sparse: return tuple(axi.reshape((1,)*i+(-1,)+(1,)*(bc.ndim-i-1)) for i,axi in enumerate(X))
48		else: return xp.asarray(xp.meshgrid(*X,indexing='ij'),dtype=float_t)
49
50	return make_grid(Xs),make_grid(Xv),dx

Generates the grid, staggered grid, and boundary conditions, as suitable for the mincut problem, for a rectangular domain. Grids use 'ij' indexing. Input :

  • shape (tuple) : the domain dimensions (n1,...,nd)
  • corners (array of shape (2,d)) : the extreme points of the domain
  • sparse (bool) : wether to generate a dense or a sparse grid Output : Xm,Xϕ,dx,weights (last element only if retweights=True)
  • Xs : standard grid coordinates (for the scalar potential)
  • Xv : staggered grid coordinates, at the center of each cell (for the gradient)
  • dx : the discretization scale
def mincut( g, metric, dx=None, grad='gradb', τ_primal=0.2, ρ_overrelax=1.8, maxiter=5000, E_rtol=0.01, shape_i=None, use_numpy_eigh=False, verbosity=2):
 81def mincut(g,metric,dx=None,
 82	grad="gradb",τ_primal=0.2,ρ_overrelax=1.8,
 83	maxiter=5000,E_rtol=1e-2, #,rtol=1e-6,
 84	shape_i=None,use_numpy_eigh=False,verbosity=2):
 85	"""
 86	Numerically solves the mincut problem.
 87	- g (array) : the ground cost functions
 88	- metric : the geometric metric 
 89	- grad (optional, string): gradient discretization. Possible values : 
 90	   - 'gradb' -> upwind
 91	   - 'gradc' -> centered (accurate but unstable)
 92	   - 'grad2' -> use both upwind and downwind
 93	- τ_primal : time step for the primal proximal operator
 94	"""
 95
 96	# traits and sizes
 97	int_t = np.int32
 98	float_t = np.float32
 99	shape_s = g.shape # shape for vector fields
100	shape_v = tuple(s-1 for s in shape_s)
101	vdim = len(shape_s) # Number of space dimensions
102	qdim = {1:np.nan, 2:2, 3:4}[vdim] # Data size for a rotation matrix (compact format)
103	assert τ_primal>0
104
105	# Prepare the metric
106	w_randers = None
107	if not isinstance(metric,cp.ndarray):
108		assert metric.shape==() or metric.shape==shape_v
109		from ... import AutomaticDifferentiation as ad
110		metric = ad.cupy_generic.cupy_set(metric,dtype32=True,iterables=type(metric)) 
111		if dx is not None:
112			dx = cp.asarray(dx,dtype=float_t)
113			if   np.ndim(dx)==0: metric = metric.with_speed(dx) 
114			elif np.ndim(dx)==1: metric = metric.with_speeds(dx)
115		metric = _preproc_metric(metric,use_numpy_eigh)
116		if isinstance(metric,tuple): metric,w_randers = metric
117		if isinstance(metric,list):  metric = np.concatenate(metric,axis=0)
118	
119	def asfield(a): return np.moveaxis(np.broadcast_to(a,shape_v+a.shape),-1,0)
120	if np.ndim(metric)==1:metric = asfield(metric)
121	if np.ndim(w_randers)==1:w_randers = asfield(w_randers)
122
123	assert metric.shape[1:]==shape_v # Metric is provided at cell centers
124	geomsize = len(metric)
125	metric_type = {1:'iso', (1+vdim):'iso_asym', (vdim+qdim):'riemann', 
126	(vdim+qdim+vdim+qdim+vdim):'riemann_asym'}[geomsize]
127
128	if shape_i is None: shape_i = {1:(64,), 2:(8,8), 3:(4,4,4)}[vdim]
129	assert len(shape_i)==vdim
130	size_i = np.prod(shape_i)
131
132	if vdim==1: grad='gradc' # Only one meaningful discretization in dimension 1
133	else: assert grad in ('gradb','gradc','grad2')
134	if grad=='grad2':
135		graddim = 2*vdim
136		gradnorm2 = 2*vdim
137		if w_randers is not None: w_randers = np.concatenate([w_randers,w_randers],axis=0)
138	else:
139		graddim = vdim
140		gradnorm2 = 4*vdim # Squared norm of gradient operator
141
142	# Format suitably for the cupy kernel
143	g = cp.ascontiguousarray(fd.block_expand(cp.asarray(g,dtype=float_t),
144		shape_i,constant_values=np.nan))
145	shape_o = g.shape[:vdim]
146	metric = cp.ascontiguousarray(fd.block_expand(cp.asarray(metric,dtype=float_t)
147		,shape_i,shape_o,constant_values=np.nan))
148
149	# Create the optimization variables
150	ϕ = cp.zeros(shape_o+shape_i,dtype=float_t) # primal point
151	ϕ_ext = cp.zeros(shape_o+shape_i,dtype=float_t) # extrapolated primal point
152	η = cp.zeros((graddim,*shape_o,*shape_i),dtype=float_t) # dual point
153	primal_value = cp.zeros(shape_o,dtype=float_t) # primal objective value, by block
154	dual_value = cp.zeros(shape_o,dtype=float_t) # dual objective value, by block
155
156	# --------------- cuda header construction and compilation ----------------
157	# Generate the load order for the boundary of shared data
158	x_top_e = fd.block_neighbors(shape_i,True)
159	x_bot_e = fd.block_neighbors(shape_i,False)
160
161	# Generate the kernels
162	traits = {
163		'ndim_macro':vdim,
164		'Int':int_t,
165		'Scalar':float_t,
166		'shape_i':shape_i,
167		'shape_e':tuple(s+1 for s in shape_i),
168		'newton_maxiter':7,
169		'metric_type_macro':{'iso':1,'iso_asym':2,'riemann':3,'riemann_asym':4}[metric_type],
170		'size_bd_e':len(x_top_e),
171		'x_top_e':x_top_e,
172		'x_bot_e':1+x_bot_e,
173		'grad_macro':{'gradb':1,'gradc':2,'grad2':3}[grad],
174		'preproc_randers_macro':w_randers is not None,
175	}
176
177	cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda")
178	date_modified = cupy_module_helper.getmtime_max(cuda_path)
179	source = cupy_module_helper.traits_header(traits,size_of_shape=True,log2_size=True)
180
181	source += [
182	'#include "Kernel_MinCut.h"',
183	f"// Date cuda code last modified : {date_modified}"]
184	cuoptions = ("-default-device", f"-I {cuda_path}") 
185
186	source = "\n".join(source)
187#	print(source)
188	module = cupy_module_helper.GetModule(source,cuoptions)
189	# ------------- cuda module generated (may be reused...) ----------------
190
191	def setcst(*args): cupy_module_helper.SetModuleConstant(module,*args)
192	setcst('shape_tot_s',shape_s,int_t)
193	setcst('shape_tot_v',shape_v,int_t)
194	setcst('shape_o',shape_o,int_t)
195	setcst('size_io',g.size,int_t)
196	setcst('tau_primal',τ_primal,float_t)
197	setcst('tau_dual',1./(gradnorm2*τ_primal),float_t)
198	setcst('rho_overrelax',ρ_overrelax,float_t)
199	
200	primal_step = module.get_function("primal_step")
201	dual_step = module.get_function("dual_step")
202	size_o = np.prod(shape_o)
203	primal_values,dual_values = [],[]
204
205	if w_randers is not None:
206		w_randers = cp.ascontiguousarray(
207		fd.block_expand(w_randers,shape_i,shape_o,constant_values=np.nan),dtype=float_t)
208		assert w_randers.shape == η.shape
209		dummy = cp.array([0.],dtype=float_t)
210#		print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}")
211#		g.fill(0)
212		setcst('preproc_randers',1,int_t)
213		primal_step((size_o,),(size_i,),(g,dummy,w_randers,dummy,dummy,dummy))
214		setcst('preproc_randers',0,int_t)
215#		print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}")
216#		print(np.any(np.isnan(g)))
217
218	# Main loop
219	top = time.time()
220	for niter in range(maxiter):
221#		setcst('niter',niter,int_t)
222#		print(f"{niter=}")
223#		print(f"{ϕ=}")
224#		print(f"η*dx={η.get()*dx}")
225		primal_step((size_o,),(size_i,),(ϕ,ϕ_ext,η,g,primal_value,dual_value))
226#		print(f"{ϕ=}")
227#		print(primal_value)
228		dual_step((size_o,),(size_i,),(η,ϕ_ext,metric,primal_value))
229#		print(primal_value)
230
231#		print(f"(After dual step η={η.get()}")
232#		print(f"(After dual step η*dx={η.get()*dx}")
233
234		# Check objectives and termination
235
236		e_primal =  float(primal_value.sum())
237		e_dual   = -float(dual_value.sum())
238		primal_values.append(e_primal)
239		dual_values.append(e_dual)
240		if E_rtol>0 and e_primal-e_dual<E_rtol*np.abs(e_primal): break
241	else: 
242		if E_rtol>0 and verbosity>=1: 
243			print("Exhausted iteration budget without satisfying convergence criterion") 
244	if verbosity>=2:
245		print(f"GPU primal-dual solver completed {niter+1} steps in {time.time()-top} seconds")
246
247	# Reshape, rescale the results
248	ϕ = fd.block_squeeze(ϕ,shape_s)
249	η = fd.block_squeeze(η,shape_v)
250	if grad=='grad2': η=np.moveaxis(η.reshape((2,vdim)+shape_v),0,1)
251	if dx is not None: 
252		if np.ndim(dx)==0: η*=dx
253		else: 
254			for ηi,dxi in zip(η,dx): ηi*=dxi
255
256	return {'ϕ':ϕ,'η':η,'niter':niter+1,
257	'primal_values':np.array(primal_values),'dual_values':np.array(dual_values)}

Numerically solves the mincut problem.

  • g (array) : the ground cost functions
  • metric : the geometric metric
  • grad (optional, string): gradient discretization. Possible values :
    • 'gradb' -> upwind
    • 'gradc' -> centered (accurate but unstable)
    • 'grad2' -> use both upwind and downwind
  • τ_primal : time step for the primal proximal operator