agd.Eikonal.HFM_CUDA.MinCut
1import os 2import numpy as np 3from ... import FiniteDifferences as fd 4import time 5 6try: 7 import cupy as cp 8 from . import cupy_module_helper 9 from .cupy_module_helper import SetModuleConstant 10 from . import Eigh 11except ImportError: 12 pass # Allow using the CPU implementation on a machine without cuda 13 14""" 15This file provides a GPU solver of the mincut optimization problem, which reads 16minimize int_Omega F(grad ϕ) + g ϕ subject to -1 <= ϕ <= 1, 17and arises in image segmentation methods. 18This problem is dual to 19minimize int_Omega |div(eta) - g| subject to F^*(eta) <=1, 20which is solved numerically. 21""" 22 23def stag_grids(shape,corners,sparse=False,xp=np): 24 """ 25 Generates the grid, staggered grid, and boundary conditions, as suitable for the mincut 26 problem, for a rectangular domain. Grids use 'ij' indexing. 27 Input : 28 - shape (tuple) : the domain dimensions (n1,...,nd) 29 - corners (array of shape (2,d)) : the extreme points of the domain 30 - sparse (bool) : wether to generate a dense or a sparse grid 31 Output : Xm,Xϕ,dx,weights (last element only if retweights=True) 32 - Xs : standard grid coordinates (for the scalar potential) 33 - Xv : staggered grid coordinates, at the center of each cell (for the gradient) 34 - dx : the discretization scale 35 """ 36 if isinstance(shape,int): shape = (shape,); corners = tuple((c,) for c in corners) 37 float_t = np.float64 if xp is np else np.float32 38 corners = xp.asarray(corners,dtype=float_t) 39 assert corners.shape == (2,len(shape)) 40 bot,top = corners 41 dx = (top-bot)/(xp.asarray(shape,dtype=float_t)-1) 42 Xs = tuple(boti+dxi*xp.arange(si,dtype=float_t) for (boti,dxi,si) in zip(bot,dx,shape)) 43 Xv = tuple(dxi/2.+axi[:-1] for (dxi,axi) in zip(dx,Xs)) 44 45 def make_grid(X): 46 if sparse: return tuple(axi.reshape((1,)*i+(-1,)+(1,)*(bc.ndim-i-1)) for i,axi in enumerate(X)) 47 else: return xp.asarray(xp.meshgrid(*X,indexing='ij'),dtype=float_t) 48 49 return make_grid(Xs),make_grid(Xv),dx 50 51def _preproc_metric(metric,use_numpy_eigh): 52 from ... import Metrics 53 if isinstance(metric,Metrics.Isotropic): return metric.cost[None] 54 elif isinstance(metric,Metrics.Diagonal): return metric.costs 55 elif metric.model_HFM().startswith("AsymIso"): return [metric.a[None],metric.w] 56 57 elif isinstance(metric,Metrics.Rander): 58 return _preproc_metric(Metrics.Riemann(metric.m),use_numpy_eigh),metric.w 59 elif metric.model_HFM().startswith("AsymRander"): #Not imported by default in Metrics 60 assert cp.allclose(metric.v,0) 61 return _preproc_metric(Metrics.AsymQuad(metric.m,metric.u),use_numpy_eigh),metric.w 62 63 def eigh(m): 64 λ,v = Eigh.eigh(m,quaternion=True,flatsym=True,use_numpy=use_numpy_eigh) 65 return [np.moveaxis(λ,-1,0),np.moveaxis(v,-1,0)] 66 67 m = np.concatenate([np.moveaxis(metric.m[i,:(i+1)],0,-1) 68 for i in range(metric.vdim)],axis=-1) 69 eigm = eigh(m) 70 if isinstance(metric,Metrics.Riemann): return eigm 71 assert isinstance(metric,Metrics.AsymQuad) 72 k=0 # compute m + w w^T 73 for i in range(metric.vdim): 74 for j in range(i+1): 75 m[...,k] += metric.w[i]*metric.w[j] 76 k+=1 77 return eigm+eigh(m)+[metric.w] 78 79 80def mincut(g,metric,dx=None, 81 grad="gradb",τ_primal=0.2,ρ_overrelax=1.8, 82 maxiter=5000,E_rtol=1e-2, #,rtol=1e-6, 83 shape_i=None,use_numpy_eigh=False,verbosity=2): 84 """ 85 Numerically solves the mincut problem. 86 - g (array) : the ground cost functions 87 - metric : the geometric metric 88 - grad (optional, string): gradient discretization. Possible values : 89 - 'gradb' -> upwind 90 - 'gradc' -> centered (accurate but unstable) 91 - 'grad2' -> use both upwind and downwind 92 - τ_primal : time step for the primal proximal operator 93 """ 94 95 # traits and sizes 96 int_t = np.int32 97 float_t = np.float32 98 shape_s = g.shape # shape for vector fields 99 shape_v = tuple(s-1 for s in shape_s) 100 vdim = len(shape_s) # Number of space dimensions 101 qdim = {1:np.nan, 2:2, 3:4}[vdim] # Data size for a rotation matrix (compact format) 102 assert τ_primal>0 103 104 # Prepare the metric 105 w_randers = None 106 if not isinstance(metric,cp.ndarray): 107 assert metric.shape==() or metric.shape==shape_v 108 from ... import AutomaticDifferentiation as ad 109 metric = ad.cupy_generic.cupy_set(metric,dtype32=True,iterables=type(metric)) 110 if dx is not None: 111 dx = cp.asarray(dx,dtype=float_t) 112 if np.ndim(dx)==0: metric = metric.with_speed(dx) 113 elif np.ndim(dx)==1: metric = metric.with_speeds(dx) 114 metric = _preproc_metric(metric,use_numpy_eigh) 115 if isinstance(metric,tuple): metric,w_randers = metric 116 if isinstance(metric,list): metric = np.concatenate(metric,axis=0) 117 118 def asfield(a): return np.moveaxis(np.broadcast_to(a,shape_v+a.shape),-1,0) 119 if np.ndim(metric)==1:metric = asfield(metric) 120 if np.ndim(w_randers)==1:w_randers = asfield(w_randers) 121 122 assert metric.shape[1:]==shape_v # Metric is provided at cell centers 123 geomsize = len(metric) 124 metric_type = {1:'iso', (1+vdim):'iso_asym', (vdim+qdim):'riemann', 125 (vdim+qdim+vdim+qdim+vdim):'riemann_asym'}[geomsize] 126 127 if shape_i is None: shape_i = {1:(64,), 2:(8,8), 3:(4,4,4)}[vdim] 128 assert len(shape_i)==vdim 129 size_i = np.prod(shape_i) 130 131 if vdim==1: grad='gradc' # Only one meaningful discretization in dimension 1 132 else: assert grad in ('gradb','gradc','grad2') 133 if grad=='grad2': 134 graddim = 2*vdim 135 gradnorm2 = 2*vdim 136 if w_randers is not None: w_randers = np.concatenate([w_randers,w_randers],axis=0) 137 else: 138 graddim = vdim 139 gradnorm2 = 4*vdim # Squared norm of gradient operator 140 141 # Format suitably for the cupy kernel 142 g = cp.ascontiguousarray(fd.block_expand(cp.asarray(g,dtype=float_t), 143 shape_i,constant_values=np.nan)) 144 shape_o = g.shape[:vdim] 145 metric = cp.ascontiguousarray(fd.block_expand(cp.asarray(metric,dtype=float_t) 146 ,shape_i,shape_o,constant_values=np.nan)) 147 148 # Create the optimization variables 149 ϕ = cp.zeros(shape_o+shape_i,dtype=float_t) # primal point 150 ϕ_ext = cp.zeros(shape_o+shape_i,dtype=float_t) # extrapolated primal point 151 η = cp.zeros((graddim,*shape_o,*shape_i),dtype=float_t) # dual point 152 primal_value = cp.zeros(shape_o,dtype=float_t) # primal objective value, by block 153 dual_value = cp.zeros(shape_o,dtype=float_t) # dual objective value, by block 154 155 # --------------- cuda header construction and compilation ---------------- 156 # Generate the load order for the boundary of shared data 157 x_top_e = fd.block_neighbors(shape_i,True) 158 x_bot_e = fd.block_neighbors(shape_i,False) 159 160 # Generate the kernels 161 traits = { 162 'ndim_macro':vdim, 163 'Int':int_t, 164 'Scalar':float_t, 165 'shape_i':shape_i, 166 'shape_e':tuple(s+1 for s in shape_i), 167 'newton_maxiter':7, 168 'metric_type_macro':{'iso':1,'iso_asym':2,'riemann':3,'riemann_asym':4}[metric_type], 169 'size_bd_e':len(x_top_e), 170 'x_top_e':x_top_e, 171 'x_bot_e':1+x_bot_e, 172 'grad_macro':{'gradb':1,'gradc':2,'grad2':3}[grad], 173 'preproc_randers_macro':w_randers is not None, 174 } 175 176 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 177 date_modified = cupy_module_helper.getmtime_max(cuda_path) 178 source = cupy_module_helper.traits_header(traits,size_of_shape=True,log2_size=True) 179 180 source += [ 181 '#include "Kernel_MinCut.h"', 182 f"// Date cuda code last modified : {date_modified}"] 183 cuoptions = ("-default-device", f"-I {cuda_path}") 184 185 source = "\n".join(source) 186# print(source) 187 module = cupy_module_helper.GetModule(source,cuoptions) 188 # ------------- cuda module generated (may be reused...) ---------------- 189 190 def setcst(*args): cupy_module_helper.SetModuleConstant(module,*args) 191 setcst('shape_tot_s',shape_s,int_t) 192 setcst('shape_tot_v',shape_v,int_t) 193 setcst('shape_o',shape_o,int_t) 194 setcst('size_io',g.size,int_t) 195 setcst('tau_primal',τ_primal,float_t) 196 setcst('tau_dual',1./(gradnorm2*τ_primal),float_t) 197 setcst('rho_overrelax',ρ_overrelax,float_t) 198 199 primal_step = module.get_function("primal_step") 200 dual_step = module.get_function("dual_step") 201 size_o = np.prod(shape_o) 202 primal_values,dual_values = [],[] 203 204 if w_randers is not None: 205 w_randers = cp.ascontiguousarray( 206 fd.block_expand(w_randers,shape_i,shape_o,constant_values=np.nan),dtype=float_t) 207 assert w_randers.shape == η.shape 208 dummy = cp.array([0.],dtype=float_t) 209# print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}") 210# g.fill(0) 211 setcst('preproc_randers',1,int_t) 212 primal_step((size_o,),(size_i,),(g,dummy,w_randers,dummy,dummy,dummy)) 213 setcst('preproc_randers',0,int_t) 214# print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}") 215# print(np.any(np.isnan(g))) 216 217 # Main loop 218 top = time.time() 219 for niter in range(maxiter): 220# setcst('niter',niter,int_t) 221# print(f"{niter=}") 222# print(f"{ϕ=}") 223# print(f"η*dx={η.get()*dx}") 224 primal_step((size_o,),(size_i,),(ϕ,ϕ_ext,η,g,primal_value,dual_value)) 225# print(f"{ϕ=}") 226# print(primal_value) 227 dual_step((size_o,),(size_i,),(η,ϕ_ext,metric,primal_value)) 228# print(primal_value) 229 230# print(f"(After dual step η={η.get()}") 231# print(f"(After dual step η*dx={η.get()*dx}") 232 233 # Check objectives and termination 234 235 e_primal = float(primal_value.sum()) 236 e_dual = -float(dual_value.sum()) 237 primal_values.append(e_primal) 238 dual_values.append(e_dual) 239 if E_rtol>0 and e_primal-e_dual<E_rtol*np.abs(e_primal): break 240 else: 241 if E_rtol>0 and verbosity>=1: 242 print("Exhausted iteration budget without satisfying convergence criterion") 243 if verbosity>=2: 244 print(f"GPU primal-dual solver completed {niter+1} steps in {time.time()-top} seconds") 245 246 # Reshape, rescale the results 247 ϕ = fd.block_squeeze(ϕ,shape_s) 248 η = fd.block_squeeze(η,shape_v) 249 if grad=='grad2': η=np.moveaxis(η.reshape((2,vdim)+shape_v),0,1) 250 if dx is not None: 251 if np.ndim(dx)==0: η*=dx 252 else: 253 for ηi,dxi in zip(η,dx): ηi*=dxi 254 255 return {'ϕ':ϕ,'η':η,'niter':niter+1, 256 'primal_values':np.array(primal_values),'dual_values':np.array(dual_values)}
def
stag_grids( shape, corners, sparse=False, xp=<module 'numpy' from 'C:\\Users\\jmmir\\miniconda3\\envs\\agd-hfm_cuda_312\\Lib\\site-packages\\numpy\\__init__.py'>):
24def stag_grids(shape,corners,sparse=False,xp=np): 25 """ 26 Generates the grid, staggered grid, and boundary conditions, as suitable for the mincut 27 problem, for a rectangular domain. Grids use 'ij' indexing. 28 Input : 29 - shape (tuple) : the domain dimensions (n1,...,nd) 30 - corners (array of shape (2,d)) : the extreme points of the domain 31 - sparse (bool) : wether to generate a dense or a sparse grid 32 Output : Xm,Xϕ,dx,weights (last element only if retweights=True) 33 - Xs : standard grid coordinates (for the scalar potential) 34 - Xv : staggered grid coordinates, at the center of each cell (for the gradient) 35 - dx : the discretization scale 36 """ 37 if isinstance(shape,int): shape = (shape,); corners = tuple((c,) for c in corners) 38 float_t = np.float64 if xp is np else np.float32 39 corners = xp.asarray(corners,dtype=float_t) 40 assert corners.shape == (2,len(shape)) 41 bot,top = corners 42 dx = (top-bot)/(xp.asarray(shape,dtype=float_t)-1) 43 Xs = tuple(boti+dxi*xp.arange(si,dtype=float_t) for (boti,dxi,si) in zip(bot,dx,shape)) 44 Xv = tuple(dxi/2.+axi[:-1] for (dxi,axi) in zip(dx,Xs)) 45 46 def make_grid(X): 47 if sparse: return tuple(axi.reshape((1,)*i+(-1,)+(1,)*(bc.ndim-i-1)) for i,axi in enumerate(X)) 48 else: return xp.asarray(xp.meshgrid(*X,indexing='ij'),dtype=float_t) 49 50 return make_grid(Xs),make_grid(Xv),dx
Generates the grid, staggered grid, and boundary conditions, as suitable for the mincut problem, for a rectangular domain. Grids use 'ij' indexing. Input :
- shape (tuple) : the domain dimensions (n1,...,nd)
- corners (array of shape (2,d)) : the extreme points of the domain
- sparse (bool) : wether to generate a dense or a sparse grid Output : Xm,Xϕ,dx,weights (last element only if retweights=True)
- Xs : standard grid coordinates (for the scalar potential)
- Xv : staggered grid coordinates, at the center of each cell (for the gradient)
- dx : the discretization scale
def
mincut( g, metric, dx=None, grad='gradb', τ_primal=0.2, ρ_overrelax=1.8, maxiter=5000, E_rtol=0.01, shape_i=None, use_numpy_eigh=False, verbosity=2):
81def mincut(g,metric,dx=None, 82 grad="gradb",τ_primal=0.2,ρ_overrelax=1.8, 83 maxiter=5000,E_rtol=1e-2, #,rtol=1e-6, 84 shape_i=None,use_numpy_eigh=False,verbosity=2): 85 """ 86 Numerically solves the mincut problem. 87 - g (array) : the ground cost functions 88 - metric : the geometric metric 89 - grad (optional, string): gradient discretization. Possible values : 90 - 'gradb' -> upwind 91 - 'gradc' -> centered (accurate but unstable) 92 - 'grad2' -> use both upwind and downwind 93 - τ_primal : time step for the primal proximal operator 94 """ 95 96 # traits and sizes 97 int_t = np.int32 98 float_t = np.float32 99 shape_s = g.shape # shape for vector fields 100 shape_v = tuple(s-1 for s in shape_s) 101 vdim = len(shape_s) # Number of space dimensions 102 qdim = {1:np.nan, 2:2, 3:4}[vdim] # Data size for a rotation matrix (compact format) 103 assert τ_primal>0 104 105 # Prepare the metric 106 w_randers = None 107 if not isinstance(metric,cp.ndarray): 108 assert metric.shape==() or metric.shape==shape_v 109 from ... import AutomaticDifferentiation as ad 110 metric = ad.cupy_generic.cupy_set(metric,dtype32=True,iterables=type(metric)) 111 if dx is not None: 112 dx = cp.asarray(dx,dtype=float_t) 113 if np.ndim(dx)==0: metric = metric.with_speed(dx) 114 elif np.ndim(dx)==1: metric = metric.with_speeds(dx) 115 metric = _preproc_metric(metric,use_numpy_eigh) 116 if isinstance(metric,tuple): metric,w_randers = metric 117 if isinstance(metric,list): metric = np.concatenate(metric,axis=0) 118 119 def asfield(a): return np.moveaxis(np.broadcast_to(a,shape_v+a.shape),-1,0) 120 if np.ndim(metric)==1:metric = asfield(metric) 121 if np.ndim(w_randers)==1:w_randers = asfield(w_randers) 122 123 assert metric.shape[1:]==shape_v # Metric is provided at cell centers 124 geomsize = len(metric) 125 metric_type = {1:'iso', (1+vdim):'iso_asym', (vdim+qdim):'riemann', 126 (vdim+qdim+vdim+qdim+vdim):'riemann_asym'}[geomsize] 127 128 if shape_i is None: shape_i = {1:(64,), 2:(8,8), 3:(4,4,4)}[vdim] 129 assert len(shape_i)==vdim 130 size_i = np.prod(shape_i) 131 132 if vdim==1: grad='gradc' # Only one meaningful discretization in dimension 1 133 else: assert grad in ('gradb','gradc','grad2') 134 if grad=='grad2': 135 graddim = 2*vdim 136 gradnorm2 = 2*vdim 137 if w_randers is not None: w_randers = np.concatenate([w_randers,w_randers],axis=0) 138 else: 139 graddim = vdim 140 gradnorm2 = 4*vdim # Squared norm of gradient operator 141 142 # Format suitably for the cupy kernel 143 g = cp.ascontiguousarray(fd.block_expand(cp.asarray(g,dtype=float_t), 144 shape_i,constant_values=np.nan)) 145 shape_o = g.shape[:vdim] 146 metric = cp.ascontiguousarray(fd.block_expand(cp.asarray(metric,dtype=float_t) 147 ,shape_i,shape_o,constant_values=np.nan)) 148 149 # Create the optimization variables 150 ϕ = cp.zeros(shape_o+shape_i,dtype=float_t) # primal point 151 ϕ_ext = cp.zeros(shape_o+shape_i,dtype=float_t) # extrapolated primal point 152 η = cp.zeros((graddim,*shape_o,*shape_i),dtype=float_t) # dual point 153 primal_value = cp.zeros(shape_o,dtype=float_t) # primal objective value, by block 154 dual_value = cp.zeros(shape_o,dtype=float_t) # dual objective value, by block 155 156 # --------------- cuda header construction and compilation ---------------- 157 # Generate the load order for the boundary of shared data 158 x_top_e = fd.block_neighbors(shape_i,True) 159 x_bot_e = fd.block_neighbors(shape_i,False) 160 161 # Generate the kernels 162 traits = { 163 'ndim_macro':vdim, 164 'Int':int_t, 165 'Scalar':float_t, 166 'shape_i':shape_i, 167 'shape_e':tuple(s+1 for s in shape_i), 168 'newton_maxiter':7, 169 'metric_type_macro':{'iso':1,'iso_asym':2,'riemann':3,'riemann_asym':4}[metric_type], 170 'size_bd_e':len(x_top_e), 171 'x_top_e':x_top_e, 172 'x_bot_e':1+x_bot_e, 173 'grad_macro':{'gradb':1,'gradc':2,'grad2':3}[grad], 174 'preproc_randers_macro':w_randers is not None, 175 } 176 177 cuda_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"cuda") 178 date_modified = cupy_module_helper.getmtime_max(cuda_path) 179 source = cupy_module_helper.traits_header(traits,size_of_shape=True,log2_size=True) 180 181 source += [ 182 '#include "Kernel_MinCut.h"', 183 f"// Date cuda code last modified : {date_modified}"] 184 cuoptions = ("-default-device", f"-I {cuda_path}") 185 186 source = "\n".join(source) 187# print(source) 188 module = cupy_module_helper.GetModule(source,cuoptions) 189 # ------------- cuda module generated (may be reused...) ---------------- 190 191 def setcst(*args): cupy_module_helper.SetModuleConstant(module,*args) 192 setcst('shape_tot_s',shape_s,int_t) 193 setcst('shape_tot_v',shape_v,int_t) 194 setcst('shape_o',shape_o,int_t) 195 setcst('size_io',g.size,int_t) 196 setcst('tau_primal',τ_primal,float_t) 197 setcst('tau_dual',1./(gradnorm2*τ_primal),float_t) 198 setcst('rho_overrelax',ρ_overrelax,float_t) 199 200 primal_step = module.get_function("primal_step") 201 dual_step = module.get_function("dual_step") 202 size_o = np.prod(shape_o) 203 primal_values,dual_values = [],[] 204 205 if w_randers is not None: 206 w_randers = cp.ascontiguousarray( 207 fd.block_expand(w_randers,shape_i,shape_o,constant_values=np.nan),dtype=float_t) 208 assert w_randers.shape == η.shape 209 dummy = cp.array([0.],dtype=float_t) 210# print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}") 211# g.fill(0) 212 setcst('preproc_randers',1,int_t) 213 primal_step((size_o,),(size_i,),(g,dummy,w_randers,dummy,dummy,dummy)) 214 setcst('preproc_randers',0,int_t) 215# print(f"g_gpu={fd.block_squeeze(g,shape_s)[-5:,-5:]}") 216# print(np.any(np.isnan(g))) 217 218 # Main loop 219 top = time.time() 220 for niter in range(maxiter): 221# setcst('niter',niter,int_t) 222# print(f"{niter=}") 223# print(f"{ϕ=}") 224# print(f"η*dx={η.get()*dx}") 225 primal_step((size_o,),(size_i,),(ϕ,ϕ_ext,η,g,primal_value,dual_value)) 226# print(f"{ϕ=}") 227# print(primal_value) 228 dual_step((size_o,),(size_i,),(η,ϕ_ext,metric,primal_value)) 229# print(primal_value) 230 231# print(f"(After dual step η={η.get()}") 232# print(f"(After dual step η*dx={η.get()*dx}") 233 234 # Check objectives and termination 235 236 e_primal = float(primal_value.sum()) 237 e_dual = -float(dual_value.sum()) 238 primal_values.append(e_primal) 239 dual_values.append(e_dual) 240 if E_rtol>0 and e_primal-e_dual<E_rtol*np.abs(e_primal): break 241 else: 242 if E_rtol>0 and verbosity>=1: 243 print("Exhausted iteration budget without satisfying convergence criterion") 244 if verbosity>=2: 245 print(f"GPU primal-dual solver completed {niter+1} steps in {time.time()-top} seconds") 246 247 # Reshape, rescale the results 248 ϕ = fd.block_squeeze(ϕ,shape_s) 249 η = fd.block_squeeze(η,shape_v) 250 if grad=='grad2': η=np.moveaxis(η.reshape((2,vdim)+shape_v),0,1) 251 if dx is not None: 252 if np.ndim(dx)==0: η*=dx 253 else: 254 for ηi,dxi in zip(η,dx): ηi*=dxi 255 256 return {'ϕ':ϕ,'η':η,'niter':niter+1, 257 'primal_values':np.array(primal_values),'dual_values':np.array(dual_values)}
Numerically solves the mincut problem.
- g (array) : the ground cost functions
- metric : the geometric metric
- grad (optional, string): gradient discretization. Possible values :
- 'gradb' -> upwind
- 'gradc' -> centered (accurate but unstable)
- 'grad2' -> use both upwind and downwind
- τ_primal : time step for the primal proximal operator