Cut problems in graphs

The code below initializes the graph used in all the examples of this page. It should be run prior to any of the codes presented in this page. The packages networkx and matplotlib are recquired. We use a graph generated by the LCF generator of the networkx package. The graph and the edge capacities are deterministic, so that you can compare your results.

import picos as pic
import networkx as nx
import pylab
import random

# Use a fixed RNG seed so the result is reproducable.
random.seed(1)

# Number of nodes.
N=20

# Generate a graph using LCF notation.
G=nx.LCF_graph(N,[1,3,14],5)
G=nx.DiGraph(G) #edges are bidirected

# Generate edge capacities.
c={}
for e in sorted(G.edges(data=True)):
  capacity = random.randint(1, 20)
  e[2]['capacity'] = capacity
  c[(e[0], e[1])]  = capacity

# Convert the capacities to a PICOS expression.
cc=pic.new_param('c',c)

# Manually set a layout for which the graph is planar.
pos={
  0:  (0.07, 0.70), 1:  (0.18, 0.78), 2:  (0.26, 0.45), 3:  (0.27, 0.66),
  4:  (0.42, 0.79), 5:  (0.56, 0.95), 6:  (0.60, 0.80), 7:  (0.64, 0.65),
  8:  (0.55, 0.37), 9:  (0.65, 0.30), 10: (0.77, 0.46), 11: (0.83, 0.66),
  12: (0.90, 0.41), 13: (0.70, 0.10), 14: (0.56, 0.16), 15: (0.40, 0.17),
  16: (0.28, 0.05), 17: (0.03, 0.38), 18: (0.01, 0.66), 19: (0.00, 0.95)
}

# Set source and sink nodes for flow computation.
s=16
t=10

# Set node colors.
node_colors=['lightgrey']*N
node_colors[s]='lightgreen' # Source is green.
node_colors[t]='lightblue'  # Sink is blue.

# Define a plotting helper that closes the old and opens a new figure.
def new_figure():
  try:
    global fig
    pylab.close(fig)
  except NameError:
    pass
  fig=pylab.figure(figsize=(11,8))
  fig.gca().axes.get_xaxis().set_ticks([])
  fig.gca().axes.get_yaxis().set_ticks([])

# Plot the graph with the edge capacities.
new_figure()
nx.draw_networkx(G, pos, node_color=node_colors)
labels={
  e: '{} | {}'.format(c[(e[0], e[1])], c[(e[1], e[0])])
  for e in G.edges if e[0] < e[1]}
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
pylab.show()
_images/graphs-1.png

The first number on an edge label denotes the capacity from the node with the smaller number to the node with the larger number; the second number denotes the capacity for the other direction. Source and sink that we will use for flow computations are drawn in green and blue, respectively.

Max-flow, Min-cut (LP)

Max-flow

Given a directed graph G(V,E), with a capacity c(e) on each edge e \in E, a source node s and a sink node t, the max-flow problem is to find a flow from s to t of maximum value. Recall that a flow s to t is a mapping from E to \mathbb{R}^+ such that

  • the capacity of each edge is respected, \forall e \in E,\ f(e) \leq c(e), and
  • the flow is conserved at each non-terminal node, \forall n \in V \setminus \{s,t\},\ \sum_{(i,n)\in E} f((i,n)) = \sum_{(n,j)\in E} f((n,j)).

Its value is defined as the volume passing from s to t:

\mathrm{value} (f) = \sum_{(s,j)\in E} f((s,j)) - \sum_{(i,s)\in E} f((i,s)) = \sum_{(i,t)\in E} f((i,t)) - \sum_{(t,j)\in E} f((t,j)).

This problem has a linear programming formulation, which we solve below for s=16 and t=10:

maxflow=pic.Problem()

# Add the flow variables.
f={}
for e in G.edges():
  f[e]=maxflow.add_variable('f[{0}]'.format(e),1)

# Add another variable for the total flow.
F=maxflow.add_variable('F',1)

# Enforce edge capacities.
maxflow.add_list_of_constraints(
  [f[e]<cc[e] for e in G.edges()], # list of constraints
  [('e',2)],                       # e is a double index
  'edges')                         # set the index belongs to

# Enforce flow conservation.
maxflow.add_list_of_constraints(
  [pic.sum([f[p,i] for p in G.predecessors(i)],'p','pred(i)')
    == pic.sum([f[i,j] for j in G.successors(i)],'j','succ(i)')
    for i in G.nodes() if i not in (s,t)],
  'i','nodes-(s,t)')

# Set source flow at s.
maxflow.add_constraint(
  pic.sum([f[p,s] for p in G.predecessors(s)],'p','pred(s)') + F
  == pic.sum([f[s,j] for j in G.successors(s)],'j','succ(s)'))

# Set sink flow at t.
maxflow.add_constraint(
  pic.sum([f[p,t] for p in G.predecessors(t)],'p','pred(t)')
  == pic.sum([f[t,j] for j in G.successors(t)],'j','succ(t)') + F)

# Enforce flow nonnegativity.
maxflow.add_list_of_constraints(
  [f[e]>0 for e in G.edges()], # list of constraints
  [('e',2)],                   # e is a double index
  'edges')                     # set the index belongs to

# Set the objective.
maxflow.set_objective('max',F)

# Solve the problem.
maxflow.solve(verbose=0,solver='glpk')

An equivalent and faster way to define this problem is to use the function flow_Constraint:

maxflow2=pic.Problem()

# Add the flow variables.
f={}
for e in G.edges():
  f[e]=maxflow2.add_variable('f[{0}]'.format(e),1)

# Add another variable for the total flow.
F=maxflow2.add_variable('F',1)

# Enforce all flow constraints at once.
maxflow2.addConstraint(pic.flow_Constraint(
  G, f, source=16, sink=10, capacity='capacity', flow_value=F, graphName='G'))

# Set the objective.
maxflow2.set_objective('max',F)

# Solve the problem.
maxflow2.solve(verbose=0,solver='glpk')

Let us now draw the maximum flow computed with the second approach:

# Close the old figure and open a new one.
new_figure()

# Determine which edges carry flow.
flow_edges=[e for e in G.edges() if f[e].value[0] > 1e-4]

# Draw the nodes and the edges that don't carry flow.
nx.draw_networkx(G, pos, edge_color='lightgrey', node_color=node_colors,
  edgelist=[e for e in G.edges
    if e not in flow_edges and (e[1], e[0]) not in flow_edges])

# Draw the edges that carry flow.
nx.draw_networkx_edges(G, pos, edgelist=flow_edges)

# Show flow values and capacities on these edges.
labels={e: '{0}/{1}'.format(f[e], c[e]) for e in flow_edges}
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)

# Show the maximum flow value.
fig.suptitle("Maximum flow value: {}".format(F), fontsize=16, y=0.95)

# Show the figure.
pylab.show()
_images/graphs-4.png

The graph shows the source in blue, the sink in green, and the value of the flow together with the capacity on each edge that carries flow.

Min-cut

Given a directed graph G(V,E), with a capacity c(e) on each edge e \in E, a source node s and a sink node t, the min-cut problem is to find a partition of the nodes in two sets (S,T), such that s\in S, t \in T, and the total capacity of the cut, \mathrm{capacity}(S,T)=\sum_{(i,j)\in E \cap S \times T} c((i,j)), is minimized.

It can be seen that binary solutions d\in\{0,1\}^E,\ p\in\{0,1\}^V of the following linear program yield a minimum cut:

\begin{center}
\begin{eqnarray*}
&\underset{\substack{d \in \mathbb{R}^E\\
                          p \in \mathbb{R}^V}}
             {\mbox{minimize}}
                   & \sum_{e \in E} c(e) d(e)\\
&\mbox{subject to} & \forall (i,j) \in E,\ d((i,j)) \geq p(i)-p(j)\\
&                  & p(s) = 1\\
&                  & p(t) = 0\\
&                  & \forall n \in V,\ p(n) \geq 0\\
&                  & \forall e \in E,\ d(e) \geq 0
\end{eqnarray*}
\end{center}

Remarkably, this LP is the dual of the max-flow LP, and the max-flow-min-cut theorem (also known as Ford-Fulkerson theorem [1]) states that the capacity of the minimum cut is equal to the value of the maximum flow. This means that the above LP always has an optimal solution in which d is binary. In fact, the matrix defining this LP is totally unimodular, from which we know that every extreme point of the polyhedron defining the feasible region is integral, and hence the simplex algorithm will return a minimum cut.

We solve the min-cut problem below, again for s=16 and t=10:

mincut=pic.Problem()

# Add cut indicator variables.
d={}
for e in G.edges():
  d[e]=mincut.add_variable('d[{0}]'.format(e),1)

# Add variables for the potentials.
p=mincut.add_variable('p',N)

# State the potential inequalities.
mincut.add_list_of_constraints(
        [d[i,j] > p[i]-p[j]
        for (i,j) in G.edges()], # list of constraints
        ['i','j'],'edges')       # indices and set they belong to

# Set the source potential to one.
mincut.add_constraint(p[s]==1)

# Set the sink potential to zero.
mincut.add_constraint(p[t]==0)

# Enforce nonnegativity.
mincut.add_constraint(p>0)
mincut.add_list_of_constraints(
        [d[e]>0 for e in G.edges()], # list of constraints
        [('e',2)],                   # e is a double index
        'edges')                     # set the index belongs to

# Set the objective.
mincut.set_objective('min',
  pic.sum([cc[e]*d[e] for e in G.edges()], [('e',2)], 'edges'))

mincut.solve(verbose=0,solver='glpk')

# Determine the cut edges and node sets.
# Rounding is done because solvers might return near-optimal solutions due to
# numerical precision issues.
cut=[e for e in G.edges() if abs(d[e].value[0]-1) < 1e-6]
S  =[n for n in G.nodes() if abs(p[n].value[0]-1) < 1e-6]
T  =[n for n in G.nodes() if abs(p[n].value[0]  ) < 1e-6]

Note that the minimum-cut can also be derived from the dual variables of the max-flow LP:

Note

Due to a technical issue, the interactive Python listing below is not automatically verified. If you find an error, please refer to the section on reporting a bug.

>>> # capacited flow constraint
>>> capaflow=maxflow.get_constraint((0,))
>>> dualcut=[e for i,e in enumerate(G.edges()) if abs(capaflow[i].dual[0]-1)<1e-6]
>>> # flow conservation constraint
>>> consflow=maxflow.get_constraint((1,))
>>> Sdual = [s]+ [n for i,n in
...           enumerate([n for n in G.nodes() if n not in (s,t)])
...           if abs(consflow[i].dual[0]-1)<1e-6]
>>> Tdual = [t]+ [n for i,n in
...           enumerate([n for n in G.nodes() if n not in (s,t)])
...           if abs(consflow[i].dual[0])<1e-6]
>>> cut == dualcut
True
>>> set(S) == set(Sdual)
True
>>> set(T) == set(Tdual)
True

Let us now draw the minim cut:

# Close the old figure and open a new one.
new_figure()

# Draw the nodes and the edges that are not in the cut.
nx.draw_networkx(G, pos, node_color=node_colors,
  edgelist=[e for e in G.edges() if e not in cut and (e[1], e[0]) not in cut])

# Draw edges that are in the cut.
nx.draw_networkx_edges(G, pos, edgelist=cut, edge_color='r')

# Show capacities for cut edges.
labels={e: '{}'.format(c[e]) for e in cut}
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels, font_color='r')

# Show the minimum cut value and the partition.
fig.suptitle("Minimum cut value: {}\nS: {}, T: {}".format(
  mincut.obj_value(), S, T), fontsize=16, y=0.97)

# Show the figure.
pylab.show()
_images/graphs-6.png

The graph shows the source in blue, the sink in green, and the edges defining the cut in red, with their capacities.

Multicut (MIP)

Multicut is a generalization of the min-cut problem, in which several pairs of nodes must be disconnected. The goal is to find a cut of minimal capacity, such that for all pairs (s,t) \in\mathcal{P}=\{(s_1,t_1),\ldots,(s_k,t_k))\}, there is no path from s to t in the graph obtained by removing the cut edges.

We can obtain a MIP formulation of the multicut problem via a small modification of the min-cut LP. The idea is to introduce a different potential for every node that is the source of a pair in \mathcal{P}, that is

\forall s \in \mathcal{S}=\{s\in V: \exists t \in V\ (s,t)\in\mathcal{P}\},
p_s \in \mathbb{R}^V,

and to constrain the cut indicator variables to be binary.

\begin{center}
\begin{eqnarray*}
&\underset{\substack{y \in \{0,1\}^E\\
                     \forall s \in \mathcal{S},\ p_s \in \mathbb{R}^V}}
             {\mbox{minimize}}
                   & \sum_{e \in E} c(e) y(e)\\
&\mbox{subject to} & \forall (i,j),s \in E\times\mathcal{S},\ y((i,j)) \geq p_s(i)-p_s(j)\\
&                  & \forall s \in \mathcal{S},\ p_s(s) = 1\\
&                  & \forall (s,t) \in \mathcal{P},\ p_s(t) = 0\\
&                  & \forall (s,n) \in \mathcal{S} \times V,\ p_s(n) \geq 0
\end{eqnarray*}
\end{center}

Unlike the min-cut problem, the LP obtained by relaxing the integer constraint y \in \{0,1\}^E is not guaranteed to have an integral solution (see e.g. [2]).

We solve the multicut problem below, for the terminal pairs \mathcal{P}=\{(0,12),(1,5),(1,19),(2,11),(3,4),(3,9),(3,18),(6,15),(10,14)\}.

multicut=pic.Problem()

# Define the pairs to be separated.
pairs=[(0,12),(1,5),(1,19),(2,11),(3,4),(3,9),(3,18),(6,15),(10,14)]

# Extract the sources and sinks.
sources=set([p[0] for p in pairs])
sinks=set([p[1] for p in pairs])

# Define the cut indicator variables.
y={}
for e in G.edges():
  y[e]=multicut.add_variable('y[{0}]'.format(e), 1, vtype='binary')

# Define one potential for each source.
p={}
for s in sources:
  p[s]=multicut.add_variable('p[{0}]'.format(s),N)

# State the potential inequalities.
multicut.add_list_of_constraints(
  [y[i,j]>p[s][i]-p[s][j] for s in sources for (i,j) in G.edges()],
  ['i','j','s'], 'edges x sources')

# Set the source potentials to one.
multicut.add_list_of_constraints(
  [p[s][s]==1 for s in sources], 's', 'sources')

# Set the sink potentials to zero.
multicut.add_list_of_constraints(
  [p[s][t]==0 for (s,t) in pairs], ['s','t'], 'pairs')

# Enforce nonnegativity.
multicut.add_list_of_constraints(
  [p[s]>0 for s in sources], 's', 'sources')

# Set the objective.
multicut.set_objective(
  'min', pic.sum([cc[e]*y[e] for e in G.edges()], [('e',2)], 'edges'))

# Solve the problem.
multicut.solve(verbose=0,solver='glpk')

# Extract the cut.
cut=[e for e in G.edges() if y[e].value[0]==1]

Let us now draw the multicut:

# Close the old figure and open a new one.
new_figure()

# Define matching colors for the pairs.
colors=[
  ('#4CF3CE','#0FDDAF'), # turquoise
  ('#FF4D4D','#FF0000'), # red
  ('#FFA64D','#FF8000'), # orange
  ('#3ABEFE','#0198E1'), # topaz
  ('#FFDB58','#FFCC11'), # mustard
  ('#BCBC8F','#9F9F5F')  # khaki
]

# Assign the colors.
node_colors=['lightgrey']*N
for i,s in enumerate(sources):
  node_colors[s]=colors[i][0]
  for t in [t for (s0,t) in pairs if s0==s]:
    node_colors[t]=colors[i][1]

# Draw the nodes and the edges that are not in the cut.
nx.draw_networkx(G, pos, node_color=node_colors,
  edgelist=[e for e in G.edges() if e not in cut and (e[1], e[0]) not in cut])

# Draw the edges that are in the cut.
nx.draw_networkx_edges(G, pos, edgelist=cut, edge_color='r')

# Show capacities for cut edges.
labels={e: '{}'.format(c[e]) for e in cut}
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels, font_color='r')

# Show the cut capacity.
fig.suptitle("Multicut value: {}"
  .format(multicut.obj_value()), fontsize=16, y=0.95)

# Show the figure.
pylab.show()
_images/graphs-8.png

The graph shows terminal nodes with matching hue. Sources are a tad lighter than sinks to make them distinguishable. The edges defining the cut are drawn in red and show their capacities. The colors for the source nodes are, in order: Turquoise, red, orange, topaz, mustard and khaki.

Maxcut relaxation (SDP)

The goal of the maxcut problem is to find a partition (S,T) of the nodes of an undirected graph G(V,E), such that the capacity of the cut, \mathrm{capacity}(S,T)=\sum_{\{i,j\} \in E \cap (S \Delta T)} c((i,j)), is maximized.

Goemans and Williamson have designed a famous 0.878-approximation algorithm [3] for this NP-hard problem based on semidefinite programming. The idea is to introduce a variable x \in \{-1,1\}^V where x(n) takes the value +1 or -1 depending on whether n \in S or n \in T. Then, it can be seen that the value of the cut is equal to \frac{1}{4} x^T L x, where L is the Laplacian of the graph. If we define the matrix X=xx^T, which is positive semidefinite and of rank 1, we obtain an SDP by relaxing the rank-one constraint on X:

\begin{center}
\begin{eqnarray*}
&\underset{X \in \mathbb{S}_{|V|}}
             {\mbox{maximize}}
                   & \frac{1}{4} \langle L, X \rangle \\
&\mbox{subject to} & \mbox{diag}(X) = \mathbf{1}\\
&                  & X \succeq 0
\end{eqnarray*}
\end{center}

Then, Goemans and Williamson have shown that if we project the solution X onto a random hyperplan, we obtain a cut whose expected capacity is at least 0.878 times the optimum. We give a simple implementation of their algorithm. First, let us define and solve the SDP relaxation:

import cvxopt as cvx
import cvxopt.lapack
import numpy as np

# Make G undirected.
G=nx.Graph(G)

# Allocate weights to the edges.
for (i,j) in G.edges():
  G[i][j]['weight']=c[i,j]+c[j,i]

maxcut = pic.Problem()

# Add the symmetric matrix variable.
X=maxcut.add_variable('X',(N,N),'symmetric')

# Retrieve the Laplacian of the graph.
LL = 1/4.*nx.laplacian_matrix(G).todense()
L=pic.new_param('L',LL)

# Constrain X to have ones on the diagonal.
maxcut.add_constraint(pic.tools.diag_vect(X)==1)

# Constrain X to be positive semidefinite.
maxcut.add_constraint(X>>0)

# Set the objective.
maxcut.set_objective('max',L|X)

#print(maxcut)

# Solve the problem.
maxcut.solve(verbose = 0,solver='cvxopt')

#print('bound from the SDP relaxation: {0}'.format(maxcut.obj_value()))

Then, we perform the random projection:

# Use a fixed RNG seed so the result is reproducable.
cvx.setseed(1)

# Perform a Cholesky factorization.
V=X.value
cvxopt.lapack.potrf(V)
for i in range(N):
  for j in range(i+1,N):
    V[i,j]=0

# Do up to 100 projections. Stop if we are within a factor 0.878 of the SDP
# optimal value.
count=0
obj_sdp=maxcut.obj_value()
obj=0
while (count < 100 or obj < 0.878*obj_sdp):
  r=cvx.normal(20,1)
  x=cvx.matrix(np.sign(V*r))
  o=(x.T*L*x).value[0]
  if o > obj:
    x_cut=x
    obj=o
  count+=1
x=x_cut

# Extract the cut and the seperated node sets.
S1=[n for n in range(N) if x[n]<0]
S2=[n for n in range(N) if x[n]>0]
cut = [(i,j) for (i,j) in G.edges() if x[i]*x[j]<0]
leave = [e for e in G.edges if e not in cut]

Let us now draw this cut:

# Close the old figure and open a new one.
new_figure()

# Assign colors based on set membership.
node_colors=[('lightgreen' if n in S1 else 'lightblue') for n in range(N)]

# Draw the nodes and the edges that are not in the cut.
nx.draw_networkx(G, pos, node_color=node_colors, edgelist=leave)
labels={e: '{}'.format(G[e[0]][e[1]]['weight']) for e in leave}
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)

# Draw the edges that are in the cut.
nx.draw_networkx_edges(G, pos, edgelist=cut, edge_color='r')
labels={e: '{}'.format(G[e[0]][e[1]]['weight']) for e in cut}
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels, font_color='r')

# Show the relaxation optimum value and the cut capacity.
rval = maxcut.obj_value()
sval = sum(G[e[0]][e[1]]['weight'] for e in cut)
fig.suptitle(
  'SDP relaxation value: {0:.1f}\nCut value: {1:.1f} = {2:.3f}×{0:.1f}'
  .format(rval, sval, sval/rval), fontsize=16, y=0.97)

# Show the figure.
pylab.show()
_images/graphs-11.png

The graph shows the edges defining the cut in red. The nodes are colored blue or green depending on the partition that they belong to.

References

  1. “Maximal Flow through a Network”, LR Ford Jr and DR Fulkerson, Canadian journal of mathematics, 1956.
  2. “Analysis of LP relaxations for multiway and multicut problems”, D.Bertsimas, C.P. Teo and R. Vohra, Networks, 34(2), p. 102-114, 1999.
  3. “Improved approximation algorithms for maximum cut and satisfiability problems using semidefinite programming”, M.X. Goemans and D.P. Williamson, Journal of the ACM, 42(6), p. 1115-1145, 1995.