\[ \newcommand{\argmin}{\operatorname{argmin}} \newcommand{\sign}{\operatorname{sign}} \newcommand{\diag}[1]{\operatorname{diag}(#1)} \newcommand{\prox}[2]{\operatorname{prox}_{#1}(#2)} \]

1 Introduction

The Rosenbrock problem involves minimizing the following function: \[ \begin{gather*} f(x = (x_1,x_2)) = c (x_1^2 - x_2)^2 + (x_1 - 1)^2 \end{gather*} \] where \(c \ge 0\)

2 Bayesian Optimization

Let \(\pi(x_1,x_2) \sim \operatorname{Unif}([0,3] \times [0,3])\).

2.1 Standard MCMC

2.2 Conditional Sampling

We sample from a uniform using \(x \sim \pi(x_1,x_2)\). Record \(L(x) = f(x_1,x_2)\) – the observed function value.

Then we need to sample from a uniform conditional to the set \(c (x_1^2 - x_2)^2 + (x_1 - 1)^2 < L\)

We do this in Gibbs fashion: \((x_1 | x_2)\) and \((x_2 | x_1)\)

For \((x_2 | x_1)\) we have the set \[ \begin{gather*} V_2 = \{ x_2 : x_1 - \sqrt{c^{-1} \left(L - (1 - x_1)^2\right)} < x_2^2 < x_1 + \sqrt{c^{-1} \left( L - (1 - x_1)^2\right)} \} \end{gather*} \] and we know \(x_2 > 0, L \ge 0\).

For \((x_1 | x_2)\) we have the set \[ \begin{gather*} V_1 = \{x_1 : 1 - 2 x_1 (1 + c x_2^2) + (1 + c) x_1^2 < L - c x_2^4 \} \end{gather*} \] which is equivalent to \[ \begin{gather*} V_1 =\{ x_1 : \begin{cases} x_2 = 1 & 1-\sqrt{\frac{L}{c+1}} < x_1 < \sqrt{\frac{L}{c+1}}+1 \\ x_2 \le \sqrt{\sqrt{L}+1} & 1 + c x_2^2 - \sqrt{L + c \left(L-(x_2^2-1)^2\right)} < (1+c) x_1 < 1 + c x_2^2 + \sqrt{L + c \left(L-(x_2^2-1)^2\right)} \\ x_2 < \sqrt{\sqrt{\frac{L}{c}}+1} & \vdots \end{cases} \} \end{gather*} \]

Thus, for \(x_t = (x_1, x_2)\), we sample \((x_2 | x_1) \sim \operatorname{Unif}(V_2)\) and \((x_1 | x_2) \sim \operatorname{Unif}(V_1)\) which yields a new point \(x_{t+1}\).

For \(N\) points of \(x_t\) in the set \(X_t\), we find \(x^{*}_t = \operatorname{argmax}_{x^{(i)}_t} L(x^{(i)}_t)\), and for some \(x_t \in X_t \setminus \{x^{*}_t\} = \tilde{X}_t\) we perform the aforementioned Gibbs step obtaining \(x_{t+1}\) with which we define our updated set \(X_{t+1} = \tilde{X}_t \setminus \{x_t\} \cup x_{t+1}\). The process is then repeated.

3 Simulations

library(msm)
# or 
#library(truncnorm)
library(ggplot2)

3.1 Slice Sampler

library(msm)
# or 
#library(truncnorm)
rosen.mcmc <- function(niter,x0,kappa,const) {
  # initial x
  x = x0
  stopifnot(length(x0) == 2)

  lambda = const*kappa/2.0

  output <- list(u = matrix(nrow=niter, ncol=1),
                 x = matrix(nrow=niter, ncol=2)
                 )

  for(i in 1:niter) {
    if(i%%100==0) cat("iteration ", i, "\n")

    u <- draw.u(lambda, x)
    x <- draw.x(x, u, const, kappa)

    output$u[i,] <- u
    output$x[i,] <- x
  }
  colnames(output$x) = c("x1", "x2")

  return(output)
}

draw.u <- function(lambda,x) {
  #u = c(0)
  #u= lambda*(x[2]-x[1]^2)^2 + rexp(1,1)
  u = runif(1, 0, lambda*(x[2]-x[1]^2)^2)
  stopifnot(is.finite(u))
  return(u)
}

draw.x <- function(x, u, const, kappa) {
  lambda = const*kappa/2.0

  auy = x[2] - sqrt(u*lambda)
  auy = sqrt(max(auy,0))

  buy = x[2] + sqrt(u*lambda)
  buy = sqrt(buy)

  x1.sd = sqrt(kappa * 2/const)
  x2.sd = sqrt(kappa * 2)
  x[1] = rtnorm(1, 1, x1.sd, -auy, buy)
  #x[1] = rtruncnorm(1, a=-auy, b=buy, mean=1, sd=x1.sd)
  x[2] = rnorm(1, x[1]^2, x2.sd)

  stopifnot(all(is.finite(x)))
  return(x)
}
options(error=recover)
options(error=NULL)

x0 = c(0,0)
C = 100
kappa = 1
mcmc.results = rosen.mcmc(100, x0, kappa, C)
## iteration  100
exp.seq = function(from, to, rate=0.2, length.out=from-to) {
  steps = seq(0, length.out)
  return(from + 
         exp(rate*(steps - length.out))*(to-from))
}

x.seq = c(exp.seq(1, 0, len=50),
          exp.seq(1, 2, len=50))
y.seq = c(exp.seq(1, -2, len=50),
          exp.seq(1, 4, len=50))

rosen.data = transform(expand.grid(x=x.seq,
                                   y=y.seq),
                                   z=C*(x^2 - y)^2 + (x - 1)^2)
mcmc.data = as.data.frame(mcmc.results$x)
colnames(mcmc.data) = c("x", "y")

library(scales)
rosen.breaks = c(exp.seq(0, 1, len=50), 50, 100, 200, 500)

rosen.plot = ggplot(rosen.data, aes(x=x, y=y)) + 
  stat_density2d(data=mcmc.data,
                 aes(fill=..level..),
                 alpha=0.05,
                 geom="polygon") + 
  stat_contour(aes(z=z, colour=..level..), 
               breaks=rosen.breaks,
               binwidth=1e-1, size=0.1
               ) +
  scale_fill_gradient("MCMC") +
  scale_colour_gradient2("Rosenbrock", 
                         trans=log_trans(base=10),
                         high="blue",
                         midpoint=5,
                         mid="black",
                         low="red")
print(rosen.plot)

plot of chunk slice_run

3.2 Conditional MCMC

3.3 Particle Swarm Optimization

Now, we try Particle Swarm Optimization:

library(pso)