\[ \newcommand{\argmin}{\operatorname{argmin}} \newcommand{\sign}{\operatorname{sign}} \newcommand{\diag}[1]{\operatorname{diag}(#1)} \newcommand{\prox}[2]{\operatorname{prox}_{#1}(#2)} \]

1 Horseshoe Plus

The horseshoe+ prior, as a scale mixture of normals, is given by \[ \begin{align*} (\theta_j | \lambda_j, \tau) &\sim \operatorname{N}(0, \lambda_j^2) \\ \lambda_j &\sim \operatorname{C}^+(0,\eta_j \tau) \\ \eta_j &\sim \operatorname{C}^+(0,1) \end{align*} \] The original horseshoe is \[ \begin{align*} (\theta_j | \lambda_j, \tau) &\sim \operatorname{N}(0, \lambda_j^2) \\ \lambda_j &\sim \operatorname{C}^+(0,\tau) \end{align*} \]

2 Simulations

In the following simulations we generate data according to \[ \begin{align*} y_k &\sim \operatorname{N}\left(\theta, \bf{I}\right) \end{align*} \] and we consider the following two \(\tau\) priors: \[ \begin{align*} \tau &\sim \operatorname{Unif}[0,1] \\ \tau &\sim \operatorname{C}^+(0,1/J) \end{align*} \]

Qn=10 
A=7
J=200 
theta.true = rep(0, J)
theta.true[sample.int(J, Qn)] = A

We construct a signal with 10 non-zero components of magnitude 7 and dimension \(J=200\).

We generate the data with the following:

test.data = list('J'=J, 'y'=rnorm(J,theta.true,1))

2.1 Estimation

First, we load the necessary R packages:

library(rstan)
library(ggplot2)
theme_set(theme_bw())
library(plyr)
library(dplyr)
library(reshape2)
set_cppo("fast")

The horseshoe+ is implemented in Stan with the following

stan.hsplus.unif.code = "
  data {
    int<lower=1> J; 
    vector[J] y; 
  }
  parameters {
    vector[J] theta_step; 
    vector[J] lambda_step; 
    vector[J] eta;
    real tau;
  }
  transformed parameters {
    vector[J] theta; 
    vector[J] lambda; 
    lambda <- (lambda_step .* eta) * tau;
    theta <- ((theta_step .* lambda_step) .* eta) * tau;
  }
  model {
    tau ~ uniform(0, 1);
    eta ~ cauchy(0, 1);
    lambda_step ~ cauchy(0, 1);
    theta_step ~ normal(0, 1);
    y ~ normal(theta, 1);
  }  
"
stan.hsplus.cauchy.code = "
  data {
    int<lower=1> J; 
    vector[J] y; 
  }
  parameters {
    vector[J] theta_step; 
    vector[J] lambda_step; 
    vector[J] eta;
    real tau;
  }
  transformed parameters {
    vector[J] theta; 
    vector[J] lambda; 
    lambda <- (lambda_step .* eta) * tau;
    theta <- ((theta_step .* lambda_step) .* eta) * tau;
  }
  model {
    tau ~ cauchy(0, 1.0/J);
    eta ~ cauchy(0, 1);
    lambda_step ~ cauchy(0, 1);
    theta_step ~ normal(0, 1);
    y ~ normal(theta, 1);
  }  
"

and the regular horseshoe:

stan.hs.unif.code = "
  data {
    int<lower=1> J; 
    vector[J] y; 
  }
  parameters {
    vector[J] theta_step; 
    vector[J] lambda_step; 
    real tau;
  }
  transformed parameters {
    vector[J] theta; 
    vector[J] lambda; 
    lambda <- lambda_step * tau;
    theta <- (theta_step .* lambda_step) * tau;
  }
  model {
    tau ~ uniform(0, 1);
    lambda_step ~ cauchy(0, 1);
    theta_step ~ normal(0, 1);
    y ~ normal(theta, 1);
  }  
"
stan.hs.cauchy.code = "
  data {
    int<lower=1> J; 
    vector[J] y; 
  }
  parameters {
    vector[J] theta_step; 
    vector[J] lambda_step; 
    real tau;
  }
  transformed parameters {
    vector[J] theta; 
    vector[J] lambda; 
    lambda <- lambda_step * tau;
    theta <- (theta_step .* lambda_step) * tau;
  }
  model {
    tau ~ cauchy(0, 1.0/J);
    lambda_step ~ cauchy(0, 1);
    theta_step ~ normal(0, 1);
    y ~ normal(theta, 1);
  }  
"

For comparison, we include the DL prior from Section 2.4 of 1:

stan.dl.gamma.code = '
  data {
    int<lower=1> J; 
    real<lower=0> A; 
    vector[J] y; 
  }
  transformed data {
    vector<lower=0>[J] Avec;
    real<lower=0> JA;
    Avec <- rep_vector(A, J);
    JA <- J * A; 
  }
  parameters {
    vector[J] theta_step; 
    simplex[J] phi; 
    vector<lower=0>[J] psi;
    real<lower=0> tau;
  }
  transformed parameters {
    vector[J] theta; 
    for (j in 1:J) {
      theta[j] <- ((theta_step[j] .* sqrt(psi[j])) .* phi[j]) * tau; 
    }
  }
  model {
    tau ~ gamma(JA, 0.5);
    psi ~ exponential(0.5);
    phi ~ dirichlet(Avec);
    theta_step ~ normal(0, 1); 
    y ~ normal(theta, 1);
  }  
'

It’s necessary to compile the code in Stan (we use Clang):

stan.hsplus.cauchy.fit = stan_model(model_code=stan.hsplus.cauchy.code, 
                                    model_name="hs+ cauchy")
stan.hsplus.unif.fit = stan_model(model_code=stan.hsplus.unif.code, 
                                  model_name="hs+ unif")
stan.hs.cauchy.fit = stan_model(model_code=stan.hs.cauchy.code, 
                                model_name="hs cauchy")
stan.hs.unif.fit = stan_model(model_code=stan.hs.unif.code, 
                              model_name="hs unif")
stan.dl.gamma.fit = stan_model(model_code=stan.dl.gamma.code, model_name="dl gamma")
n.iters = 1000
n.chains = 3

Stan is run with 3 chains of 1000 iterations each.

2.2 Horseshoe+ Results

2.2.1 Cauchy \(\tau\)

smpls.hsplus.res.cauchy = sampling(stan.hsplus.cauchy.fit, 
                            data = test.data, 
                            iter = n.iters,
                            pars = c("theta", "lambda", "eta", "tau"),
                            #init = 0,
                            #seed = rng.seed, 
                            chains = n.chains)
theta.smpls.hsplus.cauchy = extract(smpls.hsplus.res.cauchy, pars=c("theta"), permuted=TRUE)[[1]]
med.theta.hsplus.cauchy = apply(theta.smpls.hsplus.cauchy, 2, median)

hsplus.sample.data.cauchy = melt(extract(smpls.hsplus.res.cauchy, permuted=TRUE))
colnames(hsplus.sample.data.cauchy) = c("iteration", "component", "value", "variable")

The following produces plots for each estimated variable:

hsplus.theta.sample.data.cauchy = hsplus.sample.data.cauchy %>%
  filter(variable %in% c("theta", "lambda")) %>%
  mutate(variable = as.factor(ifelse(variable == "lambda", "kappa", variable)),
         value = ifelse(variable == "kappa", 1/(1+abs(value)), value))

hsplus.theta.sample.data.cauchy.2 = hsplus.theta.sample.data.cauchy %>% 
group_by(component, variable) %>%
summarise(upper = quantile(value, prob=0.95), 
       lower = quantile(value, prob=0.05),
       middle = mean(value))

hsplus.theta.kappa.cauchy.plot = ggplot(hsplus.theta.sample.data.cauchy.2, 
       aes(x=component, y=middle, group=component)) + theme_bw() +
geom_pointrange(aes(ymin=lower, ymax=upper), size=0.3) + 
facet_grid(variable ~ ., scales="free_y") +
scale_colour_manual(values=c("#D55E00","#0072B2")) +
xlab("") + ylab("")
print(hsplus.theta.kappa.cauchy.plot)

HS+ Cauchy prior

ggplot(hsplus.sample.data.cauchy %>% filter(variable == "eta") %>% 
       mutate(value = abs(value)), 
       aes(x=component, y=value, group=component)) + 
geom_boxplot(outlier.size=0.5, outlier.colour="grey") + 
scale_y_log10() +
geom_hline(aes(yintercept=0), size=0.5, linetype="dashed") + 
geom_hline(aes(yintercept=1), size=0.5, linetype="dashed") + 
xlab(expression(eta[i])) + ylab("")