\[ \newcommand{\argmin}{\operatorname{argmin}} \newcommand{\sign}{\operatorname{sign}} \newcommand{\diag}[1]{\operatorname{diag}(#1)} \newcommand{\prox}[2]{\operatorname{prox}_{#1}(#2)} \]
The horseshoe+ prior, as a scale mixture of normals, is given by \[ \begin{align*} (\theta_j | \lambda_j, \tau) &\sim \operatorname{N}(0, \lambda_j^2) \\ \lambda_j &\sim \operatorname{C}^+(0,\eta_j \tau) \\ \eta_j &\sim \operatorname{C}^+(0,1) \end{align*} \] The original horseshoe is \[ \begin{align*} (\theta_j | \lambda_j, \tau) &\sim \operatorname{N}(0, \lambda_j^2) \\ \lambda_j &\sim \operatorname{C}^+(0,\tau) \end{align*} \]
In the following simulations we generate data according to \[ \begin{align*} y_k &\sim \operatorname{N}\left(\theta, \bf{I}\right) \end{align*} \] and we consider the following two \(\tau\) priors: \[ \begin{align*} \tau &\sim \operatorname{Unif}[0,1] \\ \tau &\sim \operatorname{C}^+(0,1/J) \end{align*} \]
Qn=10
A=7
J=200
theta.true = rep(0, J)
theta.true[sample.int(J, Qn)] = A
We construct a signal with 10 non-zero components of magnitude 7 and dimension \(J=200\).
We generate the data with the following:
test.data = list('J'=J, 'y'=rnorm(J,theta.true,1))
First, we load the necessary R packages:
library(rstan)
library(ggplot2)
theme_set(theme_bw())
library(plyr)
library(dplyr)
library(reshape2)
set_cppo("fast")
The horseshoe+ is implemented in Stan with the following
stan.hsplus.unif.code = "
data {
int<lower=1> J;
vector[J] y;
}
parameters {
vector[J] theta_step;
vector[J] lambda_step;
vector[J] eta;
real tau;
}
transformed parameters {
vector[J] theta;
vector[J] lambda;
lambda <- (lambda_step .* eta) * tau;
theta <- ((theta_step .* lambda_step) .* eta) * tau;
}
model {
tau ~ uniform(0, 1);
eta ~ cauchy(0, 1);
lambda_step ~ cauchy(0, 1);
theta_step ~ normal(0, 1);
y ~ normal(theta, 1);
}
"
stan.hsplus.cauchy.code = "
data {
int<lower=1> J;
vector[J] y;
}
parameters {
vector[J] theta_step;
vector[J] lambda_step;
vector[J] eta;
real tau;
}
transformed parameters {
vector[J] theta;
vector[J] lambda;
lambda <- (lambda_step .* eta) * tau;
theta <- ((theta_step .* lambda_step) .* eta) * tau;
}
model {
tau ~ cauchy(0, 1.0/J);
eta ~ cauchy(0, 1);
lambda_step ~ cauchy(0, 1);
theta_step ~ normal(0, 1);
y ~ normal(theta, 1);
}
"
and the regular horseshoe:
stan.hs.unif.code = "
data {
int<lower=1> J;
vector[J] y;
}
parameters {
vector[J] theta_step;
vector[J] lambda_step;
real tau;
}
transformed parameters {
vector[J] theta;
vector[J] lambda;
lambda <- lambda_step * tau;
theta <- (theta_step .* lambda_step) * tau;
}
model {
tau ~ uniform(0, 1);
lambda_step ~ cauchy(0, 1);
theta_step ~ normal(0, 1);
y ~ normal(theta, 1);
}
"
stan.hs.cauchy.code = "
data {
int<lower=1> J;
vector[J] y;
}
parameters {
vector[J] theta_step;
vector[J] lambda_step;
real tau;
}
transformed parameters {
vector[J] theta;
vector[J] lambda;
lambda <- lambda_step * tau;
theta <- (theta_step .* lambda_step) * tau;
}
model {
tau ~ cauchy(0, 1.0/J);
lambda_step ~ cauchy(0, 1);
theta_step ~ normal(0, 1);
y ~ normal(theta, 1);
}
"
For comparison, we include the DL prior from Section 2.4 of 1:
stan.dl.gamma.code = '
data {
int<lower=1> J;
real<lower=0> A;
vector[J] y;
}
transformed data {
vector<lower=0>[J] Avec;
real<lower=0> JA;
Avec <- rep_vector(A, J);
JA <- J * A;
}
parameters {
vector[J] theta_step;
simplex[J] phi;
vector<lower=0>[J] psi;
real<lower=0> tau;
}
transformed parameters {
vector[J] theta;
for (j in 1:J) {
theta[j] <- ((theta_step[j] .* sqrt(psi[j])) .* phi[j]) * tau;
}
}
model {
tau ~ gamma(JA, 0.5);
psi ~ exponential(0.5);
phi ~ dirichlet(Avec);
theta_step ~ normal(0, 1);
y ~ normal(theta, 1);
}
'
It’s necessary to compile the code in Stan (we use Clang):
stan.hsplus.cauchy.fit = stan_model(model_code=stan.hsplus.cauchy.code,
model_name="hs+ cauchy")
stan.hsplus.unif.fit = stan_model(model_code=stan.hsplus.unif.code,
model_name="hs+ unif")
stan.hs.cauchy.fit = stan_model(model_code=stan.hs.cauchy.code,
model_name="hs cauchy")
stan.hs.unif.fit = stan_model(model_code=stan.hs.unif.code,
model_name="hs unif")
stan.dl.gamma.fit = stan_model(model_code=stan.dl.gamma.code, model_name="dl gamma")
n.iters = 1000
n.chains = 3
Stan is run with 3 chains of 1000 iterations each.
smpls.hsplus.res.cauchy = sampling(stan.hsplus.cauchy.fit,
data = test.data,
iter = n.iters,
pars = c("theta", "lambda", "eta", "tau"),
#init = 0,
#seed = rng.seed,
chains = n.chains)
theta.smpls.hsplus.cauchy = extract(smpls.hsplus.res.cauchy, pars=c("theta"), permuted=TRUE)[[1]]
med.theta.hsplus.cauchy = apply(theta.smpls.hsplus.cauchy, 2, median)
hsplus.sample.data.cauchy = melt(extract(smpls.hsplus.res.cauchy, permuted=TRUE))
colnames(hsplus.sample.data.cauchy) = c("iteration", "component", "value", "variable")
The following produces plots for each estimated variable:
hsplus.theta.sample.data.cauchy = hsplus.sample.data.cauchy %>%
filter(variable %in% c("theta", "lambda")) %>%
mutate(variable = as.factor(ifelse(variable == "lambda", "kappa", variable)),
value = ifelse(variable == "kappa", 1/(1+abs(value)), value))
hsplus.theta.sample.data.cauchy.2 = hsplus.theta.sample.data.cauchy %>%
group_by(component, variable) %>%
summarise(upper = quantile(value, prob=0.95),
lower = quantile(value, prob=0.05),
middle = mean(value))
hsplus.theta.kappa.cauchy.plot = ggplot(hsplus.theta.sample.data.cauchy.2,
aes(x=component, y=middle, group=component)) + theme_bw() +
geom_pointrange(aes(ymin=lower, ymax=upper), size=0.3) +
facet_grid(variable ~ ., scales="free_y") +
scale_colour_manual(values=c("#D55E00","#0072B2")) +
xlab("") + ylab("")
print(hsplus.theta.kappa.cauchy.plot)
ggplot(hsplus.sample.data.cauchy %>% filter(variable == "eta") %>%
mutate(value = abs(value)),
aes(x=component, y=value, group=component)) +
geom_boxplot(outlier.size=0.5, outlier.colour="grey") +
scale_y_log10() +
geom_hline(aes(yintercept=0), size=0.5, linetype="dashed") +
geom_hline(aes(yintercept=1), size=0.5, linetype="dashed") +
xlab(expression(eta[i])) + ylab("")