我正在从头开始复制第4.2.1节的结果
悉达多(Siddhartha Chib)
美国统计协会杂志,第一卷。90,第432号。(1995年12月),第1313-1321页。
它是具有已知组件数的法线模型的混合。
该模型的Gibbs采样器是使用Tanner和Wong的数据增强技术实现的。引入了一组分配变量并假设值为,我们指定和f(x_i \ mid z ,\ mu,\ sigma ^ 2)= \ mathrm {N}(x_i \ mid \ mu_ {z_i},\ sigma ^ 2_ {z_i})。因此,在z_i上的积分给出了原始可能性(*)。
该数据集是由来自日冕的星系的速度形成的。
set.seed(1701)
x <- c( 9.172, 9.350, 9.483, 9.558, 9.775, 10.227, 10.406, 16.084, 16.170, 18.419, 18.552, 18.600, 18.927,
19.052, 19.070, 19.330, 19.343, 19.349, 19.440, 19.473, 19.529, 19.541, 19.547, 19.663, 19.846, 19.856,
19.863, 19.914, 19.918, 19.973, 19.989, 20.166, 20.175, 20.179, 20.196, 20.215, 20.221, 20.415, 20.629,
20.795, 20.821, 20.846, 20.875, 20.986, 21.137, 21.492, 21.701, 21.814, 21.921, 21.960, 22.185, 22.209,
22.242, 22.249, 22.314, 22.374, 22.495, 22.746, 22.747, 22.888, 22.914, 23.206, 23.241, 23.263, 23.484,
23.538, 23.542, 23.666, 23.706, 23.711, 24.129, 24.285, 24.289, 24.366, 24.717, 24.990, 25.633, 26.960,
26.995, 32.065, 32.789, 34.279 )
nn <- length(x)
我们假设,该的,和的是独立的先验与
k <- 3
mu0 <- 20
va0 <- 100
nu0 <- 6
de0 <- 40
a <- rep(1, k)
使用贝叶斯定理,全部条件为 其中 与
目标是计算模型的边际可能性的估计。Chib的方法从使用完整条件的吉布斯采样器的首次运行开始。
burn_in <- 1000
run <- 15000
cat("First Gibbs run (full):\n")
N <- burn_in + run
w <- matrix(1, nrow = N, ncol = k)
mu <- matrix(0, nrow = N, ncol = k)
va <- matrix(1, nrow = N, ncol = k)
z <- matrix(1, nrow = N, ncol = nn)
n <- integer(k)
m <- numeric(k)
de <- numeric(k)
rdirichlet <- function(a) { y <- rgamma(length(a), a, 1); y / sum(y) }
pb <- txtProgressBar(min = 2, max = N, style = 3)
z[1,] <- sample.int(k, size = nn, replace = TRUE)
for (t in 2:N) {
n <- tabulate(z[t-1,], nbins = k)
w[t,] <- rdirichlet(a + n)
m <- sapply(1:k, function(j) sum(x[z[t-1,]==j]))
m[n > 0] <- m[n > 0] / n[n > 0]
mu[t,] <- rnorm(k, mean = (n*m*va0+mu0*va[t-1,])/(n*va0+va[t-1,]), sd = sqrt(va0*va[t-1,]/(n*va0+va[t-1,])))
de <- sapply(1:k, function(j) sum((x[z[t-1,]==j] - mu[t,j])^2))
va[t,] <- 1 / rgamma(k, shape = (nu0+n)/2, rate = (de0+de)/2)
z[t,] <- sapply(1:nn, function(i) sample.int(k, size = 1, prob = exp(log(w[t,]) + dnorm(x[i], mean = mu[t,], sd = sqrt(va[t,]), log = TRUE))))
setTxtProgressBar(pb, t)
}
close(pb)
从第一次运行中,我们得到最大似然的近似点。由于可能性实际上是无限的,因此此过程可能给出的是近似本地MAP。
w <- w[(burn_in+1):N,]
mu <- mu[(burn_in+1):N,]
va <- va[(burn_in+1):N,]
z <- z[(burn_in+1):N,]
N <- N - burn_in
log_L <- function(x, w, mu, va) sum(log(sapply(1:nn, function(i) sum(exp(log(w) + dnorm(x[i], mean = mu, sd = sqrt(va), log = TRUE))))))
ts <- which.max(sapply(1:N, function(t) log_L(x, w[t,], mu[t,], va[t,])))
ws <- w[ts,]
mus <- mu[ts,]
vas <- va[ts,]
Chib对边际似然的对数估计为
我们已经有了前两个学期。
log_prior <- function(w, mu, va) {
lgamma(sum(a)) - sum(lgamma(a)) + sum((a-1)*log(w))
+ sum(dnorm(mu, mean = mu0, sd = sqrt(va0), log = TRUE))
+ sum((nu0/2)*log(de0/2) - lgamma(nu0/2) - (nu0/2+1)*log(va) - de0/(2*va))
}
chib <- log_L(x, ws, mus, vas) + log_prior(ws, mus, vas)
的Rao-Blackwellized估计为 并很容易获得从第一次吉布斯跑。
pi.mu_va.z.x <- function(mu, va, z) {
n <- tabulate(z, nbins = k)
m <- sapply(1:k, function(j) sum(x[z==j]))
m[n > 0] <- m[n > 0] / n[n > 0]
exp(sum(dnorm(mu, mean = (n*m*va0+mu0*va)/(n*va0+va), sd = sqrt(va0*va/(n*va0+va)), log = TRUE)))
}
chib <- chib - log(mean(sapply(1:N, function(t) pi.mu_va.z.x(mus, va[t,], z[t,]))))
的Rao-Blackwellized估计为 并且是根据第二个简化的Gibbs运行计算得出的,其中不会更新,而是在每个迭代步骤等于。
cat("Second Gibbs run (reduced):\n")
N <- burn_in + run
w <- matrix(1, nrow = N, ncol = k)
va <- matrix(1, nrow = N, ncol = k)
z <- matrix(1, nrow = N, ncol = nn)
pb <- txtProgressBar(min = 2, max = N, style = 3)
z[1,] <- sample.int(k, size = nn, replace = TRUE)
for (t in 2:N) {
n <- tabulate(z[t-1,], nbins = k)
w[t,] <- rdirichlet(a + n)
de <- sapply(1:k, function(j) sum((x[z[t-1,]==j] - mus[j])^2))
va[t,] <- 1 / rgamma(k, shape = (nu0+n)/2, rate = (de0+de)/2)
z[t,] <- sapply(1:nn, function(i) sample.int(k, size = 1, prob = exp(log(w[t,]) + dnorm(x[i], mean = mus, sd = sqrt(va[t,]), log = TRUE))))
setTxtProgressBar(pb, t)
}
close(pb)
w <- w[(burn_in+1):N,]
va <- va[(burn_in+1):N,]
z <- z[(burn_in+1):N,]
N <- N - burn_in
pi.va_mu.z.x <- function(va, mu, z) {
n <- tabulate(z, nbins = k)
de <- sapply(1:k, function(j) sum((x[z==j] - mu[j])^2))
exp(sum(((nu0+n)/2)*log((de0+de)/2) - lgamma((nu0+n)/2) - ((nu0+n)/2+1)*log(va) - (de0+de)/(2*va)))
}
chib <- chib - log(mean(sapply(1:N, function(t) pi.va_mu.z.x(vas, mus, z[t,]))))
同样,的Rao-Blackwellized估计为 和,是根据第三次简化的Gibbs计算得出的,其中和不会更新,但等于和分别在每个迭代步骤。
cat("Third Gibbs run (reduced):\n")
N <- burn_in + run
w <- matrix(1, nrow = N, ncol = k)
z <- matrix(1, nrow = N, ncol = nn)
pb <- txtProgressBar(min = 2, max = N, style = 3)
z[1,] <- sample.int(k, size = nn, replace = TRUE)
for (t in 2:N) {
n <- tabulate(z[t-1,], nbins = k)
w[t,] <- rdirichlet(a + n)
z[t,] <- sapply(1:nn, function(i) sample.int(k, size = 1, prob = exp(log(w[t,]) + dnorm(x[i], mean = mus, sd = sqrt(vas), log = TRUE))))
setTxtProgressBar(pb, t)
}
close(pb)
w <- w[(burn_in+1):N,]
z <- z[(burn_in+1):N,]
N <- N - burn_in
pi.w_z.x <- function(w, z) {
n <- tabulate(z, nbins = k)
exp(lgamma(sum(a+n)) - sum(lgamma(a+n)) + sum((a+n-1)*log(w)))
}
chib <- chib - log(mean(sapply(1:N, function(t) pi.w_z.x(ws, z[t,]))))
这一切之后,我们得到一个数估计这是一个比报告奇夫更大:蒙特卡罗错误。
为了检查是否以某种方式弄乱了Gibbs采样器,我使用RJAGS重新实现了整个过程。以下代码给出了相同的结果。
x <- c( 9.172, 9.350, 9.483, 9.558, 9.775, 10.227, 10.406, 16.084, 16.170, 18.419, 18.552, 18.600, 18.927, 19.052, 19.070, 19.330,
19.343, 19.349, 19.440, 19.473, 19.529, 19.541, 19.547, 19.663, 19.846, 19.856, 19.863, 19.914, 19.918, 19.973, 19.989, 20.166,
20.175, 20.179, 20.196, 20.215, 20.221, 20.415, 20.629, 20.795, 20.821, 20.846, 20.875, 20.986, 21.137, 21.492, 21.701, 21.814,
21.921, 21.960, 22.185, 22.209, 22.242, 22.249, 22.314, 22.374, 22.495, 22.746, 22.747, 22.888, 22.914, 23.206, 23.241, 23.263,
23.484, 23.538, 23.542, 23.666, 23.706, 23.711, 24.129, 24.285, 24.289, 24.366, 24.717, 24.990, 25.633, 26.960, 26.995, 32.065,
32.789, 34.279 )
library(rjags)
nn <- length(x)
k <- 3
mu0 <- 20
va0 <- 100
nu0 <- 6
de0 <- 40
a <- rep(1, k)
burn_in <- 10^3
N <- 10^4
full <- "
model {
for (i in 1:n) {
x[i] ~ dnorm(mu[z[i]], tau[z[i]])
z[i] ~ dcat(w[])
}
for (i in 1:k) {
mu[i] ~ dnorm(mu0, 1/va0)
tau[i] ~ dgamma(nu0/2, de0/2)
va[i] <- 1/tau[i]
}
w ~ ddirich(a)
}
"
data <- list(x = x, n = nn, k = k, mu0 = mu0, va0 = va0, nu0 = nu0, de0 = de0, a = a)
model <- jags.model(textConnection(full), data = data, n.chains = 1, n.adapt = 100)
update(model, n.iter = burn_in)
samples <- jags.samples(model, c("mu", "va", "w", "z"), n.iter = N)
mu <- matrix(samples$mu, nrow = N, byrow = TRUE)
va <- matrix(samples$va, nrow = N, byrow = TRUE)
w <- matrix(samples$w, nrow = N, byrow = TRUE)
z <- matrix(samples$z, nrow = N, byrow = TRUE)
log_L <- function(x, w, mu, va) sum(log(sapply(1:nn, function(i) sum(exp(log(w) + dnorm(x[i], mean = mu, sd = sqrt(va), log = TRUE))))))
ts <- which.max(sapply(1:N, function(t) log_L(x, w[t,], mu[t,], va[t,])))
ws <- w[ts,]
mus <- mu[ts,]
vas <- va[ts,]
log_prior <- function(w, mu, va) {
lgamma(sum(a)) - sum(lgamma(a)) + sum((a-1)*log(w))
+ sum(dnorm(mu, mean = mu0, sd = sqrt(va0), log = TRUE))
+ sum((nu0/2)*log(de0/2) - lgamma(nu0/2) - (nu0/2+1)*log(va) - de0/(2*va))
}
chib <- log_L(x, ws, mus, vas) + log_prior(ws, mus, vas)
cat("log-likelihood + log-prior =", chib, "\n")
pi.mu_va.z.x <- function(mu, va, z, x) {
n <- sapply(1:k, function(j) sum(z==j))
m <- sapply(1:k, function(j) sum(x[z==j]))
m[n > 0] <- m[n > 0] / n[n > 0]
exp(sum(dnorm(mu, mean = (n*m*va0+mu0*va)/(n*va0+va), sd = sqrt(va0*va/(n*va0+va)), log = TRUE)))
}
chib <- chib - log(mean(sapply(1:N, function(t) pi.mu_va.z.x(mus, va[t,], z[t,], x))))
cat("log-likelihood + log-prior - log-pi.mu_ =", chib, "\n")
fixed.mu <- "
model {
for (i in 1:n) {
x[i] ~ dnorm(mus[z[i]], tau[z[i]])
z[i] ~ dcat(w[])
}
for (i in 1:k) {
tau[i] ~ dgamma(nu0/2, de0/2)
va[i] <- 1/tau[i]
}
w ~ ddirich(a)
}
"
data <- list(x = x, n = nn, k = k, nu0 = nu0, de0 = de0, a = a, mus = mus)
model <- jags.model(textConnection(fixed.mu), data = data, n.chains = 1, n.adapt = 100)
update(model, n.iter = burn_in)
samples <- jags.samples(model, c("va", "w", "z"), n.iter = N)
va <- matrix(samples$va, nrow = N, byrow = TRUE)
w <- matrix(samples$w, nrow = N, byrow = TRUE)
z <- matrix(samples$z, nrow = N, byrow = TRUE)
pi.va_mu.z.x <- function(va, mu, z, x) {
n <- sapply(1:k, function(j) sum(z==j))
de <- sapply(1:k, function(j) sum((x[z==j] - mu[j])^2))
exp(sum(((nu0+n)/2)*log((de0+de)/2) - lgamma((nu0+n)/2) - ((nu0+n)/2+1)*log(va) - (de0+de)/(2*va)))
}
chib <- chib - log(mean(sapply(1:N, function(t) pi.va_mu.z.x(vas, mus, z[t,], x))))
cat("log-likelihood + log-prior - log-pi.mu_ - log-pi.va_ =", chib, "\n")
fixed.mu.and.va <- "
model {
for (i in 1:n) {
x[i] ~ dnorm(mus[z[i]], 1/vas[z[i]])
z[i] ~ dcat(w[])
}
w ~ ddirich(a)
}
"
data <- list(x = x, n = nn, a = a, mus = mus, vas = vas)
model <- jags.model(textConnection(fixed.mu.and.va), data = data, n.chains = 1, n.adapt = 100)
update(model, n.iter = burn_in)
samples <- jags.samples(model, c("w", "z"), n.iter = N)
w <- matrix(samples$w, nrow = N, byrow = TRUE)
z <- matrix(samples$z, nrow = N, byrow = TRUE)
pi.w_z.x <- function(w, z, x) {
n <- sapply(1:k, function(j) sum(z==j))
exp(lgamma(sum(a)+nn) - sum(lgamma(a+n)) + sum((a+n-1)*log(w)))
}
chib <- chib - log(mean(sapply(1:N, function(t) pi.w_z.x(ws, z[t,], x))))
cat("log-likelihood + log-prior - log-pi.mu_ - log-pi.va_ - log-pi.w_ =", chib, "\n")
我的问题是,在上面的描述中是否存在对Chib方法的误解或在实现过程中的任何错误。