Some time people fail….

For the last few weeks I have been working on a way to do something interesting for this entry and as of yet all my ideas have had limited success. So basically this post is just going to rehash what wiki says about data augmentation. But I have appended working code so win?

The basic idea of data augmentation is that MCMC sampling would accept 100 percent of proposed models if the proposal distribution was the target distribution (this is Gibbs’ sampling). Data augmentation creates axillary parameters such that the conditional distribution of each parameter (axillary and original) is known analytically given the remaining parameters.

This is not possible for logistic regression but it is possible for probit regression. Thus there is an example of that in code below and not for example a logistic regression. Also the idea I was working on required finding the integral of a Gaussian (with an arbitrary correlation matrix) from 0-inf in all dimensions. If you have any leads on this please leave them in the comments.

Cheers

Code:

P.S. this needs MASS to be loaded to run.

rtnorm<-function(n, mu, type=1, N=10001)

{

if (type == 1)

{

x = seq(0, max(mu+5,5), length.out = N)

W = exp(-0.5*(x-mu)^2)

samp = sample(1:N, n, prob=W, replace=T)

samp = x[samp]

}

if (type == 0)

{

x = seq(min(mu-5,-5), 0, length.out = N)

W = exp(-0.5*(x-mu)^2)

samp = sample(1:N, n, prob=W, replace=T)

samp = x[samp]

}

return(samp)

}

g.probit<-function(nd= 100, N= 1000, burn=100)

{

#par(mfrow=c(1,2))

# the sensitivity matrix

A = matrix(0, nrow = nd, ncol=3)

A[,1] = 1

A[,2] = 1:nd

A[,3] = (1:nd)^2

# the sim data

x = 1:nd

beta.true = matrix(c(-10/30,3/30,-0.035/30), nrow=3, ncol=1)

eta.true = A%*%beta.true

P.true = pnorm(eta.true)

r = runif(nd)

d = 0*r

d[r<=P.true] = 1

plot(x, P.true, xlab=”x”, ylab=”y”, type=”l”, col=”blue”, lwd=2, ylim=c(0,1))

points(x, d)

# beta prior is N(beta.0, tau)

beta.0 = matrix(c(0,0,0), nrow=3, ncol=1)

tau = matrix(c(1,0,0,0,1,0,0,0,1), nrow=3, ncol=3)

tau.inv = solve(tau)

# useful values

A.t.A = t(A)%*%A

Sigma.inv = (tau.inv + A.t.A )

Sigma = solve(Sigma.inv)

tau.inv.beta.0 = tau.inv%*%beta.0

# record

beta = beta.true

Rec = data.frame(M1 = 1:N, M2= 1:N, M3= 1:N )

d.star = 1:nd

pred.rec = matrix(0, nrow=N, ncol=nd)

for ( i in 1:(N+burn))

{

print(i)

# sample d.star | beta

A.beta = A%*%beta

for (j in 1:nd)

{

d.star[j] = rtnorm(1, mu=A.beta[j], type=d[j], N=10001)

}

# sample beta | d.star

mu.temp = Sigma%*%(tau.inv.beta.0 + t(A)%*%d.star)

beta = mvrnorm(1, mu = mu.temp, Sigma = Sigma )

# record beta

if ( i > burn)

{

print(round(beta,5))

pred.temp = pnorm(t(A%*%beta))

pred.rec[i-burn,] = pred.temp

Rec[i-burn,] = beta

}

}

ncolor = 21

layers = matrix(0, nrow=ncolor, ncol=nd)

for (i in 1:nd)

{

q = quantile(pred.rec[,i], probs=seq(0.025, 0.975, length.out=ncolor))

layers[,i] = q

}

c.ind = (ncolor-1)/2

gcol = rev(heat.colors(c.ind + 5))[6:(c.ind + 5) ]

#gcol = c(“white”, rev(heat.colors((ncolor-1)/2) ))

# 1 2 34 4 3 21

min.y. = 0

max.y. = 1

plot(x, P.true, xlab=”x”, ylab=”y”, type=”l”, col=”blue”, lwd=2, ylim=c(0,1))

for (i in 1:((ncolor-1)/2))

{

PY = c(layers[i,], rev(layers[ncolor+1-i,]))

Px = c(x, rev(x))

polygon(Px, PY, col=gcol[i], border=NA)

}

lines(x, layers[((ncolor-1)/2)+1,], col=gcol[((ncolor-1)/2)+1], lwd=2)

points(x, d)

#hist(Rec[,2])

lines(x, P.true, xlab=”x”, ylab=”y”, type=”l”, col=”blue”, lwd=2, ylim=c(0,1))

points(x, d)

}

g.probit()