The scripts used here was taken from the electronic supplementary material.
pkgs <- c(
"mgcv", "INLA", "gamair", "ggplot2", "gridExtra", "tictoc", "tscount")
pacman::p_load(pkgs, character.only = TRUE)
source("mgcv_spde_smooth.R")
Check the file mgcv_spde_smooth.R.
data from
Ferland, R., Latour, A. and Oraichi, D. (2006) Integer-valued GARCH process. Journal of Time Series Analysis 27(6), 923-942, http://dx.doi.org/10.1111/j.1467-9892.2006.00496.x
data(campy) ; str(campy)
Time-Series [1:140] from 1990 to 2001: 2 3 4 1 6 9 12 8 5 7 ...
campy <- data.frame(x = as.numeric(campy), time = 1:length(campy))
## set seed
set.seed(35832)
tic()
mod <- gam(
x ~ s(time, bs = "spde",
k = 50), # basis dimension for the mesh
data = campy,
control = gam.control(scalePenalty = FALSE),
family = "poisson",
method = "REML") # as we want to use Laplace approx. and marginal lkl
toc()
0.081 sec elapsed
## get hyperparameter estimates
tau <- mod$sp[1] ; kappa <- mod$sp[2]
# compute correlation range (rho)
(rho <- sqrt(8 * 1.5) / kappa)
s(time)2
7.286571
cat("mgcv:\n")
cat("kappa =", kappa, "\n")
cat("tau =", tau, "\n\n")
mgcv:
kappa = 0.475409
tau = 3.251945
## predict over a grid
predday <- seq(min(campy$time), max(campy$time), by = .25)
## get design matrix
Xp <- PredictMat(mod$smooth[[1]], data = data.frame(time = predday))
## add in intercept
Xp <- cbind(1, Xp)
## compute posterior mean
predmu <- exp(Xp %*% coef(mod))
## sample from posterior
nsamp <- 1000
bsamp <- rmvn(nsamp, coef(mod), vcov(mod, unconditional = TRUE))
ysamp <- exp(Xp %*% t(bsamp))
## compute credible interval
predlcl <- apply(ysamp, 1, quantile, .975)
preducl <- apply(ysamp, 1, quantile, .025)
mesh <- mod$smooth[[1]]$mesh
spde <- inla.spde2.pcmatern(mesh, alpha = 2,
prior.range = c(2, .01),
prior.sigma = c(10, .01))
## setup indices and intercept for INLA
day_index <- inla.spde.make.index("time", n.spde = spde$n.spde)
intercept <- matrix(1, nr = nrow(campy), nc = 1)
## create projector matrix (projects from mesh to observation points)
A <- inla.spde.make.A(mesh = mesh, loc = campy$time)
## stack all information INLA needs to do estimation
stk.e <- inla.stack(
tag = 'est', # tag
data = list(x = campy$x), # response
A = list(A, 1), # projector matrix
effects = list(time = day_index, intercept = intercept)) ## RF index
## stack all the information INLA needs to do predcition
Apred <- inla.spde.make.A(mesh = mesh, loc = predday)
stk.pred <- inla.stack(
tag = 'pred',
A = list(Apred, 1),
data = list(x = NA), # response as NA
effects = list(time = day_index,
intercept = matrix(1, nr = length(predday), nc = 1)))
## stack all the stacks together
stk.full <- inla.stack(stk.e, stk.pred)
## fit model
tic()
inlamod <- inla(x ~ 0 + intercept + f(time, model = spde),
data=inla.stack.data(stk.full, spde = spde),
family = "poisson",
control.predictor = list(compute = TRUE,
A = inla.stack.A(stk.full)),
control.compute = list(config = TRUE))
toc()
1.26 sec elapsed
## get estimates
samp <- inla.posterior.sample(nsamp, inlamod)
## find where predicted values are
predind <- inla.stack.index(stk.full, tag = "pred")$data
## extract posterior predictions
inlapred <- sapply(samp, FUN = function(x) {x$latent[predind]})
## posterior mean
inlapredmu <- exp(rowMeans(inlapred))
## posterior credible intervals
inlapredlcl <- exp(apply(inlapred, 1, quantile, .975))
inlapreducl <- exp(apply(inlapred, 1, quantile, .025))
## extract range and switch to kappa parameterisation
inla_range <- inlamod$summary.hyperpar[1, 1]
alpha <- 2 ; nu <- alpha - .5
inla_kappa <- sqrt(8 * nu)/inla_range
## extract sd, switch to tau param
inla_sd <- inlamod$summary.hyperpar[2, 1]
## see equation 4 of Lindgren and Rue 2015 (JSS)
inla_tau <- exp(
.5 * log(gamma(nu) / (gamma(alpha) * (4 * pi)^.5)) -
log(inla_sd) - nu * log(inla_kappa) )
cat("INLA:\n")
cat("kappa =", inla_kappa, "\n")
cat("tau =", inla_tau, "\n\n")
INLA:
kappa = 0.4289814
tau = 3.602839
bsmod <- gam(x ~ s(time, bs = "bs", k = 50),
family = "poisson", data = campy, method = "REML")
## compute predictions
## get design matrix
bsXp <- PredictMat(bsmod$smooth[[1]], data = data.frame(time = predday))
bsXp <- cbind(1, bsXp)
## compute posterior mean
bspredmu <- exp(bsXp %*% coef(bsmod))
## sample from posterior
bsbsamp <- rmvn(nsamp, coef(bsmod), vcov(bsmod, unconditional = TRUE))
bsysamp <- exp(bsXp %*% t(bsbsamp))
## compute credible interval
bspredlcl <- apply(bsysamp, 1, quantile, .975)
bspreducl <- apply(bsysamp, 1, quantile, .025)
## function to make the base plot
baseplot <- function(title){
rugt <- seq(min(campy$time), max(campy$time), by = 1)
plot(campy$time, campy$x,
pch = 19, cex = .6,
xlab = "Date", ylab = "Campylobacterosis cases",
main = title, axes = FALSE, cex.main = 1.75, cex.lab = 1.5)
axis(1, labels = 1990:2000, at = seq(7, 140, by = 13),
tick = FALSE, cex.axis = 1.5)
axis(1, labels = rep("", 12), at = seq(1, 144, by = 13),
tick = TRUE, cex.axis = 1.5)
axis(2, cex.axis = 1.5) ; box()
rug(rugt)
}
par(mfrow = c(3, 1))
baseplot("mgcv SPDE")
## plot predictions
polygon(c(predday, rev(predday)), c(preducl, rev(predlcl)),
col = grey(80/255, .6), border = NA)
lines(predday, predmu, type = "l", lwd = 2)
## plot predictions
baseplot("INLA SPDE")
polygon(c(predday, rev(predday)), c(inlapreducl, rev(inlapredlcl)),
col = grey(80/255, .6), border = NA)
lines(predday, inlapredmu, type = "l", lwd = 2)
## plot predictions
baseplot("mgcv B-splines")
polygon(c(predday, rev(predday)), c(bspreducl, rev(bspredlcl)),
col = grey(80/255, .6), border = NA)
lines(predday, bspredmu, type = "l", lwd = 2)
data from the \(\texttt{gamair}\) package, original source.
## set.seed
set.seed(25853)
data(aral) ; str(aral)
'data.frame': 488 obs. of 3 variables:
$ lon: num 59.6 59.7 59.1 59.1 59.2 ...
$ lat: num 44.1 44.1 44.1 44.1 44.1 ...
$ chl: num 9.33 9.33 3.2 11.89 3.43 ...
## boundary of observation window
data(aral.bnd) ; str(aral.bnd)
List of 2
$ lon: num [1:107] 59.4 59.6 59.9 59.9 60 ...
$ lat: num [1:107] 46.4 46.4 46.3 46.1 46.2 ...
## split boundary into segments for INLA
bnd <- inla.mesh.segment(cbind(aral.bnd$lon, aral.bnd$lat))
loc <- cbind(aral$lon, aral$lat)
## build a mesh within the boundary
mesh <- inla.mesh.2d(
boundary = bnd,
cutoff = .05, # filter away adjacent points
max.edge = c(.6, .3),
offset = c(.1, .2)) # offset for extra boundaries, if needed
plot(mesh)
tic()
mod <- gam(
chl ~ s(lon, lat, bs = "spde",
k = mesh$n, # basis dimension for the mesh
xt = list(mesh = mesh)),
data = aral,
control = gam.control(scalePenalty = FALSE),
method = "REML") # as we want to use Laplace approx. and marginal lkl
toc()
7.715 sec elapsed
## get estimates
kappa <- mod$sp[2] ; tau <- mod$sp[1]
## compute correlation range (rho) and marginal variance (sigma)
(rho <- sqrt(8) / kappa)
s(lon,lat)2
0.7983935
## see Lindgren et al. (2011) for this formula
(sigma <- 1 / sqrt(tau^2 * 4 * pi * kappa^2))
s(lon,lat)1
1.660024
cat("mgcv:\n")
cat("kappa =", kappa, "\n")
cat("tau =", tau, "\n\n")
mgcv:
kappa = 3.542648
tau = 0.04796811
## create SPDE object
spde <- inla.spde2.pcmatern(mesh = mesh,
prior.range = c(.1, .5),
prior.sigma = c(10, .5))
## setup estimation stack
A <- inla.spde.make.A(mesh, loc)
intercept <- rep(1, nrow(aral))
stk <- inla.stack(
tag = 'est', # tag
data = list(chl = aral$chl), # response
A = list(A, 1), # two projector matrix
effects = list(s = 1:spde$n.spde, intercept = intercept))
## model formula
formula <- chl ~ 0 + intercept + f(s, model = spde)
## fit with INLA
tic()
inlamod <- inla(formula,
data = inla.stack.data(stk),
control.predictor=list(A = inla.stack.A(stk),
compute = TRUE),
control.compute=list(config = TRUE))
toc()
2.147 sec elapsed
## extract range and switch to kappa parameterisation
alpha <- 2 ; nu <- alpha - 2/2
inla_range <- inlamod$summary.hyperpar[2, 1]
inla_kappa <- sqrt(8 * nu) / inla_range
## extract sd, switch to tau param
inla_sd <- inlamod$summary.hyperpar[3, 1]
## see equation 4 of Lindgren and Rue 2015 (JSS)
inla_tau <- exp(
.5 * log(gamma(nu) / (gamma(alpha) * (4 * pi)^.5)) -
log(inla_sd) - nu * log(inla_kappa) )
cat("INLA:\n")
cat("kappa =", inla_kappa, "\n")
cat("tau =", inla_tau, "\n\n")
INLA:
kappa = 3.434967
tau = 0.058271
nsamp <- 1000 # number of posterior samples
## samples from mgcv model
modsamp <- rmvn(nsamp, coef(mod), vcov(mod, unconditional = TRUE))
X <- PredictMat(mod$smooth[[1]],
data = data.frame(lon = aral$lon, lat = aral$lat))
X <- cbind(1, X)
modpred <- X %*% t(modsamp)
## sample from INLA model
ind <- inla.stack.index(stk, tag = "est")$data
inlasamp <- inla.posterior.sample(nsamp, inlamod)
inlapred <- sapply(inlasamp, FUN = function(x){x$latent[ind]})
# posterior mean difference
preddiff <- modpred - inlapred
diffmu <- rowMeans(preddiff)
diffsd <- apply(preddiff, 1, sd)
## make plot data
pmu <- data.frame(lon = aral$lon, lat = aral$lat, y = diffmu)
psd <- data.frame(lon = aral$lon, lat = aral$lat, y = diffsd)
## posterior mean difference
pltmu <- ggplot(pmu) +
geom_tile(aes(x = lon, y = lat, fill = y)) +
coord_equal() +
theme_minimal(base_size = 14) +
xlab("Longitude") + ylab("Latitude") +
scale_fill_viridis_c("Mean diff.")
## posterior std. dev of difference
pltsd <- ggplot(psd) +
geom_tile(aes(x = lon, y = lat, fill = y)) +
coord_equal() +
theme_minimal(base_size = 14) +
xlab("Longitude") + ylab("Latitude") +
scale_fill_viridis_c("SD of diff.")
## plot together
grid.arrange(pltmu, pltsd, ncol = 2)