\(\newcommand{\bm}[1]{\boldsymbol{\mathbf{#1}}}\) logo

The scripts used here was taken from the electronic supplementary material.


pkgs

pkgs <- c(
    "mgcv", "INLA", "gamair", "ggplot2", "gridExtra", "tictoc", "tscount")
pacman::p_load(pkgs, character.only = TRUE)

source("mgcv_spde_smooth.R")

Check the file mgcv_spde_smooth.R.

Campylobacterosis infection example (1d smoothing)

data from

Ferland, R., Latour, A. and Oraichi, D. (2006) Integer-valued GARCH process. Journal of Time Series Analysis 27(6), 923-942, http://dx.doi.org/10.1111/j.1467-9892.2006.00496.x

data(campy) ; str(campy)
 Time-Series [1:140] from 1990 to 2001: 2 3 4 1 6 9 12 8 5 7 ...
campy <- data.frame(x = as.numeric(campy), time = 1:length(campy))
## set seed
set.seed(35832)

fit Matern SPDE using mgcv

tic()
mod <- gam(
    x ~ s(time, bs = "spde",
          k = 50), # basis dimension for the mesh
    data = campy,
    control = gam.control(scalePenalty = FALSE),
    family = "poisson",
    method = "REML") # as we want to use Laplace approx. and marginal lkl
toc()
0.081 sec elapsed
## get hyperparameter estimates
tau <- mod$sp[1] ; kappa <- mod$sp[2]
# compute correlation range (rho)
(rho <- sqrt(8 * 1.5) / kappa)
s(time)2 
7.286571 
cat("mgcv:\n")
cat("kappa =", kappa, "\n")
cat("tau =", tau, "\n\n")
mgcv:
kappa = 0.475409 
tau = 3.251945 
## predict over a grid
predday <- seq(min(campy$time), max(campy$time), by = .25)
## get design matrix
Xp <- PredictMat(mod$smooth[[1]], data = data.frame(time = predday))
## add in intercept
Xp <- cbind(1, Xp)
## compute posterior mean
predmu <- exp(Xp %*% coef(mod))
## sample from posterior
nsamp <- 1000
bsamp <- rmvn(nsamp, coef(mod), vcov(mod, unconditional = TRUE))
ysamp <- exp(Xp %*% t(bsamp))
## compute credible interval
predlcl <- apply(ysamp, 1, quantile, .975)
preducl <- apply(ysamp, 1, quantile, .025)

fit Matern SPDE using INLA

mesh <- mod$smooth[[1]]$mesh
spde <- inla.spde2.pcmatern(mesh, alpha = 2,
                            prior.range = c(2, .01),
                            prior.sigma = c(10, .01))
## setup indices and intercept for INLA
day_index <- inla.spde.make.index("time", n.spde = spde$n.spde)
intercept <- matrix(1, nr = nrow(campy), nc = 1)
## create projector matrix (projects from mesh to observation points)
A <- inla.spde.make.A(mesh = mesh, loc = campy$time)
## stack all information INLA needs to do estimation
stk.e <- inla.stack(
    tag = 'est', # tag
    data = list(x = campy$x), # response
    A = list(A, 1), # projector matrix
    effects = list(time = day_index, intercept = intercept)) ## RF index
## stack all the information INLA needs to do predcition
Apred <- inla.spde.make.A(mesh = mesh, loc = predday)
stk.pred <- inla.stack(
    tag = 'pred',
    A = list(Apred, 1),
    data = list(x = NA), # response as NA
    effects = list(time = day_index,
                   intercept = matrix(1, nr = length(predday), nc = 1)))
## stack all the stacks together
stk.full <- inla.stack(stk.e, stk.pred)

## fit model
tic()
inlamod <- inla(x ~ 0 + intercept + f(time, model = spde),
               data=inla.stack.data(stk.full, spde = spde),
               family = "poisson",
               control.predictor = list(compute = TRUE,
                                        A = inla.stack.A(stk.full)),
               control.compute = list(config = TRUE))
toc()
1.26 sec elapsed
## get estimates
samp <- inla.posterior.sample(nsamp, inlamod)
## find where predicted values are
predind <- inla.stack.index(stk.full, tag = "pred")$data
## extract posterior predictions
inlapred <- sapply(samp, FUN = function(x) {x$latent[predind]})
## posterior mean
inlapredmu <- exp(rowMeans(inlapred))
## posterior credible intervals
inlapredlcl <- exp(apply(inlapred, 1, quantile, .975))
inlapreducl <- exp(apply(inlapred, 1, quantile, .025))

## extract range and switch to kappa parameterisation
inla_range <- inlamod$summary.hyperpar[1, 1]
alpha <- 2 ; nu <- alpha - .5
inla_kappa <- sqrt(8 * nu)/inla_range

## extract sd, switch to tau param
inla_sd <- inlamod$summary.hyperpar[2, 1]
## see equation 4 of Lindgren and Rue 2015 (JSS)
inla_tau <- exp(
    .5 * log(gamma(nu) / (gamma(alpha) * (4 * pi)^.5)) -
    log(inla_sd) - nu * log(inla_kappa) )
cat("INLA:\n")
cat("kappa =", inla_kappa, "\n")
cat("tau =", inla_tau, "\n\n")
INLA:
kappa = 0.4289814 
tau = 3.602839 

fit B-spline basis-penalty using mgcv

bsmod <- gam(x ~ s(time, bs = "bs", k = 50),
             family = "poisson", data = campy, method = "REML")

## compute predictions
## get design matrix
bsXp <- PredictMat(bsmod$smooth[[1]], data = data.frame(time = predday))
bsXp <- cbind(1, bsXp)
## compute posterior mean
bspredmu <- exp(bsXp %*% coef(bsmod))
## sample from posterior
bsbsamp <- rmvn(nsamp, coef(bsmod), vcov(bsmod, unconditional = TRUE))
bsysamp <- exp(bsXp %*% t(bsbsamp))
## compute credible interval
bspredlcl <- apply(bsysamp, 1, quantile, .975)
bspreducl <- apply(bsysamp, 1, quantile, .025)

compare fits

## function to make the base plot
baseplot <- function(title){
    rugt <- seq(min(campy$time), max(campy$time), by = 1)
    plot(campy$time, campy$x,
         pch = 19, cex = .6,
         xlab =  "Date", ylab = "Campylobacterosis cases",
         main = title, axes = FALSE, cex.main = 1.75, cex.lab = 1.5)
    axis(1, labels = 1990:2000, at = seq(7, 140, by = 13),
         tick = FALSE, cex.axis = 1.5)
    axis(1, labels = rep("", 12), at = seq(1, 144, by = 13),
         tick = TRUE, cex.axis = 1.5)
    axis(2, cex.axis = 1.5) ; box()
    rug(rugt)
}
par(mfrow = c(3, 1))

baseplot("mgcv SPDE")
## plot predictions
polygon(c(predday, rev(predday)), c(preducl, rev(predlcl)),
        col = grey(80/255, .6), border = NA)
lines(predday, predmu, type = "l", lwd = 2)

## plot predictions
baseplot("INLA SPDE")
polygon(c(predday, rev(predday)), c(inlapreducl, rev(inlapredlcl)),
        col = grey(80/255, .6), border = NA)
lines(predday, inlapredmu, type = "l", lwd = 2)

## plot predictions
baseplot("mgcv B-splines")
polygon(c(predday, rev(predday)), c(bspreducl, rev(bspredlcl)),
        col = grey(80/255, .6), border = NA)
lines(predday, bspredmu, type = "l", lwd = 2)

Aral sea example (2d smoothing)

data from the \(\texttt{gamair}\) package, original source.

## set.seed
set.seed(25853)

data

data(aral) ; str(aral)
'data.frame':   488 obs. of  3 variables:
 $ lon: num  59.6 59.7 59.1 59.1 59.2 ...
 $ lat: num  44.1 44.1 44.1 44.1 44.1 ...
 $ chl: num  9.33 9.33 3.2 11.89 3.43 ...
## boundary of observation window
data(aral.bnd) ; str(aral.bnd)
List of 2
 $ lon: num [1:107] 59.4 59.6 59.9 59.9 60 ...
 $ lat: num [1:107] 46.4 46.4 46.3 46.1 46.2 ...
## split boundary into segments for INLA
bnd <- inla.mesh.segment(cbind(aral.bnd$lon, aral.bnd$lat))
loc <- cbind(aral$lon, aral$lat)

## build a mesh within the boundary
mesh <- inla.mesh.2d(
    boundary = bnd,
    cutoff = .05, # filter away adjacent points
    max.edge = c(.6, .3),
    offset = c(.1, .2)) # offset for extra boundaries, if needed
plot(mesh)

SPDE model with mgcv

tic()
mod <- gam(
    chl ~ s(lon, lat, bs = "spde",
            k = mesh$n, # basis dimension for the mesh
            xt = list(mesh = mesh)),
    data = aral,
    control = gam.control(scalePenalty = FALSE),
    method = "REML") # as we want to use Laplace approx. and marginal lkl
toc()
7.715 sec elapsed
## get estimates
kappa <- mod$sp[2] ; tau <- mod$sp[1]

## compute correlation range (rho) and marginal variance (sigma)
(rho <- sqrt(8) / kappa)
s(lon,lat)2 
  0.7983935 
## see Lindgren et al. (2011) for this formula
(sigma <- 1 / sqrt(tau^2 * 4 * pi * kappa^2))
s(lon,lat)1 
   1.660024 
cat("mgcv:\n")
cat("kappa =", kappa, "\n")
cat("tau =", tau, "\n\n")
mgcv:
kappa = 3.542648 
tau = 0.04796811 

SPDE model with INLA

## create SPDE object
spde <- inla.spde2.pcmatern(mesh = mesh,
                            prior.range = c(.1, .5),
                            prior.sigma = c(10, .5))
## setup estimation stack
A <- inla.spde.make.A(mesh, loc)
intercept <- rep(1, nrow(aral))
stk <- inla.stack(
    tag = 'est', # tag
    data = list(chl = aral$chl), # response
    A = list(A, 1), # two projector matrix
    effects = list(s = 1:spde$n.spde, intercept = intercept))
## model formula
formula <- chl ~ 0 + intercept + f(s, model = spde)
## fit with INLA
tic()
inlamod <- inla(formula,
            data = inla.stack.data(stk),
            control.predictor=list(A = inla.stack.A(stk),
                                   compute = TRUE),
            control.compute=list(config = TRUE))
toc()
2.147 sec elapsed
## extract range and switch to kappa parameterisation
alpha <- 2 ; nu <- alpha - 2/2

inla_range <- inlamod$summary.hyperpar[2, 1]
inla_kappa <- sqrt(8 * nu) / inla_range

## extract sd, switch to tau param
inla_sd <- inlamod$summary.hyperpar[3, 1]
## see equation 4 of Lindgren and Rue 2015 (JSS)
inla_tau <- exp(
    .5 * log(gamma(nu) / (gamma(alpha) * (4 * pi)^.5)) -
    log(inla_sd) - nu * log(inla_kappa) )
cat("INLA:\n")
cat("kappa =", inla_kappa, "\n")
cat("tau =", inla_tau, "\n\n")
INLA:
kappa = 3.434967 
tau = 0.058271 

compare fits

nsamp <- 1000 # number of posterior samples

## samples from mgcv model
modsamp <- rmvn(nsamp, coef(mod), vcov(mod, unconditional = TRUE))
X <- PredictMat(mod$smooth[[1]],
                data = data.frame(lon =  aral$lon, lat = aral$lat))
X <- cbind(1, X)
modpred <- X %*% t(modsamp)

## sample from INLA model
ind <- inla.stack.index(stk, tag = "est")$data
inlasamp <- inla.posterior.sample(nsamp, inlamod)
inlapred <- sapply(inlasamp, FUN = function(x){x$latent[ind]})

# posterior mean difference
preddiff <- modpred - inlapred
diffmu <- rowMeans(preddiff)
diffsd <- apply(preddiff, 1, sd)

## make plot data
pmu <- data.frame(lon = aral$lon, lat = aral$lat, y = diffmu)
psd <- data.frame(lon = aral$lon, lat = aral$lat, y = diffsd)

## posterior mean difference
pltmu <- ggplot(pmu) +
    geom_tile(aes(x = lon, y = lat, fill = y)) +
    coord_equal() +
    theme_minimal(base_size = 14) +
    xlab("Longitude") + ylab("Latitude") +
    scale_fill_viridis_c("Mean diff.")

## posterior std. dev of difference
pltsd <- ggplot(psd) +
    geom_tile(aes(x = lon, y = lat, fill = y)) +
    coord_equal() +
    theme_minimal(base_size = 14) +
    xlab("Longitude") + ylab("Latitude") +
    scale_fill_viridis_c("SD of diff.")

## plot together
grid.arrange(pltmu, pltsd, ncol = 2)