rm(list=ls())
library(spBayes)
library(fields)
library(MBA)
library(geoR)
library(raster)
library(leaflet)
library(viridis)
Load data and remove missing data. These data are part of the fields
package. The outcome is CO.tmin.MAM.climate
is minimum temperature in March, April, and May. The predictor is elevation, i.e., CO.elev
.
data(COmonthlyMet)
y <- CO.tmin.MAM.climate
coords <- CO.loc
elev <- CO.elev
missing <- is.na(y)
y.ho <- y[missing]
coords.ho <- coords[missing,]
elev.ho <- elev[missing]
y <- y[!missing]
coords <- coords[!missing,]
elev <- elev[!missing]
Do a little Exploratory data analysis (EDA).
##make a design matrix for regression analysis
dat <- data.frame(y, coords, elev)
pal <- colorNumeric(palette = "YlOrRd", domain = y)
##https://rstudio.github.io/leaflet/
leaflet(dat) %>% addCircleMarkers(lng = ~lon, lat = ~lat, col = ~pal(y),
stroke = FALSE, fillOpacity = 0.75,
popup=~paste("Temp C:", round(y,2))) %>%
addProviderTiles("Esri.WorldImagery", group="Satellite") %>%
addProviderTiles("Stamen.Terrain", group="Terrain") %>%
addProviderTiles("Esri.WorldTopoMap", group="Topographical") %>%
addLegend("bottomright", pal = pal, values = ~y, title = "Temp C" , opacity = 0.75) %>%
addLayersControl(
baseGroup = c("Topographical", "Satellite","Terrain")
)
##LM residual EDA
lm.m <- lm(y ~ elev, data=dat)
summary(lm.m)
##
## Call:
## lm(formula = y ~ elev, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6185 -0.9858 -0.0704 0.9781 4.7948
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.3283214 0.3633448 22.92 <2e-16 ***
## elev -0.0053176 0.0001908 -27.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.632 on 211 degrees of freedom
## Multiple R-squared: 0.7864, Adjusted R-squared: 0.7854
## F-statistic: 776.7 on 1 and 211 DF, p-value: < 2.2e-16
lm.resids <- resid(lm.m)
d.max <- max(iDist(coords))
d.max
## [1] 9.15006
##variogram
par(mfrow=c(1,2))
v <- variog(coords=coords, data=y, uvec=(seq(0, 0.75*d.max, length=10)))
## variog: computing omnidirectional variogram
plot(v)
v <- variog(coords=coords, data=lm.resids, uvec=(seq(0, 0.5*d.max, length=10)))
## variog: computing omnidirectional variogram
plot(v)
##surfaces
par(mfrow=c(1,2))
quilt.plot(coords[,1], coords[,2], lm.resids, col=viridis(100), main="LM residuals")
resid.surf <- mba.surf(cbind(coords, lm.resids), no.X=200, no.Y=200, extend=TRUE)$xyz.est
image.plot(resid.surf, col=viridis(100), main="LM residuals\n(interpolated surface)")
Fit spatial regression via spBates
spSVC
function.
n.samples <- 2500
cov.model <- "exponential"
starting <- list("phi"=3/(0.5*d.max), "sigma.sq"=1, "tau.sq"=1.5)
tuning <- list("phi"=0.1, "sigma.sq"=0.05, "tau.sq"=0.05)
priors <- list("beta.Flat", "phi.Unif"=list(3/d.max, 3/(0.01*d.max)), "sigma.sq.IG"=list(2, 1), "tau.sq.IG"=c(2, 1.5))
n.report <- 500
sp.m <- spSVC(y ~ elev, coords=c("lon","lat"), data=dat, starting=starting,
tuning=tuning, priors=priors, cov.model=cov.model,
n.samples=n.samples, n.report=n.report, n.omp.threads=3)
## ----------------------------------------
## General model description
## ----------------------------------------
## Model fit with 213 observations.
##
## Number of covariates 2.
##
## Number of space varying covariates 1.
##
## Using the exponential spatial correlation model.
##
## Number of MCMC samples 2500.
##
## Priors and hyperpriors:
## beta flat.
## Diag(K) IG hyperpriors
## parameter shape scale
## K[1,1] 2.000000 1.000000
##
## phi Unif lower bound hyperpriors: 0.328
## phi Unif upper bound hyperpriors: 32.787
##
## tau.sq IG hyperpriors shape=2.00000 and scale=1.50000
##
## Source compiled with OpenMP, posterior sampling is using 3 thread(s).
## -------------------------------------------------
## Sampling
## -------------------------------------------------
## Sampled: 500 of 2500, 20.00%
## Report interval Metrop. Acceptance rate: 47.00%
## Overall Metrop. Acceptance rate: 47.00%
## -------------------------------------------------
## Sampled: 1000 of 2500, 40.00%
## Report interval Metrop. Acceptance rate: 43.20%
## Overall Metrop. Acceptance rate: 45.10%
## -------------------------------------------------
## Sampled: 1500 of 2500, 60.00%
## Report interval Metrop. Acceptance rate: 45.20%
## Overall Metrop. Acceptance rate: 45.13%
## -------------------------------------------------
## Sampled: 2000 of 2500, 80.00%
## Report interval Metrop. Acceptance rate: 46.40%
## Overall Metrop. Acceptance rate: 45.45%
## -------------------------------------------------
## Sampled: 2500 of 2500, 100.00%
## Report interval Metrop. Acceptance rate: 41.00%
## Overall Metrop. Acceptance rate: 44.56%
## -------------------------------------------------
plot(sp.m$p.theta.samples)
Recover beta and spatial random effects, and take a look at the posterior summary of \(\beta\) and \(\theta\).
burn.in <- 0.5*n.samples
sp.m <- spRecover(sp.m, start=burn.in, thin=2, n.omp.threads = 3)
## Source compiled with OpenMP, posterior sampling is using 3 thread(s).
## -------------------------------------------------
## Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 626, 15.81%
## Sampled: 199 of 626, 31.79%
## Sampled: 299 of 626, 47.76%
## Sampled: 399 of 626, 63.74%
## Sampled: 499 of 626, 79.71%
## Sampled: 599 of 626, 95.69%
round(summary(sp.m$p.theta.recover.samples)$quantiles[,c(3,1,5)],2)
## 50% 2.5% 97.5%
## sigma.sq.(Intercept) 1.60 0.92 2.90
## tau.sq 1.03 0.71 1.35
## phi.(Intercept) 0.63 0.37 1.22
round(summary(sp.m$p.beta.recover.samples)$quantiles[,c(3,1,5)],2)
## 50% 2.5% 97.5%
## (Intercept) 8.57 7.09 10.05
## elev -0.01 -0.01 0.00
Summarize the random effects’ posterior distributions and examine using a surface plot of posterior medians. Compare these medians to non-spatial lm
residuals.
quants <- function(x){quantile(x, prob=c(0.5,0.025,0.975))}
w.summary <- apply(sp.m$p.w.recover.samples, 1, quants)
##Plot posterior median surface
par(mfrow=c(1,2))
resid.surf <- mba.surf(cbind(coords, lm.resids), no.X=200, no.Y=200, extend=TRUE)$xyz.est
image.plot(resid.surf, col=viridis(100), main="lm() resids")
w.mu.surf <- mba.surf(cbind(coords, w.summary[1,]), no.X=200, no.Y=200, extend=TRUE)$xyz.est
image.plot(w.mu.surf, col=viridis(100), main="spSVC()\nspatial random effects, w")
Predict for all available elevation information, then plot posterior predictive distribution means.
##Load elevation (DEM grid)
data(RMelevation)
##Extract pixel centroids and predictor where prediction is desired
pred.coords <- as.matrix(expand.grid(RMelevation$x, RMelevation$y))
pred.elev <- as.vector(RMelevation$z)
pred.X <- cbind(1, pred.elev)
pred.obj <- spPredict(sp.m, pred.covars=pred.X, pred.coords=pred.coords, thin=25, n.report=5, n.omp.threads=4)
## Using 26 posterior samples from previous spRecover call.
## Source compiled with OpenMP, posterior sampling is using 4 thread(s).
## -------------------------------------------------
## Point-wise sampling of predicted w
## -------------------------------------------------
## Sampled: 4 of 26, 15.38%
## Sampled: 9 of 26, 34.62%
## Sampled: 14 of 26, 53.85%
## Sampled: 19 of 26, 73.08%
## Sampled: 24 of 26, 92.31%
y.post.pred.mean <- apply(pred.obj$p.y.predictive.samples, 1, mean)
y.post.pred.mean.r <- rasterFromXYZ(cbind(pred.coords,y.post.pred.mean))
##Compare spSVC() and lm() predictions
y.mean <- pred.X%*%coefficients(lm.m)
y.lm.pred.mean.r <- rasterFromXYZ(cbind(pred.coords,y.mean))
diff.r <- rasterFromXYZ(cbind(pred.coords, y.mean-y.post.pred.mean))
par(mfrow=c(1,3))
plot(y.post.pred.mean.r, main="Post pred mean")
plot(y.lm.pred.mean.r, main="lm pred mean")
plot(diff.r, main="diff")