Colorado temperature data analysis

rm(list=ls())
library(spBayes)
library(fields)
library(MBA)
library(geoR)
library(raster)
library(leaflet)
library(viridis)

Load data and remove missing data. These data are part of the fields package. The outcome is CO.tmin.MAM.climate is minimum temperature in March, April, and May. The predictor is elevation, i.e., CO.elev.

data(COmonthlyMet)

y <- CO.tmin.MAM.climate
coords <- CO.loc
elev <- CO.elev

missing <- is.na(y)

y.ho <- y[missing]
coords.ho <- coords[missing,]
elev.ho <- elev[missing]

y <- y[!missing]
coords <- coords[!missing,]
elev <- elev[!missing]

Do a little Exploratory data analysis (EDA).

##make a design matrix for regression analysis
dat <- data.frame(y, coords, elev)

pal <- colorNumeric(palette = "YlOrRd", domain = y)

##https://rstudio.github.io/leaflet/
leaflet(dat) %>% addCircleMarkers(lng = ~lon, lat = ~lat, col = ~pal(y),
                                  stroke = FALSE, fillOpacity = 0.75,
                                  popup=~paste("Temp C:", round(y,2))) %>%
    addProviderTiles("Esri.WorldImagery", group="Satellite") %>%
    addProviderTiles("Stamen.Terrain", group="Terrain") %>%
    addProviderTiles("Esri.WorldTopoMap", group="Topographical") %>%
    addLegend("bottomright", pal = pal, values = ~y, title = "Temp C" , opacity = 0.75) %>% 
    addLayersControl(
        baseGroup = c("Topographical", "Satellite","Terrain")
    )

##LM residual EDA
lm.m <- lm(y ~ elev, data=dat)
summary(lm.m)

## 
## Call:
## lm(formula = y ~ elev, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6185 -0.9858 -0.0704  0.9781  4.7948 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.3283214  0.3633448   22.92   <2e-16 ***
## elev        -0.0053176  0.0001908  -27.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.632 on 211 degrees of freedom
## Multiple R-squared:  0.7864, Adjusted R-squared:  0.7854 
## F-statistic: 776.7 on 1 and 211 DF,  p-value: < 2.2e-16

lm.resids <- resid(lm.m)

d.max <- max(iDist(coords))
d.max

## [1] 9.15006

##variogram 
par(mfrow=c(1,2))
v <- variog(coords=coords, data=y, uvec=(seq(0, 0.75*d.max, length=10)))

## variog: computing omnidirectional variogram

plot(v)

v <- variog(coords=coords, data=lm.resids, uvec=(seq(0, 0.5*d.max, length=10)))

## variog: computing omnidirectional variogram

plot(v)

##surfaces
par(mfrow=c(1,2))
quilt.plot(coords[,1], coords[,2], lm.resids, col=viridis(100), main="LM residuals")

resid.surf <- mba.surf(cbind(coords, lm.resids), no.X=200, no.Y=200, extend=TRUE)$xyz.est
image.plot(resid.surf, col=viridis(100), main="LM residuals\n(interpolated surface)")

Fit spatial regression via spBates spSVC function.

n.samples <- 2500

cov.model <- "exponential"
starting <- list("phi"=3/(0.5*d.max), "sigma.sq"=1, "tau.sq"=1.5)
tuning <- list("phi"=0.1, "sigma.sq"=0.05, "tau.sq"=0.05)
priors <- list("beta.Flat", "phi.Unif"=list(3/d.max, 3/(0.01*d.max)), "sigma.sq.IG"=list(2, 1), "tau.sq.IG"=c(2, 1.5))
 
n.report <- 500

sp.m <- spSVC(y ~ elev, coords=c("lon","lat"), data=dat, starting=starting,
              tuning=tuning, priors=priors, cov.model=cov.model,
              n.samples=n.samples, n.report=n.report, n.omp.threads=3)

## ----------------------------------------
##  General model description
## ----------------------------------------
## Model fit with 213 observations.
## 
## Number of covariates 2.
## 
## Number of space varying covariates 1.
## 
## Using the exponential spatial correlation model.
## 
## Number of MCMC samples 2500.
## 
## Priors and hyperpriors:
##  beta flat.
##  Diag(K) IG hyperpriors
##      parameter   shape       scale
##      K[1,1]      2.000000    1.000000
## 
##  phi Unif lower bound hyperpriors:   0.328   
##  phi Unif upper bound hyperpriors:   32.787  
## 
##  tau.sq IG hyperpriors shape=2.00000 and scale=1.50000
## 
## Source compiled with OpenMP, posterior sampling is using 3 thread(s).
## -------------------------------------------------
##      Sampling
## -------------------------------------------------
## Sampled: 500 of 2500, 20.00%
## Report interval Metrop. Acceptance rate: 47.00%
## Overall Metrop. Acceptance rate: 47.00%
## -------------------------------------------------
## Sampled: 1000 of 2500, 40.00%
## Report interval Metrop. Acceptance rate: 43.20%
## Overall Metrop. Acceptance rate: 45.10%
## -------------------------------------------------
## Sampled: 1500 of 2500, 60.00%
## Report interval Metrop. Acceptance rate: 45.20%
## Overall Metrop. Acceptance rate: 45.13%
## -------------------------------------------------
## Sampled: 2000 of 2500, 80.00%
## Report interval Metrop. Acceptance rate: 46.40%
## Overall Metrop. Acceptance rate: 45.45%
## -------------------------------------------------
## Sampled: 2500 of 2500, 100.00%
## Report interval Metrop. Acceptance rate: 41.00%
## Overall Metrop. Acceptance rate: 44.56%
## -------------------------------------------------

plot(sp.m$p.theta.samples)

Recover beta and spatial random effects, and take a look at the posterior summary of \(\beta\) and \(\theta\).

burn.in <- 0.5*n.samples

sp.m <- spRecover(sp.m, start=burn.in, thin=2, n.omp.threads = 3)

## Source compiled with OpenMP, posterior sampling is using 3 thread(s).
## -------------------------------------------------
##  Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 626, 15.81%
## Sampled: 199 of 626, 31.79%
## Sampled: 299 of 626, 47.76%
## Sampled: 399 of 626, 63.74%
## Sampled: 499 of 626, 79.71%
## Sampled: 599 of 626, 95.69%

round(summary(sp.m$p.theta.recover.samples)$quantiles[,c(3,1,5)],2)

##                       50% 2.5% 97.5%
## sigma.sq.(Intercept) 1.60 0.92  2.90
## tau.sq               1.03 0.71  1.35
## phi.(Intercept)      0.63 0.37  1.22

round(summary(sp.m$p.beta.recover.samples)$quantiles[,c(3,1,5)],2)

##               50%  2.5% 97.5%
## (Intercept)  8.57  7.09 10.05
## elev        -0.01 -0.01  0.00

Summarize the random effects’ posterior distributions and examine using a surface plot of posterior medians. Compare these medians to non-spatial lm residuals.

quants <- function(x){quantile(x, prob=c(0.5,0.025,0.975))}

w.summary <- apply(sp.m$p.w.recover.samples, 1, quants)

##Plot posterior median surface
par(mfrow=c(1,2))
resid.surf <- mba.surf(cbind(coords, lm.resids), no.X=200, no.Y=200, extend=TRUE)$xyz.est
image.plot(resid.surf, col=viridis(100), main="lm() resids")

w.mu.surf <- mba.surf(cbind(coords, w.summary[1,]), no.X=200, no.Y=200, extend=TRUE)$xyz.est
image.plot(w.mu.surf, col=viridis(100), main="spSVC()\nspatial random effects, w")

Predict for all available elevation information, then plot posterior predictive distribution means.

##Load elevation (DEM grid)
data(RMelevation)

##Extract pixel centroids and predictor where prediction is desired 
pred.coords <- as.matrix(expand.grid(RMelevation$x, RMelevation$y))
pred.elev <- as.vector(RMelevation$z)
pred.X <- cbind(1, pred.elev)

pred.obj <- spPredict(sp.m, pred.covars=pred.X, pred.coords=pred.coords, thin=25, n.report=5, n.omp.threads=4)

## Using 26 posterior samples from previous spRecover call.

## Source compiled with OpenMP, posterior sampling is using 4 thread(s).
## -------------------------------------------------
##  Point-wise sampling of predicted w
## -------------------------------------------------
## Sampled: 4 of 26, 15.38%
## Sampled: 9 of 26, 34.62%
## Sampled: 14 of 26, 53.85%
## Sampled: 19 of 26, 73.08%
## Sampled: 24 of 26, 92.31%

y.post.pred.mean <- apply(pred.obj$p.y.predictive.samples, 1, mean)
y.post.pred.mean.r <- rasterFromXYZ(cbind(pred.coords,y.post.pred.mean))

##Compare spSVC() and lm() predictions
y.mean <- pred.X%*%coefficients(lm.m)
y.lm.pred.mean.r <- rasterFromXYZ(cbind(pred.coords,y.mean))

diff.r <- rasterFromXYZ(cbind(pred.coords, y.mean-y.post.pred.mean))

par(mfrow=c(1,3))
plot(y.post.pred.mean.r, main="Post pred mean")
plot(y.lm.pred.mean.r, main="lm pred mean")
plot(diff.r, main="diff")