R wrapper and C++ code for our multivariate normal log density function dMvN.

dMvN <- function(y, mu, C, n.omp.threads=1){
    
    n <- nrow(C)
    
    storage.mode(n) <- "integer"
    storage.mode(y) <- "double"
    storage.mode(mu) <- "double"
    storage.mode(C) <- "double"
    storage.mode(n.omp.threads) <- "integer"
    
    start.time <- proc.time()
    
    out <- .Call("dMvN", n, y, mu, C, n.omp.threads)
    
    list("log.density"=out, sys.time=proc.time()-start.time)
}
#include <R.h>
#include <Rinternals.h>
#include <R_ext/Lapack.h>
#include <R_ext/BLAS.h>

#ifdef _OPENMP
#include <omp.h>
#endif

extern "C" {

  SEXP dMvN(SEXP n_r, SEXP y_r, SEXP mu_r, SEXP C_r, SEXP nThreads_r){
    
    int n = INTEGER(n_r)[0];
    double *y = REAL(y_r);
    double *mu = REAL(mu_r);
    double *C = REAL(C_r);
    int nThreads = INTEGER(nThreads_r)[0];
    
#ifdef _OPENMP
    omp_set_num_threads(nThreads);
#else
    if(nThreads > 1){
      warning("n.omp.threads = %i requested however source code was not compiled with OpenMP support.", nThreads);
      nThreads = 1;
    }
#endif
    
    //common variables
    int i, info, nProtect = 0;
    char const *lower = "L";
    char const *nUnit = "N";
    char const *ntran = "N";

    const int incOne = 1;
    const double pi = 3.14159265359;

    double det = 0;
    double Q;
    double *z = (double *) R_alloc(n, sizeof(double)); 
    SEXP result = PROTECT(allocVector(REALSXP, 1)); nProtect++;
 
   //z = mu-y
   for(i = 0; i < n; i++){
     z[i] = mu[i]-y[i];
   }
 
   //Cholesky factorization (note C is overwritten, so you might want to make a copy if you'll need it later)
   F77_NAME(dpotrf)(lower, &n, C, &n, &info); if(info != 0){error("c++ error: dpotrf failed\n");}

   //log determinant
   for(i = 0; i < n; i++){
     det += 2.0*log(C[i*n+i]);
   }
   
   //solve Lu = z for u (note u is now stored in z, just to be confusing)
   F77_NAME(dtrsv)(lower, ntran, nUnit, &n, C, &n, z, &incOne);
   
   Q = F77_NAME(ddot)(&n, z, &incOne, z, &incOne);
   
   REAL(result)[0] = -0.5*n*log(2.0*pi)-0.5*det-0.5*Q;

   UNPROTECT(nProtect);

   return result;
 }
}

Compile the C++ shared object.

system("R CMD SHLIB dMvN.cpp")

Some code to try it out and compare with the implementation in the mvtnorm package.

##Note, depending on your OS and threaded BLAS library dMvN's
##n.omp.thread might work at controlling the number of threads.
##If not, then running "export OMP_NUM_THREADS=n", where n is
##the number of desired threads, on the terminal prior to
##starting R or R CMD BATCH try-it-out.R should work.

rm(list=ls())

dyn.load("dMvN.so")
source("dMvN.R")

rmvn <- function(n, mu=0, V = matrix(1)){
  p <- length(mu)
  if(any(is.na(match(dim(V),p))))
    stop("Dimension problem!")
  D <- chol(V)
  t(matrix(rnorm(n*p), ncol=p)%*%D + rep(mu,rep(n,p)))
}

set.seed(1)

n <- 5000
coords <- cbind(runif(n,0,1), runif(n,0,1))
X <- as.matrix(cbind(1, rnorm(n)))

B <- as.matrix(c(1,5))
p <- length(B)

##Exponential spatial covariance with a nugget
sigma.sq <- 2
tau.sq <- 0.1
phi <- 3/0.1

D <- as.matrix(dist(coords))
C <- sigma.sq*exp(-phi*D)+diag(tau.sq,n)
mu <- X%*%B
y <- rmvn(1, mu, C)

##Evaluate log likelihood
out <- dMvN(y, mu, C, n.omp.thread=2)

out$log.density
## [1] -6471.511
out$sys.time
##    user  system elapsed 
##   1.301   0.085   0.691
##Compare timing to:
library(mvtnorm)

start.time <- proc.time()
dmvnorm(y[,1], mu[,1], C, log=TRUE)
## [1] -6471.511
proc.time()-start.time
##    user  system elapsed 
##   1.649   0.312   1.373