R wrapper and C++ code.

c.rmvn <- function(mu, Sigma, n.omp.threads=1){

    ##Note, I'm not doing any check of dim or if Sigma is PD.
    n <- length(mu)
    
    storage.mode(mu) <- "double"
    storage.mode(Sigma) <- "double"
    storage.mode(n) <- "integer"
    storage.mode(n.omp.threads) <- "integer"

    start.time <- proc.time()
    
    v <- .Call("rMvN", mu, Sigma, n, n.omp.threads)
    
    return(list("v"=v, sys.time=proc.time()-start.time))
}
#include <R.h>
#include <Rmath.h>
#include <Rinternals.h>
#include <R_ext/Lapack.h>
#include <R_ext/BLAS.h>

#ifdef _OPENMP
#include <omp.h>
#endif

//Description: prints the nxp matrix to the R console (helps with debugging)
void show(double *a, int n, int p){

  for(int i = 0; i < n; i++){
    for(int j = 0; j < p; j++){
      Rprintf("%.5f\t", a[j*n+i]);
    }
    Rprintf("\n");
  }
}

//Description: generates random numbers from a n-dimensional multivariate normal N(mu, Sigma)
//mu - mean vector
//Sigma - covariance matrix
//n - length of mu and rows and columns of Sigma
//nThreads - number of threads requested for BLAS 
extern "C" {
  
  SEXP rMvN(SEXP mu_r, SEXP Sigma_r, SEXP n_r, SEXP nThreads_r){
    
    ////////////////////////////////////////////
    //get input arguments
    ////////////////////////////////////////////
    //just a pointers to *_r's space to simplify code below
    double *mu = REAL(mu_r);
    double *Sigma = REAL(Sigma_r);
    int n = INTEGER(n_r)[0];
    int nThreads = INTEGER(nThreads_r)[0];

    ////////////////////////////////////////////
    //set number of threads for BLAS if able
    ////////////////////////////////////////////
#ifdef _OPENMP
       Rprintf("n.omp.threads set to %i.\n", nThreads);
    omp_set_num_threads(nThreads);
#else
    if(nThreads > 1){
      warning("n.omp.threads = %i requested however source code was not compiled with OpenMP support.\n", nThreads);
      nThreads = 1;
    }
#endif

    ////////////////////////////////////////////
    //define some variables we'll use
    ///////////////////////////////////////////
    int i;
    int nn = n*n;
    int info;
    int nProtect = 0;

    //BLAS and LAPACK variables
    int inc = 1;
    double one = 1.0;
    char const *lower = "L";
    char const *nDiag = "N";
    char const *nTran = "N";
    
    ////////////////////////////////////////////
    //allocate the return vector
    ///////////////////////////////////////////
    SEXP v_r ;
    PROTECT(v_r = allocVector(REALSXP, n)); nProtect++;
    double *v = REAL(v_r); 
    
    ////////////////////////////////////////////
    //draw random numbers from N(0,1)
    ///////////////////////////////////////////

    //Get the current state of the random number generator.
    GetRNGstate();
  
    for(i = 0; i < n; i++){
      v[i] = rnorm(0.0, 1.0);
    }

    //Set the current state of the random number generator.
    PutRNGstate();

    //Make a copy of the covariance matrix Sigma, so that it is not altered on exit.
    double *L = (double *) R_alloc(nn, sizeof(double));//allocate space to receive the copy of Sigma

    //Copy the nn elements of Sigma (you could just copy the lower triangle elements if you really wanted).
    F77_NAME(dcopy)(&nn, Sigma, &inc, L, &inc);

    //Compute the lower Cholesky square root, i.e., Sigma = LL', and catch any errors.
    F77_NAME(dpotrf)(lower, &n, L, &n, &info); if(info != 0){error("c++ error: dpotrf failed\n");}
    
    //Lower triangular matrix L times vector of random normal values.
    //We have to be careful here, because dpotrf leaves garbage in the upper triangle of L (it's the workspace),
    //so we either need to set L's upper triangle to zero and use dgemv or a more efficient solution is dtrmv, see, BLAS docs.
    F77_NAME(dtrmv)(lower, nTran, nDiag, &n, L, &n, v, &inc);
    
    //Add the mean to the result.
    F77_NAME(daxpy)(&n, &one, mu, &inc, v, &inc);

    ////////////////////////////////////////////
    //return v_r and clean up
    ///////////////////////////////////////////

    //unprotect
    UNPROTECT(nProtect);
    
    return(v_r);
  }

}

Compile the C++ shared objects.

system("R CMD SHLIB rMvN.cpp")

Run some time tests.

##Load shared libraries
dyn.load("rMvN.so")
source("rMvN.R")

##export OMP_NUM_THREADS=3

rmvn <- function(mu, Sigma){
  n <- length(mu)

  L <- t(chol(Sigma))##just to keep it consistent with our c function
  as.vector(L%*%rnorm(n) + mu)
}

##try out our r and c functions for generating multivariate normal random numbers

##make a covariance matrix
n <- 100
A <- matrix(rnorm(n*n), n, n)
Sigma <- A%*%t(A)

mu <- rep(0, n)

set.seed(1)
v.r <- rmvn(mu, Sigma)

set.seed(1)
v.c <- c.rmvn(mu, Sigma)
## n.omp.threads set to 1.
max(abs(v.r-v.c$v))
## [1] 7.114309e-13