Rcpp (with inline)
Rcpp had a number of problems. First of all, it did not work on my Windows box. It ended with an error message:
Error in compileCode(f, code, language = language, verbose = verbose) :
Compilation ERROR, function(s)/method(s) not created!
In addition: Warning message:
running command 'C:/Users/Kees/Documents/R/R-3.0.1/bin/x64/R CMD SHLIB file12102de67044.cpp 2> file12102de67044.cpp.err.txt' had status 1
As far as I understand the .cpp is not found. Which is not strange, it resides in a temp directory:
C:\Users\Kees\AppData\Local\Temp\Rtmp25t6tT allocated for the current session. I gave up after playing around for half an evening and went to plan B, a Fedora box, which worked without a problem. I have no C++ experience, my C time is like 20 years ago. So using some examples I was happy when I converted a few lines of code. The part was were chosen for simplicity rather than speed expectations, still, I used 97% of the original time over the whole function. Subsequently I tried a larger part, where a data.frame was converted. Unhappily, the whole function used 200% of the original time. It seemed there suddenly appeared a try/tryCatch which consumed more time than the original code. At this point I started to abandon Rcpp and tried more ordinary approaches. Learning Rcpp is now a bit on the back burner.
Code optimization
This is actually fairly simple. Vectors beat for loops, especially the inner vectors. Remove stuff from inner loops to outer loops. Take functions called and extract parts needed. For example, these two functions do the same, yet the first is much slower. Adding the compiler gives a little extra
nspacejump0 <- function(x,i,sd) {
xold <- arm::logit(x[i])
xnew <- arm::invlogit(xold + rnorm(1,0,sd))
x <- x/sum(x[-i])*(1-xnew)
x[i] <- xnew
x
}
nspacejump1 <- function(x,i,sd) {
xold <- log(x[i]/(1-x[i])) + rnorm(1,0,sd)
xnew <- 1/(1+exp(-xold))
x <- x/sum(x[-i])*(1-xnew)
x[i] <- xnew
x
}
nspacejump0c <- cmpfun(nspacejump0)
nspacejump1c <- cmpfun(nspacejump1)
############### speed run ####################
microbenchmark(
nspacejump0(c(.2,.4,.1,.3),1,.1),
nspacejump1(c(.2,.4,.1,.3),1,.1),
nspacejump0c(c(.2,.4,.1,.3),1,.1),
nspacejump1c(c(.2,.4,.1,.3),1,.1))
Unit: microseconds
expr min lq median uq
nspacejump0(c(0.2, 0.4, 0.1, 0.3), 1, 0.1) 111.428 114.360 115.8265 117.659
nspacejump1(c(0.2, 0.4, 0.1, 0.3), 1, 0.1) 29.323 30.789 31.5230 32.988
nspacejump0c(c(0.2, 0.4, 0.1, 0.3), 1, 0.1) 101.165 104.097 106.2960 107.763
nspacejump1c(c(0.2, 0.4, 0.1, 0.3), 1, 0.1) 19.793 21.259 21.9930 22.726
max neval
287.367 100
79.173 100
178.138 100
108.495 100
Second part code optimization
In the deviance calculation there are more possibilities for short cuts. The original calculates expected proportions from limits. In the end, this can be moved to a more outer loop, since only one distribution is modified at a time. On top of that (not shown because it was moved out) pnorm(upper,mean,sd)-pnorm(lower,mean,sd) has almost the same limits and became something like: pn<-pnorm(limits,mean,sd);pn[-1]-pn[-23]. Dmultinom has been eliminated, and since lgamma(x + 1) is only a function of x, the counts, this is constant for the data used, so can be moved completely outside. Obviously it again helps to use the compiler.
curldev0 <- function(counts,curmodel) {
dsd <- sum(dnorm(curmodel$sd,300,300,log=TRUE))
prop.exp <- matrix(0,nrow=nrow(counts),ncol=nrow(curmodel))
for (i in 1:nrow(curmodel)) {
prop.exp[,i] <- curmodel$w[i]*(pnorm(counts$upper,curmodel$mean[i],curmodel$sd[i])-
pnorm(counts$lower,curmodel$mean[i],curmodel$sd[i]))
}
finprop <- rowSums(prop.exp)
dsd + dmultinom(counts$Waarde,prob=finprop,log=TRUE)
}
curldev5 <- function(counts,curmodel,pemodel) {
dsd <- sum(dnorm(curmodel$sd,300,300,log=TRUE))
prop.exp <- pemodel %*% curmodel$w
prop.exp <- prop.exp/sum(prop.exp)
dsd + lgamma(sum(counts$Waarde) + 1) + sum(counts$Waarde * log(prop.exp) - counts$lgam)
#dmultinom(counts$Waarde,prob=prop.exp,log=TRUE)
}
##########speed run############
curldev0c <- cmpfun(curldev0)
curldev5c <- cmpfun(curldev5)
curmodel <- data.frame(
mean=c( 892.0141263, 1.417520e+03, 1306.8248062, 1.939645e+03),
sd =c( 99.5660288, 2.129247e+02, 194.2452168, 1.684932e+02),
w =c( 0.2252041, 4.384217e-02, 0.7125014, 1.845233e-02))
pemodel <- matrix(0,nrow=22,ncol=nrow(curmodel))
for (i in 1:nrow(curmodel)) {
pn <- pnorm(weightlims,curmodel$mean[i],curmodel$sd[i])
pemodel[,i] <- (pn[-1]-pn[-23])
}
microbenchmark(
# c(
curldev0(weight3,curmodel),
curldev5(weight3,curmodel,pemodel),
curldev0c(weight3,curmodel),
curldev5c(weight3,curmodel,pemodel)
)
Unit: microseconds
expr min lq median uq
curldev0(weight3, curmodel) 435.448 442.4130 447.1780 465.5050
curldev5(weight3, curmodel, pemodel) 58.646 61.5785 68.1760 71.8420
curldev0c(weight3, curmodel) 394.396 402.4600 407.9585 426.6515
curldev5c(weight3, curmodel, pemodel) 54.248 57.1800 63.0450 67.4430
max neval
1625.234 100
361.407 100
823.980 100
164.943 100
third part code optimization
This is the calling environment, so downstream adaptations have adaptations here. In the end the time used is about 30% of the original.
version0 <- function(weight3,curmodel,niter) {
cnow <- curldev0(weight3,curmodel)
ndist <- nrow(curmodel)
result <- matrix(nrow=ndist*niter,ncol=3*ndist+1)
index <- 1
for (iter in 1:niter) {
for (dist in 1:ndist) {
nowmodel <- curmodel
result[index,] <- c(unlist(curmodel),cnow)
curmodel$mean[dist] <- curmodel$mean[dist] + rnorm(1,0,1)
curmodel$sd[dist] <- curmodel$sd[dist] * runif(1,1/1.01,1.01)
curmodel$w <- nspacejump0(curmodel$w,dist,.1)
cnew <- curldev0(weight3,curmodel)
# cat(c(cnow,cnew,'\n'))
if (exp(cnew-cnow) > runif(1) ) cnow <- cnew
else curmodel <- nowmodel
index <- index + 1
}
}
return(list(result,curmodel))
}
version6 <- function(weight3,curmodel,niter) {
pemodel <- matrix(0,nrow=22,ncol=nrow(curmodel))
pemodel2 <- pemodel
for (i in 1:nrow(curmodel)) {
pn <- pnorm(weightlims,curmodel$mean[i],curmodel$sd[i])
pemodel[,i] <- (pn[-1]-pn[-23])
}
cnow <- curldev5c(weight3,curmodel,pemodel)
ndist <- nrow(curmodel)
result <- matrix(nrow=ndist*niter,ncol=3*ndist+1)
index <- 1
for (iter in 1:niter) {
for (dist in 1:ndist) {
nowmodel <- curmodel
nowpe <- pemodel
result[index,] <- c(curmodel$mean,curmodel$sd,curmodel$w,cnow)
curmodel$mean[dist] <- curmodel$mean[dist] + rnorm(1,0,1)
curmodel$sd[dist] <- curmodel$sd[dist] * runif(1,1/1.01,1.01)
curmodel$w <- nspacejump1c(curmodel$w,dist,.1)
pn <- pnorm(weightlims,curmodel$mean[dist],curmodel$sd[dist])
pemodel[,dist] <- (pn[-1]-pn[-23])
cnew <- curldev5c(weight3,curmodel,pemodel)
# cat(c(cnow,cnew,'\n'))
if (exp(cnew-cnow) > runif(1) ) cnow <- cnew
else {curmodel <- nowmodel
pemodel <- nowpe }
index <- index + 1
}
}
return(list(result=result,curmodel=curmodel))
}
############### speed run ###############
version0c <- cmpfun(version0)
version6c <- cmpfun(version6)
microbenchmark(
version0(weight3,curmodel,200),
version6(weight3,curmodel,200),
version0c(weight3,curmodel,200),
version6c(weight3,curmodel,200),
times=5)
Unit: milliseconds
expr min lq median uq max
version0(weight3, curmodel, 200) 648.9325 655.1388 660.7138 665.8644 669.2029
version6(weight3, curmodel, 200) 264.1112 266.0451 267.8924 270.6649 274.3435
version0c(weight3, curmodel, 200) 617.1030 627.7700 630.9208 638.2391 645.8507
version6c(weight3, curmodel, 200) 230.3676 230.6623 231.3389 236.2792 237.2043
neval
5
5
5
5