For my first experience with STAN I wanted to convert my last JAGS program into STAN. This was a bit more difficult than I expected. The JAGS program was
Data has been explained before. The only difference is that by now I am reading the data using read.xls from the gdata package. The new code is at the bottom of this post.
Initially I thought the conversion would be simple. I made some simplification and the code worked. However, it appeared my initial code was not very fast. Then I added the out of range data and that resulted in a problem seen before, out of range initialized data and hence no MCMC samples. After muddying along for a bit my second step was to make a simple regression model and get to know STAN a bit better. This knowledge set in a simple model. Extended it till the model had the features which I wanted except the out of range data. To add the out of range data I decided to initialize the more complex model using estimates from a more simple model. This worked well, the first code also serves as a kind of warm-up too. There are some warnings the way I set it up, but that is only in the first model, which is only run for a few cycles. Final running time was a disappointing close to two hours. This could probably be reduced a bit using less but longer chains.
Reading data
# http://www.lml.rivm.nl/data/gevalideerd/data/lmre_1992-2005.xls
# http://www.r-bloggers.com/read-excel-files-from-r/
library(gdata)
readsheet <- function(cursheet,fname) {
df = read.xls(fname,
sheet = cursheet ,
blank.lines.skip=FALSE,
colClasses = "character")
topline <- 8
test <- which(df[6,]=='C')+1
numin <- df[topline:nrow(df),test]
names(numin) <- make.names( gsub(' ','',df[6,test]))
for(i in 1:ncol(numin)) numin[,i] <- as.numeric(gsub(',','.',numin[,i]))
status = df[topline:nrow(df),test-1]
names(status) <- paste('C',make.names( gsub(' ','',df[6,test])),sep='')
numin$StartDate <- as.Date(substr(df[topline:nrow(df),1],1,10))
numin$EndDate <- as.Date(substr(df[topline:nrow(df),2],1,10))
numin <- cbind(numin,status)
numin <- numin[!is.na(numin$StartDate),]
tall <- reshape(numin,
varying=list(
make.names(gsub(' ','',df[6,test])),
paste('C',
make.names(gsub(' ','',df[6,test])),sep='')),
v.names=c('Amount','Status'),
timevar='Compound',
idvar=c('StartDate','EndDate'),
times=paste(df[6,test],'(',df[5,test],')',sep=''),
direction='long')
tall$Compound <- factor(gsub(' )',')',gsub(' +',' ',tall$Compound)))
row.names(tall)<- NULL
tall$Location <- cursheet
tall
}
readfile <- function(fname) {
sheets <- sheetNames(fname)[-1]
tt <- lapply(sheets,readsheet,fname=fname)
tt2 <- do.call(rbind,tt)
tt2$Location <- factor(tt2$Location)
tt2$fname <- fname
tt2
}
fnames <- dir(pattern='*.xls') #.\\. ?
rf <- lapply(fnames,readfile)
rf2 <- do.call(rbind,rf)
rf2$machine <- factor(ifelse(rf2$fname=="lmre_1992-2005.xls",
'Van Essen/ECN vanger',
' Eigenbrodt NSA 181 KHT'))
Select Fe data and prepare
Fe <- rf2[rf2$Compound==levels(rf2$Compound)[9],]
Fe$LAmount <- log10(Fe$Amount)
Fe$LAmount[Fe$Amount<0.4] <- NA
Fe2 <- Fe[!is.na(Fe$Status),]
Fe3 <- Fe2[!is.na(Fe2$LAmount),]
Fe4 <- Fe2[is.na(Fe2$LAmount),]
library(rstan)
datain <- list(
LAmount = Fe3$LAmount,
Machine=(Fe3$machine=='Van Essen/ECN vanger')+1,
Location=(1:nlevels(Fe3$Location))[Fe3$Location],
time=as.numeric(Fe3$StartDate),
nobs =nrow(Fe3),
nloc=nlevels(Fe3$Location),
MachineC=(Fe4$machine=='Van Essen/ECN vanger')+1,
LocationC=(1:nlevels(Fe4$Location))[Fe4$Location],
timeC=as.numeric(Fe4$StartDate),
nobsC =nrow(Fe4),
L=log10(0.399)
)
First model
parameters1 <- c('intercept','rate','FLoc_r',
'sigmaF','sfmach','sfloc','FMach_r' )
Fe_code1 <- '
data {
int<lower=0> nloc;
int<lower=0> nobs;
vector[nobs] LAmount;
int<lower=1,upper=2> Machine[nobs];
int<lower=1,upper=nloc> Location[nobs];
vector[nobs] time;
int<lower=0> nobsC;
real <upper=min(LAmount)> L;
int<lower=1,upper=2> MachineC[nobsC];
int<lower=1,upper=nloc> LocationC[nobsC];
vector[nobsC] timeC;
}
parameters {
real intercept;
real rate;
vector [nloc] FLoc_r;
vector<lower=0> [2] sigmaF;
real<lower=0> sfmach;
real<lower=0> sfloc;
vector[2] FMach_r;
}
transformed parameters {
vector[nobs] ExpLAmount;
vector<lower=0>[nobs] sigma;
real mean_FLoc;
real mean_FMach;
vector[nloc] FLoc;
vector[2] FMach;
vector[nobsC] ExpLAmountC;
vector<lower=0> [nobsC] sigmaC;
vector[nobsC] LAmountC;
mean_FLoc <- mean(FLoc_r);
FLoc <- (FLoc_r-mean_FLoc)*sfloc;
mean_FMach <- mean(FMach_r);
FMach <- (FMach_r-mean_FMach)*sfmach;
for (i in 1:nobs) {
ExpLAmount[i] <- intercept
+ .0001*rate*time[i]
+ FMach[Machine[i]] +
+ FLoc[Location[i]];
sigma[i] <- sigmaF[Machine[i]];
}
for (i in 1:nobsC) {
ExpLAmountC[i] <- intercept
+ .0001*rate*timeC[i]
+ FMach[MachineC[i]] +
+ FLoc[LocationC[i]];
sigmaC[i] <- sigmaF[MachineC[i]];
LAmountC[i] <- log10(.2);
}
}
model {
sfmach ~ cauchy(0,4);
sfloc ~ cauchy(0,4);
FMach_r ~ normal(0,1);
FLoc_r ~ normal(0,1);
sigmaF ~ cauchy(0,4);
LAmount ~ normal(ExpLAmount,sigma);
LAmountC ~ normal(ExpLAmountC,sigmaC);
rate ~ normal(0,1);
}
generated quantities {
real dailydec;
real yearlydec;
dailydec <- pow(10,rate*.0001);
yearlydec <- pow(dailydec,365);
}
'
fit1 <- stan(model_code = Fe_code1,
data = datain,
pars=parameters1,
warmup=50,
iter = 75,
chains = 4)
fit1
Second model
samples <- as.array(fit1)
lastsample <- dim(samples)[1]
nchain <- dim(samples)[2]
init2 <- lapply(1:nchain,function(i) {
fin <- samples[lastsample,i,]
list(
intercept=fin[1],
rate=fin[2],
FLoc_r=fin[3:(3+16)],
sigmaF=fin[20:21],
sfmach=fin[22],
sfloc=fin[23],
FMach_r=fin[24:25],
LAmountC=rep(log10(.2),datain$nobsC)
)
})
parameters2 <- c('intercept','yearlydec',
'sigmaF','FMach','sfmach','sfloc',
'FLoc' ,'rate' )
Fe_code2 <- '
data {
int<lower=0> nloc;
int<lower=0> nobs;
vector[nobs] LAmount;
int<lower=1,upper=2> Machine[nobs];
int<lower=1,upper=nloc> Location[nobs];
vector[nobs] time;
int<lower=0> nobsC;
real <upper=min(LAmount)> L;
int<lower=1,upper=2> MachineC[nobsC];
int<lower=1,upper=nloc> LocationC[nobsC];
vector[nobsC] timeC;
}
parameters {
real intercept;
real rate;
vector [nloc] FLoc_r;
vector<lower=0> [2] sigmaF;
real<lower=0> sfmach;
real<lower=0> sfloc;
vector[2] FMach_r;
vector<upper=L>[nobsC] LAmountC;
}
transformed parameters {
vector[nobs] ExpLAmount;
vector<lower=0>[nobs] sigma;
real mean_FLoc;
real mean_FMach;
vector[nloc] FLoc;
vector[2] FMach;
vector[nobsC] ExpLAmountC;
vector<lower=0> [nobsC] sigmaC;
mean_FLoc <- mean(FLoc_r);
FLoc <- (FLoc_r-mean_FLoc)*sfloc;
mean_FMach <- mean(FMach_r);
FMach <- (FMach_r-mean_FMach)*sfmach;
for (i in 1:nobs) {
ExpLAmount[i] <- intercept
+ .0001*rate*time[i]
+ FMach[Machine[i]] +
+ FLoc[Location[i]];
sigma[i] <- sigmaF[Machine[i]];
}
for (i in 1:nobsC) {
ExpLAmountC[i] <- intercept
+ .0001*rate*timeC[i]
+ FMach[MachineC[i]] +
+ FLoc[LocationC[i]];
sigmaC[i] <- sigmaF[MachineC[i]];
}
}
model {
sfmach ~ cauchy(0,4);
sfloc ~ cauchy(0,4);
FMach_r ~ normal(0,1);
FLoc_r ~ normal(0,1);
sigmaF ~ cauchy(0,4);
LAmount ~ normal(ExpLAmount,sigma);
LAmountC ~ normal(ExpLAmountC,sigmaC);
rate ~ normal(0,1);
}
generated quantities {
real dailydec;
real yearlydec;
dailydec <- pow(10,rate*.0001);
yearlydec <- pow(dailydec,365);
}
'
fit2 <- stan(model_code = Fe_code2,
data = datain,
pars=parameters2,
init=init2,
warmup=100,
iter = 500,
chains = 4)
fit2