Wiekvoet: Bayesian analysis of sensory profiling data, part 2

Last week I made the core of a Bayesian model for sensory profiling data. This week the extras need to be added. That is, there are a bunch of extra interactions and the error is dependent on panelists and descriptors.
Note that where last week I pointed to influence of Procrustes and STATIS in these models, I probably should have mentioned Per Brockhoff's work too.

Data

See last week

Model

A few features were added compared to last week: Round effect was averaged over all descriptors. It is now dependent on descriptors. As normalization, the sum of the round effects within a descriptor is fixed to be 0. Similar, a shift effect was defined per panelist. It is now per panelist*descriptor combination, again normalized to sum to 0 by descriptor. Residual error was defined as one variable for all data. It is now descriptor and panelist dependent. I decided to add variances there. It turned out to be quite a complex model. Running time was about 100 samples per minute on my hardware: too long to sit there waiting for it, but could fit in during a meeting or lunch.
model1 <-
'
data {
    int<lower=0> npanelist;
       int<lower=0> nobs;
    int<lower=0> nsession;
    int<lower=0> nround;
    int<lower=0> ndescriptor;
    int<lower=0> nproduct;
       vector[nobs] y;
       int<lower=1,upper=npanelist> panelist[nobs];
       int<lower=1,upper=nproduct> product[nobs];
    int<lower=1,upper=ndescriptor> descriptor[nobs];
    int<lower=1,upper=nround> rounds[nobs];
    real maxy;
    }
parameters {
    matrix<lower=0,upper=maxy> [nproduct,ndescriptor] profile;
    vector<lower=-maxy/3,upper=maxy/3>[npanelist] shift[ndescriptor];
    vector<lower=-1,upper=1> [npanelist] logsensitivity;
    vector<lower=-maxy/3,upper=maxy/3> [nround] roundeffect[ndescriptor];
    real<lower=0,upper=maxy/5> varr;
    vector [npanelist] lpanelistvar;
    vector [ndescriptor] ldescriptorvar;
    }
transformed parameters {
    vector [nobs] expect;
    vector[npanelist] sensitivity;
    real mlogsens;
    real mlpanelistvar;
    real mldescriptorvar;
    real mroundeff[ndescriptor];
    real meanshift[ndescriptor];
    vector [nobs] sigma;

    mlogsens <- mean(logsensitivity);
    for (i in 1:ndescriptor) {
       mroundeff[i] <- mean(roundeffect[i]);
       meanshift[i] <- mean(shift[i]);
    }
    mlpanelistvar <- mean(lpanelistvar);
    mldescriptorvar <- mean(ldescriptorvar);

    for (i in 1:npanelist) {
        sensitivity[i] <- pow(10,logsensitivity[i]-mlogsens);
        }
    for (i in 1:nobs) {
        expect[i] <- profile[product[i],descriptor[i]]
            *sensitivity[panelist[i]]
            + shift[descriptor[i],panelist[i]]-meanshift[descriptor[i]]
            + roundeffect[descriptor[i],rounds[i]]-mroundeff[descriptor[i]];
        sigma[i] <- sqrt(varr
                         *exp(lpanelistvar[panelist[i]]-mlpanelistvar)
                         *exp(ldescriptorvar[descriptor[i]]-mldescriptorvar));
         }
    }
model {
    logsensitivity ~ normal(0,0.1);
    for (i in 1: ndescriptor) {
       roundeffect[i] ~ normal(0,maxy/10);
       shift[i] ~ normal(0,maxy/10);
       ldescriptorvar[i] ~ normal(0,1);
    }
    for (i in 1:npanelist)
       lpanelistvar[i] ~ normal(0,1);
    y ~ normal(expect,sigma);
    }
generated quantities    {
    vector [npanelist] panelistsd;
    vector [ndescriptor] descriptorsd;
    for (i in 1:npanelist) {
        panelistsd[i] <- sqrt(exp(lpanelistvar[i]));
        }
    for (i in 1:ndescriptor) {
         descriptorsd[i] <- sqrt(exp(ldescriptorvar[i]));
        }
    }
'

model call

pars <- c('profile','shift','roundeffect','sensitivity',
        'panelistsd','descriptorsd')
datainchoc <- with(choc,list(
                panelist=(1:nlevels(Panelist))[Panelist],
                npanelist=nlevels(Panelist),
                session=(1:nlevels(Session))[Session],
                nsession=nlevels(Session),
                rounds=(1:nlevels(Rank))[Rank],
                nround=nlevels(Rank),
                product=(1:nlevels(Product))[Product],
                nproduct=nlevels(Product),
                descriptor=(1:nlevels(Descriptor))[Descriptor],
                ndescriptor=nlevels(Descriptor),
                y=Score,
                nobs=length(Score),
                maxy=10))

fitchoc <- stan(model_code = model1,
        data = datainchoc,
        pars=pars,
        iter = 500,
        warmup=100,
        chains = 4)

Results

I do not think there is much point in showing all printed output. However the summary plot is interesting. There is something with the eight level of some of the factors, a few extra samples might be not unwelcome.
The error is more dependent on panelist than on descriptor, panelists 7, 20 and 29 might benefit from some training.

profile

The code for profile has been slightly modified, last week I only used a few of the samples. For me the intervals look nice and sharp. Other than that choc3 is very different from the others, more sweet, milk, less cocoa. Choc1 very bitter and cocoa.

rounds

It is premature based on one data set, but rounds do seem to have minimal effect. If this was structural on more data sets, this term might be removed.

Panelists' shift

The plot shows that shift is important and this is a factor which should be in the model. Getting this under control might cost more than it is worth.

Code for plots

library(ggplot2)
samples <- extract(fitchoc,'profile')$profile
nsamp <- dim(samples)[1]
profile <- expand.grid(Product=levels(choc$Product),
        Descriptor=levels(choc$Descriptor))
profile$des <- as.numeric(profile$Descriptor)
profile$prod <- as.numeric(profile$Product)
profs <- as.data.frame(t(sapply(1:nrow(profile),function(i){
            subsamples <- c(samples[1:nsamp,profile$prod[i],profile$des[i]])
            c(mean=mean(subsamples),quantile(subsamples,c(.1,.9)))
        })))
names(profs) <- c('score','lmin','lmax')
profile <- cbind(profile,profs)
p1 <- ggplot(profile, aes(y=score, x=des,color=Product))
p1 + geom_path() +
        scale_x_continuous(
                breaks=1:nlevels(profile$Descriptor),
                labels=levels(profile$Descriptor)) +
    xlab('') +
    geom_ribbon(aes(ymin=lmin, ymax=lmax,fill=Product),
                    alpha=.15,linetype='blank') +
    coord_flip()
##########
samples <- extract(fitchoc,'roundeffect')$roundeffect
roundeffect <- expand.grid(Round=levels(choc$Rank),
        Descriptor=levels(choc$Descriptor))
roundeffect$des <- as.numeric(roundeffect$Descriptor)
roundeffect$round <- as.numeric(roundeffect$Round)
rounds <- as.data.frame(t(sapply(1:nrow(roundeffect),function(i){
            subsamples <- c(samples[1:nsamp,roundeffect$des[i],roundeffect$round[i]])
                c(mean=mean(subsamples),quantile(subsamples,c(.1,.9)))
                })))
names(rounds) <- c('score','lmin','lmax')
roundeffect <- cbind(roundeffect,rounds)
library(ggplot2)
p1 <- ggplot(roundeffect, aes(y=score, x=des,color=Round))
p1 + geom_path() +
        scale_x_continuous(
                breaks=1:nlevels(roundeffect$Descriptor),
                labels=levels(roundeffect$Descriptor)) +
        xlab('') +
        geom_ribbon(aes(ymin=lmin, ymax=lmax,fill=Round),
                alpha=.15,linetype='blank') +
        coord_flip()
#########
samples <- extract(fitchoc,'shift')$shift
shift <- expand.grid(Panelist=levels(choc$Panelist),
        Descriptor=levels(choc$Descriptor))
shift$des <- as.numeric(shift$Descriptor)
shift$pnlst <- as.numeric(shift$Panelist)
shifts <- as.data.frame(t(sapply(1:nrow(shift),function(i){
                            subsamples <- c(samples[1:nsamp,shift$des[i],shift$pnlst[i]])
                            c(mean=mean(subsamples),quantile(subsamples,c(.1,.9)))
                        })))
names(shifts) <- c('score','lmin','lmax')
shift <- cbind(shift,shifts)
p1 <- ggplot(shift, aes(y=score, x=des))
p1 + geom_path() +
        scale_x_continuous(
                breaks=1:nlevels(shift$Descriptor),
                labels=levels(shift$Descriptor)) +
        xlab('') +
        geom_ribbon(aes(ymin=lmin, ymax=lmax),
                alpha=.15,linetype='blank') +
        coord_flip() +
        facet_wrap(~ Panelist)

Wiekvoet

Sunday, February 9, 2014

Bayesian analysis of sensory profiling data, part 2