Having said the negative, the pictures can be interpreted. The car market has changed from a double peaked distribution to a three peak distribution. Sales in recent years are mostly in the lowest weight category. The weight of cars in this category is slowly increasing though.
Data
Data were obtained as described here. I made an additional plot of the weights. These are the raw data weighted by the width of the categories.lweight4 <- weight2[weight2$RefYear == weight2$BuildYear+1,]
weight4$lower <- lweightcats[weight4$Onderwerpen_2]
weight4$upper <- uweightcats[weight4$Onderwerpen_2]
datashow <- expand.grid(year=2000:2012,
weight=seq(500,2000,by=20))
datashow$y <-0
for (ii in 1:nrow(weight4)) {
datashow$y[datashow$weight>=weight4$lower[ii]
& datashow$weight<=weight4$upper[ii]
& datashow$year==weight4$BuildYear[ii]] <-
weight4$Waarde[ii]*100/(weight4$upper[ii]-weight4$lower[ii])
}
library(lattice)
levelplot(y ~ weight+ year , data=datashow, col.regions=
colorRampPalette(c('white','yellow','green','blue','purple','red'))
)
xyplot(y ~ weight | factor(year) , data=datashow,type='l')
Modelling.
I worked on the previous code to obtain something which was very short and wrapped in a lapply. This means I had to trust the outcome, which I thought was reasonable.
la <- lapply(2000:2012,function(x) {
weight <- makdata(x)
weightlims <- c(0,weight$upper)
update <- updater(weight)
dist <- update2line(weight,update)
dist$year <- x
dist
}
)
rbla <- do.call(rbind,la)
levelplot(y ~ x+ year , data=rbla[rbla$x %in% seq(700,1600,10),],
col.regions=
colorRampPalette(c('white','yellow','green','blue','purple','red'))
model flaws
So I did the calculations again, resulting in rbla2, because I found 2009 odd compared to the rest. For comparison the next plot:
rbla$sim <- 1
rbla2$sim <- 2
rblaall <- rbind(rbla,rbla2)
xyplot(y ~ x | factor(year) ,group=sim, type='l', data=rblaall[rblaall$x %in% seq(700,1600,10),])
R-code
weight1 <- read.csv2('Motorvoertuigen__per_010613140907.csv',na.strings='-')
weight2 <- weight1[!is.na(weight1$Waarde),]
weight2$BuildYear <- as.numeric(sub('Bouwjaar ','',as.character(weight2$Bouwjaren)))
weight2$RefYear <- as.numeric(sub(', 1 januari','',as.character(weight2$Peildatum)))
weightcats <- levels(weight2$Onderwerpen_2)
weightcats <- gsub('en meer','and more',weightcats)
levels(weight2$Onderwerpen_2) <- weightcats
lweightcats <- as.numeric(gsub('( |-).*$','',weightcats))
uweightcats <- as.numeric(gsub('(^[0-9]+-)|([ [:alpha:]]+$)','',weightcats))
uweightcats[uweightcats==2451] <- 3500
nspacejump1 <- function(x,i,sd) {
xold <- log(x[i]/(1-x[i])) + rnorm(1,0,sd)
xnew <- 1/(1+exp(-xold))
x <- x/sum(x[-i])*(1-xnew)
x[i] <- xnew
x
}
curldev5 <- function(counts,curmodel,pemodel) {
dsd <- sum(dnorm(curmodel$sd,300,300,log=TRUE))
prop.exp <- pemodel %*% curmodel$w
prop.exp <- prop.exp/sum(prop.exp)
dsd + lgamma(sum(counts$Waarde) + 1) + sum(counts$Waarde * log(prop.exp) - counts$lgam)
#dmultinom(counts$Waarde,prob=prop.exp,log=TRUE)
}
version6 <- function(weight3,curmodel,niter) {
pemodel <- matrix(0,nrow=22,ncol=nrow(curmodel))
pemodel2 <- pemodel
for (i in 1:nrow(curmodel)) {
pn <- pnorm(weightlims,curmodel$mean[i],curmodel$sd[i])
pemodel[,i] <- (pn[-1]-pn[-23])
}
cnow <- curldev5(weight3,curmodel,pemodel)
ndist <- nrow(curmodel)
result <- matrix(nrow=ndist*niter,ncol=3*ndist+1)
index <- 1
for (iter in 1:niter) {
for (dist in 1:ndist) {
nowmodel <- curmodel
nowpe <- pemodel
result[index,] <- c(curmodel$mean,curmodel$sd,curmodel$w,cnow)
curmodel$mean[dist] <- curmodel$mean[dist] + rnorm(1,0,1)
curmodel$sd[dist] <- curmodel$sd[dist] * runif(1,1/1.01,1.01)
curmodel$w <- nspacejump1(curmodel$w,dist,.1)
pn <- pnorm(weightlims,curmodel$mean[dist],curmodel$sd[dist])
pemodel[,dist] <- (pn[-1]-pn[-23])
cnew <- curldev5(weight3,curmodel,pemodel)
# cat(c(cnow,cnew,'\n'))
if (exp(cnew-cnow) > runif(1) ) cnow <- cnew
else {curmodel <- nowmodel
pemodel <- nowpe }
index <- index + 1
}
}
return(list(result=result,curmodel=curmodel))
}
curmodel5 <- data.frame(
mean=c( 892.0141263, 1.417520e+03, 1306.8248062, 1.939645e+03,1000),
sd =c( 99.5660288, 2.129247e+02, 194.2452168, 1.684932e+02,100),
w =c( 0.2252041, 4.384217e-02, 0.6125014, 1.845233e-02,.1))
makdata <- function(xxxx) {
weightxxxx <- weight2[weight2$RefYear==xxxx+1 & weight2$BuildYear==xxxx,c(2,6)]
weightxxxx$lower <- lweightcats[weightxxxx$Onderwerpen_2]
weightxxxx$upper <- uweightcats[weightxxxx$Onderwerpen_2]
weightxxxx$lgam <- lgamma(weightxxxx$Waarde + 1)
weightxxxx
}
updater <- function(weight,nburnin=200000,nsample=10000) {
update <- version6(weight,curmodel5,nburnin)
update <- version6(weight,update$curmodel,nsample)
plot(update$result[,ncol(update$result)],type='l')
return(update)
}
update2line <- function(weight,update) {
integral <- sum((weight$upper-weight$lower)*weight$Waarde)
x <- seq(0,3500,10)
ysum <- rep(0,length(x))
for(i in seq(1,10000,100)) {
ndist <- nrow(update$curmodel)
y <- rep(0,length(x))
newmodel <- data.frame(mean = update$result[i,1:ndist],
sd=update$result[i,ndist+(1:ndist)],
w=update$result[i,2*ndist+(1:ndist)])
for (ir in 1:nrow(newmodel))
y <- y + newmodel$w[ir]*dnorm(x,newmodel$mean[ir],newmodel$sd[ir])
ysum <- ysum+y
}
data.frame(x=x,y=integral*ysum/sum(y))
}
My friend mentioned to me your blog, so I thought I’d read it for myself. Very interesting insights, will be back for more! check our our website here
ReplyDeleteYou have raised an important issue..Thanks for sharing..I would like to read more current affairs from this blog..keep posting.. sell my car
ReplyDeleteMy friend mentioned to me your blog, so I thought I’d read it for myself. Very interesting insights, will be back for more! used Holden Commodores
ReplyDeleteThis comment has been removed by a blog administrator.
ReplyDeleteThis comment has been removed by a blog administrator.
ReplyDelete