Data
Data from Eurostat. Or, if you are lazy, Google une_rt_m, which is the name of the table. There is a bit of pre-processing of the data, mostly getting names of countries decent for plotting. The plots shown are unemployment and its first derivative, both smoothed.Plots
Smoothed data
First derivative
Code
library(ggplot2)
library(KernSmooth)
library(plyr)
library(scales) # to access breaks/formatting functions
r1 <- read.csv("une_rt_m_1_Data.csv",na.strings=':')
levels(r1$GEO) <- sub(' countries)',')' ,levels(r1$GEO),fixed=TRUE)
levels(r1$GEO) <- sub('European Union','EU' ,levels(r1$GEO))
levels(r1$GEO)[levels(r1$GEO)=='Euro area (EA11-2000, EA12-2006, EA13-2007, EA15-2008, EA16-2010, EA17-2013, EA18)'] <- "EAll"
levels(r1$GEO)[levels(r1$GEO)=='United Kingdom'] <- 'UK'
levels(r1$GEO)[levels(r1$GEO)=='United States'] <- 'US'
levels(r1$GEO)[levels(r1$GEO)=='Germany (until 1990 former territory of the FRG)'] <- 'Germany'
levels(r1$GEO)
grep('12|13|15|16|17|25|27',x=levels(r1$GEO),value=TRUE)
r1 <- r1[!(r1$GEO %in% grep('12|13|15|16|17|25|27',x=levels(r1$GEO),value=TRUE)),]
r1$GEO <- factor(r1$GEO)
r1$Age <- factor(r1$AGE,levels=levels(r1$AGE)[c(2,1,3)])
r1$Date <- as.Date(paste(gsub('M','-',as.character(r1$TIME)),'-01',sep=''))
#
maxi <- aggregate(r1$Value,by=list(GEO=r1$GEO),FUN=max,na.rm=TRUE)
parts <- data.frame(
low = maxi$GEO[maxi$x<quantile(maxi$x,1/3)]
,middle = maxi$GEO[maxi$x>quantile(maxi$x,1/3) & maxi$x<quantile(maxi$x,2/3)]
,high = maxi$GEO[maxi$x>quantile(maxi$x,2/3)]
)
#ggplot(r1[r1$GEO %in% low,],aes(x=Date,y=Value,colour=Age)) +
# facet_wrap( ~ GEO, drop=TRUE) +
# geom_line() +
# theme(legend.position = "bottom")
# ylab('% Unemployment') + xlab('Year')
r1$class <- interaction(r1$GEO,r1$Age)
r3 <- r1[complete.cases(r1),]
r3$class <- factor(r3$class)
Perc <- ddply(.data=r3,.variables=.(class),
function(piece,...) {
lp <- locpoly(x=as.numeric(piece$Date),y=piece$Value,
drv=0,bandwidth=90)
sdf <- data.frame(Date=as.Date(lp$x,origin='1970-01-01'),
sPerc=lp$y,Age=piece$Age[1],GEO=piece$GEO[1])}
,.inform=FALSE
)
for (i in c('low','middle','high')) {
png(paste(i,'.png',sep=''))
print(
ggplot(Perc[Perc$GEO %in% parts[,i] ,],
aes(x=Date,y=sPerc,colour=Age)) +
facet_wrap( ~ GEO, drop=TRUE) +
geom_line() +
theme(legend.position = "bottom")+
ylab('% Unemployment') + xlab('Year') +
scale_x_date(breaks = date_breaks("5 years"),
labels = date_format("%y"))
)
dev.off()
}
dPerc <- ddply(.data=r3,.variables=.(class),
function(piece,...) {
lp <- locpoly(x=as.numeric(piece$Date),y=piece$Value,
drv=1,bandwidth=365/2)
sdf <- data.frame(Date=as.Date(lp$x,origin='1970-01-01'),
dPerc=lp$y,Age=piece$Age[1],GEO=piece$GEO[1])}
,.inform=FALSE
)
for (i in c('low','middle','high')) {
png(paste('d',i,'.png',sep=''))
print(
ggplot(dPerc[dPerc$GEO %in% parts[,i] ,],
aes(x=Date,y=dPerc,colour=Age)) +
facet_wrap( ~ GEO, drop=TRUE) +
geom_line() +
theme(legend.position = "bottom")+
ylab('Change in % Unemployment') + xlab('Year')+
scale_x_date(breaks = date_breaks("5 years"),
labels = date_format("%y"))
)
dev.off()
}
library(KernSmooth)
library(plyr)
library(scales) # to access breaks/formatting functions
r1 <- read.csv("une_rt_m_1_Data.csv",na.strings=':')
levels(r1$GEO) <- sub(' countries)',')' ,levels(r1$GEO),fixed=TRUE)
levels(r1$GEO) <- sub('European Union','EU' ,levels(r1$GEO))
levels(r1$GEO)[levels(r1$GEO)=='Euro area (EA11-2000, EA12-2006, EA13-2007, EA15-2008, EA16-2010, EA17-2013, EA18)'] <- "EAll"
levels(r1$GEO)[levels(r1$GEO)=='United Kingdom'] <- 'UK'
levels(r1$GEO)[levels(r1$GEO)=='United States'] <- 'US'
levels(r1$GEO)[levels(r1$GEO)=='Germany (until 1990 former territory of the FRG)'] <- 'Germany'
levels(r1$GEO)
grep('12|13|15|16|17|25|27',x=levels(r1$GEO),value=TRUE)
r1 <- r1[!(r1$GEO %in% grep('12|13|15|16|17|25|27',x=levels(r1$GEO),value=TRUE)),]
r1$GEO <- factor(r1$GEO)
r1$Age <- factor(r1$AGE,levels=levels(r1$AGE)[c(2,1,3)])
r1$Date <- as.Date(paste(gsub('M','-',as.character(r1$TIME)),'-01',sep=''))
#
maxi <- aggregate(r1$Value,by=list(GEO=r1$GEO),FUN=max,na.rm=TRUE)
parts <- data.frame(
low = maxi$GEO[maxi$x<quantile(maxi$x,1/3)]
,middle = maxi$GEO[maxi$x>quantile(maxi$x,1/3) & maxi$x<quantile(maxi$x,2/3)]
,high = maxi$GEO[maxi$x>quantile(maxi$x,2/3)]
)
#ggplot(r1[r1$GEO %in% low,],aes(x=Date,y=Value,colour=Age)) +
# facet_wrap( ~ GEO, drop=TRUE) +
# geom_line() +
# theme(legend.position = "bottom")
# ylab('% Unemployment') + xlab('Year')
r1$class <- interaction(r1$GEO,r1$Age)
r3 <- r1[complete.cases(r1),]
r3$class <- factor(r3$class)
Perc <- ddply(.data=r3,.variables=.(class),
function(piece,...) {
lp <- locpoly(x=as.numeric(piece$Date),y=piece$Value,
drv=0,bandwidth=90)
sdf <- data.frame(Date=as.Date(lp$x,origin='1970-01-01'),
sPerc=lp$y,Age=piece$Age[1],GEO=piece$GEO[1])}
,.inform=FALSE
)
for (i in c('low','middle','high')) {
png(paste(i,'.png',sep=''))
print(
ggplot(Perc[Perc$GEO %in% parts[,i] ,],
aes(x=Date,y=sPerc,colour=Age)) +
facet_wrap( ~ GEO, drop=TRUE) +
geom_line() +
theme(legend.position = "bottom")+
ylab('% Unemployment') + xlab('Year') +
scale_x_date(breaks = date_breaks("5 years"),
labels = date_format("%y"))
)
dev.off()
}
dPerc <- ddply(.data=r3,.variables=.(class),
function(piece,...) {
lp <- locpoly(x=as.numeric(piece$Date),y=piece$Value,
drv=1,bandwidth=365/2)
sdf <- data.frame(Date=as.Date(lp$x,origin='1970-01-01'),
dPerc=lp$y,Age=piece$Age[1],GEO=piece$GEO[1])}
,.inform=FALSE
)
for (i in c('low','middle','high')) {
png(paste('d',i,'.png',sep=''))
print(
ggplot(dPerc[dPerc$GEO %in% parts[,i] ,],
aes(x=Date,y=dPerc,colour=Age)) +
facet_wrap( ~ GEO, drop=TRUE) +
geom_line() +
theme(legend.position = "bottom")+
ylab('Change in % Unemployment') + xlab('Year')+
scale_x_date(breaks = date_breaks("5 years"),
labels = date_format("%y"))
)
dev.off()
}