Data
Data is in a number of spreadsheets. The script is a variation on last week, with an extra wrapper for spreadsheets. The final parts are merging locations (Station Leiduin and Witteveen have been replaced by De Zilk and Valthermond respectively) and adding the equipment variable. The code is at the bottom of the post.
pH
It seems pH is still slightly increasing. It also seems there are like two levels of pH. For instance, in Gilze-Rijen the pH is either around 5 or around 6. But 5.5 is less probable.
Iron
For Iron there are just four locations with complete data. Hence these are displayed. It would seem the trend downward continues.
Iron model
Since I wanted to give the two machines each a separate variance, I opted to use nlme in this particular calculation. In addition, rather than following the plot, all locations are used, and the replacement stations are not merged together. The machines seem to give similar variation, which probably means variation in iron concentration is much larger than machine accuracy. The decrease estimate is still around 5% per year.
Linear mixed-effects model fit by REML
Data: Fe2
AIC BIC logLik
3135.969 3184.729 -1559.984
Random effects:
Formula: ~StartDate | Location
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 1.018836e-05 (Intr)
StartDate 6.680830e-06 -0.05
Formula: ~1 | machine %in% Location
(Intercept) Residual
StdDev: 0.09556604 0.3791318
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | machine
Parameter estimates:
Van Essen/ECN vanger Eigenbrodt NSA 181 KHT
1.000000 1.049395
Fixed effects: LAmount ~ StartDate
Value Std.Error DF t-value p-value
(Intercept) 0.4373752 0.06188798 3258 7.067208 0
StartDate -0.0000640 0.00000549 3258 -11.645303 0
Correlation:
(Intr)
StartDate -0.887
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.752718189 -0.655477969 -0.003628072 0.645709629 4.226135302
Number of Observations: 3280
Number of Groups:
Location machine %in% Location
17 21
#decrease estimate
> (10^fixef(lme1)['StartDate'])^365
StartDate
0.9476403
Code
Import data
library(XLConnect)
readsheet <- function(wb,cursheet) {
df = readWorksheet(wb, sheet = cursheet , header = TRUE)
topline <- 8
test <- which(df[6,]=='C')+1
numin <- df[topline:nrow(df),test]
names(numin) <- make.names( gsub(' ','',df[6,test]))
for(i in 1:ncol(numin)) numin[,i] <- as.numeric(gsub(',','.',numin[,i]))
status = df[topline:nrow(df),test-1]
names(status) <- paste('C',make.names( gsub(' ','',df[6,test])),sep='')
numin$StartDate <- as.Date(substr(df[topline:nrow(df),1],1,10))
numin$EndDate <- as.Date(substr(df[topline:nrow(df),2],1,10))
numin <- cbind(numin,status)
tall <- reshape(numin,
varying=list(make.names( gsub(' ','',df[6,test])),paste('C',make.names( gsub(' ','',df[6,test])),sep='')),
v.names=c('Amount','Status'),
timevar='Compound',
idvar=c('StartDate','EndDate'),
times=paste(df[6,test],'(',df[5,test],')',sep=''),
direction='long')
tall$Compound <- factor(gsub(' )',')',gsub(' +',' ',tall$Compound)))
row.names(tall)<- NULL
tall$Location <- cursheet
tall
}
readfile <- function(fname) {
wb <- loadWorkbook(fname)
print(wb)
sheets <- getSheets(wb)[-1]
tt <- lapply(sheets,readsheet,wb=wb)
tt2 <- do.call(rbind,tt)
tt2$Location <- factor(tt2$Location)
tt2$fname <- fname
tt2
}
fnames <- dir(pattern='*.xls')
rf <- lapply(fnames,readfile)
rf2 <- do.call(rbind,rf)
(ll <- levels(rf2$Location))
ll[c(13,17)] <- 'V&W'
ll[c(9,4)] <- 'L&DZ'
rf2$Location2 <- factor(ll[rf2$Location])
head(rf2)
rf2$machine <- factor(ifelse(rf2$fname=="lmre_1992-2005.xls",
'Van Essen/ECN vanger',
' Eigenbrodt NSA 181 KHT'))
pH plot
library(ggplot2)
library(scales)
levels(rf2$Compound)
(cc <- levels(rf2$Compound)[22])
rf2$Amount2 <- rf2$Amount
rf2$Amount2[rf2$Amount<1 & rf2$Compound==cc] <- NA
rf2[rf2$Amount < 1 & rf2$Compound==cc & !is.na(rf2$Amount),]
pp <- ggplot(rf2[rf2$Compound==cc & !(rf2$Location2 %in% ll[c(2,5,7,15)]),],
aes( StartDate,Amount2,col=machine))
pp + geom_point() +
facet_wrap(~Location2) +
ylab(cc) + xlab('Year') + ylim(c(4,7)) +
scale_x_date(breaks=as.Date(paste(c('1992','2000','2010'),'-01-01',sep=''),
format=('%Y-%m-%d')),
labels = date_format("%Y")) +
theme(legend.position = "bottom")
Fe Plot
(cc <- levels(rf2$Compound)[9])
#levels(rf2$Location2)
pp <- ggplot(rf2[rf2$Compound==cc & (rf2$Location2 %in%
levels(rf2$Location2)[c(7,8,10,13)]),],
aes( StartDate,Amount,color=machine))
pp + geom_point() +
scale_y_log10() +
facet_wrap(~Location2) +
ylab(cc) + xlab('Year') +
scale_x_date(breaks=as.Date(paste(c('1992','2000','2010'),'-01-01',sep=''),
format=('%Y-%m-%d')),
labels = date_format("%Y")) +
theme(legend.position = "bottom")
Fe model
Fe <- rf2[rf2$Compound==levels(rf2$Compound)[9],]
# plot(density(Fe$Amount[!is.na(Fe$Status) & Fe$Status!=0],adjust=.5))
Fe$LAmount <- log10(Fe$Amount)
Fe$LAmount[Fe$Amount<=0] <- NA
Fe$Location <- factor
#nlme
library(nlme)
Fe2 <- Fe[complete.cases(Fe),]
lme1 <- lme(LAmount ~ StartDate,
# random= ~ 1|machine, # machine = groups
# random=~StartDate | Location, # location=groups
random=list(Location=~StartDate, machine=~1),
weights=varIdent(form=~1 | machine),
data=Fe2 )
summary(lme1)
Nice post, nice plots! As a fellow Dutchman (!) I invite you to have a look at my newly developped blog! http://gettingthingsr.site90.com/
ReplyDelete