############################################################### #### #### 5. Forecasting ARMA models. #### ############################################################### #### # We have discussed the estimation and diagnostics of # ARMA models. The same can be done for possible non-stationary # ARIMA models. There are several procedures implented in R # ar, arima, arima0 (older but faster), and arma (in TSA library) # for first four prediction is straightforward by procedure # predict # Note: arima/arima0 use "exact mle", arma "conditional l.s" #### # predict (be careful/read manual/don't use it as a black box # for data etc. #http://www.stat.pitt.edu/stoffer/tsa2/ #http://www.stat.uiowa.edu/~kchan/TSA.htm # rec=scan("recruit.dat") # number of new fish in central Pacific # supposedly related to El Nino soi=scan("soi.dat") # changes in airpressure over Pacific # known to be related with sea surface temperature plot.ts(cbind(rec,soi)) acf(rec,50) pacf(rec) acf(soi,50) ccf(soi,rec,50) rec.yw = ar.yw(rec, order=2) rec.yw$x.mean rec.yw$ar # sqrt(diag(rec.yw$asy.var.coef)) # variance of estimators for phi's # rec.yw$var.pred # error variance of estimator for \sigma rec.pr$se ### mean sq. prediction error estimate/note convergence rec.pr = predict(rec.yw, n.ahead=24) ######################################### # assuming gaussianity we get prediction intervals # multiply by 1.95 to get 95% pred.interval U = rec.pr$pred + 1.96*rec.pr$se L = rec.pr$pred - 1.96*rec.pr$se minx = min(rec,L) maxx = max(rec,U) month=360:453 plot(month,rec[month],type="o",ylim=c(-20,140),xlim=c(360,480),ylab="recruits") lines(rec.pr$pred, col="red", type="o") lines(U, col="blue", lty="dashed") lines(L, col="blue", lty="dashed") ### note: m.s.e converges and predictions converge to mean ######################################### # assuming gaussianity we get 95 % prediction intervals ### Annual number of hare data. library(TSA) data(hare) help(hare) m1.hare=arima(sqrt(hare),order=c(2,0,0)) # straight from function arima plot(m1.hare, n.ahead=25,type='b',xlab='Year',ylab='Sqrt(hare)') abline(h=coef(m1.hare)[names(coef(m1.hare))=='intercept']) # or using predict as before hare.pr = predict(m1.hare, n.ahead=25) U = hare.pr$pred + 2*hare.pr$se L = hare.pr$pred - 2*hare.pr$se lines(hare.pr$pred, col="red", type="o") lines(U, col="blue", lty="dashed") lines(L, col="blue", lty="dashed") ### note: m.s.e converges and predictions converge to mean hare.pr = predict(m1.hare, n.ahead=25) ######################################### # or altogether as before # #U = hare.pr$pred + 2*hare.pr$se #L = hare.pr$pred - 2*hare.pr$se #year=1905:1935 #plot(year,sqrt(hare),type="o",xlim=c(1905,1960),ylim=c(-1,14),ylab="sqrt hare") #lines(hare.pr$pred, col="red", type="o") #lines(U, col="blue", lty="dashed") #lines(L, col="blue", lty="dashed") ### risovi, Annual numbers of lynx trappings for 1821–1934 in Canada. Taken from Brockwell & Davis (1991) ts.plot(lynx) ts.plot(log(lynx)) pacf(log(lynx)) acf(log(lynx)) fit <- arima(log(lynx),order=c(5,0,0)) pp <- predict(fit, n.ahead = 20) ######################################### # assuming gaussianity we get prediction intervals plot(log(lynx),xlim=c(1820,1954),ylim=c(3.5,10)) lines(1935:1954,pp$pred,col=2) lines(1935:1954,pp$pred + 1.96*pp$se,col=3) lines(1935:1954,pp$pred - 1.96*pp$se,col=3) ######################################### # assuming gaussianity but using -> using Monte Carlo # simulations phi <- fit$coef[1:5] mu <- fit$coef[6] sigma2 <- fit$sigma xpred <- matrix(0,ncol=25,nrow=1000) #### 1000 puta simuliramo iz modela for(k in 1:5) { xpred[,k] <- log(lynx)[109+k] #### prvih 5 vrijednosti su starih prvih 5 } for(i in 1:1000) #### od 6 do 25 su simulacije { for(j in 1:20) { xpred[i,5+j] <- mu+ sum(phi*(xpred[i,(j+4):j] - mu)) + rnorm(1,0,sqrt(sigma2)) } } plot(log(lynx),xlim=c(1820,1954),ylim=c(3.5,10)) lines(1935:1954,xpred[1,6:25],col=5) lines(1935:1954,xpred[2,6:25],col=6) lines(1935:1954,xpred[3,6:25],col=7) plot(log(lynx),xlim=c(1820,1954),ylim=c(3.5,10)) lines(1935:1954,pp$pred,col=2) lines(1935:1954,pp$pred + 1.96*pp$se,col=2) lines(1935:1954,pp$pred - 1.96*pp$se,col=2) lines(1935:1954,apply(xpred[,6:25],2,mean),col=4) my.quant <- function(x) { quantile(x,c(0.025,0.975)) } lines(1935:1954,apply(xpred[,6:25],2,my.quant)[1,],col=4) lines(1935:1954,apply(xpred[,6:25],2,my.quant)[2,],col=4) ### we could do the same even with nongaussian innovations ### if other distribution of the noise is more appropriate