###############################################################
####
####  5. Forecasting ARMA models. 
####
###############################################################

####
# We have discussed the estimation and diagnostics of
# ARMA models. The same can be done for possible non-stationary
# ARIMA models. There are several procedures implented in R
# ar, arima, arima0 (older but faster), and arma (in TSA library)
# for first four prediction is straightforward by procedure
# predict 
# Note: arima/arima0 use "exact mle", arma "conditional l.s"
####
# predict (be careful/read manual/don't use it as a black box

# for data etc.
#http://www.stat.pitt.edu/stoffer/tsa2/
#http://www.stat.uiowa.edu/~kchan/TSA.htm
#

rec=scan("recruit.dat") # number of new fish in central Pacific
                                # supposedly related to El Nino

soi=scan("soi.dat")     # changes in airpressure over Pacific
                                # known to be related with sea surface temperature

plot.ts(cbind(rec,soi))
acf(rec,50)
pacf(rec)
acf(soi,50)
ccf(soi,rec,50)

rec.yw = ar.yw(rec, order=2)
rec.yw$x.mean
rec.yw$ar
# sqrt(diag(rec.yw$asy.var.coef)) # variance of estimators for phi's  
# rec.yw$var.pred                 # error variance of estimator for \sigma
 
 
rec.pr$se ### mean sq. prediction error estimate/note convergence

rec.pr = predict(rec.yw, n.ahead=24)  

#########################################
# assuming gaussianity we get prediction intervals
# multiply by 1.95 to get 95% pred.interval

U = rec.pr$pred + 1.96*rec.pr$se 
L = rec.pr$pred - 1.96*rec.pr$se
minx = min(rec,L)
maxx = max(rec,U)
month=360:453
plot(month,rec[month],type="o",ylim=c(-20,140),xlim=c(360,480),ylab="recruits")
lines(rec.pr$pred, col="red", type="o")
lines(U, col="blue", lty="dashed")
lines(L, col="blue", lty="dashed") 

### note: m.s.e converges and predictions converge to mean


#########################################
# assuming gaussianity we get 95 % prediction intervals

### Annual number of hare data.

library(TSA)

data(hare)
help(hare)  

m1.hare=arima(sqrt(hare),order=c(2,0,0))

# straight from function arima

plot(m1.hare, n.ahead=25,type='b',xlab='Year',ylab='Sqrt(hare)')


abline(h=coef(m1.hare)[names(coef(m1.hare))=='intercept'])

# or using predict as before

hare.pr = predict(m1.hare, n.ahead=25)  
U = hare.pr$pred + 2*hare.pr$se 
L = hare.pr$pred - 2*hare.pr$se
lines(hare.pr$pred, col="red", type="o")
lines(U, col="blue", lty="dashed")
lines(L, col="blue", lty="dashed")
### note: m.s.e converges and predictions converge to mean


hare.pr = predict(m1.hare, n.ahead=25)  

#########################################
# or altogether as before
#
#U = hare.pr$pred + 2*hare.pr$se 
#L = hare.pr$pred - 2*hare.pr$se
#year=1905:1935
#plot(year,sqrt(hare),type="o",xlim=c(1905,1960),ylim=c(-1,14),ylab="sqrt hare")
#lines(hare.pr$pred, col="red", type="o")
#lines(U, col="blue", lty="dashed")
#lines(L, col="blue", lty="dashed")


### risovi, Annual numbers of lynx trappings for 1821–1934 in Canada. Taken from Brockwell & Davis (1991)

ts.plot(lynx)
ts.plot(log(lynx))

pacf(log(lynx))
acf(log(lynx))  

fit <- arima(log(lynx),order=c(5,0,0))
pp <- predict(fit, n.ahead = 20)


#########################################
# assuming gaussianity we get prediction intervals

plot(log(lynx),xlim=c(1820,1954),ylim=c(3.5,10))
lines(1935:1954,pp$pred,col=2)
lines(1935:1954,pp$pred + 1.96*pp$se,col=3)
lines(1935:1954,pp$pred - 1.96*pp$se,col=3)


#########################################
# assuming gaussianity but using -> using Monte Carlo  
# simulations


phi <- fit$coef[1:5]
mu <- fit$coef[6]

sigma2 <- fit$sigma
xpred <- matrix(0,ncol=25,nrow=1000)    #### 1000 puta simuliramo iz modela
for(k in 1:5)
  {
    xpred[,k] <- log(lynx)[109+k]       #### prvih 5 vrijednosti su starih prvih 5
  } 
for(i in 1:1000)                        #### od 6 do 25 su simulacije
  {
    for(j in 1:20)
      {
        xpred[i,5+j] <- mu+ sum(phi*(xpred[i,(j+4):j] - mu)) + rnorm(1,0,sqrt(sigma2))
      }
  }


plot(log(lynx),xlim=c(1820,1954),ylim=c(3.5,10))
lines(1935:1954,xpred[1,6:25],col=5)
lines(1935:1954,xpred[2,6:25],col=6)
lines(1935:1954,xpred[3,6:25],col=7)

plot(log(lynx),xlim=c(1820,1954),ylim=c(3.5,10))
lines(1935:1954,pp$pred,col=2)
lines(1935:1954,pp$pred + 1.96*pp$se,col=2)
lines(1935:1954,pp$pred - 1.96*pp$se,col=2)
lines(1935:1954,apply(xpred[,6:25],2,mean),col=4)

my.quant <- function(x)
  {
    quantile(x,c(0.025,0.975))
  }

lines(1935:1954,apply(xpred[,6:25],2,my.quant)[1,],col=4)
lines(1935:1954,apply(xpred[,6:25],2,my.quant)[2,],col=4)

### we could do the same even with nongaussian innovations
### if other distribution of the noise is more appropriate