Statistika

Linkovi: 

    web-stranica kolegija  (vjezbe s primjerima iz R-a)

    time line of Statistics


Upis ocjena: 1.7. 2014.  u 11 sati

Predavanja: cetvrtkom 10-13, PMF-MO, 003





Testovi prilagodbe ilustracija


###

x <- c(315,101,108,32)  ### podaci o boji i obliku graska
p <- c(9,3,3,1)         ### teoretski odnos frekvencija prema Medelovim principima
chisq.test(x, p = p, rescale.p = TRUE)


### direktno izracunata Hi^2 statistika

p1=p/sum(p)
sum((x-sum(x)*p1)^2/ (sum(x)*p1))
1-pchisq(0.47,3)

### test nezavisnosti

library(MASS)
tbl = table(survey$Smoke, survey$Exer)
tbl
chisq.test(tbl)  ### uocite upozorenje

fisher.test(tbl) ### bolja alternativa - nije napravljeno na predavanju


Lin. regresija ilustracija

x <- rnorm(15)
y <- x + rnorm(15)
predict(lm(y ~ x))
new <- data.frame(x = seq(-3, 3, 0.5))
predict(lm(y ~ x), new, se.fit = TRUE)
pred.w.plim <- predict(lm(y ~ x), new, interval="prediction")
pred.w.clim <- predict(lm(y ~ x), new, interval="confidence")
matplot(new$x,cbind(pred.w.clim, pred.w.plim[,-1]),
        lty=c(1,2,2,3,3), type="l", ylab="predicted y")



de Moivre Laplace ilustracija

par(mfrow=c(2,2))
main("binomna razdioba za p=0.4 i n=10,20,40,100")

#Sl1
    Nn<-10;
    Pprob<-0.4
    x<-0:Nn
    y <-dbinom(x,Nn,Pprob)
    plot(x,y,type="s",ylab="",xlab="")
    points(x,y,type="h")
    lines(x,dnorm(x,Nn*Pprob,sqrt(Nn *Pprob*(1-Pprob))),col=2)

#Sl2
    Nn<-20;
    Pprob<-0.4
    x<-0:Nn
    y <-dbinom(x,Nn,Pprob)
    plot(x,y,type="s",ylab="",xlab="")
    points(x,y,type="h")
    lines(x,dnorm(x,Nn*Pprob,sqrt(Nn *Pprob*(1-Pprob))),col=2)

#Sl3
    Nn<-40;
    Pprob<-0.4
    x<-0:Nn
    y <-dbinom(x,Nn,Pprob)
    plot(x,y,type="s",ylab="",xlab="")
    points(x,y,type="h")
    lines(x,dnorm(x,Nn*Pprob,sqrt(Nn *Pprob*(1-Pprob))),col=2)

#Sl4
    Nn<-100;
    Pprob<-0.4
    x<-0:Nn
    y <-dbinom(x,Nn,Pprob)
    plot(x,y,type="s",ylab="",xlab="")
    points(x,y,type="h")
    lines(x,dnorm(x,Nn*Pprob,sqrt(Nn *Pprob*(1-Pprob))),col=2)


#### korekcija po neprekidnosti - df

par(mfrow=c(1,1))

Nn<-10;
Pprob<-0.4

x<-0:Nn
y <-pbinom(x,Nn,Pprob)


plot(x,y,type="s",main="", axes=FALSE,xlab="",ylab="")
Axis(side=1, labels=TRUE)
Axis(side=2, labels=TRUE)

x<-seq(0,Nn,0.1)
lines(x,pnorm(x,Nn*Pprob,sqrt(Nn *Pprob*(1-Pprob))),
      lwd=2,col=colors()[54])

Nn<-10;
Pprob<-0.4

x<-0:Nn
y <-pbinom(x,Nn,Pprob)


plot(x,y,type="s",main="", axes=FALSE,xlab="",ylab="")
Axis(side=1, labels=TRUE)
Axis(side=2, labels=TRUE)

x<-seq(0,Nn,0.1)
lines(x,pnorm(x+.5,Nn*Pprob,sqrt(Nn *Pprob*(1-Pprob))),
      lwd=2,col=colors()[54])



Neke opisne statistike
a)

caff.marital=matrix(c(652,1537,598,242,36,46,38,21,218,327,106,67),nrow=3,byrow=T)
colnames(caff.marital)=c("0","1-150","151-300",">300")
rownames(caff.marital)=c("Married","Prev.married","Single")
caff.marital
o= chisq.test(caff.marital)$statistic/(2*sum(caff.marital)) ### oko nula no znacajno razlicit pokazuje se
barplot(caff.marital)
barplot(t(caff.marital))
b)
 boxplot(count ~ spray, data = InsectSprays, col = "lightgray")
c)

library(Flury)
data(challenger)
attach(challenger)

 ch <- glm(cbind(Damage, 6-Damage) ~ Temp, family = binomial, data = challenger)
 with(challenger, plot(Temp, Damage,xlim=c(30,85)))
 lines(challenger$Temp, 6*predict(ch, type = "response")  )
 abline(v=32, col = "red", lwd = 2)## temp when challenger launched\


d)

library(MASS)
plot(trees)
attach(trees)
plot(Girth,Volume)
trees.lm <- lm(Volume~Girth)
plot(Girth,Volume)
abline(trees.lm)

trees.lm1 <- lm(log(Volume)~log(Girth) )
plot(log(Girth),log(Volume))
abline(trees.lm1)

e)

ff <- y ~ x
mods <- setNames(as.list(1:4), paste0("lm", 1:4))
for(i in 1:4) {
  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
  ## or   ff[[2]] <- as.name(paste0("y", i))
  ##      ff[[3]] <- as.name(paste0("x", i))
  mods[[i]] <- lmi <- lm(ff, data= anscombe)
  print(anova(lmi))
}
op <- par(mfrow = c(2, 2), mar = 0.1+c(4,4,1,1), oma =  c(0, 0, 2, 0))
for(i in 1:4) {
  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
  plot(ff, data = anscombe, col = "red", pch = 21, bg = "orange", cex = 1.2,
       xlim = c(3, 19), ylim = c(3, 13))
  abline(mods[[i]], col = "blue")
}
mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.5)
par(op)


f)
plant.df = PlantGrowth
plant.df$group = factor(plant.df$group,
                        labels = c("Control", "Treatment 1", "Treatment 2"))
boxplot(weight ~ group, data = plant.df)
plant.mod1 = lm(weight ~ group, data = plant.df)
anova(plant.mod1)

plant.mod = data.frame(Fitted = fitted(plant.mod1),
                       Residuals = resid(plant.mod1), Treatment = plant.df$group)
boxplot(Residuals~Treatment,data=plant.mod)