R Studio coding

profilenicely6
chapter8.R

## Commands for chapter 8 lecture ## read in the data reg.data <- read.table("E:\\Uvic folder\\359\\R code\\c8\\tannin.txt",header=T) attach(reg.data) names(reg.data) ## scatter plot plot(tannin,growth,pch=16) ################################################ ## rough guess at parameter values a<-12 b<-(growth[9]-growth[1])/(tannin[9]-tannin[1]) ## plot the guessed linear relationship lines(tannin,a+b*tannin,lty=2) ################################################# ## plot the errors simple.fit<-a+b*tannin for (i in 1:length(growth)) { lines(c(tannin[i],tannin[i]),c(simple.fit[i],growth[i])) } ################################################# ## compute least squares estimators ## change notation to match the notes y<-growth x<-tannin b.ls<-(sum(x*y) - sum(y)*sum(x)/length(y))/(sum(x^2) - sum(x)*sum(x)/length(x)) a.ls<-mean(y) - b.ls*mean(x) ## plot the least squares line plot(tannin,growth,pch=16) ## plot the guessed linear relationship lines(tannin,a+b*tannin,lty=2) ## plot the LS line lines(tannin,a.ls+b.ls*tannin,lty=2,col="red") ## can compute these using the lm() function lm(growth~tannin) ############################################3 ## error sum of squares SSE<-sum((y-a.ls-b.ls*x)^2) ## total sum of squares SSY<-sum(y^2) - (sum(y)^2)/length(y) ## regression SS SSR<-SSY-SSE ##########################################3 ## compute the p-value for F-test F.ratio<-(SSR/1)/(SSE/7) 1-pf(F.ratio,1,7) ## can produce the anova table in R model<-lm(growth~tannin) summary.aov(model) ################################ ## examine the standard errors and other output summary(model) ## want a 95% confidence interval for b lower.b<-b.ls-0.2186*qt(p=0.975,df=length(y)-2) upper.b<-b.ls+0.2186*qt(p=0.975,df=length(y)-2) CI.b<-c(lower.b,upper.b) ## want a 95% confidence interval for a lower.a<-a.ls-1.0408*qt(p=0.975,df=length(y)-2) upper.a<-a.ls+1.0408*qt(p=0.975,df=length(y)-2) CI.a<-c(lower.a,upper.a) ################################################# ## regression diagnostics par(mfrow=c(1,3)) plot(model,which=c(1,2,4)) ################################################ ## sensitivity analysis investigating impact of most influential point c(y[7],x[7]) # remove this point and re-fit the model model2<-update(model,subset=(tannin !=6)) summary(model) summary(model2) ################################################# ## examine data on the relationship between radioactive emissions (y) and time (x) ## this data is collected for a radioactive decay process detach(reg.data) rm(x,y) ## remove variables x and y from gloabal env par(mfrow=c(1,1)) curve <- read.table("E:\\Uvic folder\\359\\R code\\c8\\decay.txt",header=T) attach(curve) names(curve) plot(x,y) abline(lm(y~x)) ##examine r^2 summary(lm(y~x)) ##########################################3 ## fit a quadratic x2<-x^2 quadratic<-lm(y~x+x2) plot(x,y) xv<-seq(0,30,.1) ## plot the line linear<-lm(y~x) yv.linear<-predict(linear,list(x=xv)) lines(xv,yv.linear) ## plot the quadratic model yv.quadratic<-106.3888-7.3448*xv+0.1506*xv^2 yv.quadratic<-predict(quadratic,list(x=xv,x2=xv^2)) lines(xv,yv.quadratic,col="red") ## testing summary(quadratic) ## or can use anova table anova(quadratic,linear) ########################################## ## compare a seqence of four nested models constant<-lm(y~1) linear<-lm(y~x) x3<-x^3 cubic<-lm(y~x+x2+x3) ## compare 4 nested models ## the F-statistic provides pairwise comparison of models ## numerator = difference in RSS/difference in DF ## denominator = RSS of largest model/DF anova(constant,linear,quadratic,cubic) ############################################# exponential<-lm(log(y)~x) summary(exponential) ## make predicitions on the log scale and exponentiate yv.exp<-exp(predict(exponential,list(x=xv))) lines(xv,yv.exp,col="blue") ## nice to add a legend legend(x=20,y=100,legend=c("linear","quadratic","exp"),fill=c("black","red","blue")) ######################################################## ## examine data relating the jaw bone length of deer to the age of deer ## theory indicates that the length of the jaw bone changes over time ## according to an asympototic exponential growth curve deer <- read.table("E:\\Uvic folder\\359\\R code\\c8\\jaws.txt",header=T) attach(deer) names(deer) plot(age,bone) #################################################### ## you may need to download the nls2 package... library(nls2) model<-nls(bone~a-b*exp(-c*age),start = list(a=120,b=120,c=0.061)) summary(model) ## try with bad starting values and see what happens model<-nls(bone~a-b*exp(-c*age),start = list(a=1,b=1,c=1)) ######################################################## ## confidence intervals for a, b,c a.CI<-c(115.2528-1.96*2.9139,115.2528+1.96*2.9139) b.CI<-c(118.6875-1.96*7.8925,118.6875+1.96*7.8925) c.CI<-c(0.1235 - 1.96*0.0171,0.1235 + 1.96*0.0171) rbind(a.CI,b.CI,c.CI) ################################################### ## fit the simpler model where a=b model2<-nls(bone~a-a*exp(-c*age),start = list(a=120,c=0.061)) ## compare the models anova(model,model2) ## can also use AIC to comapre models ## more general and useful for comparing non-nested models AIC(model) AIC(model2) ## we can not reject H0: a=b so we work with model 2 ad examine the fitted curve and estimates summary(model2) age.fit<-seq(0,50,.1) b.length.fit<-predict(model2,list(age=age.fit)) lines(age.fit,b.length.fit) title("Estimated Jaw Growth Over Time") ######################################################## library(mgcv) ## you may need to download this package np<-deer <- read.table("E:\\Uvic folder\\359\\R code\\c8\\hump.txt",header=T) attach(np) names(np) ## the dangers of fitting linear models without looking at the data first model.linear<-lm(y~x) summary(model.linear) ## plot the data plot(x,y) ## a non-linear signal is clear ## fit a non-parametric smoother model.gam<-gam(y~s(x)) ## examine signifcance of the smooth term summary(model.gam) ## plot the non-parametric fit plot(model.gam) ## compare with the data: need to remove the estimated constant term first points(x,y-1.95737)