# This code is to accompany Maximum Likelihood Methods Strategies for Social Science, # Michael D. Ward and John S. Ahlquist, 2018, Cambridge University Press, # ISBN 9781107185821. The code is presented as is, but is designed to replicate all of # the material in the book, chapter by chapter. Any comments, suggestions, or problems can # be reported to the authors: michael.don.ward@gmail.com and John Ahlquist jahlquist@ucsd.edu. # These programs were designed to run on a variety of platforms, but currently uses # platform x86_64-apple-darwin15.6.0 # arch x86_64 # os darwin15.6.0 # system x86_64, darwin15.6.0 # version.string R version 3.4.4 (2018-03-15) # nickname Someone to Lean On # We used Rstudio, Version 1.1.447 # This code requires installation of the loaded libraries. rm(list=ls()); gc() library(foreign); library(nlme); library(MASS); library(WDI);library(ProfileLikelihood) set.seed(123) ### Table 1.1 Not calculated ### Figure 1.1 ####################################### pi.hat <- seq(0,1,.001) f.y <- pi.hat^2*(1-pi.hat)^1 #pdf(file = "/Users/mdw/Git/HalfMoonBay/graphics/chapter1/ch1fig4.pdf",width = 6, height = 6, onefile = TRUE, family = "Times") plot(pi.hat,f.y,type="l",bty="l",ylab="",lwd=3,xlab=substitute(hat(theta)),las=1) segments(.67, 0.0, x1 = .67, y1 = .148, col = "grey30", lwd = 2) segments(.62, .149, x1 = .72, y1 = .149, col = "grey30", lty = par("lty"), lwd = 2) #dev.off() ###################################################### ### Figure 1.2 ####################################### n<-10000 # population size mu<-10 # population mean sigma<-1 # population standard error samplesize<-10 # sample size nsamples<-1000 # number of samples to collect pop<-rnorm(n,mu,sigma) smpls<-NULL for (i in 1:nsamples) { smpls[i]<-mean(sample(pop,samplesize,replace=T)) } #pdf("/Users/mdw/Git/HalfMoonBay/graphics/chapter1/ch1fig2.pdf", width = 6, height = 6, onefile = TRUE, family = "Times") truehist(smpls,nbins=19,col="slategray3",las=1,yaxt="n",ylab="",xlab="",ann=FALSE,xlim=(c(9,11))) abline(a=NULL,b=NULL,v=10,col="gray10",lwd=4,xaxt="n") #dev.off() ###################################################### ### Figure 1.3 ####################################### # Get Data from WDI api via R wdi<-WDI(country = "all", indicator = c("EN.POP.DNST", #popdensity "EN.ATM.CO2E.KT", #c02 emissions "NY.GDP.PCAP.PP.CD"), #GDPpcPPP start = 2012, end = 2012, extra = TRUE, cache = NULL) wdi<-na.omit(subset(wdi, region !="Aggregates")) names(wdi)[4:6]<-c("pop.den", "co2.kt","gdp.pc.ppp" ) out<-lm(log(co2.kt) ~ log(gdp.pc.ppp), data=wdi) out2<-glm(log(co2.kt) ~ log(gdp.pc.ppp) + pop.den, data=wdi) anova(out,out2, test="LRT") attach(wdi) #pdf("/Users/mdw/Git/HalfMoonBay/graphics/chapter1/ch1fig1.pdf") plot(log(gdp.pc.ppp ), log(co2.kt ), pch=19, xlab="2012 GDP per capita at PPP, log $US", ylab="2012 CO2 emissions, log Ktons",bty="l", las=1) abline(reg=out,lwd=3) arrows(x1=log(wdi[wdi$iso3c=="USA","gdp.pc.ppp"]), y1=fitted(out)[rownames(out$model)[which(wdi$iso3c=="USA")]], x0=log(wdi[wdi$iso3c=="USA","gdp.pc.ppp"]), y0=log(wdi[wdi$iso3c=="USA","co2.kt"]), col="black", length=0.1, angle=20, lwd=2.5) points(log(wdi[wdi$iso3c=="USA","gdp.pc.ppp"]), log(wdi[wdi$iso3c=="USA","co2.kt"]), #col= "blue",#grey(0.5), pch = 19, cex= 3) #dev.off() ###################################################### ### Figure 1.4 ####################################### #### Profile likelihood wdi$lgdppc<-log(wdi$gdp.pc.ppp) xx <- profilelike.lm(formula = log(co2.kt)~1, data=wdi, profile.theta="lgdppc", lo.theta=0.94, hi.theta=1.14, length=500) #pdf(file = "/Users/mdw/Git/HalfMoonBay/graphics/chapter1/ch1fig3.pdf",width = 6, height = 6, onefile = TRUE, family = "Times") with(xx, plot(theta,profile.lik,las=1,lty=1,lwd=3, type="l",pch=19,xlab=substitute(beta[1]), ylab="likelihood",yaxt="n",bty="l",main="Least Squares as MLE", xlim=c(0.95,1.15)) ) abline(v=coef(fit.lm)[2],col="gray50",lwd=3) abline(h=max(xx$profile.lik),col="gray50",lwd=4) #dev.off() # profilelike.plot(theta=xx$theta, profile.lik.norm=xx$profile.lik.norm, round=2) # Using R to Maximize the Likelihood # Set up matrices. x <- cbind(1,as.matrix(log(wdi$gdp.pc.ppp))) # note the column of 1s y <- as.matrix(log(wdi$co2.kt)) K <- ncol(x); n <- nrow(x) # number of cases (n) and variables (k) # Define the likelihood function. loglik.my <- function(par,X,Y) { Y <- as.vector(y) X <- as.matrix(x) xbeta <- X%*%par[1:K] sigma <- sqrt(sum(((X[,2]-mean(X[,2]))^2)/(n-K)))* sum(-(1/2)*log(2*pi)-(1/2)*log(sigma^2)-(1/(2*sigma^2))*(y-xbeta)^2) } # Pass the likelihood fn to an optimizer. mle.fit <- optim(c(5,5),loglik.my, method = "BFGS", control = list(trace=TRUE,maxit=10000,fnscale = -1),hessian = TRUE) if(mle.fit$convergence!=0) print("MDW WARNING: Convergence Problems; Try again!") # Calculate standard diagnostics. stderrors<- -sqrt(diag(-solve(mle.fit$hessian))) z<-mle.fit$par/stderrors p.z <- 2* (1 - pnorm(abs(z))) out.table <- cbind(mle.fit$par,stderrors,z,p.z) out.table out.table <– data.frame ( Est=mle.fit$par , SE=stderrors , Z=z , pval=p.z ) round (out.table , 2 ) ###################################################### ### Table 1.2 ######################################## xtable(summary(out)) stargazer(out,star.cutoffs = NA)