1 Package AES

Applied Econometrics with R

Functions, data sets, examples, demos, and vignettes for the book Christian Kleiber and Achim Zeileis (2008), Applied Econometrics with R, Springer-Verlag, New York.

install.packages( "AER" )
library( AER )

vignette( "AER" )

1.1 California Schools Data

The data used here are from all 420 K-6 and K-8 districts in California with data available for 1998 and 1999.

The dataset is used to address the question of whether

Test scores are on the Stanford 9 standardized test administered to 5th grade students. School characteristics (averaged across the district) include enrollment, number of teachers (measured as “full-time equivalents”, number of computers per classroom, and expenditures per student. Demographic variables for the students are averaged across the district. The demographic variables include the percentage of students in the public assistance program CalWorks (formerly AFDC), the percentage of students that qualify for a reduced price lunch, and the percentage of students that are English learners (that is, students for whom English is a second language).

Data Dictionary

district: District code (character)
stratio: Student-teacher ratio (calculated below)
score: Average of two primary scores, math and reading
school: School name (character)
county: indicating county (factor)
grades: indicating grade span of district (factor)
students: Total enrollment
teachers: Number of teachers
calworks: Percent qualifying for CalWorks (income assistance)
lunch: Percent qualifying for reduced-price lunch
computer: Number of computers
expenditure: Expenditure per student
income: District average income (in USD 1,000)
english: Percent of English learners
read: Average reading score
math: Average math score

1.1.1 Specification

What happens to the significance of the student-teacher ratio variable once appropriate controls are added to the model?

data( "CASchools" )

## variable transformations
CASchools$stratio <- with(CASchools, students/teachers)
CASchools$score <- with(CASchools, (math + read)/2)


dat <- dplyr::select( CASchools, score, stratio, expenditure, english, lunch, calworks )

pairs( dat, lower.panel=panel.smooth, upper.panel=panel.cor)

## Stock and Watson (2007)
## p. 152
fm1 <- lm( score ~ stratio, data = CASchools )
# coeftest(fm1, vcov = sandwich)

## p. 159
fm2 <- lm( score ~ I(stratio < 20), data = CASchools )

## p. 199
fm3 <- lm( score ~ stratio + english, data = CASchools )

## p. 224
fm4 <- lm( score ~ stratio + expenditure + english, data = CASchools )


stargazer( fm1, fm2, fm3, fm4, type="html",
           omit.stat = c("rsq", "f", "ser"),
           digits=2)


	Dependent variable:

	score
	(1)	(2)	(3)	(4)

stratio	-2.28^***		-1.10^***	-0.29
	(0.48)		(0.38)	(0.48)

I(stratio < 20)		7.17^***
		(1.85)

expenditure				0.004^***
				(0.001)

english			-0.65^***	-0.66^***
			(0.04)	(0.04)

Constant	698.93^***	650.08^***	686.03^***	649.58^***
	(9.47)	(1.39)	(7.41)	(15.21)


Observations	420	420	420	420
Adjusted R²	0.05	0.03	0.42	0.43

Note:	p<0.1; p<0.05; p<0.01

1.1.2 Additional Controls

Control for non-native speakers and income level of students.

## Table 7.1, p. 242 (numbers refer to columns)
fmc3 <- lm( score ~ stratio + english + lunch, data = CASchools )
fmc4 <- lm( score ~ stratio + english + calworks, data = CASchools )
fmc5 <- lm( score ~ stratio + english + lunch + calworks, data = CASchools )

stargazer( fmc3, fmc4, fmc5, type="html",
           omit.stat = c("rsq", "f", "ser"),
           digits=2 )


	Dependent variable:

	score
	(1)	(2)	(3)

stratio	-1.00^***	-1.31^***	-1.01^***
	(0.24)	(0.31)	(0.24)

english	-0.12^***	-0.49^***	-0.13^***
	(0.03)	(0.03)	(0.03)

lunch	-0.55^***		-0.53^***
	(0.02)		(0.03)

calworks		-0.79^***	-0.05
		(0.05)	(0.06)

Constant	700.15^***	698.00^***	700.39^***
	(4.69)	(6.02)	(4.70)


Observations	420	420	420
Adjusted R²	0.77	0.63	0.77

Note:	p<0.1; p<0.05; p<0.01

1.2 Teacher Ratings

data("TeachingRatings")

TeachingRatings$response.rate <- TeachingRatings$students / TeachingRatings$allstudents

TeachingRatings %>% head() %>% pander()

Table continues below
minority	age	gender	credits	beauty	eval	division	native
yes	36	female	more	0.2899	4.3	upper	yes
no	59	male	more	-0.7377	4.5	upper	yes
no	51	male	more	-0.572	3.7	upper	yes
no	40	female	more	-0.678	4.3	upper	yes
no	31	female	more	1.51	4.4	upper	yes
no	62	male	more	0.5886	4.2	upper	yes

tenure	students	allstudents	prof	response.rate
yes	24	43	1	0.5581
yes	17	20	2	0.85
yes	55	55	3	1
yes	40	46	4	0.8696
yes	42	48	5	0.875
yes	182	282	6	0.6454

dat <- dplyr::select( TeachingRatings, eval, age, beauty, response.rate )

pairs( dat, lower.panel=panel.smooth, upper.panel=panel.cor)

Are beautiful people better teachers?
What is the relationship between age and response rate?
Are people content with a course more or less likely to submit evaluations?

par( mfrow=c(2,2) )
plot( TeachingRatings$gender, TeachingRatings$eval, frame.plot=F, outline=F, main="Gender" )
plot( TeachingRatings$minority, TeachingRatings$eval, frame.plot=F, outline=F, main="Minority" )
plot( TeachingRatings$tenure, TeachingRatings$eval, frame.plot=F, outline=F, main="Tenure Status" )
plot( TeachingRatings$credits, TeachingRatings$eval, frame.plot=F, outline=F, main="Credits" )

group.structure <- formula( TeachingRatings$eval ~ TeachingRatings$minority * TeachingRatings$gender )

boxplot( group.structure, ylim=c(3,5),
         las=0, frame.plot=F, outline=F, 
         main="Performance by Gender & Minority Status",
         col=c("steelblue4","steelblue3","firebrick4","firebrick3"), 
         staplewex=0, whisklty=0, border="gray90", lwd=2 )

abline( h=seq(3,5,0.25), col="gray", lty=3, lwd=0.25 )

group.structure <- formula( TeachingRatings$eval ~ TeachingRatings$gender * TeachingRatings$tenure )

boxplot( group.structure, ylim=c(3,5),
         las=0, frame.plot=F, outline=F, 
         main="Performance by Gender and Tenure Status of the Professor",
         col=c("steelblue","firebrick","steelblue","firebrick" ), 
         staplewex=0, whisklty=0, border="gray90", lwd=2 )

abline( h=seq(3,5,0.25), col="gray", lty=3, lwd=0.25 )

## evaluation score vs. beauty
jplot( TeachingRatings$beauty, TeachingRatings$eval, xlab="Beauty", ylab="Student Evaluations" )
fm <- lm(eval ~ beauty, data = TeachingRatings)
abline(fm)

summary(fm)

## 
## Call:
## lm(formula = eval ~ beauty, data = TeachingRatings)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.80015 -0.36304  0.07254  0.40207  1.10373 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.99827    0.02535 157.727  < 2e-16 ***
## beauty       0.13300    0.03218   4.133 4.25e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5455 on 461 degrees of freedom
## Multiple R-squared:  0.03574,    Adjusted R-squared:  0.03364 
## F-statistic: 17.08 on 1 and 461 DF,  p-value: 4.247e-05

## prediction of Stock & Watson's evaluation score
sw <- with(TeachingRatings, mean(beauty) + c(0, 1) * sd(beauty))
names(sw) <- c("Watson", "Stock")
predict(fm, newdata = data.frame(beauty = sw))

##   Watson    Stock 
## 3.998272 4.103163

## Hamermesh and Parker, 2005, Table 3

fmw <- lm(eval ~ beauty + gender + minority + native + tenure + division + credits,
weights = students, data = TeachingRatings)
coeftest(fmw, vcov = sandwich)

## 
## t test of coefficients:
## 
##                Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)    4.223142   0.063947 66.0417 < 2.2e-16 ***
## beauty         0.274805   0.034761  7.9056 2.033e-14 ***
## genderfemale  -0.238993   0.056402 -4.2373 2.740e-05 ***
## minorityyes   -0.248937   0.089177 -2.7915  0.005467 ** 
## nativeno      -0.252713   0.098061 -2.5771  0.010277 *  
## tenureyes     -0.135923   0.060122 -2.2608  0.024245 *  
## divisionlower -0.045895   0.059307 -0.7739  0.439421    
## creditssingle  0.686507   0.114675  5.9866 4.351e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## (same coefficients but with different covariances)

stargazer( fmc3, fmc4, fmc5, type="html",
           omit.stat = c("rsq", "f", "ser"),
           digits=2 )


	Dependent variable:

	score
	(1)	(2)	(3)

stratio	-1.00^***	-1.31^***	-1.01^***
	(0.24)	(0.31)	(0.24)

english	-0.12^***	-0.49^***	-0.13^***
	(0.03)	(0.03)	(0.03)

lunch	-0.55^***		-0.53^***
	(0.02)		(0.03)

calworks		-0.79^***	-0.05
		(0.05)	(0.06)

Constant	700.15^***	698.00^***	700.39^***
	(4.69)	(6.02)	(4.70)


Observations	420	420	420
Adjusted R²	0.77	0.63	0.77

Note:	p<0.1; p<0.05; p<0.01

2 Package ‘woolridge’

Read directly from the website:

library( foreign )

dat <- read.dta( "http://fmwww.bc.edu/ec-p/data/wooldridge/meap01.dta" )

dat <- read.dta( "http://fmwww.bc.edu/ec-p/data/wooldridge/affairs.dta" )

R package:

install.packages( "wooldridge" )
library( wooldridge )

2.1 Affairs Dataset

data( "affairs" )
head( affairs ) %>% pander()

Table continues below
id	male	age	yrsmarr	kids	relig	educ	occup	ratemarr	naffairs
4	1	37	10	0	3	18	7	4	0
5	0	27	4	0	4	14	6	4	0
6	1	27	1.5	0	3	18	4	4	3
11	0	32	15	1	1	12	1	4	0
12	0	27	4	1	3	17	1	5	3
16	1	57	15	1	5	18	6	5	0

affair	vryhap	hapavg	vryrel	smerel	slghtrel
0	0	1	0	0	1
0	0	1	0	1	0
1	0	1	0	0	1
0	0	1	0	0	0
1	1	0	0	0	1
0	1	0	1	0	0

id: identifier
male: =1 if male
age: in years
yrsmarr: years married
kids: =1 if have kids
relig: 5 = very relig., 4 = somewhat, 3 = slightly, 2 = not at all, 1 = anti
educ: years schooling
occup: occupation, reverse Hollingshead scale
ratemarr: 5 = vry hap marr, 4 = hap than avg, 3 = avg, 2 = smewht unhap, 1 = vry unhap
naffairs: number of affairs within last year
affair: =1 if had at least one affair
vryhap: ratemarr == 5
hapavg: ratemarr == 4
avgmarr: ratemarr == 3
unhap: ratemarr == 2
vryrel: relig == 5
smerel: relig == 4
slghtrel: relig == 3
notrel: relig == 2

2.1.1 Models

Insert model exammples here…

2.2 School Voucher Dataset

data( 'voucher' )
head( voucher ) %>% pander()

Table continues below
studyid	black	female	appyear	mnce	select	choice
21	1	1	90	44	1	0
26	1	0	90	46	1	1
30	1	0	90	20	1	1
31	1	1	90	36	1	1
33	1	0	90	32	1	1
39	1	1	90	76	1	0

Table continues below
selectyrs	choiceyrs	mnce90
4	1	NA
4	4	NA
4	4	NA
4	4	NA
4	4	NA
4	1	19

selectyrs4	choiceyrs1	choiceyrs4
1	1	0
1	0	1
1	0	1
1	0	1
1	0	1
1	1	0

3 Package ‘Ecdat’

install.packages( "Ecdat" )
library( Ecdat )

3.1 Star Dataset

tmathssk total math scaled score
treadssk total reading scaled score
classk type of class, a factor with levels (regular,small.class,regular.with.aide)
totexpk years of total teaching experience
sex a factor with levels (boy,girl)
freelunk qualified for free lunch ?
race a factor with levels (white,black,other)
schidkn school indicator variable

data( Star )
head( Star ) %>% pander()

Table continues below
	tmathssk	treadssk	classk	totexpk	sex	freelunk
2	473	447	small.class	7	girl	no
3	536	450	small.class	21	girl	no
5	463	439	regular.with.aide	0	boy	yes
11	559	448	regular	16	boy	no
12	489	447	small.class	5	boy	yes
13	454	431	regular	8	boy	yes

	race	schidkn
2	white	63
3	black	20
5	black	19
11	white	69
12	white	79
13	white	5

dat <- dplyr::select( Star, tmathssk,treadssk, totexpk ) %>% 
  dplyr::rename( Math_Score=tmathssk, Reading_Score=treadssk, Teacher_Experience=totexpk )

pairs( dat, lower.panel=panel.smooth, upper.panel=panel.cor)

par( mfrow=c(2,2) )
plot( Star$class, Star$tmathssk, frame.plot=F, outline=F, main="Class Type" )
plot( Star$sex, Star$tmathssk, frame.plot=F, outline=F, main="Gender" )
plot( Star$freelunk, Star$tmathssk, frame.plot=F, outline=F, main="Free Lunch" )
plot( Star$race, Star$tmathssk, frame.plot=F, outline=F, main="Race" )

group.structure <- formula( Star$tmathssk ~ Star$freelunk * Star$totexpk )

boxplot( group.structure, 
         las=2, frame.plot=F, outline=F, main="Age Group by Gender",
         col=c("firebrick","steelblue"), xaxt="n",
         staplewex=0, whisklty=0, ylim=c(400,600) )

m1 <- lm( tmathssk ~ classk, data=Star )
m2 <- lm( tmathssk ~ classk + totexpk + sex + race + freelunk, data=Star )
m3 <- lm( treadssk ~ classk, data=Star )
m4 <- lm( treadssk ~ classk + totexpk + sex + race + freelunk, data=Star )

stargazer( m1, m2, m3, m4, type="html" )


	Dependent variable:

	tmathssk		treadssk
	(1)	(2)	(3)	(4)

classksmall.class	8.209^***	8.200^***	5.899^***	5.900^***
	(1.563)	(1.502)	(1.040)	(0.992)

classkregular.with.aide	-0.251	-0.107	0.806	0.878
	(1.503)	(1.447)	(1.000)	(0.955)

totexpk		0.618^***		0.490^***
		(0.106)		(0.070)

sexboy		-7.872^***		-6.135^***
		(1.209)		(0.798)

raceblack		-7.746^***		-3.799^***
		(1.454)		(0.960)

raceother		-14.486		-1.870
		(8.841)		(5.837)

freelunkyes		-19.553^***		-14.852^***
		(1.355)		(0.894)

Constant	483.261^***	493.512^***	434.681^***	441.649^***
	(1.065)	(1.699)	(0.708)	(1.122)


Observations	5,748	5,748	5,748	5,748
R²	0.006	0.083	0.006	0.097
Adjusted R²	0.006	0.082	0.006	0.096
Residual Std. Error	47.626 (df = 5745)	45.765 (df = 5740)	31.676 (df = 5745)	30.214 (df = 5740)
F Statistic	18.553^*** (df = 2; 5745)	74.563^*** (df = 7; 5740)	18.538^*** (df = 2; 5745)	87.895^*** (df = 7; 5740)

Note:	p<0.1; p<0.05; p<0.01

3.2 Dataset ‘Treatment’

data( 'Treatment' )
head( Treatment ) %>% pander()

treat	age	educ	ethn	married	re78	u74	u75
TRUE	37	11	black	TRUE	9930	TRUE	TRUE
TRUE	30	12	black	FALSE	24910	TRUE	TRUE
TRUE	27	11	black	FALSE	7506	TRUE	TRUE
TRUE	33	8	black	FALSE	289.8	TRUE	TRUE
TRUE	22	9	black	FALSE	4056	TRUE	TRUE
TRUE	23	12	black	FALSE	0	TRUE	TRUE

id	male	age	yrsmarr	kids	relig	educ	occup	ratemarr	naffairs
4	1	37	10	0	3	18	7	4	0
5	0	27	4	0	4	14	6	4	0
6	1	27	1.5	0	3	18	4	4	3
11	0	32	15	1	1	12	1	4	0
12	0	27	4	1	3	17	1	5	3
16	1	57	15	1	5	18	6	5	0

studyid	black	female	appyear	mnce	select	choice
21	1	1	90	44	1	0
26	1	0	90	46	1	1
30	1	0	90	20	1	1
31	1	1	90	36	1	1
33	1	0	90	32	1	1
39	1	1	90	76	1	0

id	male	age	yrsmarr	kids	relig	educ	occup	ratemarr	naffairs
4	1	37	10	0	3	18	7	4	0
5	0	27	4	0	4	14	6	4	0
6	1	27	1.5	0	3	18	4	4	3
11	0	32	15	1	1	12	1	4	0
12	0	27	4	1	3	17	1	5	3
16	1	57	15	1	5	18	6	5	0

studyid	black	female	appyear	mnce	select	choice
21	1	1	90	44	1	0
26	1	0	90	46	1	1
30	1	0	90	20	1	1
31	1	1	90	36	1	1
33	1	0	90	32	1	1
39	1	1	90	76	1	0

Dataset Examples