Download the internationally-comparative ESS 2014 data. Reconstruct our two outcome variables and all predictors that we have constructed so far. Download the ESS 2014 country data. Use the online variable viewer to identify the following variables: the most recent Gini-coefficient measure of inequality, and the most recent GDP per capita. Finally, conjoin these two variables from the ESS country data to your micro-level survey data.
1. Are people in unequal societies more racist? Regress our measures of racism on the most recent Gini-coefficient measure of inequality. Make sure, to also control for the micro-level demographic variables age, gender, and education, and the macro-level variable GDP.
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package.
library(lme4) # For regression with robust SE
library(texreg) # For nicely-formatted regression tables
# Import the ESS round 7 data via the API
ESS <- import_rounds(
rounds = 7,
ess_email = YOUR_EMAIL) %>%
recode_missings() %>%
filter(brncntr == 1 & mocntr == 1 & facntr == 1) %>%
mutate(
# Racism
## Make variables numeric
smegbli = zap_labels(smegbli),
smegbhw = zap_labels(smegbhw),
smctmbe = zap_labels(smctmbe),
## Recode 2 (No) to zero
smegbli = case_when(smegbli == 2 ~ 0, TRUE ~ smegbli),
smegbhw = case_when(smegbhw == 2 ~ 0, TRUE ~ smegbhw),
smctmbe = case_when(smctmbe == 2 ~ 0, TRUE ~ smctmbe),
## Generate additive racist prejudice scale 0 to 3
racism_ij = smegbli + smegbhw + smctmbe,
# Categorical variables
cntry = as_factor(cntry) %>% fct_drop(),
gndr_ij = factor(gndr) %>% fct_drop(),
# Control variables
eduyrs_ij = zap_labels(eduyrs),
age_ij = zap_labels(agea)) %>%
inner_join(., read_dta("./../../assets/ESS7/ESSMD-2014-cntry_F1.dta") %>%
mutate(
cntry = as_factor(cntry) %>% fct_drop()), by = "cntry") %>%
select(racism_ij, cntry, gndr_ij, age_ij, eduyrs_ij, c_gini_2012, c_gdppc_2012)
# Random intercept model
RE_racism1 <- lmer(racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_ij + gndr_ij + (1 | cntry), data = ESS)
summary(RE_racism1)
# Linear mixed model fit by REML ['lmerMod']
# Formula: racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_ij + gndr_ij + (1 | cntry)
# Data: ESS
#
# REML criterion at convergence: 68101
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -2.0401 -0.8766 -0.0334 0.6840 3.0404
#
# Random effects:
# Groups Name Variance Std.Dev.
# cntry (Intercept) 0.0627 0.250
# Residual 0.8780 0.937
# Number of obs: 25096, groups: cntry, 19
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 7.53e-01 5.79e-01 1.30
# c_gini_2012 1.80e-02 1.81e-02 1.00
# c_gdppc_2012 -1.75e-06 2.86e-06 -0.61
# eduyrs_ij -2.40e-02 1.60e-03 -15.01
# age_ij 3.17e-03 3.28e-04 9.66
# gndr_ij2 -8.58e-02 1.19e-02 -7.22
#
# Correlation of Fixed Effects:
# (Intr) c_gn_2012 c_gd_2012 edyrs_ age_ij
# c_gini_2012 -0.979
# c_gdpp_2012 -0.594 0.451
# eduyrs_ij -0.049 0.009 -0.001
# age_ij -0.033 -0.002 -0.004 0.221
# gndr_ij2 -0.011 0.001 0.005 -0.014 -0.018
# fit warnings:
# Some predictor variables are on very different scales: consider rescaling
#
# . * Prepare the micro d. use "./../../assets/ESS7/ESS7e02_2.dta", clear
#
# . quietly do "./../../assets/ESS7/ESS7e02_2_formats_unicode.do"
# . ** Sample defintion
#
# . keep if brncntr == 1 & mocntr == 1 & facntr == 1
# (8,673 observations deleted)
#
# . ** Racism
#
# . recode smegbli smegbhw smctmbe (2 = 0)
# (smegbli: 24589 changes made)
# (smegbhw: 18062 changes made)
# (smctmbe: 16255 changes made)
#
# . gen racism_ij = smegbli + smegbhw + smctmbe
# (3,841 missing values generated)
#
# . ** Control variables
#
# . gen age_ij = agea
# (59 missing values generated)
#
# . gen eduyrs_ij = eduyrs
# (260 missing values generated)
#
# . gen gndr_ij = gndr
# (18 missing values generated)
#
# . * Store data for later use
#
# . ** Keep only the vars that are necessary
#
# . keep racism_ij age_ij eduyrs_ij gndr_ij cntry
#
# . ** Reduce memory usage
#
# . compress
# variable racism_ij was float now byte
# variable age_ij was float now int
# variable eduyrs_ij was float now byte
# variable gndr_ij was float now byte
# (346,632 bytes saved)
#
# . ** Create temporary dta file
#
# . tempfile micro_data
#
# . ** Save memory into the temporary file
#
# . save `micro_data'
# file /var/folders/2_/0vc2cx65781cm0qm9tw72fg80000gn/T//St59047.000001 saved
#
# .
# . * Prepare the country data
#
# . ** Open country data but only three variables
#
# . use c_gini_2012 c_gdppc_2012 cntry using "./../../assets/ESS7/ESSMD-2014-cntr
# > y_F1.dta", clear
#
# .
# . ** Conjoin the small country data, keep only matched cases and generate no _m
# > erge variable
#
# . merge 1:m cntry using `micro_data', keep(match) nogen
#
# Result # of obs.
# -----------------------------------------
# not matched 0
# matched 31,512
# -----------------------------------------
#
# .
# . * Estimate the random intercept model
#
# . mixed racism_ij c_gini_2012 c_gdppc_2012 eduyrs_ij age_ij gndr_ij || cntry: ,
# > reml
#
# Performing EM optimization:
#
# Performing gradient-based optimization:
#
# Iteration 0: log restricted-likelihood = -34050.343
# Iteration 1: log restricted-likelihood = -34050.343
#
# Computing standard errors:
#
# Mixed-effects REML regression Number of obs = 25,096
# Group variable: cntry Number of groups = 19
#
# Obs per group:
# min = 800
# avg = 1,320.8
# max = 2,268
#
# Wald chi2(5) = 457.43
# Log restricted-likelihood = -34050.343 Prob > chi2 = 0.0000
#
# ------------------------------------------------------------------------------
# racism_ij | Coef. Std. Err. z P>|z| [95% Conf. Interval]
# -------------+----------------------------------------------------------------
# c_gini_2012 | .0180446 .01813 1.00 0.320 -.0174895 .0535787
# c_gdppc_2012 | -1.75e-06 2.86e-06 -0.61 0.541 -7.36e-06 3.86e-06
# eduyrs_ij | -.0239504 .0015959 -15.01 0.000 -.0270783 -.0208225
# age_ij | .0031708 .0003282 9.66 0.000 .0025275 .003814
# gndr_ij | -.0858167 .0118801 -7.22 0.000 -.1091014 -.0625321
# _cons | .8385431 .5797304 1.45 0.148 -.2977077 1.974794
# ------------------------------------------------------------------------------
#
# ------------------------------------------------------------------------------
# Random-effects Parameters | Estimate Std. Err. [95% Conf. Interval]
# -----------------------------+------------------------------------------------
# cntry: Identity |
# var(_cons) | .0627408 .022422 .0311424 .1264005
# -----------------------------+------------------------------------------------
# var(Residual) | .8780084 .0078415 .862773 .8935129
# ------------------------------------------------------------------------------
# LR test vs. linear model: chibar2(01) = 1405.60 Prob >= chibar2 = 0.0000
#
# .
2. Visualize the \(U_{0j}\)? Visualize the country-level residuals (i.e., left-over country-level differences in racism ) of the empty random intercept ANOVA and the random intercept model with all variables considered. What do you see and how is that reflected in the change of \(\hat{\tau}^2_0\) when you add predictors?
# Empty RI model
ri_mod0 <- lmer(racism_ij ~ (1 | cntry), data = ESS)
# RI model with predictors
ri_mod1 <- lmer(racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_ij + gndr_ij + (1 | cntry), data = ESS)
summary(ri_mod0)
# Linear mixed model fit by REML ['lmerMod']
# Formula: racism_ij ~ (1 | cntry)
# Data: ESS
#
# REML criterion at convergence: 75921
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -1.7101 -0.9003 -0.0444 0.7861 2.5399
#
# Random effects:
# Groups Name Variance Std.Dev.
# cntry (Intercept) 0.0658 0.257
# Residual 0.9069 0.952
# Number of obs: 27671, groups: cntry, 21
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 1.0195 0.0563 18.1
summary(ri_mod1)
# Linear mixed model fit by REML ['lmerMod']
# Formula: racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_ij + gndr_ij + (1 | cntry)
# Data: ESS
#
# REML criterion at convergence: 68101
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -2.0401 -0.8766 -0.0334 0.6840 3.0404
#
# Random effects:
# Groups Name Variance Std.Dev.
# cntry (Intercept) 0.0627 0.250
# Residual 0.8780 0.937
# Number of obs: 25096, groups: cntry, 19
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 7.53e-01 5.79e-01 1.30
# c_gini_2012 1.80e-02 1.81e-02 1.00
# c_gdppc_2012 -1.75e-06 2.86e-06 -0.61
# eduyrs_ij -2.40e-02 1.60e-03 -15.01
# age_ij 3.17e-03 3.28e-04 9.66
# gndr_ij2 -8.58e-02 1.19e-02 -7.22
#
# Correlation of Fixed Effects:
# (Intr) c_gn_2012 c_gd_2012 edyrs_ age_ij
# c_gini_2012 -0.979
# c_gdpp_2012 -0.594 0.451
# eduyrs_ij -0.049 0.009 -0.001
# age_ij -0.033 -0.002 -0.004 0.221
# gndr_ij2 -0.011 0.001 0.005 -0.014 -0.018
# fit warnings:
# Some predictor variables are on very different scales: consider rescaling
# Predict U_01 as empirical Bayes predictions
beta_0jEB <- left_join(
ranef(ri_mod0) %>% as_tibble(),
ranef(ri_mod1) %>% as_tibble(),
by = "grp")
# Plot them
ggplot(data = beta_0jEB, aes(y = condval.x, x = reorder(grp, condval.x))) +
geom_point(shape = 1) +
geom_point(aes(y = condval.y)) +
geom_hline(yintercept = 0) +
scale_y_continuous(breaks = c(-0.25, 0, 0.25, 0.5),
labels = c("-0.25", expression(gamma["00"]), "0.25", "0.5")) +
labs(y = expression(Racism["ij"]), x = "") +
theme_minimal()
#
# . * Re-do initial Stata code (see abo. quietly do "./../../assets/ESS7/6-exercise.do"
# .
# . * Estimate models and predict U_01 afterwards
#
# . ** Empty RI model
#
# . mixed racism_ij || cntry: , reml
#
# Performing EM optimization:
#
# Performing gradient-based optimization:
#
# Iteration 0: log restricted-likelihood = -37960.558
# Iteration 1: log restricted-likelihood = -37960.558 (backed up)
#
# Computing standard errors:
#
# Mixed-effects REML regression Number of obs = 27,671
# Group variable: cntry Number of groups = 21
#
# Obs per group:
# min = 700
# avg = 1,317.7
# max = 2,277
#
# Wald chi2(0) = .
# Log restricted-likelihood = -37960.558 Prob > chi2 = .
#
# ------------------------------------------------------------------------------
# racism_ij | Coef. Std. Err. z P>|z| [95% Conf. Interval]
# -------------+----------------------------------------------------------------
# _cons | 1.01955 .0563059 18.11 0.000 .9091922 1.129907
# ------------------------------------------------------------------------------
#
# ------------------------------------------------------------------------------
# Random-effects Parameters | Estimate Std. Err. [95% Conf. Interval]
# -----------------------------+------------------------------------------------
# cntry: Identity |
# var(_cons) | .0658327 .0210518 .0351761 .1232073
# -----------------------------+------------------------------------------------
# var(Residual) | .9069068 .0077131 .8919146 .9221509
# ------------------------------------------------------------------------------
# LR test vs. linear model: chibar2(01) = 1717.44 Prob >= chibar2 = 0.0000
#
# . predict beta_0jEB, reffects
#
# .
# . ** RI model with predictors
#
# . mixed racism_ij c_gini_2012 c_gdppc_2012 eduyrs_ij age_ij gndr_ij || cntry: ,
# > reml
#
# Performing EM optimization:
#
# Performing gradient-based optimization:
#
# Iteration 0: log restricted-likelihood = -34050.343
# Iteration 1: log restricted-likelihood = -34050.343
#
# Computing standard errors:
#
# Mixed-effects REML regression Number of obs = 25,096
# Group variable: cntry Number of groups = 19
#
# Obs per group:
# min = 800
# avg = 1,320.8
# max = 2,268
#
# Wald chi2(5) = 457.43
# Log restricted-likelihood = -34050.343 Prob > chi2 = 0.0000
#
# ------------------------------------------------------------------------------
# racism_ij | Coef. Std. Err. z P>|z| [95% Conf. Interval]
# -------------+----------------------------------------------------------------
# c_gini_2012 | .0180446 .01813 1.00 0.320 -.0174895 .0535787
# c_gdppc_2012 | -1.75e-06 2.86e-06 -0.61 0.541 -7.36e-06 3.86e-06
# eduyrs_ij | -.0239504 .0015959 -15.01 0.000 -.0270783 -.0208225
# age_ij | .0031708 .0003282 9.66 0.000 .0025275 .003814
# gndr_ij | -.0858167 .0118801 -7.22 0.000 -.1091014 -.0625321
# _cons | .8385431 .5797304 1.45 0.148 -.2977077 1.974794
# ------------------------------------------------------------------------------
#
# ------------------------------------------------------------------------------
# Random-effects Parameters | Estimate Std. Err. [95% Conf. Interval]
# -----------------------------+------------------------------------------------
# cntry: Identity |
# var(_cons) | .0627408 .022422 .0311424 .1264005
# -----------------------------+------------------------------------------------
# var(Residual) | .8780084 .0078415 .862773 .8935129
# ------------------------------------------------------------------------------
# LR test vs. linear model: chibar2(01) = 1405.60 Prob >= chibar2 = 0.0000
#
# . predict beta_0jEB_2, reffects
# (2,802 missing values generated)
#
# .
# . * Plot them
#
# . graph dot (mean) beta_0jEB beta_0jEB_2, over(cntry, sort(beta_0jEB)) vertical
# > yline(0)
#
# .
3. Decompose age into its context and composition effect. What do you learn from that exercise?
ESS <- ESS %>% group_by(cntry) %>% # Generate average age by country,
summarize(age_.j = mean(age_ij, na.rm = TRUE)) %>%
left_join(ESS, ., by = "cntry") # & conjoin to micro-level data.
# Random intercept model
RE_racism2 <- lmer(racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_.j + age_ij + gndr_ij + (1 | cntry), data = ESS)
summary(RE_racism2)
# Linear mixed model fit by REML ['lmerMod']
# Formula: racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_.j + age_ij + gndr_ij + (1 | cntry)
# Data: ESS
#
# REML criterion at convergence: 68106
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -2.0404 -0.8765 -0.0335 0.6840 3.0402
#
# Random effects:
# Groups Name Variance Std.Dev.
# cntry (Intercept) 0.067 0.259
# Residual 0.878 0.937
# Number of obs: 25096, groups: cntry, 19
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 7.44e-01 1.46e+00 0.51
# c_gini_2012 1.80e-02 1.98e-02 0.91
# c_gdppc_2012 -1.75e-06 3.04e-06 -0.58
# eduyrs_ij -2.39e-02 1.60e-03 -15.00
# age_.j 1.96e-04 3.09e-02 0.01
# age_ij 3.17e-03 3.28e-04 9.66
# gndr_ij2 -8.58e-02 1.19e-02 -7.22
#
# Correlation of Fixed Effects:
# (Intr) c_gn_2012 c_gd_2012 edyrs_ age_.j age_ij
# c_gini_2012 -0.081
# c_gdpp_2012 -0.025 0.490
# eduyrs_ij -0.025 0.007 -0.002
# age_.j -0.913 -0.326 -0.232 0.005
# age_ij -0.005 0.002 -0.002 0.221 -0.009
# gndr_ij2 -0.003 0.001 0.005 -0.014 -0.001 -0.018
# fit warnings:
# Some predictor variables are on very different scales: consider rescaling
#
# . * Re-do initial Stata code (see abo. quietly do "./../../assets/ESS7/6-exercise.do"
# .
# . * Generate average education by country.
#
# . bys cntry: egen age_pj = mean(age_ij)
#
# .
# . mixed racism_ij c_gini_2012 c_gdppc_2012 eduyrs_ij age_pj age_ij gndr_ij || c
# > ntry: , reml
#
# Performing EM optimization:
#
# Performing gradient-based optimization:
#
# Iteration 0: log restricted-likelihood = -34052.916
# Iteration 1: log restricted-likelihood = -34052.916
#
# Computing standard errors:
#
# Mixed-effects REML regression Number of obs = 25,096
# Group variable: cntry Number of groups = 19
#
# Obs per group:
# min = 800
# avg = 1,320.8
# max = 2,268
#
# Wald chi2(6) = 457.15
# Log restricted-likelihood = -34052.916 Prob > chi2 = 0.0000
#
# ------------------------------------------------------------------------------
# racism_ij | Coef. Std. Err. z P>|z| [95% Conf. Interval]
# -------------+----------------------------------------------------------------
# c_gini_2012 | .0180086 .0198037 0.91 0.363 -.0208059 .0568231
# c_gdppc_2012 | -1.75e-06 3.04e-06 -0.58 0.564 -7.71e-06 4.20e-06
# eduyrs_ij | -.0239462 .001596 -15.00 0.000 -.0270743 -.0208182
# age_pj | .0001962 .0309356 0.01 0.995 -.0604365 .060829
# age_ij | .0031709 .0003282 9.66 0.000 .0025276 .0038142
# gndr_ij | -.0858179 .0118802 -7.22 0.000 -.1091026 -.0625332
# _cons | .8298649 1.463196 0.57 0.571 -2.037946 3.697675
# ------------------------------------------------------------------------------
#
# ------------------------------------------------------------------------------
# Random-effects Parameters | Estimate Std. Err. [95% Conf. Interval]
# -----------------------------+------------------------------------------------
# cntry: Identity |
# var(_cons) | .0669671 .0247005 .0325012 .1379822
# -----------------------------+------------------------------------------------
# var(Residual) | .8780085 .0078415 .862773 .8935129
# ------------------------------------------------------------------------------
# LR test vs. linear model: chibar2(01) = 1409.22 Prob >= chibar2 = 0.0000
#
# .
4. Decompose age into its between- and within-country effect. What do you learn from that exercise? What do you learn from that exercise?
# Group-mean center education
ESS$age_ij_minus_age_.j <- ESS$age_ij - ESS$age_.j
# Random intercept model
RE_racism3 <- lmer(racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_.j + age_ij_minus_age_.j + gndr_ij + (1 | cntry), data = ESS)
summary(RE_racism3)
# Linear mixed model fit by REML ['lmerMod']
# Formula: racism_ij ~ c_gini_2012 + c_gdppc_2012 + eduyrs_ij + age_.j +
# age_ij_minus_age_.j + gndr_ij + (1 | cntry)
# Data: ESS
#
# REML criterion at convergence: 68106
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -2.0404 -0.8765 -0.0335 0.6840 3.0402
#
# Random effects:
# Groups Name Variance Std.Dev.
# cntry (Intercept) 0.067 0.259
# Residual 0.878 0.937
# Number of obs: 25096, groups: cntry, 19
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 7.44e-01 1.46e+00 0.51
# c_gini_2012 1.80e-02 1.98e-02 0.91
# c_gdppc_2012 -1.75e-06 3.04e-06 -0.58
# eduyrs_ij -2.39e-02 1.60e-03 -15.00
# age_.j 3.37e-03 3.09e-02 0.11
# age_ij_minus_age_.j 3.17e-03 3.28e-04 9.66
# gndr_ij2 -8.58e-02 1.19e-02 -7.22
#
# Correlation of Fixed Effects:
# (Intr) c_gn_2012 c_gd_2012 edyrs_ age_.j a____.
# c_gini_2012 -0.081
# c_gdpp_2012 -0.025 0.490
# eduyrs_ij -0.025 0.007 -0.002
# age_.j -0.913 -0.326 -0.232 0.008
# ag_j_mns__. -0.005 0.002 -0.002 0.221 0.001
# gndr_ij2 -0.003 0.001 0.005 -0.014 -0.001 -0.018
# fit warnings:
# Some predictor variables are on very different scales: consider rescaling
#
# . * Re-do initial Stata code (see abo. quietly do "./../../assets/ESS7/6-exercise.do"
# .
# . * Generate average education by country.
#
# . bys cntry: egen age_pj = mean(age_ij)
#
# .
# . * Group-mean center education
#
# . gen age_ij_minus_age_pj = age_ij - age_pj
# (59 missing values generated)
#
# .
# . mixed racism_ij c_gini_2012 c_gdppc_2012 eduyrs_ij age_pj age_ij_minus_age_pj
# > gndr_ij || cntry: , reml
#
# Performing EM optimization:
#
# Performing gradient-based optimization:
#
# Iteration 0: log restricted-likelihood = -34052.916
# Iteration 1: log restricted-likelihood = -34052.916
#
# Computing standard errors:
#
# Mixed-effects REML regression Number of obs = 25,096
# Group variable: cntry Number of groups = 19
#
# Obs per group:
# min = 800
# avg = 1,320.8
# max = 2,268
#
# Wald chi2(6) = 457.15
# Log restricted-likelihood = -34052.916 Prob > chi2 = 0.0000
#
# ------------------------------------------------------------------------------
# racism_ij | Coef. Std. Err. z P>|z| [95% Conf. Interval]
# -------------+----------------------------------------------------------------
# c_gini_2012 | .0180086 .0198037 0.91 0.363 -.0208059 .0568231
# c_gdppc_2012 | -1.75e-06 3.04e-06 -0.58 0.564 -7.71e-06 4.20e-06
# eduyrs_ij | -.0239462 .001596 -15.00 0.000 -.0270743 -.0208182
# age_pj | .0033671 .0309343 0.11 0.913 -.057263 .0639973
# age_ij_min~j | .0031709 .0003282 9.66 0.000 .0025276 .0038142
# gndr_ij | -.0858179 .0118802 -7.22 0.000 -.1091026 -.0625332
# _cons | .8298649 1.463196 0.57 0.571 -2.037946 3.697675
# ------------------------------------------------------------------------------
#
# ------------------------------------------------------------------------------
# Random-effects Parameters | Estimate Std. Err. [95% Conf. Interval]
# -----------------------------+------------------------------------------------
# cntry: Identity |
# var(_cons) | .0669671 .0247005 .0325012 .1379822
# -----------------------------+------------------------------------------------
# var(Residual) | .8780085 .0078415 .862773 .8935129
# ------------------------------------------------------------------------------
# LR test vs. linear model: chibar2(01) = 1409.22 Prob >= chibar2 = 0.0000
#
# .