suppressMessages(
  suppressWarnings({
    library(ergm)
    library(RSiena)
    library(sna)
    library(spdep)
    library(igraph)
    library(rio)
  })
)

data <- import("http://www.joselkink.net/wp-content/uploads/2013/01/demdev.dta")

Generate W matrices for geographic contiguity and trade relations

The following code is not evaluated, as it is a slow process to generate all W matrices.

generate.W <- function(i, j, x, names = NULL, subset = NULL, standardize = FALSE) {
  
  if (is.null(subset)) subset <- rep(TRUE, length(i))
  
  W <- matrix(0, nrow = length(names), ncol = length(names))
  rownames(W) <- colnames(W) <- names
  
  i <- as.character(i[subset])
  j <- as.character(j[subset])
  
  for (r in 1:sum(subset))
    W[i[r],j[r]] <- x[subset][r]
  
  if (standardize) {
    rs <- rowSums(W)
    W[rs > 0, ] <- W[rs > 0, ] / rs[rs > 0]
  }
  
  W
}

# Downloaded from http://correlatesofwar.org/data-sets/direct-contiguity
# conttype is contiguity, less than 48 miles of water in between
contDyads <- subset(import("~/aqm/data/contdird.csv"), 
                subset = year %in% data$year & state1no %in% data$ccode & state2no %in% data$ccode)
Wgeogr <- list()
for (y in sort(unique(data$year))) {
  Wgeogr[[as.character(y)]] <- generate.W(contDyads$state1no,
                    contDyads$state2no,
                    as.integer(contDyads$conttype <= 3),
                    sort(unique(contDyads$state1no)),
                    contDyads$year == y,
                    standardize = TRUE)
}

# Downloaded from http://www.correlatesofwar.org/data-sets/bilateral-trade
# flow1 is imports from B to A, in US million dollars
# flow2 is imports from A to B, in US million dollars
dyads <- subset(import("~/aqm/data/Dyadic_COW_4.0.csv"), 
                subset = year %in% data$year & ccode1 %in% data$ccode & ccode2 %in% data$ccode & 
                  flow1 >= 0 & flow2 >= 0)
Wimports <- list()
for (y in sort(unique(data$year))) {
  Wimports[[as.character(y)]] <- generate.W(dyads$ccode1,
                           dyads$ccode2,
                           log(dyads$flow1 + 1e-7),
                           sort(unique(c(dyads$ccode1, dyads$ccode2))),
                           dyads$year == y,
                           standardize = FALSE)
}

Wexports <- list()
for (y in sort(unique(data$year))) {
  Wexports[[as.character(y)]] <- generate.W(dyads$ccode1,
                           dyads$ccode2,
                           log(dyads$flow2 + 1e-7),
                           sort(unique(c(dyads$ccode1, dyads$ccode2))),
                           dyads$year == y,
                           standardize = FALSE)
}

Wtrade <- list()
for (y in sort(unique(data$year))) {
  Wtrade[[as.character(y)]] <- generate.W(dyads$ccode1,
                           dyads$ccode2,
                           log(dyads$flow1 + dyads$flow2 + 1e-7),
                           sort(unique(c(dyads$ccode1, dyads$ccode2))),
                           dyads$year == y,
                           standardize = FALSE)
}

save(Wgeogr, file = "~/aqm/data/Wgeographic.Rdata")
save(Wimports, file = "~/aqm/data/Wimports.Rdata")
save(Wexports, file = "~/aqm/data/Wexports.Rdata")
save(Wtrade, file = "~/aqm/data/Wtrade.Rdata")

Here we just open the above files to make it much faster.

load(url("http://www.joselkink.net/files/data/Wgeographic.Rdata"))
load(url("http://www.joselkink.net/files/data/Wtrade.Rdata"))
load(url("http://www.joselkink.net/files/data/Wexports.Rdata"))

It will be helpful to have a function that can normalise a W matrix, i.e. make the rows add up to one:

normalise.W <- function(W) {
  
  rs <- rowSums(W)
  
  W[rs > 0, ] <- W[rs > 0, ] / rs[rs > 0]
  
  W
}

Spatial Error Model

Ignoring spatial autocorrelation

summary(lm(polity2 ~ energy2 + cwar + ipyears, data, subset = year == 1988))

Call:
lm(formula = polity2 ~ energy2 + cwar + ipyears, data = data, 
    subset = year == 1988)

Residuals:
    Min      1Q  Median      3Q     Max 
-15.169  -4.641  -2.883   7.145  14.201 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -5.75229    1.26754  -4.538 1.25e-05 ***
energy2      3.14852    0.81224   3.876 0.000165 ***
cwar         1.53336    1.58345   0.968 0.334604    
ipyears      0.05453    0.02098   2.599 0.010385 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 7.227 on 134 degrees of freedom
Multiple R-squared:  0.1413,    Adjusted R-squared:  0.1221 
F-statistic: 7.353 on 3 and 134 DF,  p-value: 0.000134

Testing for spatial autocorrelation in the residuals

Unfortunately, working with matrices in spatial econometrics is always painful, as none of these functions properly deal with missing data, the W matrix has to match exactly on the data also in terms of order of observations, and it is not possible to include islands - observations without neighbours - in W.

So here we extract the country codes from the data set, select only those elements in W where we have matching data, and then remove from both the observations that are islands.

W <- Wgeogr[["1988"]]

ccodesData <- as.character(data$ccode[data$year == 1988])
ccodesW <- colnames(W)

matchingID <- ccodesData %in% ccodesW

W <- W[ccodesData[matchingID], ccodesData[matchingID]]

islands <- rowSums(W) == 0
W <- W[!islands, !islands]
matchingID[which(matchingID)[islands]] <- FALSE

W <- normalise.W(W)

Only now are we able to perform the tests for spatial autocorrelation. We re-estimate the model, because it can only include those observations where we also have W.

d <- subset(data, year == 1988)[matchingID, ]
mOLS <- lm(polity2 ~ energy2 + cwar + ipyears, d)

moran.test(residuals(mOLS), listw = mat2listw(W))

    Moran I test under randomisation

data:  residuals(mOLS)  
weights: mat2listw(W)  

Moran I statistic standard deviate = 5.4846, p-value = 2.072e-08
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
      0.378792544      -0.007751938       0.004967183 
moran.test(residuals(mOLS), listw = mat2listw(W), alternative = "two.sided")

    Moran I test under randomisation

data:  residuals(mOLS)  
weights: mat2listw(W)  

Moran I statistic standard deviate = 5.4846, p-value = 4.144e-08
alternative hypothesis: two.sided
sample estimates:
Moran I statistic       Expectation          Variance 
      0.378792544      -0.007751938       0.004967183 
lm.LMtests(mOLS, listw = mat2listw(W), test = "LMerr")
Warning in lm.LMErr(model = model, listw = listw, zero.policy =
zero.policy, : Spatial weights matrix not row standardized

    Lagrange multiplier diagnostics for spatial dependence

data:  
model: lm(formula = polity2 ~ energy2 + cwar + ipyears, data = d)
weights: mat2listw(W)

LMErr = 28.287, df = 1, p-value = 1.046e-07
lm.LMtests(mOLS, listw = mat2listw(W), test = "LMlag")
Warning in lm.LMtests(mOLS, listw = mat2listw(W), test = "LMlag"): Spatial
weights matrix not row standardized

    Lagrange multiplier diagnostics for spatial dependence

data:  
model: lm(formula = polity2 ~ energy2 + cwar + ipyears, data = d)
weights: mat2listw(W)

LMlag = 35.348, df = 1, p-value = 2.757e-09

Interpret all of the above tests.

Estimating a SEM

summary(errorsarlm(polity2 ~ energy2 + cwar + ipyears, d, listw = mat2listw(W)))

Call:errorsarlm(formula = polity2 ~ energy2 + cwar + ipyears, data = d, 
    listw = mat2listw(W))

Residuals:
     Min       1Q   Median       3Q      Max 
-11.4245  -4.5649  -1.6307   4.6019  13.3367 

Type: error 
Coefficients: (asymptotic standard errors) 
              Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.9390459  1.6002496 -2.4615  0.01383
energy2      2.4351289  1.0118980  2.4065  0.01611
cwar         1.2148429  1.3180464  0.9217  0.35669
ipyears      0.0091921  0.0182314  0.5042  0.61413

Lambda: 0.51845, LR test value: 30.625, p-value: 3.1305e-08
Asymptotic standard error: 0.078121
    z-value: 6.6365, p-value: 3.2132e-11
Wald statistic: 44.043, p-value: 3.2132e-11

Log likelihood: -423.1861 for error model
ML residual variance (sigma squared): 36.019, (sigma: 6.0016)
Number of observations: 130 
Number of parameters estimated: 6 
AIC: 858.37, (AIC for lm: 887)

Spatial Autoregressive Model

Naively including spatial autocorrelation

avgPolity2 <- W %*% d$polity2
summary(lm(polity2 ~ avgPolity2 + energy2 + cwar + ipyears, d))

Call:
lm(formula = polity2 ~ avgPolity2 + energy2 + cwar + ipyears, 
    data = d)

Residuals:
     Min       1Q   Median       3Q      Max 
-13.7078  -3.6866  -0.4385   3.8526  13.6278 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2.25081    1.20428  -1.869   0.0640 .  
avgPolity2   0.70876    0.09394   7.545 8.15e-12 ***
energy2      1.36228    0.72501   1.879   0.0626 .  
cwar         1.36901    1.34264   1.020   0.3099    
ipyears      0.01070    0.01849   0.579   0.5638    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.966 on 125 degrees of freedom
Multiple R-squared:  0.4116,    Adjusted R-squared:  0.3928 
F-statistic: 21.86 on 4 and 125 DF,  p-value: 1.076e-13

Estimating a SAR

summary(lagsarlm(polity2 ~ energy2 + cwar + ipyears, d, listw = mat2listw(W)))

Call:lagsarlm(formula = polity2 ~ energy2 + cwar + ipyears, data = d, 
    listw = mat2listw(W))

Residuals:
     Min       1Q   Median       3Q      Max 
-12.0525  -4.2098  -1.1918   4.5327  12.3432 

Type: lag 
Coefficients: (asymptotic standard errors) 
             Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.407219   1.134640 -3.0029 0.002674
energy2      1.880821   0.712952  2.6381 0.008338
cwar         1.591627   1.339229  1.1885 0.234650
ipyears      0.024120   0.017545  1.3748 0.169205

Rho: 0.49824, LR test value: 33.139, p-value: 8.5786e-09
Asymptotic standard error: 0.078241
    z-value: 6.3679, p-value: 1.9157e-10
Wald statistic: 40.551, p-value: 1.9157e-10

Log likelihood: -421.9289 for lag model
ML residual variance (sigma squared): 35.601, (sigma: 5.9666)
Number of observations: 130 
Number of parameters estimated: 6 
AIC: 855.86, (AIC for lm: 887)
LM test for residual autocorrelation
test value: 3.7687, p-value: 0.05222

Calculate and evaluate the AIC for each model (if not already provided). What do you conclude?

How does the spatial component change depending on model specification? How does the coefficient on energy2 (our proxy for economic development) change?

Repeat the above analysis using the trade network (Wtrade) instead of the geographic network. What do you conclude?

Exponential Random Graph Model

We create a matrix for 1980 with for each dyad a 1 if one country is in the approximately Top 2 trading partners of the other country, 0 otherwise.

W <- Wtrade[["1980"]]

ccodesData <- as.character(data$ccode[data$year == 1980])
ccodesW <- colnames(W)

matchingID <- ccodesData %in% ccodesW

W <- W[ccodesData[matchingID], ccodesData[matchingID]]
d <- data[data$year == 1980, ][matchingID, ]

Wtop <- W > matrix(apply(W, 1, quantile, .99), nrow = dim(W)[1], ncol = dim(W)[1])
table(rowSums(Wtop))

  0   1   2 
  5   5 128 
gplot(Wtop)

We now explain this top trading partner selection using an ERGM model. See the helpfile on ergm-terms for an overview of all network structure variables that can be entered.

d$democracy[is.nan(d$democracy)] <- 0
Wnetwork <- network(Wtop, vertex.attr = list("democracy" = d$democracy))

summary(ergm(Wnetwork ~ edges))
Evaluating log-likelihood at the estimate. 

==========================
Summary of model fit
==========================

Formula:   Wnetwork ~ edges

Iterations:  7 out of 20 

Monte Carlo MLE Results:
      Estimate Std. Error MCMC % p-value    
edges -4.26881    0.06233      0  <1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     Null Deviance: 26209  on 18906  degrees of freedom
 Residual Deviance:  2754  on 18905  degrees of freedom
 
AIC: 2756    BIC: 2764    (Smaller is better.) 
summary(ergm(Wnetwork ~ edges + nodecov("democracy")))
Evaluating log-likelihood at the estimate. 

==========================
Summary of model fit
==========================

Formula:   Wnetwork ~ edges + nodecov("democracy")

Iterations:  7 out of 20 

Monte Carlo MLE Results:
                  Estimate Std. Error MCMC % p-value    
edges             -4.71616    0.09677      0  <1e-04 ***
nodecov.democracy  0.66489    0.09036      0  <1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     Null Deviance: 26209  on 18906  degrees of freedom
 Residual Deviance:  2702  on 18904  degrees of freedom
 
AIC: 2706    BIC: 2722    (Smaller is better.) 
summary(ergm(Wnetwork ~ edges + gwesp + nodecov("democracy")))
Starting maximum likelihood estimation via MCMLE:
Iteration 1 of at most 20: 
The log-likelihood improved by 7.068 
Iteration 2 of at most 20: 
The log-likelihood improved by 3.56 
Step length converged once. Increasing MCMC sample size.
Iteration 3 of at most 20: 
The log-likelihood improved by 3.882 
Iteration 4 of at most 20: 
The log-likelihood improved by 1.97 
Step length converged once. Increasing MCMC sample size.
Iteration 5 of at most 20: 
The log-likelihood improved by 0.529 
Step length converged twice. Stopping.
Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .

This model was fit using MCMC.  To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.

==========================
Summary of model fit
==========================

Formula:   Wnetwork ~ edges + gwesp + nodecov("democracy")

Iterations:  5 out of 20 

Monte Carlo MLE Results:
                  Estimate Std. Error MCMC % p-value    
edges             -4.98622    0.07897      0 < 1e-04 ***
gwesp              1.94638    0.26653     18 < 1e-04 ***
gwesp.decay       -0.78808    0.24791     18 0.00148 ** 
nodecov.democracy  0.27911    0.06788      1 < 1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     Null Deviance: 26209  on 18906  degrees of freedom
 Residual Deviance:  2547  on 18902  degrees of freedom
 
AIC: 2555    BIC: 2586    (Smaller is better.) 

Repeat this last regression adding precipitation as an independent variable.

Instead of a node attribute, we can also add a statistic for cases where nodes match - i.e. do democracies trade more with each other?

# summary(ergm(Wnetwork ~ edges + gwesp + nodematch("democracy")))

Investigate whether Catholic countries are more likely to trade with each other.

We can also include network independent variables, such as geographical proximity, for which we have to prepare that matrix as well.

Wg <- Wgeogr[["1980"]]

ccodesW <- colnames(Wg)

matchingID <- ccodesData %in% ccodesW

Wg <- Wg[ccodesData[matchingID], ccodesData[matchingID]]

d <- d[matchingID, ]
W <- W[matchingID, matchingID]

Wnetwork <- network(W, vertex.attr = list(democracy = d$democracy))
Wgeogr <- network(Wg)

#summary(ergm(Wnetwork ~ edges + gwesp + nodecov("democracy") + edgecov(Wgeogr) + isolates))

Interpret the above regression output.