General information
Below you can find sample R-code for multiple and joint correspondence analysis. A detailed explaination of the steps is given in the appendix of Multiple Correspondence Analysis and Related Methods.
The example data set used here is 'wg93', which you can download below:
wg93.zip (zipped .txt, ~3KB)
Make sure that the data are available in R as 'dat', i.e. by extracting the zip-file above to 'c:' and entering
dat <- read.table("C:/wg93.txt", header = TRUE, colClasses = "factor")
You can copy the code below and select 'paste commands only' in R.
Sample code for the computations in the Appendix of "Multiple Correspondence Analysis and Related Methods"
# Table 1 (response pattern matrix)
> sup.ind <- 5:7 > dat.act <- dat[,-sup.ind] > dat.sup <- dat[,sup.ind] > I <- dim(dat.act)[1] > Q <- dim(dat.act)[2] > dat[c(1:5,I),]
A B C D sex age edu 1 2 3 4 3 2 2 3 2 3 4 2 3 1 3 4 3 2 3 2 4 2 3 2 4 2 2 2 2 1 2 3 5 3 3 3 3 1 5 2 871 1 2 2 2 2 3 6
# Table 2 (indicator matrix)
> lev.n <- unlist(lapply(dat, nlevels)) > n <- cumsum(lev.n) > J.t <- sum(lev.n) > Q.t <- dim(dat)[2] > Z <- matrix(0, nrow = I, ncol = J.t) > newdat <- lapply(dat, as.numeric) > offset <- (c(0, n[-length(n)])) > for (i in 1:Q.t) + Z[1:I + (I * (offset[i] + newdat[[i]] - 1))] <- 1 > fn <- rep(names(dat), unlist(lapply(dat, nlevels))) > ln <- unlist(lapply(dat, levels)) > > dimnames(Z)[[2]] <- paste(fn, ln, sep = "") > dimnames(Z)[[1]] <- as.character(1:I) > > ind.temp <- range(n[sup.ind]) > Z.sup.ind <- (ind.temp[1]-1):ind.temp[2] > Z.act <- Z[,-Z.sup.ind] > J <- dim(Z.act)[2] > Z.act[c(1:5,I),]
A1 A2 A3 A4 A5 B1 B2 B3 B4 B5 C1 C2 C3 C4 C5 D1 D2 D3 D4 D5 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 2 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 3 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 4 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 5 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 871 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0
# Table 3 (inertias of Z)
> P <- Z.act / sum(Z.act) > cm <- apply(P, 2, sum) > rm <- apply(P, 1, sum) > eP <- rm %*% t(cm) > S <- (P - eP) / sqrt(eP) > dec <- svd(S) > lam <- dec$d[1:(J-Q)]^2 > expl <- 100*(lam / sum(lam)) > rbind(round(lam[c(1:4,(J-Q))], 3),round(expl[c(1:4,(J-Q))], 1))
[,1] [,2] [,3] [,4] [,5] [1,] 0.457 0.431 0.322 0.306 0.125 [2,] 11.400 10.800 8.000 7.700 3.100
# Table 4 (column standard / principal coordinates)
> b.s1 <- dec$v[,1] / sqrt(cm) > b.s2 <- dec$v[,2] / sqrt(cm) > g.s1 <- b.s1 * sqrt(lam[1]) > g.s2 <- b.s2 * sqrt(lam[2]) > round(rbind(b.s1,b.s2,g.s1,g.s2)[,c(1:5,16:20)], 3)
A1 A2 A3 A4 A5 D1 D2 D3 D4 D5 b.s1 1.837 0.546 -0.447 -1.166 -1.995 1.204 -0.221 -0.385 -0.222 0.708 b.s2 -0.727 0.284 1.199 -0.737 -2.470 -1.822 0.007 1.159 0.211 -1.152 g.s1 1.242 0.369 -0.302 -0.788 -1.349 0.814 -0.150 -0.260 -0.150 0.479 g.s2 -0.478 0.187 0.787 -0.484 -1.622 -1.196 0.005 0.761 0.138 -0.756
# Table 5 (row principal coordinates)
> f.s1 <- dec$u[,1] * sqrt(lam[1]) / sqrt(rm) > f.s2 <- dec$u[,2] * sqrt(lam[2]) / sqrt(rm) > a.s1 <- f.s1 / sqrt(lam[1]) > a.s2 <- f.s2 / sqrt(lam[2]) > round(rbind(f.s1,f.s2)[,c(1:5,I)], 3)
1 2 3 4 5 871 f.s1 -0.210 -0.325 0.229 0.303 -0.276 0.626 f.s2 0.443 0.807 0.513 0.387 1.092 0.135
# Table 6 (Burt matrix)
> B <- t(Z.act) %*% Z.act > B[c(1:5,16:20), c(1:5,16:20)]
A1 A2 A3 A4 A5 D1 D2 D3 D4 D5 A1 119 0 0 0 0 15 25 17 34 28 A2 0 322 0 0 0 22 102 76 68 54 A3 0 0 204 0 0 10 44 68 58 24 A4 0 0 0 178 0 9 52 28 54 35 A5 0 0 0 0 48 4 9 13 12 10 D1 15 22 10 9 4 60 0 0 0 0 D2 25 102 44 52 9 0 232 0 0 0 D3 17 76 68 28 13 0 0 202 0 0 D4 34 68 58 54 12 0 0 0 226 0 D5 28 54 24 35 10 0 0 0 0 151
# Table 7 (principal inertias of Burt matrix)
> P.2 <- B / sum(B) > cm.2 <- apply(P.2, 2, sum) > eP.2 <- cm.2 %*% t(cm.2) > S.2 <- (P.2 - eP.2) / sqrt(eP.2) > dec.2 <- eigen(S.2) > delt.2 <- dec.2$values[1:(J-Q)] > expl.2 <- 100*(delt.2 / sum(delt.2)) > lam.2 <- delt.2^2 > expl.2b <- 100*(lam.2 / sum(lam.2)) > rbind(round(lam.2, 3),round(expl.2b, 1))[,c(1:4,16)]
[,1] [,2] [,3] [,4] [,5] [1,] 0.209 0.186 0.104 0.094 0.016 [2,] 18.600 16.500 9.200 8.300 1.400
# Addendum: "check" that ds is equivalent to ls:
> rbind(round(delt.2, 3),round(expl.2, 1), round(lam, 3),round(expl, 1))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 0.457 0.431 0.322 0.306 0.276 0.252 0.243 0.235 0.225 0.221 [2,] 11.400 10.800 8.000 7.700 6.900 6.300 6.100 5.900 5.600 5.500 [3,] 0.457 0.431 0.322 0.306 0.276 0.252 0.243 0.235 0.225 0.221 [4,] 11.400 10.800 8.000 7.700 6.900 6.300 6.100 5.900 5.600 5.500 [,11] [,12] [,13] [,14] [,15] [,16] [1,] 0.21 0.197 0.178 0.169 0.153 0.125 [2,] 5.20 4.900 4.400 4.200 3.800 3.100 [3,] 0.21 0.197 0.178 0.169 0.153 0.125 [4,] 5.20 4.900 4.400 4.200 3.800 3.100
# Table 8 (eigenvectors, column masses, column sc/pc's)
> u.s1 <- dec.2$vectors[,1] > u.s2 <- dec.2$vectors[,2] > a2.s1 <- u.s1 / sqrt(cm.2) > a2.s2 <- u.s2 / sqrt(cm.2) > f2.s1 <- a2.s1 * sqrt(lam.2[1]) > f2.s2 <- a2.s2 * sqrt(lam.2[2]) > round(rbind(u.s1,u.s2,cm,a2.s1,a2.s2,f2.s1,f2.s2), 3)[,c(1:5,16:20)]
A1 A2 A3 A4 A5 D1 D2 D3 D4 D5 u.s1 0.339 0.166 -0.108 -0.264 -0.234 0.158 -0.057 -0.093 -0.056 0.147 u.s2 -0.134 0.086 0.290 -0.167 -0.290 -0.239 0.002 0.279 0.054 -0.240 cm 0.034 0.092 0.059 0.051 0.014 0.017 0.067 0.058 0.065 0.043 a2.s1 1.837 0.546 -0.447 -1.166 -1.995 1.204 -0.221 -0.385 -0.222 0.708 a2.s2 -0.727 0.284 1.199 -0.737 -2.470 -1.822 0.007 1.159 0.211 -1.152 f2.s1 0.840 0.250 -0.204 -0.533 -0.913 0.551 -0.101 -0.176 -0.101 0.324 f2.s2 -0.314 0.123 0.517 -0.318 -1.064 -0.785 0.003 0.499 0.091 -0.496
# Table 9 (adjusted inertias)
> lam.adj <- (Q/(Q-1))^2 * (delt.2[delt.2 >= 1/Q] - 1/Q) ^ 2 > total.adj <- (Q/(Q-1)) * (sum(delt.2^2) - ((J-Q)/Q^2)) > rbind(round(lam.adj, 5),100 * round(lam.adj / total.adj, 3))
[,1] [,2] [,3] [,4] [,5] [,6] [1,] 0.07646 0.05822 0.0092 0.00567 0.00117 1e-05 [2,] 44.90000 34.20000 5.4000 3.30000 0.70000 0e+00
# Table 10
> nd <- 2 > maxit <- 1000 > epsilon <- 0.0001 > lev <- lev.n[-sup.ind] > n <- sum(B) > li <- as.vector(c(0,cumsum(lev))) > dummy <- matrix(0, J, J) > for (i in 1:(length(li)-1)) { + ind.lo <- li[i]+1 + ind.up <- li[i+1] + ind.to <- diff(li)[i] + dummy[rep(ind.lo:ind.up, ind.to) + + (rep(ind.lo:ind.up, each = ind.to)-1) * J] <- 1 + } > iterate <- function(obj, dummy, nd, adj = FALSE) { + Bp <- obj/n + cm <- apply(Bp, 2, sum) + eP <- cm %*% t(cm) + cm.mat <- diag(cm^(-0.5)) + S <- cm.mat %*% (Bp - eP) %*% cm.mat + dec <- eigen(S) + lam <- dec$values + u <- dec$vectors + phi <- u[, 1:nd] / matrix(rep(sqrt(cm), nd), ncol = nd) + if (adj) + lam <- (Q / (Q - 1))^2 * (lam[lam >= 1 / Q] - 1 / Q) ^ 2 + for (s in 1:nd) { + if (exists("coord")) { + coord <- coord + lam[s] * (phi[,s] %*% t(phi[,s])) + } else { + coord <- lam[s] * (phi[,s] %*% t(phi[,s])) + } + } + obj * (1 - dummy) + n * eP * dummy * (1 + coord) + } > # first iteration (adjusted lambda) > B.star <- iterate(B, dummy, 2, adj = TRUE) > # subsequent iterations > k <- 1 > it <- TRUE > while (it) { + temp <- iterate(B.star, dummy, 2) + delta.B <- max(abs(B.star - temp)) + B.star <- temp + if (delta.B <= epsilon | k >= maxit) it <- FALSE + k <- k + 1 + } > round(B.star [c(1:5,16:20), c(1:5, 16:20)], 2)
A1 A2 A3 A4 A5 D1 D2 D3 D4 D5 A1 30.72 53.14 18.59 13.97 2.58 15.00 25.00 17.00 34.00 28.00 A2 53.14 130.55 76.80 51.80 9.71 22.00 102.00 76.00 68.00 54.00 A3 18.59 76.80 62.95 38.86 6.80 10.00 44.00 68.00 58.00 24.00 A4 13.97 51.80 38.86 53.51 19.85 9.00 52.00 28.00 54.00 35.00 A5 2.58 9.71 6.80 19.85 9.06 4.00 9.00 13.00 12.00 10.00 D1 15.00 22.00 10.00 9.00 4.00 9.02 14.67 5.03 13.27 18.01 D2 25.00 102.00 44.00 52.00 9.00 14.67 62.46 55.78 60.90 38.20 D3 17.00 76.00 68.00 28.00 13.00 5.03 55.78 63.56 56.49 21.14 D4 34.00 68.00 58.00 54.00 12.00 13.27 60.90 56.49 59.74 35.60 D5 28.00 54.00 24.00 35.00 10.00 18.01 38.20 21.14 35.60 38.04
# Table 11 (JCA inertias)
> P.3 <- B.star / sum(B.star) > cm.3 <- apply(P.3, 2, sum) > eP.3 <- cm.3 %*% t(cm.3) > S.3 <- (P.3 - eP.3) / sqrt(eP.3) > delt.3 <- eigen(S.3)$values > lam.3 <- delt.3^2 > expl.3 <- 100*(lam.3 / sum(lam.3)) > rbind(round(lam.3, 3),round(expl.3, 1))[,1:2]
[,1] [,2] [1,] 0.099 0.065 [2,] 54.300 35.600
# Table 12 (Inertia conributions of submatrices)
> subinr <- function(B, ind) { + nn <- length(ind) + subi <- matrix(NA, nrow = nn, ncol = nn) + ind2 <- c(0,cumsum(ind)) + for (i in 1:nn) { + for (j in 1:nn) { + tempmat <- B[(ind2[i]+1):(ind2[i+1]), + (ind2[j]+1):(ind2[j+1])] + tempmat <- tempmat / sum(tempmat) + ec <- apply(tempmat, 2, sum) + ex <- ec%*%t(ec) + subi[i,j] <- sum((tempmat - ex)^2 / ex) + } + } + subi / nn^2 + } > si <- subinr(B.star, lev) > round(si, 5)
[,1] [,2] [,3] [,4] [1,] 0.00745 0.03160 0.01237 0.01670 [2,] 0.04997 0.02244 0.05218 0.00789 [3,] 0.01409 0.04598 0.02103 0.03404 [4,] 0.02474 0.00740 0.03427 0.00381
# Table 13 (supplementary principal coordinates as columns of Z)
> Z.star <- Z[,Z.sup.ind] > I.star <- dim(Z.star)[1] > cs.star <- apply(Z.star, 2, sum) > base <- Z.star / matrix(rep(cs.star, I.star), nrow = I.star, byrow = TRUE) > b.star1 <- t(base) %*% cbind(a.s1, a.s2) > round(t(b.star1), 3)
sex1 sex2 age1 age2 age3 age4 age5 age6 a.s1 -0.143 0.137 -0.166 -0.087 -0.025 -0.031 0.016 0.281 a.s2 0.029 -0.028 -0.014 -0.081 -0.004 0.057 0.047 0.033 edu1 edu2 edu3 edu4 edu5 edu6 a.s1 0.18 0.161 -0.068 -0.227 -0.172 -0.308 a.s2 0.06 0.093 0.090 -0.279 -0.263 -0.291
# Table 14 (supplementary principal coordinates via cross-tabulation)
> ct.star <- t(Z.star)%*%Z.act > I.star2 <- dim(ct.star)[2] > cs.star2 <- apply(ct.star, 1,sum) > base2 <- ct.star / matrix(rep(cs.star2, I.star2), ncol = I.star2) > b.star2 <- base2 %*% cbind(a2.s1, a2.s2) > round(t(b.star2), 3)
sex1 sex2 age1 age2 age3 age4 age5 age6 a2.s1 -0.097 0.093 -0.112 -0.059 -0.017 -0.021 0.011 0.190 a2.s2 0.019 -0.018 -0.009 -0.053 -0.003 0.038 0.031 0.022 edu1 edu2 edu3 edu4 edu5 edu6 a2.s1 0.122 0.109 -0.046 -0.154 -0.116 -0.209 a2.s2 0.039 0.061 0.059 -0.183 -0.172 -0.191
# Table 15 (principal inertias from subset analysis)
> sub.ind <- c(3,8,13,18) > P.4 <- B / sum(B) > cm.4 <- apply(P.4, 2, sum) > eP.4 <- cm.4 %*% t(cm.4) > S.sub <- ((P.4 - eP.4) / sqrt(eP.4)) [-sub.ind,-sub.ind] > dec.sub <- eigen(S.sub) > lam.sub <- dec.sub$values[1:(J-Q)]^2 > expl.sub <- 100*(lam.sub / sum(lam.sub)) > rbind(round(lam.sub, 4),round(expl.sub, 1))[,c(1:4,(J-Q))]
[,1] [,2] [,3] [,4] [,5] [1,] 0.2016 0.1489 0.098 0.0721 0.0017 [2,] 23.4000 17.3000 11.400 8.4000 0.2000
# Table 16 (column standard and principal coordinates from subset analysis)
> cm.sub <- cm.4[-sub.ind] > u.sub.s1 <- dec.sub$vectors[,1] > u.sub.s2 <- dec.sub$vectors[,2] > a.sub.s1 <- u.sub.s1 / sqrt(cm.sub) > a.sub.s2 <- u.sub.s2 / sqrt(cm.sub) > f.sub.s1 <- a.sub.s1 * sqrt(lam.sub[1]) > f.sub.s2 <- a.sub.s2 * sqrt(lam.sub[2]) > round(rbind(a.sub.s1,a.sub.s2,f.sub.s1,f.sub.s2), 3)[, c(1:4,13:16)]
A1 A2 A4 A5 D1 D2 D4 D5 a.sub.s1 1.696 0.538 -1.316 -2.449 0.888 -0.273 -0.222 0.482 a.sub.s2 1.153 -0.517 -0.015 2.746 2.482 -0.462 -0.683 1.210 f.sub.s1 0.761 0.242 -0.591 -1.100 0.399 -0.123 -0.100 0.216 f.sub.s2 0.445 -0.200 -0.006 1.060 0.957 -0.178 -0.264 0.467