CARME-N: Correspondence Analysis and Related Methods Network

General information

Below you can find all the R commands given in Appendix B of Correspondence Analysis in Practice, Second Edition, starting from page 215.

You can find the data sets as Excel files in the section 'data sets'

R code

# Chapter 2: Profiles and the Profile Space

# The data file 'travels.xls' contains the original count data
# so first calculate the row profiles in Excel by dividing each
# row by its row total.  Then copy the profiles with their row
# and column names, remembering to leave the top left hand cell
# empty.
# Alternatively, copy the five lines below

               Holidays HalfDays FullDays
Norway            0.333    0.056    0.611
Canada            0.067    0.200    0.733
Greece            0.138    0.862    0.000
France/Germany    0.083    0.083    0.833

# To read the data into R the following statement must be typed 
# in the R command window and not copied and pasted (since the 
# data is in the "clipboard")

profiles <- read.table("clipboard")
profiles

# You need to have installed and loaded the rgl package now

rgl.lines(c(0,1.2), c(0,0),   c(0,0))
rgl.lines(c(0,0)  , c(0,1.2), c(0,0))
rgl.lines(c(0,0)  , c(0,0),   c(0,1.2))

rgl.lines(c(0,0), c(0,1), c(1,0), size = 2)
rgl.lines(c(0,1), c(1,0), c(0,0), size = 2)
rgl.lines(c(0,1), c(0,0), c(1,0), size = 2)

rgl.points(profiles[,3], profiles[,1], profiles[,2], size = 4)
rgl.texts(profiles[,3], profiles[,1], profiles[,2], text = row.names(profiles))

# Now expand the 3-d graphics window and spin the points around using the
# left mouse button; the right button or mouse wheel allows zooming

# Chapter 3: Masses and Centroids

# Copy data in file 'readers.xls' or copy following lines

   C1 C2 C3
E1  5  7  2
E2 18 46 20
E3 19 29 39
E4 12 40 49
E5  3  7 16

# Again, TYPE the following statement

table <- read.table("clipboard")

table

table.pro <- table/apply(table,1,sum)
table.x   <- 1-table.pro[,1]-table.pro[,3]/2
table.y   <- table.pro[,3]*sqrt(3)/2
plot.new()
lines(c(0,1,0.5,0), c(0,0,sqrt(3)/2,0), col = "gray")
text(c(0,1,0.5), c(0,0,sqrt(3)/2), labels = colnames(table))
text(table.x, table.y, labels = rownames(table))

# Chapter 4: Chi-square Distance and Inertia

# calculate chi-square statistic

table.rowsum <- apply(table, 1, sum)
table.colsum <- apply(table, 2, sum)
table.sum    <- sum(table)
table.exp    <- table.rowsum%o%table.colsum/table.sum
chi2         <- sum((table-table.exp)^2/table.exp)
chi2

# chi-square distances to centroid

table.colmass <- table.colsum/table.sum
chidist       <- 0
for(j in 1:dim(table)[2]){
  chidist <- chidist+(table.pro[5,j]-table.colmass[j])^2/table.colmass[j]
  }
chidist

apply((t(table.pro)-table.colmass)^2/table.colmass, 2, sum)

tablec.pro <- rbind(table.pro,table.colmass)
rownames(tablec.pro)[6] <- "ave"
dist(sweep(tablec.pro, 2, sqrt(table.colmass), FUN = "/"))

# Chapter 5: Plotting Chi-square Distance

table.wt <- sqrt(table.colmass)

rgl.lines(c(0,1.2/table.wt[2]), c(0,0), c(0,0))
rgl.lines(c(0,0), c(0,1.2/table.wt[3]), c(0,0))
rgl.lines(c(0,0), c(0,0), c(0,1.2/table.wt[1]))

rgl.lines(c(0,0), c(0,1/table.wt[3]), c(1/table.wt[1],0), size = 2)
rgl.lines(c(0,1/table.wt[2]), c(1/table.wt[3],0), c(0,0), size = 2)
rgl.lines(c(0,1/table.wt[2]), c(0,0), c(1/table.wt[1],0), size = 2)

table.chi <- t(t(table.pro)/table.wt)
rgl.points(table.chi[,2], table.chi[,3], table.chi[,1], size = 4)
rgl.texts(table.chi[,2], table.chi[,3], table.chi[,1], text = row.names(table))

# Chapter 6: Reduction of Dimensionality

# First read data from file 'health.xls' as described before into
# data frame health

health.P    <- health/sum(health)
health.r    <- apply(health.P, 1, sum)
health.c    <- apply(health.P, 2, sum)
health.Dr   <- diag(health.r)
health.Dc   <- diag(health.c)
health.Drmh <- diag(1/sqrt(health.r))
health.Dcmh <- diag(1/sqrt(health.c))

health.P   <- as.matrix(health.P)
health.S   <- health.Drmh%*%(health.P-health.r%o%health.c)%*%health.Dcmh
health.svd <- svd(health.S)

health.rsc <- health.Drmh%*%health.svd$u
health.csc <- health.Dcmh%*%health.svd$v
health.rpc <- health.rsc%*%diag(health.svd$d)
health.cpc <- health.csc%*%diag(health.svd$d)

plot(health.rpc[,1], health.rpc[,2], type = "n", pty = "s")
text(health.rpc[,1], health.rpc[,2], label = rownames(health))

# Notice that the R graphics window is square and does not give the 
# correct aspect ratio, so pull out the window so that a vertical 
# unit is approximately equal to a horizontal unit: in this example
# the window would be as wide as the computer screen but take up less
# than half the height of the screen.

# Chapter 7: Optimal Scaling

health.range <- max(health.csc[,1])-min(health.csc[,1])
health.scale <- (health.csc[,1]-min(health.csc[,1]))*100/health.range
health.scale

# Chapter 8: Symmetry of Row and Column Analyses

health.cor <- t(health.rsc[,1])%*%health.P%*%health.csc[,1]

health.cor^2

t(health.rsc[,1])%*%health.Dr%*%health.rsc[,1]

# Chapter 9: Two-dimensional Maps

# For this example you can read in the data from 'smoke.xls' as
# before, or after installing and loading the ca package, the 
# data are available by simply entering:

data(smoke)
smoke
ca(smoke)
plot(ca(smoke))

# (After the previous statement you might need to adjust the 
#  aspect ratio of the graphics window)

plot(ca(smoke), map = "rowprincipal")

# Chapter 10: Three More Examples

data(author)
plot3d.ca(ca(author), labels = c(2,1), sf = 0.000001)

# Chapter 11: Contributions to Inertia

# Read in data from 'funding.xls' into data frame fund, then:

fund.P    <- as.matrix(fund/sum(fund))
fund.r    <- apply(fund.P,1,sum)
fund.c    <- apply(fund.P,2,sum)
fund.Drmh <- diag(1/sqrt(fund.r))
fund.Dcmh <- diag(1/sqrt(fund.c))
fund.res  <- fund.Drmh%*%(fund.P-fund.r%o%fund.c)%*%fund.Dcmh
round(apply(fund.res^2, 1, sum), 5)

round(apply(fund.res^2, 2, sum), 5)

round(1000*fund.res^2/sum(fund.res^2), 0)

fund.svd <- svd(fund.res)
fund.svd$d^2

fund.F    <- fund.Drmh%*%fund.svd$u%*%diag(fund.svd$d)
fund.rowi <- diag(fund.r)%*%fund.F^2
fund.rowi[,1:4]

round(1000*(fund.rowi/apply(fund.rowi,1,sum))[,1:4], 0)

round(1000*t(t(fund.rowi)/fund.svd$d^2)[,1:4], 0)

summary(ca(fund))

# Chapter 12: Supplementary Points

fund.m     <- c(4,12,11,19,7)/53
fund.Gamma <- fund.Dcmh%*%fund.svd$v
t(fund.m)%*%fund.Gamma[,1:2]

# Chapter 13: Correspondence Analysis Biplots

diag(sqrt(fund.c))%*%fund.Gamma[,1:2]

fund.est <- fund.F[,1:2]%*%t(diag(sqrt(fund.c))%*%fund.Gamma[,1:2])
oner     <- rep(1, dim(fund)[1])
round(fund.est%*%diag(sqrt(fund.c))+oner%o%fund.c,3)

round(fund.P/fund.r, 3)

sum(diag(fund.r)%*%(fund.est%*%diag(sqrt(fund.c))+oner%o%fund.c - 
     fund.P/fund.r)^2%*%diag(1/fund.c))

sum(fund.svd$d[3:4]^2)

# Chapter 14: Transition and Regression Relationships

fund.vec  <- as.vector(as.matrix(fund))
fund.Phi  <- fund.Drmh%*%fund.svd$u
fund.vecr <- rep(fund.Phi[,1], 5)
fund.vecc <- as.vector(oner%*%t(fund.Gamma[,1]))
lm(fund.vecr~fund.vecc, weight = fund.vec)

fund.y <- (fund.P[1,]/fund.r[1])/fund.c
summary(lm(fund.y~fund.Gamma[,1]+fund.Gamma[,2], weight = fund.c))

cov.wt(cbind(fund.y,fund.Gamma[,1:2]), wt = fund.c, cor = TRUE)$cor

# Chapter 15: Clustering Rows and Columns

# The food stores data is in file 'stores.xls', to be read 
# into R data frame food; then:

food.exp   <- apply(food,1,sum)%o%apply(food,2,sum)/sum(food)
food.stres <- (food-food.exp)/sqrt(food.exp)
sum(food.stres^2)

food.rpro <- food/apply(food, 1, sum)
food.r    <- apply(food, 1, sum)/sum(food)

# using Murtagh's hierclust() function (see web resource p.261)
# to row profiles in food.rpro, using weights in food.r

food.rclust <- hierclust(food.rpro, food.r)
plot(as.dendrogram(food.rclust))

# Chapter 16: Multiway Tables

# The raw data file of 6371 cases for the Spanish health survey 
# has not been given but the derived files are: 
# 'health.xls': cross-tabulation of age by perceived health
# 'health2.xls': cross-tabulation of age-sex by perceived health
#                (Exhibit 16.2)

# The working women raw data are in 'womenraw.xls'
# Since the top lefthand cell is not empty (there are no row names)
# the following statement needs to be typed to read the copied data

women <- read.table("clipboard", header = TRUE)

# Check size of this data frame as follows:

dim(women)

colnames(women)
attach(women)
table(C, Q3)


# The above table is also given in file 'women.xls'

CG <- 2*(C-1)+G
table(CG, Q3)


# these commands declare gender 9 to be NA

women[,6][G==9] <- NA
attach(women)
CG <- 2*(C-1)+G
table(CG, Q3)


# The above table is also given in file 'women2.xls'

CGA <- 6*(CG-1)+A

# Chapter 17: Stacked Tables

women.stack <- table(C,Q3)
for(j in 6:9){
  women.stack<-rbind(women.stack,table(women[,j],Q3))
  }
women.stack <- women.stack[-c(38,39,47,48),]


# The above table is also given in file 'women3.xls' (Exhibit 17.1)


chisq.test(women.stack[27:32,])$statistic/sum(women.stack[27:32,])

sum(ca(women.stack[27:32,])$sv^2)


# The file which concatenates all cross-tabulations (Exhibit 17.3)
# is given in 'women4.xls'

# Chapter 18: Multiple Correspondence Analysis

# Assuming raw women data is in data frame women (see above) and that
# attach(women) has been executed...

germany <- C==2|C==3
womenG  <- women[germany,]
dim(womenG)


# Elimination of missing data

attach(womenG)
missing <- G==9|M==9|E==98|E==99
womenG  <- (womenG[!missing,])
dim(womenG)
womenG  <- as.matrix(womenG) 

mjca(womenG[,1:4], lambda = "indicator")

mjca(womenG[,1:4], lambda = "Burt")

sum(mjca(womenG[,1:4], lambda = "indicator")$sv^2)

sum(mjca(womenG[,1:4], lambda = "Burt")$sv^2)

sum(mjca(womenG[,1:4], lambda = "Burt")$subinertia)

16*mjca(womenG[,1:4], lambda = "Burt")$subinertia

summary(mjca(womenG, lambda = "Burt", supcol = 5:9))


# The following computes the Burt matrix, then applies ca() 
# to give the same results as the option lambda="Burt" above

womenG.B <- mjca(womenG[,1:9], lambda = "Burt")$Burt
summary(ca(womenG.B[,1:16], suprow = 17:38))

# Chapter 19: Joint Correspondence Analysis

summary(mjca(womenG[,1:4], lambda = "JCA"))

summary(mjca(womenG[,1:4]))


# The following statements perform the weighted least-squares
# regression of the off-diagonal elements of the Burt matrix
# (in the form of contingency ratios) on the predictors 
# constructed from the standard coordinates. 

# calculate contingency ratios

womenG.mca  <- mjca(womenG[,1:4], lambda = "Burt")
womenG.p    <- as.matrix(womenG.mca$Burt)/sum(womenG.mca$Burt)
womenG.mass <- womenG.mca$colmass
womenG.cr   <- diag(1/womenG.mass)%*%womenG.p%*%diag(1/womenG.mass)-1

# calculate "predictors"

womenG.x1 <- womenG.mca$colcoord[,1]%o%womenG.mca$colcoord[,1]
womenG.x2 <- womenG.mca$colcoord[,2]%o%womenG.mca$colcoord[,2]
womenG.wt <- womenG.mass%o%womenG.mass
for(q in 1:4){
  womenG.cr[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA
  womenG.x1[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA
  womenG.x2[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA
  womenG.wt[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA
  }
womenG.cr <- as.vector(womenG.cr[!is.na(womenG.cr)])
womenG.x1 <- as.vector(womenG.x1[!is.na(womenG.x1)])
womenG.x2 <- as.vector(womenG.x2[!is.na(womenG.x2)])
womenG.wt <- as.vector(womenG.wt[!is.na(womenG.wt)])

summary(lm(womenG.cr~womenG.x1+womenG.x2, weight = womenG.wt))

# In the above we see that the regression explains 89.95% of the 
# inertia. As expected this is slightly better than the simplified
# "adjusted" solution and slightly worse than the optimal "JCA" one

# Chapter 20: Scaling Properties of MCA

# Here we use the data set wg93 provided in the ca package

data(wg93)

wg93.mca <- mjca(wg93[,1:4], lambda = "indicator")
plot(wg93.mca, what = c("none","all"))

wg93.F    <- wg93.mca$colcoord%*%diag(sqrt(wg93.mca$sv))
wg93.coli <- diag(wg93.mca$colmass)%*%wg93.F^2
matrix(round(1000*wg93.coli[,1]/wg93.mca$sv[1]^2, 0), nrow = 5)

Ascal <- wg93.mca$colcoord[1:5,1]
Bscal <- wg93.mca$colcoord[6:10,1]
Cscal <- wg93.mca$colcoord[11:15,1]
Dscal <- wg93.mca$colcoord[16:20,1]
As    <- Ascal[wg93[,1]]
Bs    <- Bscal[wg93[,2]]
Cs    <- Cscal[wg93[,3]]
Ds    <- Dscal[wg93[,4]]
AVEs  <- (As+Bs+Cs+Ds)/4

cor(cbind(As,Bs,Cs,Ds,AVEs))^2

sum(cor(cbind(As,Bs,Cs,Ds,AVEs))[1:4,5]^2)/4

wg93.mca$sv[1]^2

cov(cbind(As,Bs,Cs,Ds))*870/871

mean(cov(cbind(As,Bs,Cs,Ds))*870/871)

sum(diag(cov(cbind(As,Bs,Cs,Ds))*870/871))

VARs <- ((As-AVEs)^2+(Bs-AVEs)^2+(Cs-AVEs)^2+(Ds-AVEs)^2)/4
mean(VARs)

plot(wg93.mca, map="rowprincipal", labels = c(0,2))

# Note: the above plot may come out inverted compared to Exhibit 20.3

# Chapter 21: Subset Correspondence Analysis

data(author)
vowels     <- c(1,5,9,15,21)
consonants <- c(1:26)[-vowels]
summary(ca(author, subsetcol = consonants))

summary(ca(author, subsetcol = vowels))

plot(ca(author, subsetcol = consonants), map = "rowgreen")

plot(ca(author,subsetcol = vowels), map = "rowgreen")

womenG.B <- mjca(womenG[,1:4])$Burt
subset   <- c(1:16)[-c(4,8,12,16)]
summary(ca(womenG.B[1:16,1:16], subsetrow = subset, subsetcol = subset))

# calculate contingency ratios

womenG.sca  <- ca(womenG.B[1:16,1:16], subsetrow = subset, subsetcol = subset)
womenG.p    <- as.matrix(womenG.mca$Burt)/sum(womenG.mca$Burt)
womenG.mass <- womenG.mca$colmass
womenG.cr   <- diag(1/womenG.mass)%*%womenG.p%*%diag(1/womenG.mass)-1

# calculate "predictors"

womenG.x1 <- womenG.sca$colcoord[,1]%o%womenG.sca$colcoord[,1]
womenG.x2 <- womenG.sca$colcoord[,2]%o%womenG.sca$colcoord[,2]
womenG.wt <- womenG.mass%o%womenG.mass
for(q in 1:4){
  womenG.cr[(4*(q-1)+1):(4*q-1),(4*(q-1)+1):(4*q-1)] <- NA
  womenG.cr[4*q,] <- NA; womenG.cr[,4*q] <- NA 
  womenG.x1[(3*(q-1)+1):(3*q),(3*(q-1)+1):(3*q)] <- NA
  womenG.x2[(3*(q-1)+1):(3*q),(3*(q-1)+1):(3*q)] <- NA
  womenG.wt[(4*(q-1)+1):(4*q-1),(4*(q-1)+1):(4*q-1)] <- NA
  womenG.wt[4*q,] <- NA; womenG.wt[,4*q] <- NA 
  }
womenG.cr <- as.vector(womenG.cr[!is.na(womenG.cr)])
womenG.x1 <- as.vector(womenG.x1[!is.na(womenG.x1)])
womenG.x2 <- as.vector(womenG.x2[!is.na(womenG.x2)])
womenG.wt <- as.vector(womenG.wt[!is.na(womenG.wt)])

summary(lm(womenG.cr~womenG.x1+womenG.x2, weight = womenG.wt))

# The above regression shows that the solution explains 86.7% 
# of the off-diagonal inertia in the subset, compared to the 
# percentage of 62.4% reported in the subset analysis of the 
# Burt matrix

# Chapter 22: Analysis of Square Tables

# Read in data from file 'mobility.xls' into data frame mob

mob  <- as.matrix(mob)
mob2 <- rbind(cbind(mob,t(mob)),cbind(t(mob),mob))
summary(ca(mob2))

# Chapter 23: Data Recoding

# Read in original data from file 'EU.txt' into data frame EU

EUr <- apply(EU,2,rank)-1
EUd <- cbind(EUr,11-EUr)
colnames(EUd) <- c(paste(colnames(EU), "-", sep = ""), 
                   paste(colnames(EU), "+", sep = ""))
EUd

summary(ca(EUd))

plot(ca(EUd))

# Chapter 24: Canonical Correspondence Analysis

# Install and load the vegan package, with function cca()
# Read from file 'benthos.xls' the biological data into data
# frame bio, and the environmental data into data frame env

summary(cca(bio,env))

# Chapter 25: Aspects of Stability and Inference

# Assuming ca package installed load data set author

data(author)
author.ca <- ca(author)
nsim      <- 1000       # number of simulations

# compute row sums

author.rowsum <- apply(author, 1, sum)

# compute nsim simulations of first book

author.sim <- rmultinom(nsim, author.rowsum[1], prob = author[1,])

# compute nsim simulations of other books and column-bind

for (i in 2:12) {
   author.sim <- cbind(author.sim, rmultinom(nsim, author.rowsum[i], 
                                             prob = author[i,]))
   }

# transpose to have same format as original matrix

author.sim  <- t(author.sim)
author.sim2 <- matrix(rep(0,12*nsim*26), nrow = 12*nsim)

# reorganize rows so that matrices are together

for (k in 1:nsim) { 
   for (i in 1:12) {
      author.sim2[(k-1)*12+i,] <- author.sim[k+(i-1)*nsim,]
      }
   }

# get standard coordinates for rows

author.rowsc <- author.ca$rowcoord[,1:2]

# calculate principal coordinates of all replicates using transition formula

author.colsim <- t(t(author.rowsc)%*%author.sim2[1:12,])/
                   apply(author.sim2[1:12,], 2, sum)
for (k in 2:nsim) author.colsim <- rbind(author.colsim,
       t(t(author.rowsc)%*%author.sim2[((k-1)*12+1):(k*12),])/
         apply(author.sim2[((k-1)*12+1):(k*12),], 2, sum))

# reorganize rows of coordinates so that letters are together

author.colsim2 <- matrix(rep(0,26*nsim*2), nrow = 26*nsim)
for (j in 1:26) { 
   for (k in 1:nsim) {
      author.colsim2[(j-1)*nsim+k,] <- author.colsim[j+(k-1)*26,]
      }
   }

# plot all points (use first format of coords for labelling...)

plot(author.colsim[,1], -author.colsim[,2], xlab = "dim1", ylab = "dim2", 
     type = "n")
letters <- c("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o",
             "p","q","r","s","t","u","v","w","x","y","z")
text(author.colsim[,1], -author.colsim[,2], letters, cex = 0.8, col = "gray")

# plot convex hulls for each letter
# first calculate principal coordinates of letters for original matrix

author.col <- t(t(author.rowsc)%*%author)/apply(author, 2, sum)
for (j in 1:26) {
   points <- author.colsim2[(nsim*(j-1)+1):(nsim*j),]

  # note we are reversing second coordinate in all these plots

   points[,2] <- -points[,2]
   hpts       <- chull(points)
   hpts       <- c(hpts,hpts[1])
   lines(points[hpts,], lty = 3, lwd = 2)
   text(author.col[j,1], -author.col[j,2], letters[j], font = 2, cex = 1.5)
   }

# peeling a new convex hull (here we peel until just under 5% are removed)

plot(author.colsim2[,1], -author.colsim2[,2], xlab = "dim1", ylab = "dim2", 
     type = "n")
for (j in 1:26) {
   points <- author.colsim2[(nsim*(j-1)+1):(nsim*j),]

   # note we are reversing second coordinate in all these plots

   points[,2] <- -points[,2]
   repeat {
     hpts <- chull(points)
     npts <- nrow(points[-hpts,])   #next number of points in peeled set
     if(npts/nsim < 0.95) break
     points <- points[-hpts,]
     }
   hpts <- c(hpts,hpts[1])
   lines(points[hpts,], lty = 3)
   text(author.col[j,1], -author.col[j,2], letters[j], font = 2)
   }

# confidence ellipses - needs package ellipse (download from CRAN)

plot(author.colsim2[,1], -author.colsim2[,2], xlab = "dim1", ylab = "dim2", 
     type = "n")
for (j in 1:26) {
   points <- author.colsim2[(nsim*(j-1)+1):(nsim*j),]

   # note we are reversing second coordinate in all these plots

   points[,2] <- -points[,2]
   covpoints  <- cov(points)
   meanpoints <- apply(points,2,mean)
   lines(ellipse(covpoints, centre = meanpoints))
   text(author.col[j,1], -author.col[j,2], letters[j], font = 2)
   }

# Delta method, using SPSS output contained in the file
# 'AUTHORDELTA.txt'. This is the file contents:

.007 .008  .000 
.018 .025 -.043 
.019 .024 -.275 
.015 .016  .393 
.009 .014 -.008 
.021 .024 -.041 
.019 .027 -.016 
.012 .021  .066 
.013 .013  .087 
.077 .087  .023 
.027 .033 -.173 
.013 .016 -.004 
.021 .028  .097 
.011 .014  .043 
.011 .020 -.004 
.029 .035 -.080 
.078 .094 -.001 
.015 .017 -.065 
.013 .024  .046 
.006 .012 -.010 
.016 .021  .029 
.032 .040  .027 
.015 .022  .073 
.082 .087  .116 
.039 .027 -.033 
.091 .099 -.075

# plot all points (use first format of coords for labelling...)

plot(author.colsim[,1], -author.colsim[,2], xlab = "dim1", ylab = "dim2", 
     type = "n")
letters <- c("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o",
             "p","q","r","s","t","u","v","w","x","y","z")

# calculate principal coordinates of column points

author.princol <- t(t(author.rowsc)%*%author)/apply(author, 2, sum)
text(author.princol[,1], -author.princol[,2], letters,font = 2)

# input covariances from SPSS output

covdelta <- read.table("AUTHORDELTA.txt")
for (j in 1:26) {
  covpoints[1,1] <- covdelta[j,1]^2
  covpoints[2,2] <- covdelta[j,2]^2
  covpoints[1,2] <- covdelta[j,3]*covdelta[j,1]*covdelta[j,2]
  covpoints[2,1] <- covpoints[1,2]
  lines(ellipse(covpoints, centre = c(author.princol[j,1], 
                                      -author.princol[j,2])))
  }

# Permutation test of the positions of the books

# calculate the principal coordinates of the books (note: in the
# present version of ca() the standard coordinates only are returned
# and you have to calculate the principal coordinates by multiplying
# the standard coordinates by the respective singular values (square
# roots of eigenvalues, or principal inertias)

pcs <- author.ca$rowcoord%*%diag(author.ca$sv)

# check the order of the books

author.ca$rownames

# check re-ordering in order to bring pairs together

author.ca$rownames[c(1,4,2,11,3,8,5,9,6,7,10,12)]

# reorder the rows of pcs[,] so that pairs of books come together

pcs <- pcs[c(1,4,2,11,3,8,5,9,6,7,10,12),]

# calculate the test-statistic (sum of distances between pairs, 
# calculated in K dimensions)

K    <- 2
stat <- 0
for(i in seq(1, 11, 2)){ 
  dist2 <- 0
  for(k in 1:K) {
    dist2 <- dist2+(pcs[i,k]-pcs[i+1,k])^2
    }
  stat <- stat+sqrt(dist2)
  }

# generate all permutations (exact null distribution)

perm     <- rep(0, 12)
nperm    <- 0
perm[1]  <- 1
statnull <- rep(0, 10395)
for(j11 in 2:12){
  perm[2] <- j11
  rem11   <- c(1:12)[-perm[1:2]]
  perm[3] <- rem11[1]
  for(j9 in 2:10){
    perm[4] <- rem11[j9]
    rem9    <- c(1:12)[-perm[1:4]]
    perm[5] <- rem9[1]
    for(j7 in 2:8){
      perm[6] <- rem9[j7]
      rem7    <- c(1:12)[-perm[1:6]]
      perm[7] <- rem7[1]
      for(j5 in 2:6){
        perm[8] <- rem7[j5]
        rem5    <- c(1:12)[-perm[1:8]]
        perm[9] <- rem5[1]
        for(j3 in 2:4){
          perm[10] <- rem5[j3]
          rem3     <- c(1:12)[-perm[1:10]]
          perm[11] <- rem3[1]
          perm[12] <- rem3[2]
          nperm    <- nperm+1
          pcsp     <- pcs[perm,]
          statp    <- 0
          for(i in seq(1,11,2)){ 
            dist2 <- 0
            for(k in 1:K){ 
              dist2 <- dist2+(pcsp[i,k]-pcsp[i+1,k])^2
              }
            statp <- statp+sqrt(dist2)
            } 
          statnull[nperm]<-statp
          }
        }
      }
    }
  }


stat
statsort <- sort(statnull)
statsort[1:100]
hist(statnull, breaks = 50, xlim = c(0.4,1.2))

# Same for consonants

author.ca <- ca(author,subsetcol=consonants)

# calculate the principal coordinates of the books (note: in the
# present version of ca() the standard coordinates only are returned
# and you have to calcalute the principal coordinates by multiplying
# the standard coordinates by the respective singular values (square
# roots of eigenvalues, or principal inertias)

pcs <- author.ca$rowcoord%*%diag(author.ca$sv)

# check the order of the books

author.ca$rownames

# check re-ordering in order to bring pairs together

author.ca$rownames[c(1,4,2,11,3,8,5,9,6,7,10,12)]

# reorder the rows of pcs[,] so that pairs of books come together

pcs <- pcs[c(1,4,2,11,3,8,5,9,6,7,10,12),]

# calculate the test-statistic (sum of distances between pairs, 
# calculated in K dimensions)

K    <- 2
stat <- 0
for(i in seq(1,11,2)){ 
  dist2 <- 0
  for(k in 1:K){
    dist2 <- dist2+(pcs[i,k]-pcs[i+1,k])^2
    }
  stat  <- stat+sqrt(dist2)
  }

# generate all permutations (exact null distribution)
# (here repeat above nested loops)

statsort <- sort(statnull)
statsort[1:100]
hist(statnull, breaks = 50, xlim = c(0.4,1.2))

# Same for vowels

author.ca <- ca(author, subsetcol = vowels)

# calculate the principal coordinates of the books (note: in the
# present version of ca() the standard coordinates only are returned
# and you have to calcalute the principal coordinates by multiplying
# the standard coordinates by the respective singular values (square
# roots of eigenvalues, or principal inertias)

pcs <- author.ca$rowcoord%*%diag(author.ca$sv)

# check the order of the books

author.ca$rownames

# check re-ordering in order to bring pairs together

author.ca$rownames[c(1,4,2,11,3,8,5,9,6,7,10,12)]

# reorder the rows of pcs[,] so that pairs of books come together

pcs <- pcs[c(1,4,2,11,3,8,5,9,6,7,10,12),]

# calculate the test-statistic (sum of distances between pairs, 
# calculated in K dimensions)

K    <- 2
stat <- 0
for(i in seq(1,11,2)){
  dist2 <- 0
  for(k in 1:K){
    dist2 <- dist2+(pcs[i,k]-pcs[i+1,k])^2
    }
  stat <- stat+sqrt(dist2)
  }

# generate all permutations (exact null distribution)
# (here repeat above nested loops)

stat
statsort <- sort(statnull)
statsort[1:100]
hist(statnull, breaks = 50, xlim = c(0.13,0.45))