General information
Below you can find all the R commands given in Appendix B of Correspondence Analysis in Practice, Second Edition, starting from page 215.
You can find the data sets as Excel files in the section 'data sets'
R code
# Chapter 2: Profiles and the Profile Space
# The data file 'travels.xls' contains the original count data # so first calculate the row profiles in Excel by dividing each # row by its row total. Then copy the profiles with their row # and column names, remembering to leave the top left hand cell # empty. # Alternatively, copy the five lines below
Holidays HalfDays FullDays Norway 0.333 0.056 0.611 Canada 0.067 0.200 0.733 Greece 0.138 0.862 0.000 France/Germany 0.083 0.083 0.833
# To read the data into R the following statement must be typed # in the R command window and not copied and pasted (since the # data is in the "clipboard")
profiles <- read.table("clipboard") profiles
# You need to have installed and loaded the rgl package now
rgl.lines(c(0,1.2), c(0,0), c(0,0)) rgl.lines(c(0,0) , c(0,1.2), c(0,0)) rgl.lines(c(0,0) , c(0,0), c(0,1.2)) rgl.lines(c(0,0), c(0,1), c(1,0), size = 2) rgl.lines(c(0,1), c(1,0), c(0,0), size = 2) rgl.lines(c(0,1), c(0,0), c(1,0), size = 2) rgl.points(profiles[,3], profiles[,1], profiles[,2], size = 4) rgl.texts(profiles[,3], profiles[,1], profiles[,2], text = row.names(profiles))
# Now expand the 3-d graphics window and spin the points around using the # left mouse button; the right button or mouse wheel allows zooming
# Chapter 3: Masses and Centroids
# Copy data in file 'readers.xls' or copy following lines
C1 C2 C3 E1 5 7 2 E2 18 46 20 E3 19 29 39 E4 12 40 49 E5 3 7 16
# Again, TYPE the following statement
table <- read.table("clipboard") table table.pro <- table/apply(table,1,sum) table.x <- 1-table.pro[,1]-table.pro[,3]/2 table.y <- table.pro[,3]*sqrt(3)/2 plot.new() lines(c(0,1,0.5,0), c(0,0,sqrt(3)/2,0), col = "gray") text(c(0,1,0.5), c(0,0,sqrt(3)/2), labels = colnames(table)) text(table.x, table.y, labels = rownames(table))
# Chapter 4: Chi-square Distance and Inertia
# calculate chi-square statistic
table.rowsum <- apply(table, 1, sum) table.colsum <- apply(table, 2, sum) table.sum <- sum(table) table.exp <- table.rowsum%o%table.colsum/table.sum chi2 <- sum((table-table.exp)^2/table.exp) chi2
# chi-square distances to centroid
table.colmass <- table.colsum/table.sum chidist <- 0 for(j in 1:dim(table)[2]){ chidist <- chidist+(table.pro[5,j]-table.colmass[j])^2/table.colmass[j] } chidist apply((t(table.pro)-table.colmass)^2/table.colmass, 2, sum) tablec.pro <- rbind(table.pro,table.colmass) rownames(tablec.pro)[6] <- "ave" dist(sweep(tablec.pro, 2, sqrt(table.colmass), FUN = "/"))
# Chapter 5: Plotting Chi-square Distance
table.wt <- sqrt(table.colmass) rgl.lines(c(0,1.2/table.wt[2]), c(0,0), c(0,0)) rgl.lines(c(0,0), c(0,1.2/table.wt[3]), c(0,0)) rgl.lines(c(0,0), c(0,0), c(0,1.2/table.wt[1])) rgl.lines(c(0,0), c(0,1/table.wt[3]), c(1/table.wt[1],0), size = 2) rgl.lines(c(0,1/table.wt[2]), c(1/table.wt[3],0), c(0,0), size = 2) rgl.lines(c(0,1/table.wt[2]), c(0,0), c(1/table.wt[1],0), size = 2) table.chi <- t(t(table.pro)/table.wt) rgl.points(table.chi[,2], table.chi[,3], table.chi[,1], size = 4) rgl.texts(table.chi[,2], table.chi[,3], table.chi[,1], text = row.names(table))
# Chapter 6: Reduction of Dimensionality
# First read data from file 'health.xls' as described before into # data frame health
health.P <- health/sum(health) health.r <- apply(health.P, 1, sum) health.c <- apply(health.P, 2, sum) health.Dr <- diag(health.r) health.Dc <- diag(health.c) health.Drmh <- diag(1/sqrt(health.r)) health.Dcmh <- diag(1/sqrt(health.c)) health.P <- as.matrix(health.P) health.S <- health.Drmh%*%(health.P-health.r%o%health.c)%*%health.Dcmh health.svd <- svd(health.S) health.rsc <- health.Drmh%*%health.svd$u health.csc <- health.Dcmh%*%health.svd$v health.rpc <- health.rsc%*%diag(health.svd$d) health.cpc <- health.csc%*%diag(health.svd$d) plot(health.rpc[,1], health.rpc[,2], type = "n", pty = "s") text(health.rpc[,1], health.rpc[,2], label = rownames(health))
# Notice that the R graphics window is square and does not give the # correct aspect ratio, so pull out the window so that a vertical # unit is approximately equal to a horizontal unit: in this example # the window would be as wide as the computer screen but take up less # than half the height of the screen.
# Chapter 7: Optimal Scaling
health.range <- max(health.csc[,1])-min(health.csc[,1]) health.scale <- (health.csc[,1]-min(health.csc[,1]))*100/health.range health.scale
# Chapter 8: Symmetry of Row and Column Analyses
health.cor <- t(health.rsc[,1])%*%health.P%*%health.csc[,1] health.cor^2 t(health.rsc[,1])%*%health.Dr%*%health.rsc[,1]
# Chapter 9: Two-dimensional Maps
# For this example you can read in the data from 'smoke.xls' as # before, or after installing and loading the ca package, the # data are available by simply entering:
data(smoke) smoke ca(smoke) plot(ca(smoke))
# (After the previous statement you might need to adjust the # aspect ratio of the graphics window)
plot(ca(smoke), map = "rowprincipal")
# Chapter 10: Three More Examples
data(author) plot3d.ca(ca(author), labels = c(2,1), sf = 0.000001)
# Chapter 11: Contributions to Inertia
# Read in data from 'funding.xls' into data frame fund, then:
fund.P <- as.matrix(fund/sum(fund)) fund.r <- apply(fund.P,1,sum) fund.c <- apply(fund.P,2,sum) fund.Drmh <- diag(1/sqrt(fund.r)) fund.Dcmh <- diag(1/sqrt(fund.c)) fund.res <- fund.Drmh%*%(fund.P-fund.r%o%fund.c)%*%fund.Dcmh round(apply(fund.res^2, 1, sum), 5) round(apply(fund.res^2, 2, sum), 5) round(1000*fund.res^2/sum(fund.res^2), 0) fund.svd <- svd(fund.res) fund.svd$d^2 fund.F <- fund.Drmh%*%fund.svd$u%*%diag(fund.svd$d) fund.rowi <- diag(fund.r)%*%fund.F^2 fund.rowi[,1:4] round(1000*(fund.rowi/apply(fund.rowi,1,sum))[,1:4], 0) round(1000*t(t(fund.rowi)/fund.svd$d^2)[,1:4], 0) summary(ca(fund))
# Chapter 12: Supplementary Points
fund.m <- c(4,12,11,19,7)/53 fund.Gamma <- fund.Dcmh%*%fund.svd$v t(fund.m)%*%fund.Gamma[,1:2]
# Chapter 13: Correspondence Analysis Biplots
diag(sqrt(fund.c))%*%fund.Gamma[,1:2] fund.est <- fund.F[,1:2]%*%t(diag(sqrt(fund.c))%*%fund.Gamma[,1:2]) oner <- rep(1, dim(fund)[1]) round(fund.est%*%diag(sqrt(fund.c))+oner%o%fund.c,3) round(fund.P/fund.r, 3) sum(diag(fund.r)%*%(fund.est%*%diag(sqrt(fund.c))+oner%o%fund.c - fund.P/fund.r)^2%*%diag(1/fund.c)) sum(fund.svd$d[3:4]^2)
# Chapter 14: Transition and Regression Relationships
fund.vec <- as.vector(as.matrix(fund)) fund.Phi <- fund.Drmh%*%fund.svd$u fund.vecr <- rep(fund.Phi[,1], 5) fund.vecc <- as.vector(oner%*%t(fund.Gamma[,1])) lm(fund.vecr~fund.vecc, weight = fund.vec) fund.y <- (fund.P[1,]/fund.r[1])/fund.c summary(lm(fund.y~fund.Gamma[,1]+fund.Gamma[,2], weight = fund.c)) cov.wt(cbind(fund.y,fund.Gamma[,1:2]), wt = fund.c, cor = TRUE)$cor
# Chapter 15: Clustering Rows and Columns
# The food stores data is in file 'stores.xls', to be read # into R data frame food; then:
food.exp <- apply(food,1,sum)%o%apply(food,2,sum)/sum(food) food.stres <- (food-food.exp)/sqrt(food.exp) sum(food.stres^2) food.rpro <- food/apply(food, 1, sum) food.r <- apply(food, 1, sum)/sum(food)
# using Murtagh's hierclust() function (see web resource p.261) # to row profiles in food.rpro, using weights in food.r
food.rclust <- hierclust(food.rpro, food.r) plot(as.dendrogram(food.rclust))
# Chapter 16: Multiway Tables
# The raw data file of 6371 cases for the Spanish health survey # has not been given but the derived files are: # 'health.xls': cross-tabulation of age by perceived health # 'health2.xls': cross-tabulation of age-sex by perceived health # (Exhibit 16.2) # The working women raw data are in 'womenraw.xls' # Since the top lefthand cell is not empty (there are no row names) # the following statement needs to be typed to read the copied data
women <- read.table("clipboard", header = TRUE)
# Check size of this data frame as follows:
dim(women) colnames(women) attach(women) table(C, Q3)
# The above table is also given in file 'women.xls'
CG <- 2*(C-1)+G table(CG, Q3)
# these commands declare gender 9 to be NA
women[,6][G==9] <- NA attach(women) CG <- 2*(C-1)+G table(CG, Q3)
# The above table is also given in file 'women2.xls'
CGA <- 6*(CG-1)+A
# Chapter 17: Stacked Tables
women.stack <- table(C,Q3) for(j in 6:9){ women.stack<-rbind(women.stack,table(women[,j],Q3)) } women.stack <- women.stack[-c(38,39,47,48),]
# The above table is also given in file 'women3.xls' (Exhibit 17.1)
chisq.test(women.stack[27:32,])$statistic/sum(women.stack[27:32,]) sum(ca(women.stack[27:32,])$sv^2)
# The file which concatenates all cross-tabulations (Exhibit 17.3) # is given in 'women4.xls'
# Chapter 18: Multiple Correspondence Analysis
# Assuming raw women data is in data frame women (see above) and that # attach(women) has been executed...
germany <- C==2|C==3 womenG <- women[germany,] dim(womenG)
# Elimination of missing data
attach(womenG) missing <- G==9|M==9|E==98|E==99 womenG <- (womenG[!missing,]) dim(womenG) womenG <- as.matrix(womenG) mjca(womenG[,1:4], lambda = "indicator") mjca(womenG[,1:4], lambda = "Burt") sum(mjca(womenG[,1:4], lambda = "indicator")$sv^2) sum(mjca(womenG[,1:4], lambda = "Burt")$sv^2) sum(mjca(womenG[,1:4], lambda = "Burt")$subinertia) 16*mjca(womenG[,1:4], lambda = "Burt")$subinertia summary(mjca(womenG, lambda = "Burt", supcol = 5:9))
# The following computes the Burt matrix, then applies ca() # to give the same results as the option lambda="Burt" above
womenG.B <- mjca(womenG[,1:9], lambda = "Burt")$Burt summary(ca(womenG.B[,1:16], suprow = 17:38))
# Chapter 19: Joint Correspondence Analysis
summary(mjca(womenG[,1:4], lambda = "JCA")) summary(mjca(womenG[,1:4]))
# The following statements perform the weighted least-squares # regression of the off-diagonal elements of the Burt matrix # (in the form of contingency ratios) on the predictors # constructed from the standard coordinates. # calculate contingency ratios
womenG.mca <- mjca(womenG[,1:4], lambda = "Burt") womenG.p <- as.matrix(womenG.mca$Burt)/sum(womenG.mca$Burt) womenG.mass <- womenG.mca$colmass womenG.cr <- diag(1/womenG.mass)%*%womenG.p%*%diag(1/womenG.mass)-1
# calculate "predictors"
womenG.x1 <- womenG.mca$colcoord[,1]%o%womenG.mca$colcoord[,1] womenG.x2 <- womenG.mca$colcoord[,2]%o%womenG.mca$colcoord[,2] womenG.wt <- womenG.mass%o%womenG.mass for(q in 1:4){ womenG.cr[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA womenG.x1[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA womenG.x2[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA womenG.wt[(4*(q-1)+1):(4*q),(4*(q-1)+1):(4*q)] <- NA } womenG.cr <- as.vector(womenG.cr[!is.na(womenG.cr)]) womenG.x1 <- as.vector(womenG.x1[!is.na(womenG.x1)]) womenG.x2 <- as.vector(womenG.x2[!is.na(womenG.x2)]) womenG.wt <- as.vector(womenG.wt[!is.na(womenG.wt)]) summary(lm(womenG.cr~womenG.x1+womenG.x2, weight = womenG.wt))
# In the above we see that the regression explains 89.95% of the # inertia. As expected this is slightly better than the simplified # "adjusted" solution and slightly worse than the optimal "JCA" one
# Chapter 20: Scaling Properties of MCA
# Here we use the data set wg93 provided in the ca package
data(wg93) wg93.mca <- mjca(wg93[,1:4], lambda = "indicator") plot(wg93.mca, what = c("none","all")) wg93.F <- wg93.mca$colcoord%*%diag(sqrt(wg93.mca$sv)) wg93.coli <- diag(wg93.mca$colmass)%*%wg93.F^2 matrix(round(1000*wg93.coli[,1]/wg93.mca$sv[1]^2, 0), nrow = 5) Ascal <- wg93.mca$colcoord[1:5,1] Bscal <- wg93.mca$colcoord[6:10,1] Cscal <- wg93.mca$colcoord[11:15,1] Dscal <- wg93.mca$colcoord[16:20,1] As <- Ascal[wg93[,1]] Bs <- Bscal[wg93[,2]] Cs <- Cscal[wg93[,3]] Ds <- Dscal[wg93[,4]] AVEs <- (As+Bs+Cs+Ds)/4 cor(cbind(As,Bs,Cs,Ds,AVEs))^2 sum(cor(cbind(As,Bs,Cs,Ds,AVEs))[1:4,5]^2)/4 wg93.mca$sv[1]^2 cov(cbind(As,Bs,Cs,Ds))*870/871 mean(cov(cbind(As,Bs,Cs,Ds))*870/871) sum(diag(cov(cbind(As,Bs,Cs,Ds))*870/871)) VARs <- ((As-AVEs)^2+(Bs-AVEs)^2+(Cs-AVEs)^2+(Ds-AVEs)^2)/4 mean(VARs) plot(wg93.mca, map="rowprincipal", labels = c(0,2))
# Note: the above plot may come out inverted compared to Exhibit 20.3
# Chapter 21: Subset Correspondence Analysis
data(author) vowels <- c(1,5,9,15,21) consonants <- c(1:26)[-vowels] summary(ca(author, subsetcol = consonants)) summary(ca(author, subsetcol = vowels)) plot(ca(author, subsetcol = consonants), map = "rowgreen") plot(ca(author,subsetcol = vowels), map = "rowgreen") womenG.B <- mjca(womenG[,1:4])$Burt subset <- c(1:16)[-c(4,8,12,16)] summary(ca(womenG.B[1:16,1:16], subsetrow = subset, subsetcol = subset))
# calculate contingency ratios
womenG.sca <- ca(womenG.B[1:16,1:16], subsetrow = subset, subsetcol = subset) womenG.p <- as.matrix(womenG.mca$Burt)/sum(womenG.mca$Burt) womenG.mass <- womenG.mca$colmass womenG.cr <- diag(1/womenG.mass)%*%womenG.p%*%diag(1/womenG.mass)-1
# calculate "predictors"
womenG.x1 <- womenG.sca$colcoord[,1]%o%womenG.sca$colcoord[,1] womenG.x2 <- womenG.sca$colcoord[,2]%o%womenG.sca$colcoord[,2] womenG.wt <- womenG.mass%o%womenG.mass for(q in 1:4){ womenG.cr[(4*(q-1)+1):(4*q-1),(4*(q-1)+1):(4*q-1)] <- NA womenG.cr[4*q,] <- NA; womenG.cr[,4*q] <- NA womenG.x1[(3*(q-1)+1):(3*q),(3*(q-1)+1):(3*q)] <- NA womenG.x2[(3*(q-1)+1):(3*q),(3*(q-1)+1):(3*q)] <- NA womenG.wt[(4*(q-1)+1):(4*q-1),(4*(q-1)+1):(4*q-1)] <- NA womenG.wt[4*q,] <- NA; womenG.wt[,4*q] <- NA } womenG.cr <- as.vector(womenG.cr[!is.na(womenG.cr)]) womenG.x1 <- as.vector(womenG.x1[!is.na(womenG.x1)]) womenG.x2 <- as.vector(womenG.x2[!is.na(womenG.x2)]) womenG.wt <- as.vector(womenG.wt[!is.na(womenG.wt)]) summary(lm(womenG.cr~womenG.x1+womenG.x2, weight = womenG.wt))
# The above regression shows that the solution explains 86.7% # of the off-diagonal inertia in the subset, compared to the # percentage of 62.4% reported in the subset analysis of the # Burt matrix
# Chapter 22: Analysis of Square Tables
# Read in data from file 'mobility.xls' into data frame mob
mob <- as.matrix(mob) mob2 <- rbind(cbind(mob,t(mob)),cbind(t(mob),mob)) summary(ca(mob2))
# Chapter 23: Data Recoding
# Read in original data from file 'EU.txt' into data frame EU
EUr <- apply(EU,2,rank)-1 EUd <- cbind(EUr,11-EUr) colnames(EUd) <- c(paste(colnames(EU), "-", sep = ""), paste(colnames(EU), "+", sep = "")) EUd summary(ca(EUd)) plot(ca(EUd))
# Chapter 24: Canonical Correspondence Analysis
# Install and load the vegan package, with function cca() # Read from file 'benthos.xls' the biological data into data # frame bio, and the environmental data into data frame env
summary(cca(bio,env))
# Chapter 25: Aspects of Stability and Inference
# Assuming ca package installed load data set author
data(author) author.ca <- ca(author) nsim <- 1000 # number of simulations
# compute row sums
author.rowsum <- apply(author, 1, sum)
# compute nsim simulations of first book
author.sim <- rmultinom(nsim, author.rowsum[1], prob = author[1,])
# compute nsim simulations of other books and column-bind
for (i in 2:12) { author.sim <- cbind(author.sim, rmultinom(nsim, author.rowsum[i], prob = author[i,])) }
# transpose to have same format as original matrix
author.sim <- t(author.sim) author.sim2 <- matrix(rep(0,12*nsim*26), nrow = 12*nsim)
# reorganize rows so that matrices are together
for (k in 1:nsim) { for (i in 1:12) { author.sim2[(k-1)*12+i,] <- author.sim[k+(i-1)*nsim,] } }
# get standard coordinates for rows
author.rowsc <- author.ca$rowcoord[,1:2]
# calculate principal coordinates of all replicates using transition formula
author.colsim <- t(t(author.rowsc)%*%author.sim2[1:12,])/ apply(author.sim2[1:12,], 2, sum) for (k in 2:nsim) author.colsim <- rbind(author.colsim, t(t(author.rowsc)%*%author.sim2[((k-1)*12+1):(k*12),])/ apply(author.sim2[((k-1)*12+1):(k*12),], 2, sum))
# reorganize rows of coordinates so that letters are together
author.colsim2 <- matrix(rep(0,26*nsim*2), nrow = 26*nsim) for (j in 1:26) { for (k in 1:nsim) { author.colsim2[(j-1)*nsim+k,] <- author.colsim[j+(k-1)*26,] } }
# plot all points (use first format of coords for labelling...)
plot(author.colsim[,1], -author.colsim[,2], xlab = "dim1", ylab = "dim2", type = "n") letters <- c("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o", "p","q","r","s","t","u","v","w","x","y","z") text(author.colsim[,1], -author.colsim[,2], letters, cex = 0.8, col = "gray")
# plot convex hulls for each letter # first calculate principal coordinates of letters for original matrix
author.col <- t(t(author.rowsc)%*%author)/apply(author, 2, sum) for (j in 1:26) { points <- author.colsim2[(nsim*(j-1)+1):(nsim*j),]
# note we are reversing second coordinate in all these plots
points[,2] <- -points[,2] hpts <- chull(points) hpts <- c(hpts,hpts[1]) lines(points[hpts,], lty = 3, lwd = 2) text(author.col[j,1], -author.col[j,2], letters[j], font = 2, cex = 1.5) }
# peeling a new convex hull (here we peel until just under 5% are removed)
plot(author.colsim2[,1], -author.colsim2[,2], xlab = "dim1", ylab = "dim2", type = "n") for (j in 1:26) { points <- author.colsim2[(nsim*(j-1)+1):(nsim*j),]
# note we are reversing second coordinate in all these plots
points[,2] <- -points[,2] repeat { hpts <- chull(points) npts <- nrow(points[-hpts,]) #next number of points in peeled set if(npts/nsim < 0.95) break points <- points[-hpts,] } hpts <- c(hpts,hpts[1]) lines(points[hpts,], lty = 3) text(author.col[j,1], -author.col[j,2], letters[j], font = 2) }
# confidence ellipses - needs package ellipse (download from CRAN)
plot(author.colsim2[,1], -author.colsim2[,2], xlab = "dim1", ylab = "dim2", type = "n") for (j in 1:26) { points <- author.colsim2[(nsim*(j-1)+1):(nsim*j),]
# note we are reversing second coordinate in all these plots
points[,2] <- -points[,2] covpoints <- cov(points) meanpoints <- apply(points,2,mean) lines(ellipse(covpoints, centre = meanpoints)) text(author.col[j,1], -author.col[j,2], letters[j], font = 2) }
# Delta method, using SPSS output contained in the file # 'AUTHORDELTA.txt'. This is the file contents:
.007 .008 .000 .018 .025 -.043 .019 .024 -.275 .015 .016 .393 .009 .014 -.008 .021 .024 -.041 .019 .027 -.016 .012 .021 .066 .013 .013 .087 .077 .087 .023 .027 .033 -.173 .013 .016 -.004 .021 .028 .097 .011 .014 .043 .011 .020 -.004 .029 .035 -.080 .078 .094 -.001 .015 .017 -.065 .013 .024 .046 .006 .012 -.010 .016 .021 .029 .032 .040 .027 .015 .022 .073 .082 .087 .116 .039 .027 -.033 .091 .099 -.075
# plot all points (use first format of coords for labelling...)
plot(author.colsim[,1], -author.colsim[,2], xlab = "dim1", ylab = "dim2", type = "n") letters <- c("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o", "p","q","r","s","t","u","v","w","x","y","z")
# calculate principal coordinates of column points
author.princol <- t(t(author.rowsc)%*%author)/apply(author, 2, sum) text(author.princol[,1], -author.princol[,2], letters,font = 2)
# input covariances from SPSS output
covdelta <- read.table("AUTHORDELTA.txt") for (j in 1:26) { covpoints[1,1] <- covdelta[j,1]^2 covpoints[2,2] <- covdelta[j,2]^2 covpoints[1,2] <- covdelta[j,3]*covdelta[j,1]*covdelta[j,2] covpoints[2,1] <- covpoints[1,2] lines(ellipse(covpoints, centre = c(author.princol[j,1], -author.princol[j,2]))) }
# Permutation test of the positions of the books
# calculate the principal coordinates of the books (note: in the # present version of ca() the standard coordinates only are returned # and you have to calculate the principal coordinates by multiplying # the standard coordinates by the respective singular values (square # roots of eigenvalues, or principal inertias)
pcs <- author.ca$rowcoord%*%diag(author.ca$sv)
# check the order of the books
author.ca$rownames
# check re-ordering in order to bring pairs together
author.ca$rownames[c(1,4,2,11,3,8,5,9,6,7,10,12)]
# reorder the rows of pcs[,] so that pairs of books come together
pcs <- pcs[c(1,4,2,11,3,8,5,9,6,7,10,12),]
# calculate the test-statistic (sum of distances between pairs, # calculated in K dimensions)
K <- 2 stat <- 0 for(i in seq(1, 11, 2)){ dist2 <- 0 for(k in 1:K) { dist2 <- dist2+(pcs[i,k]-pcs[i+1,k])^2 } stat <- stat+sqrt(dist2) }
# generate all permutations (exact null distribution)
perm <- rep(0, 12) nperm <- 0 perm[1] <- 1 statnull <- rep(0, 10395) for(j11 in 2:12){ perm[2] <- j11 rem11 <- c(1:12)[-perm[1:2]] perm[3] <- rem11[1] for(j9 in 2:10){ perm[4] <- rem11[j9] rem9 <- c(1:12)[-perm[1:4]] perm[5] <- rem9[1] for(j7 in 2:8){ perm[6] <- rem9[j7] rem7 <- c(1:12)[-perm[1:6]] perm[7] <- rem7[1] for(j5 in 2:6){ perm[8] <- rem7[j5] rem5 <- c(1:12)[-perm[1:8]] perm[9] <- rem5[1] for(j3 in 2:4){ perm[10] <- rem5[j3] rem3 <- c(1:12)[-perm[1:10]] perm[11] <- rem3[1] perm[12] <- rem3[2] nperm <- nperm+1 pcsp <- pcs[perm,] statp <- 0 for(i in seq(1,11,2)){ dist2 <- 0 for(k in 1:K){ dist2 <- dist2+(pcsp[i,k]-pcsp[i+1,k])^2 } statp <- statp+sqrt(dist2) } statnull[nperm]<-statp } } } } } stat statsort <- sort(statnull) statsort[1:100] hist(statnull, breaks = 50, xlim = c(0.4,1.2))
# Same for consonants
author.ca <- ca(author,subsetcol=consonants)
# calculate the principal coordinates of the books (note: in the # present version of ca() the standard coordinates only are returned # and you have to calcalute the principal coordinates by multiplying # the standard coordinates by the respective singular values (square # roots of eigenvalues, or principal inertias)
pcs <- author.ca$rowcoord%*%diag(author.ca$sv)
# check the order of the books
author.ca$rownames
# check re-ordering in order to bring pairs together
author.ca$rownames[c(1,4,2,11,3,8,5,9,6,7,10,12)]
# reorder the rows of pcs[,] so that pairs of books come together
pcs <- pcs[c(1,4,2,11,3,8,5,9,6,7,10,12),]
# calculate the test-statistic (sum of distances between pairs, # calculated in K dimensions)
K <- 2 stat <- 0 for(i in seq(1,11,2)){ dist2 <- 0 for(k in 1:K){ dist2 <- dist2+(pcs[i,k]-pcs[i+1,k])^2 } stat <- stat+sqrt(dist2) }
# generate all permutations (exact null distribution) # (here repeat above nested loops)
statsort <- sort(statnull) statsort[1:100] hist(statnull, breaks = 50, xlim = c(0.4,1.2))
# Same for vowels
author.ca <- ca(author, subsetcol = vowels)
# calculate the principal coordinates of the books (note: in the # present version of ca() the standard coordinates only are returned # and you have to calcalute the principal coordinates by multiplying # the standard coordinates by the respective singular values (square # roots of eigenvalues, or principal inertias)
pcs <- author.ca$rowcoord%*%diag(author.ca$sv)
# check the order of the books
author.ca$rownames
# check re-ordering in order to bring pairs together
author.ca$rownames[c(1,4,2,11,3,8,5,9,6,7,10,12)]
# reorder the rows of pcs[,] so that pairs of books come together
pcs <- pcs[c(1,4,2,11,3,8,5,9,6,7,10,12),]
# calculate the test-statistic (sum of distances between pairs, # calculated in K dimensions)
K <- 2 stat <- 0 for(i in seq(1,11,2)){ dist2 <- 0 for(k in 1:K){ dist2 <- dist2+(pcs[i,k]-pcs[i+1,k])^2 } stat <- stat+sqrt(dist2) }
# generate all permutations (exact null distribution) # (here repeat above nested loops)
stat statsort <- sort(statnull) statsort[1:100] hist(statnull, breaks = 50, xlim = c(0.13,0.45))