====== Discriminant analysis ====== {{notes:clu:pics:ninenationsall.jpg?450}} ===== Analysis ===== > setwd("D:/Data/counties/pajek") > library(MASS) > z <- function(x) (x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE) > options(digits = 3) > col <- c("orange",palette()[2:8]) > c9 <- c("New England","Foundry","Dixie","Islands","Mexamerica","Ecotopia", + "Empty Quarter","Bread basket") > load("rankDat.RData") > C9 <- read.csv("../9nations/9nations.clu",header=FALSE,skip=1)$V1 > names(da) [1] "STCOU" "Areaname" [3] "AGE050200D" "BZA115203D" [5] "CLF040200D" "EDU635200D" [7] "EDU685200D" "HSG045200D" [9] "HSG495200D" "INC610199D" [11] "INC910199D" "IPE010200D" [13] "IPE120200D" "IPE220200D" [15] "LFE305200D" "PIN020200D" [17] "POP050200D" "POP060200D" [19] "POP165200D" "POP225200D" [21] "POP255200D" "POP285200D" [23] "POP325200D" "POP405200D" [25] "POP645200D" "VST020200D" [27] "VST220200D" "VST420200D" [29] "WAT130200D" "P.pop.under5" [31] "P.pop.under18" "P.pop.over85" [33] "P.ind.fam.farms" "P.land.farms" [35] "P.irrigatedLand" "P.violent.crime" [37] "P.murders" "P.rapes" [39] "P.16to19.notHighSc" "P.Hisp.Latin" [41] "R.Hisp.Lat.maleFemale" "P.emply.ind.AGR.FOR.FISH.HUNT.MINING" [43] "P.emply.ind.CONSTRUCTION" "P.emply.ind.MANUFACTORING" [45] "P.emply.ind.WHOLESALEtrade" "P.emply.ind.RETAILtrade" [47] "P.emply.ind.TRANSPORT.WAREHOUSING" "P.emply.ind.INFORMATION" [49] "P.emply.ind.FINANC.INSUR" "P.emply.ind.PROFscientTECH" [51] "P.emply.ind.EDUC.HEALTH" "P.emply.ind.ARTSaccomFOOD" [53] "P.emply.ind.PUBLICaddmin" "P.25overLESS9thGRADE" [55] "P.employ.FARMING" "P.employ.AGRIC.FOREST.FISH.HUNT" [57] "F.GOV.EXP.perCapita" "P.employ.GOV" [59] "P.employ.GOV.stateLoc" "P.vacantHousingUnits" [61] "P.occupiedHousingUnits" "P.occupiedHousingUnitsBLACK" [63] "P.occupiedHousingUnitsHisLat" "P.OWNERoccupiedHousingUnits" [65] "P.RENTERoccupiedHousingUnits" "P.occupiedHousingUnitsLackingPlumb" [67] "P.URBANpopul" "P.RURALpopul" [69] "P.CHANGErural90to00" "P.CHANGEurban90to00" [71] "P.LAND" "P.WATER" [73] "P.BELOWpovertyLevel" "P.ABOVEpovertyLevel" [75] "P.WHITE.BELOWpovertyLevel" "P.aINDIAN.BELOWpovertyLevel" [77] "P.BLACK.BELOWpovertyLevel" "P.ASIAN.BELOWpovertyLevel" [79] "P.HisLat.BELOWpovertyLevel" "CHANGEperCapitaIncome89to99" [81] "P.CHANGEemployIndustry90to00" "P.IrrigationGROUNDwaterUse" [83] "GroundWaterUsePerCapita" "P.NET.DOMESTIC.MIGRATIONS" [85] "P.NativePopulationBornInStateOfRes" "R.LABOR.FORCEmaleFemale" [87] "R.VOTING.DEMOCRATESoverREPUBLICANS" "P.PUBLIC.SCHOOL.ENROLNEMT" [89] "TOTAL.DEPOSITSperCapita" "CROPvaluePerFARM" [91] "LIFESTOCKvaluePerFARM" "P.CHANGEpverty95to00" ==== Select variables and apply LDA ==== > del <- c(1,2,4,6,9,10,11,14,15,20,25,30,33,35,36,37,38,39,40,41,42,45,46,48, + 52,53,56,57,58,60,61,62,63,65,67,69,71,72,73,74,75,76,77,78,79,81,82,89,90,91) > V <- da[,-del] > C <- C9[complete.cases(V)]; V <- na.omit(V); X <- apply(V,2,z) > L <- lda(C ~ ., data.frame(X)) > Y <- X %*% L$scaling > Z <- matrix(0,8,7); cp <- numeric(8); colnames(Z) <- colnames(Y); rownames(Z) <- c9 > for(j in 1:7){for(i in 1:8){Ci <- C==i; cp[i] <- sum(Y[,j][Ci])/sum(Ci)}; Z[,j] <- cp} > colnames(X) [1] "AGE050200D" "CLF040200D" [3] "EDU685200D" "HSG045200D" [5] "IPE010200D" "IPE120200D" [7] "PIN020200D" "POP050200D" [9] "POP060200D" "POP165200D" [11] "POP255200D" "POP285200D" [13] "POP325200D" "POP405200D" [15] "VST020200D" "VST220200D" [17] "VST420200D" "WAT130200D" [19] "P.pop.under18" "P.pop.over85" [21] "P.land.farms" "P.emply.ind.CONSTRUCTION" [23] "P.emply.ind.MANUFACTORING" "P.emply.ind.TRANSPORT.WAREHOUSING" [25] "P.emply.ind.FINANC.INSUR" "P.emply.ind.PROFscientTECH" [27] "P.emply.ind.EDUC.HEALTH" "P.25overLESS9thGRADE" [29] "P.employ.FARMING" "P.employ.GOV.stateLoc" [31] "P.OWNERoccupiedHousingUnits" "P.occupiedHousingUnitsLackingPlumb" [33] "P.RURALpopul" "P.CHANGEurban90to00" [35] "CHANGEperCapitaIncome89to99" "GroundWaterUsePerCapita" [37] "P.NET.DOMESTIC.MIGRATIONS" "P.NativePopulationBornInStateOfRes" [39] "R.LABOR.FORCEmaleFemale" "R.VOTING.DEMOCRATESoverREPUBLICANS" [41] "P.PUBLIC.SCHOOL.ENROLNEMT" "P.CHANGEpverty95to00" ==== Eigen values ==== > L$svd [1] 35.33 24.44 19.04 17.47 8.66 6.58 4.48 ==== Means ==== > L$means AGE050200D CLF040200D EDU685200D HSG045200D IPE010200D IPE120200D PIN020200D POP050200D 1 0.2461 -0.7521 1.27580 -0.294 0.80568 -0.800 1.0512 -0.232 2 -0.0154 -0.0811 0.22483 -0.191 0.64007 -0.627 0.5761 -0.215 3 -0.1407 0.1795 -0.29581 0.396 -0.28066 0.389 -0.3076 0.194 4 0.5392 -0.0334 0.75171 0.731 0.42122 -0.234 1.4466 1.231 5 -0.3591 0.7852 0.18123 0.293 -0.25593 1.004 -0.2695 0.513 6 0.1707 0.9116 0.76294 0.320 0.61818 -0.288 0.7710 0.367 7 -0.1655 0.4119 0.47245 0.442 0.15583 -0.098 0.0135 0.687 8 0.2585 -0.4403 -0.00641 -0.601 -0.00521 -0.299 0.0623 -0.439 POP060200D POP165200D POP255200D POP285200D POP325200D POP405200D VST020200D VST220200D 1 0.1636 0.3773 -0.477 -0.1801 0.2739 -0.3142 -0.6422 -0.399 2 0.3842 0.1592 -0.248 -0.1966 0.2491 -0.2476 -0.1130 -0.213 3 -0.0286 0.1788 0.655 -0.1443 -0.1217 -0.3031 0.1968 0.121 4 0.1831 -0.4495 0.229 -0.1986 0.2183 1.5111 0.0625 -0.301 5 0.0081 -0.3509 -0.370 -0.0134 0.6589 3.0799 0.5213 -0.601 6 -0.0197 -0.1206 -0.492 0.0533 1.5352 0.3670 -0.2307 -0.383 7 -0.1166 -0.6472 -0.564 0.4538 -0.0587 0.1470 0.0760 -0.735 8 -0.1079 -0.0873 -0.473 0.1637 -0.1582 -0.0522 -0.2531 0.277 VST420200D WAT130200D P.pop.under18 P.pop.over85 P.land.farms P.emply.ind.CONSTRUCTION 1 -0.2188 -0.2520 -0.547 -0.159 -1.244 -0.149 2 -0.0693 -0.2098 -0.142 -0.287 -0.361 -0.369 3 0.1577 -0.1360 -0.166 -0.328 -0.438 0.245 4 -0.1624 -0.0748 -0.995 0.131 -0.873 0.689 5 -0.1451 -0.0985 0.655 -0.413 0.301 0.357 6 -0.2390 -0.0903 -0.210 -0.256 -0.893 -0.166 7 -0.1190 0.9074 0.531 -0.514 -0.431 0.389 8 -0.0888 0.0668 0.088 0.774 0.932 -0.305 P.emply.ind.MANUFACTORING P.emply.ind.TRANSPORT.WAREHOUSING P.emply.ind.FINANC.INSUR 1 -0.103 -0.71667 0.7246 2 0.501 -0.25502 0.2594 3 0.373 0.00551 -0.1479 4 -1.106 -0.21620 1.6480 5 -0.911 -0.13994 0.0567 6 -0.529 -0.37886 0.2713 7 -0.916 -0.11202 -0.1553 8 -0.260 0.22751 0.0342 P.emply.ind.PROFscientTECH P.emply.ind.EDUC.HEALTH P.25overLESS9thGRADE P.employ.FARMING 1 0.8633 0.7379 -0.709 -0.738 2 0.4109 0.0882 -0.645 -0.601 3 -0.0298 -0.2744 0.424 -0.313 4 1.7905 -1.0553 0.119 -0.603 5 0.3004 0.0835 1.115 0.237 6 0.9210 -0.0924 -0.646 -0.462 7 0.1508 -0.2195 -0.671 0.109 8 -0.3548 0.3257 -0.191 0.677 P.employ.GOV.stateLoc P.OWNERoccupiedHousingUnits P.occupiedHousingUnitsLackingPlumb 1 -0.38587 -0.7861 -0.139 2 -0.35897 0.1218 -0.266 3 -0.16898 0.2077 0.155 4 -0.49912 -0.7835 -0.243 5 0.49790 -0.7036 0.518 6 -0.00578 -0.5189 -0.121 7 0.31431 -0.6237 0.301 8 0.23853 0.0487 -0.231 P.RURALpopul P.CHANGEurban90to00 CHANGEperCapitaIncome89to99 GroundWaterUsePerCapita 1 -0.2962 -0.1259 0.5128 -0.2402 2 -0.4328 0.1408 0.3481 -0.2329 3 0.1161 0.0427 -0.1036 -0.1396 4 -1.6247 1.6085 0.4227 -0.0559 5 -0.4925 0.4100 -0.2739 -0.0921 6 -0.7936 0.6477 0.3800 -0.1015 7 -0.0191 0.3893 -0.0318 0.2172 8 0.2012 -0.3284 -0.0234 0.2608 P.NET.DOMESTIC.MIGRATIONS P.NativePopulationBornInStateOfRes R.LABOR.FORCEmaleFemale 1 0.367 -0.6505 -0.7467 2 0.110 0.3750 -0.2044 3 0.250 0.0861 -0.1101 4 0.413 -2.8180 0.6596 5 -0.265 -0.3160 0.5978 6 0.391 -1.2741 -0.0197 7 -0.180 -1.2116 0.5653 8 -0.334 0.2445 0.0315 R.VOTING.DEMOCRATESoverREPUBLICANS P.PUBLIC.SCHOOL.ENROLNEMT P.CHANGEpverty95to00 1 1.0033 0.0193 0.02006 2 0.2861 -0.2197 0.00414 3 0.0884 -0.0807 -0.06495 4 0.6238 -0.1367 0.01008 5 0.3084 0.1586 0.01669 6 0.4788 -0.1623 0.28849 7 -0.5218 0.0682 0.70853 8 -0.2523 0.1644 -0.12594 > ==== LD ==== > L$scaling LD1 LD2 LD3 LD4 LD5 LD6 LD7 AGE050200D -0.19321 -0.27029 -0.07838 -0.18939 0.0324 0.17692 -0.2236 CLF040200D -0.23279 -0.16970 -0.42048 0.24260 -0.5185 -0.50753 -0.0554 EDU685200D -0.34606 -0.29554 -0.10665 0.55260 0.6720 0.49740 -0.8581 HSG045200D 0.31049 -0.38326 0.08723 0.15970 -0.1444 -0.26211 -0.2197 IPE010200D 0.40863 0.45772 -0.08454 -1.04235 -0.1245 0.04427 -0.1879 IPE120200D 0.53126 -0.14493 -0.03626 -0.58331 0.0667 0.09255 -0.3726 PIN020200D 0.01218 0.26630 -0.39492 -0.11836 0.3200 -0.22928 0.4740 POP050200D -0.20949 0.05744 -0.00586 0.35028 -0.0995 0.17091 0.4661 POP060200D 0.12483 0.11737 0.00969 0.02559 -0.4378 0.37707 0.0985 POP165200D 0.36002 -0.14928 0.15386 -0.22683 -0.1089 -0.08121 0.0470 POP255200D 0.67531 -0.29057 0.55964 0.23297 -0.1025 -0.05190 0.4687 POP285200D 0.05811 -0.04507 0.32360 0.12240 -0.0184 -0.17351 0.4047 POP325200D -0.14312 -0.35573 0.17062 -0.00764 -0.2254 -0.75603 -0.6825 POP405200D -0.84971 -1.22271 0.34310 -0.71129 -0.0925 0.19792 0.2508 VST020200D 0.19047 0.20146 0.06047 0.14449 -0.1151 0.12283 0.0183 VST220200D 0.28040 0.01461 -0.06267 0.02276 -0.2133 -0.00544 -0.2803 VST420200D -0.00046 0.03042 0.01638 0.01518 -0.0276 0.05512 -0.0156 WAT130200D -0.05291 -0.06392 -0.26477 0.27276 -0.1759 0.21683 0.0797 P.pop.under18 -0.77459 0.02812 -0.47160 0.27963 0.2129 0.24931 -0.6446 P.pop.over85 -0.63665 0.20864 0.27415 0.29802 0.3297 -0.48102 0.4767 P.land.farms -0.36913 0.05739 0.72286 0.11401 -0.4329 -0.02445 0.0411 P.emply.ind.CONSTRUCTION -0.02916 -0.18100 0.06812 0.12769 0.2036 0.04944 -0.0745 P.emply.ind.MANUFACTORING 0.17776 -0.02170 -0.07359 -0.55273 0.1043 0.11520 0.0750 P.emply.ind.TRANSPORT.WAREHOUSING 0.05091 0.04368 0.16428 0.05347 0.0880 -0.00308 -0.0572 P.emply.ind.FINANC.INSUR -0.06735 0.09229 0.23081 -0.15488 0.2067 0.05418 0.2841 P.emply.ind.PROFscientTECH 0.17727 -0.19581 -0.12903 -0.26031 -0.2808 -0.00819 0.5369 P.emply.ind.EDUC.HEALTH 0.01863 0.20412 -0.10165 -0.35368 0.0519 0.20718 0.0399 P.25overLESS9thGRADE 0.55273 -0.03545 0.44980 0.32296 0.7085 0.03011 -0.0513 P.employ.FARMING -0.08291 0.12381 -0.10990 0.22164 0.0382 -0.30697 -0.0663 P.employ.GOV.stateLoc -0.03595 -0.09457 0.16234 0.07923 -0.1452 -0.07831 -0.1049 P.OWNERoccupiedHousingUnits 0.23631 -0.18043 0.20105 0.36389 -0.2160 -0.16008 -0.0354 P.occupiedHousingUnitsLackingPlumb -0.09287 -0.04893 -0.18635 0.07350 -0.1452 0.16486 0.0247 P.RURALpopul 0.12518 0.00888 0.06600 -0.33108 0.2040 0.30879 -0.3026 P.CHANGEurban90to00 0.11015 0.00659 -0.06433 -0.25165 -0.0605 -0.00596 0.1754 CHANGEperCapitaIncome89to99 -0.06011 -0.15386 0.29047 0.21044 -0.4346 -0.07034 -0.1179 GroundWaterUsePerCapita 0.07275 0.07020 0.11817 -0.14831 0.0959 -0.17232 0.0320 P.NET.DOMESTIC.MIGRATIONS 0.02588 -0.02753 0.07263 -0.15229 0.2493 -0.26705 -0.3171 P.NativePopulationBornInStateOfRes -0.09554 -0.01021 0.25747 -0.61473 -0.5146 0.29488 -0.2382 R.LABOR.FORCEmaleFemale 0.24305 -0.06548 0.17806 -0.10527 -0.2714 -0.01506 0.4563 R.VOTING.DEMOCRATESoverREPUBLICANS -0.23058 0.22968 -0.09197 -0.28159 0.5331 -0.14815 -0.0838 P.PUBLIC.SCHOOL.ENROLNEMT 0.00368 -0.01734 0.04209 0.07687 0.0897 0.07223 -0.0201 P.CHANGEpverty95to00 0.01812 0.01476 -0.06988 0.13598 -0.1132 0.04762 -0.2117 ==== Centers ==== > Z LD1 LD2 LD3 LD4 LD5 LD6 LD7 New England -0.487371 1.040 -2.109 -1.559 2.4103 0.45916 -0.22428 Foundry 0.000119 0.750 -0.937 -1.516 -0.5228 0.09901 0.08922 Dixie 1.930004 -0.327 0.239 0.177 0.0373 -0.00244 -0.01740 Islands -0.521521 -2.252 -0.645 0.626 1.9537 -1.38047 4.23150 Mexamerica -2.945949 -4.268 0.771 -1.004 0.0208 0.20593 -0.04802 Ecotopia -1.207992 -1.032 -2.048 -0.134 0.1092 -1.98706 -0.36186 Empty Quarter -1.377872 -0.459 -1.949 1.817 -0.2202 0.30408 0.01749 Bread basket -1.547266 0.927 0.753 0.210 0.0435 -0.04461 -0.00498 > ==== Descriptions ==== > opisLD <- function(i) {l <- L$scaling[,i]; l[which(abs(l)>0.5)]} > for(i in 1:7) {cat('*** LD',i,' :\n',sep=''); print(opisLD(i)); cat('\n')} *** LD1 : IPE120200D POP255200D POP405200D P.pop.under18 0.531 0.675 -0.850 -0.775 P.pop.over85 P.25overLESS9thGRADE -0.637 0.553 *** LD2 : POP405200D -1.22 *** LD3 : POP255200D P.land.farms 0.560 0.723 *** LD4 : EDU685200D IPE010200D 0.553 -1.042 IPE120200D POP405200D -0.583 -0.711 P.emply.ind.MANUFACTORING P.NativePopulationBornInStateOfRes -0.553 -0.615 *** LD5 : CLF040200D EDU685200D -0.519 0.672 P.25overLESS9thGRADE P.NativePopulationBornInStateOfRes 0.709 -0.515 R.VOTING.DEMOCRATESoverREPUBLICANS 0.533 *** LD6 : CLF040200D POP325200D -0.508 -0.756 *** LD7 : EDU685200D POP325200D P.pop.under18 -0.858 -0.682 -0.645 P.emply.ind.PROFscientTECH 0.537 ===== Pictures ===== ==== Data points and cluster centers in (LD1,LD2) ==== > plot(Y[,1],Y[,2],col=col[C],pch=16,cex=0.5,xlab="LD1",ylab="LD2") > points(Z[,1],Z[,2],pch=16,cex=1.5) > points(Z[,1],Z[,2],col=col,pch=16,cex=1) > legend("bottomright",legend=c9,fill=col,cex=0.75) > title("9 nations / LDA") {{notes:clu:pics:9natld12.png}} {{notes:clu:pdf:9natld12.pdf}} ==== Data points / pairs ==== > pairs(Y,col=col[C],pch=16,cex=0.3) > title("9 nations / LDA / colors") > pdf("9natLDAcol.pdf",width=20,height=20) > pairs(Y,col=col[C],pch=16,cex=0.3) > title("9 nations / LDA / colors") > dev.off() {{notes:clu:pics:9natLDAcol.png}} {{notes:clu:pdf:9natLDAcol.pdf}} ==== Cluster centers / pairs ==== > pairs(Z,col=col,pch=16,cex=1.5) > pdf("9natPairsCen.pdf",width=10,height=10) > pairs(Z,col=col,pch=16,cex=1.5) > title("9 nations / LDA / centers") > dev.off() {{notes:clu:pics:9natPairsCen.png}} {{notes:clu:pdf:9natPairsCen.pdf}}