Clustering 42 vars

4. March 2012

Preparing the data

> setwd("E:/Data/counties/pajek2")
> load("../pajek/rankDat.RData")
> objects()
[1] "da"       "rankPart" "s"       
> dim(da)
[1] 3110   92
> del <- c(1,2,4,6,9,10,11,14,15,20,25,30,33,35,36,37,38,39,40,41,42,45,46,48,
+    52,53,56,57,58,60,61,62,63,65,67,69,71,72,73,74,75,76,77,78,79,81,82,89,90,91)
> V <- da[,-del]
> for(i in 1:nrow(V)) if(length(which(is.na(V[i,])))>0) cat(i,"\n")
1620 
2888 
> V[1620,] <- V[1597,]  # Yellowstone National Park, MT  =  Park, MT
> V[2888,] <- V[2789,]  # Clifton Forge, VA  =  Alleghany, VA
> for(i in 1:nrow(V)) if(length(which(is.na(V[i,])))>0) cat(i,"\n")
> names(V)
 [1] "AGE050200D"                         "CLF040200D"                        
 [3] "EDU685200D"                         "HSG045200D"                        
 [5] "IPE010200D"                         "IPE120200D"                        
 [7] "PIN020200D"                         "POP050200D"                        
 [9] "POP060200D"                         "POP165200D"                        
[11] "POP255200D"                         "POP285200D"                        
[13] "POP325200D"                         "POP405200D"                        
[15] "VST020200D"                         "VST220200D"                        
[17] "VST420200D"                         "WAT130200D"                        
[19] "P.pop.under18"                      "P.pop.over85"                      
[21] "P.land.farms"                       "P.emply.ind.CONSTRUCTION"          
[23] "P.emply.ind.MANUFACTORING"          "P.emply.ind.TRANSPORT.WAREHOUSING" 
[25] "P.emply.ind.FINANC.INSUR"           "P.emply.ind.PROFscientTECH"        
[27] "P.emply.ind.EDUC.HEALTH"            "P.25overLESS9thGRADE"              
[29] "P.employ.FARMING"                   "P.employ.GOV.stateLoc"             
[31] "P.OWNERoccupiedHousingUnits"        "P.occupiedHousingUnitsLackingPlumb"
[33] "P.RURALpopul"                       "P.CHANGEurban90to00"               
[35] "CHANGEperCapitaIncome89to99"        "GroundWaterUsePerCapita"           
[37] "P.NET.DOMESTIC.MIGRATIONS"          "P.NativePopulationBornInStateOfRes"
[39] "R.LABOR.FORCEmaleFemale"            "R.VOTING.DEMOCRATESoverREPUBLICANS"
[41] "P.PUBLIC.SCHOOL.ENROLNEMT"          "P.CHANGEpverty95to00"              
> names(da)
 [1] "STCOU"                               
 [2] "Areaname"                            
 [3] "AGE050200D"                          
...                    
[91] "LIFESTOCKvaluePerFARM"               
[92] "P.CHANGEpverty95to00" 
> unitNams <- da[,"Areaname"]               
> unitIDs <- da[,"STCOU"]
> save(V,unitNams,unitIDs,file="vars42org.RData")  

Clustering

> setwd("E:/Data/counties/pajek2")
> load("vars42org.RData")
> objects()
[1] "unitIDs"  "unitNams" "V"       
> z <- function(x) (x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE)
> U <- apply(V,2,z)
> rownames(U) <- unitNams
> save(U,file="vars42std.Rdata")
> r <- hclust(d<-dist(U),method="ward")
> pdf("DendroWard.pdf",width=58.5,height=41.5)
> plot(r,hang=-1,cex=0.08,main="Ward / Free",lwd=0.01)
> dev.off()
 
> ant <- read.csv(file="../ANT/ANTcorr.csv",stringsAsFactors=FALSE,sep=",",header=TRUE)$cluster
> library(RColorBrewer)
> library(graphics)
> display.brewer.pal(8,"Dark2") 
> dark <- c(brewer.pal(8,"Dark2"),"red","blue","purple","black") 
> pie(rep(1,12), col=dark)
notes/clu/clus42.txt · Last modified: 2017/04/10 23:52 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki