====== Visualization ====== ===== Maps ===== To visualize US counties with the map we need the files with the map data. They can be obtained for example from http://gadm.org/country [country: USA, file format: shapefile] [[http://gadm.org/data/shp/USA_adm.zip]]. For visualization we shall use R library ''**maptools**'' (may be you need to install it). Assume that the map description files are stored in subdirectory USA. Some references about colors in R: [[http://research.stowers-institute.org/efg/R/Color/Chart/|Overview]]/ [[http://research.stowers-institute.org/efg/Report/UsingColorInR.pdf|Slides]], [[http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf|Chart 2]], [[http://statmath.wu.ac.at/~zeileis/papers/Zeileis+Hornik+Murrell-2009.pdf|Anti RGB]]/ [[http://statmath.wu.ac.at/~zeileis/papers/Augsburg-2010.pdf|Slides]], [[http://cran.r-project.org/web/packages/RColorBrewer/|RColorBrewer]], [[http://cran.r-project.org/web/packages/colorspace/index.html|colorspace]] > setwd("D:/Data/counties/9nations") > library(maptools) > gpclibPermit() > USout <- readShapeSpatial("USA/USA_adm0.shp") # USA outline > USsta <- readShapeSpatial("USA/USA_adm1.shp") # state borders > UScou <- readShapeSpatial("USA/USA_adm2.shp") # county borders Let us first draw the county borders and write the names of the counties. > pdf("UScounties.pdf",width=11.7,height=8.3,paper="a4r") > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col="wheat",bg="skyblue",border="red",lwd=0.05) > text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US"); dev.off() When working interactively we can omit the commands pdf (sets the graphics output to PDF and size of the paper to A4) and dev.off (releases the device). Let's look what data are available about counties. > names(UScou) [1] "ID_0" "ISO" "NAME_0" "ID_1" "NAME_1" "ID_2" "NAME_2" [8] "VARNAME_2" "NL_NAME_2" "HASC_2" "CC_2" "TYPE_2" "ENGTYPE_2" "VALIDFR_2" [15] "VALIDTO_2" "REMARKS_2" "Shape_Leng" "Shape_Area" > UScou$NAME_0[1:10] [1] United States United States United States United States United States United States [7] United States United States United States United States Levels: United States > UScou$NAME_1[1:10] [1] Alabama Alabama Alabama Alabama Alabama Alabama Alabama Arkansas Arkansas Arkansas 51 Levels: Alabama Alaska Arizona Arkansas California Colorado Connecticut ... Wyoming > As we see, the variable ''UScou$NAME_1'' containts the name of the state to which a county belongs. We can use this information to color counties by their states. > library(colorspace) > col <- rainbow_hcl(length(levels(UScou$NAME_1))) > pdf("UScounties.pdf",width=11.7,height=8.3,paper="a4r") > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col=col[UScou$NAME_1],bg="skyblue",border="red",lwd=0.05) > text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US"); dev.off() ===== Matching between shape file and Counties data files ===== To be able to visually chech the resulting partitions we would like to represent them on the maps. Again we have problem with alignment of units / vertices with the corresponding names of the shapes in the description of the map. The correspondence can be established between data units ''Areaname'' variable and pairs (''Name_1'', ''Name_2'') of county shapes. The problem is that the ''Areaname'' is composed by county's name followed by the state code, while the ''Name_1'' contains the full state name. To join the (''Name_1'', ''Name_2'') into the corresponding ''Areaname'' we have to replace ''Name_1'' with its code - they are given on the file ''states.csv''. > names(UScou) [1] "ID_0" "ISO" "NAME_0" "ID_1" "NAME_1" "ID_2" "NAME_2" [8] "VARNAME_2" "NL_NAME_2" "HASC_2" "CC_2" "TYPE_2" "ENGTYPE_2" "VALIDFR_2" [15] "VALIDTO_2" "REMARKS_2" "Shape_Leng" "Shape_Area" > UScou$TYPE_2[1:10] [1] County County County County County County County County County County 10 Levels: Borough Census Area City And Borough City And County County District ... Water body > length(UScou$TYPE_2) [1] 3145 > UScou$ID_2[1:10] [1] 34789 34790 34791 34792 34793 34794 34795 34943 34944 34945 > UScou$NAME_2[1:10] [1] Autauga Baldwin Barbour Bibb Blount Bullock Butler Mississippi [9] Monroe Montgomery 1837 Levels: Abbeville Acadia Accomack Ada Adair Adams Addison Aiken Aitkin Alachua ... Ziebach > UScou$NAME_1[1:10] [1] Alabama Alabama Alabama Alabama Alabama Alabama Alabama Arkansas Arkansas Arkansas 51 Levels: Alabama Alaska Arizona Arkansas California Colorado Connecticut ... Wyoming > states <- levels(UScou$NAME_1) > cou <- read.csv(file="../varsN.csv",stringsAsFactors=FALSE) > names(cou) [1] "STCOU" "Areaname" [3] "AGE050200D" "BZA115203D" [5] "CLF040200D" "EDU635200D" [7] "EDU685200D" "HSG045200D" [9] "HSG495200D" "INC610199D" [11] "INC910199D" "IPE010200D" [13] "IPE120200D" "IPE220200D" [15] "LFE305200D" "PIN020200D" [17] "POP050200D" "POP060200D" [19] "POP165200D" "POP225200D" [21] "POP255200D" "POP285200D" [23] "POP325200D" "POP405200D" [25] "POP645200D" "VST020200D" [27] "VST220200D" "VST420200D" [29] "WAT130200D" "P.pop.under5" [31] "P.pop.under18" "P.pop.over85" [33] "P.ind.fam.farms" "P.land.farms" [35] "P.irrigatedLand" "P.violent.crime" [37] "P.murders" "P.rapes" [39] "P.16to19.notHighSc" "P.Hisp.Latin" [41] "R.Hisp.Lat.maleFemale" "P.emply.ind.AGR.FOR.FISH.HUNT.MINING" [43] "P.emply.ind.CONSTRUCTION" "P.emply.ind.MANUFACTORING" [45] "P.emply.ind.WHOLESALEtrade" "P.emply.ind.RETAILtrade" [47] "P.emply.ind.TRANSPORT.WAREHOUSING" "P.emply.ind.INFORMATION" [49] "P.emply.ind.FINANC.INSUR" "P.emply.ind.PROFscientTECH" [51] "P.emply.ind.EDUC.HEALTH" "P.emply.ind.ARTSaccomFOOD" [53] "P.emply.ind.PUBLICaddmin" "P.25overLESS9thGRADE" [55] "P.employ.FARMING" "P.employ.AGRIC.FOREST.FISH.HUNT" [57] "F.GOV.EXP.perCapita" "P.employ.GOV" [59] "P.employ.GOV.stateLoc" "P.vacantHousingUnits" [61] "P.occupiedHousingUnits" "P.occupiedHousingUnitsBLACK" [63] "P.occupiedHousingUnitsHisLat" "P.OWNERoccupiedHousingUnits" [65] "P.RENTERoccupiedHousingUnits" "P.occupiedHousingUnitsLackingPlumb" [67] "P.URBANpopul" "P.RURALpopul" [69] "P.CHANGErural90to00" "P.CHANGEurban90to00" [71] "P.LAND" "P.WATER" [73] "P.BELOWpovertyLevel" "P.ABOVEpovertyLevel" [75] "P.WHITE.BELOWpovertyLevel" "P.aINDIAN.BELOWpovertyLevel" [77] "P.BLACK.BELOWpovertyLevel" "P.ASIAN.BELOWpovertyLevel" [79] "P.HisLat.BELOWpovertyLevel" "CHANGEperCapitaIncome89to99" [81] "P.CHANGEemployIndustry90to00" "P.IrrigationGROUNDwaterUse" [83] "GroundWaterUsePerCapita" "P.NET.DOMESTIC.MIGRATIONS" [85] "P.NativePopulationBornInStateOfRes" "R.LABOR.FORCEmaleFemale" [87] "R.VOTING.DEMOCRATESoverREPUBLICANS" "P.PUBLIC.SCHOOL.ENROLNEMT" [89] "TOTAL.DEPOSITSperCapita" "CROPvaluePerFARM" [91] "LIFESTOCKvaluePerFARM" "P.CHANGEpverty95to00" > cou$Areaname[1:10] [1] "Autauga, AL" "Baldwin, AL" "Barbour, AL" "Bibb, AL" "Blount, AL" "Bullock, AL" [7] "Butler, AL" "Calhoun, AL" "Chambers, AL" "Cherokee, AL" > length(cou$Areaname) [1] 3111 > states [1] "Alabama" "Alaska" "Arizona" "Arkansas" [5] "California" "Colorado" "Connecticut" "Delaware" ... [41] "South Carolina" "South Dakota" "Tennessee" "Texas" [45] "Utah" "Vermont" "Virginia" "Washington" [49] "West Virginia" "Wisconsin" "Wyoming" > st <- states[c(1:8,10:51,9)] > S <- read.csv(file="states.csv",stringsAsFactors=FALSE,sep=";") > S ind name code 1 1 Alabama AL 2 2 Alaska AK 3 3 Arizona AZ 4 4 Arkansas AR 5 5 California CA 6 6 Colorado CO 7 7 Connecticut CT 8 8 Delaware DE 9 9 Florida FL 10 10 Georgia GA 11 11 Hawaii HI 12 12 Idaho ID 13 13 Illinois IL 14 14 Indiana IN 15 15 Iowa IA 16 16 Kansas KS 17 17 Kentucky KY 18 18 Louisiana LA 19 19 Maine ME 20 20 Maryland MD 21 21 Massachusetts MA 22 22 Michigan MI 23 23 Minnesota MN 24 24 Mississippi MS 25 25 Missouri MO 26 26 Montana MT 27 27 Nebraska NE 28 28 Nevada NV 29 29 New Hampshire NH 30 30 New Jersey NJ 31 31 New Mexico NM 32 32 New York NY 33 33 North Carolina NC 34 34 North Dakota ND 35 35 Ohio OH 36 36 Oklahoma OK 37 37 Oregon OR 38 38 Pennsylvania PA 39 39 Rhode Island RI 40 40 South Carolina SC 41 41 South Dakota SD 42 42 Tennessee TN 43 43 Texas TX 44 44 Utah UT 45 45 Vermont VT 46 46 Virginia VA 47 47 Washington WA 48 48 West Virginia WV 49 49 Wisconsin WI 50 50 Wyoming WY 51 51 District of Columbia DC > match(states,S$name) [1] 1 2 3 4 5 6 7 8 51 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 [33] 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 > p <- match(states,S$name) > names <- paste(UScou$NAME_2,", ",S$code[p[as.integer(UScou$NAME_1)]],sep="") > Q <- match(names,cou$Areaname) > length(Q) [1] 3145 > Q [1] 1 2 3 4 5 6 7 129 130 131 132 133 134 135 136 137 138 501 502 [20] 503 504 505 506 507 508 509 510 999 1000 1001 1002 1003 1004 1005 1006 1007 86 87 [39] 88 89 90 91 92 93 94 95 96 97 98 309 310 311 312 313 314 315 316 [58] 317 318 319 320 321 513 514 515 516 NA NA NA NA NA 517 518 519 520 617 [77] 625 626 627 628 629 630 631 632 633 634 635 636 948 949 950 951 952 953 954 [96] 955 956 957 958 959 1471 1472 1473 1474 1475 1476 1477 1478 1479 NA 1481 1482 1483 8 ... [3079] 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 NA NA 3050 3051 3052 3053 [3098] 3054 3055 3056 3057 3058 3059 3060 3061 NA 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 [3117] 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 [3136] 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 > q <- match(cou$Areaname,names) > length(q) [1] 3111 > q [1] 1 2 3 4 5 6 7 114 115 116 117 118 119 120 121 122 123 124 125 [20] 126 127 128 129 130 NA 132 133 134 135 136 137 138 139 140 141 142 143 144 [39] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 [58] NA 165 166 167 168 169 170 171 172 173 200 201 202 203 204 205 206 207 208 [77] 209 210 211 212 213 214 215 216 217 37 38 39 40 41 42 43 44 45 46 ... [3060] 3104 3105 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 NA 3107 3108 3109 3110 3111 3112 [3079] 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 [3098] 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 > which(is.na(q)) [1] 25 58 144 290 304 345 346 579 582 610 642 679 733 1122 1123 1124 1125 1126 1127 [20] 1128 1129 1130 1176 1182 1270 1271 1348 1383 1480 1540 1541 1542 1543 1544 1563 1620 1730 1768 [39] 1839 1979 2252 2551 2888 2916 3072 > ina <- which(is.na(q)) > cou$Areaname[ina] [1] "DeKalb, AL" "St. Clair, AL" "St. Francis, AR" [4] "District of Columbia" "DeSoto, FL" "St. Johns, FL" [7] "St. Lucie, FL" "DeKalb, IL" "DuPage, IL" [10] "LaSalle, IL" "St. Clair, IL" "DeKalb, IN" [13] "St. Joseph, IN" "St. Bernard, LA" "St. Charles, LA" [16] "St. Helena, LA" "St. James, LA" "St. John the Baptist, LA" [19] "St. Landry, LA" "St. Martin, LA" "St. Mary, LA" [22] "St. Tammany, LA" "St. Mary's, MD" "Baltimore city, MD" [25] "St. Clair, MI" "St. Joseph, MI" "St. Louis, MN" [28] "DeSoto, MS" "DeKalb, MO" "St. Charles, MO" [31] "St. Clair, MO" "Ste. Genevieve, MO" "St. Francois, MO" [34] "St. Louis, MO" "St. Louis city, MO" "Yellowstone National Park, MT" [37] "Carson City city, NV" "De Baca, NM" "St. Lawrence, NY" [40] "LaMoure, ND" "McKean, PA" "DeWitt, TX" [43] "Clifton Forge, VA" "South Boston, VA" "St. Croix, WI" > inb <- which(is.na(Q)) > names[inb] [1] "Hawaii, HI" "Honolulu, HI" [3] "Kalawao, HI" "Kauai, HI" [5] "Maui, HI" "De Kalb, MO" [7] "De Kalb, AL" "Saint Clair, AL" [9] "Aleutians East, AK" "Aleutians West, AK" [11] "Anchorage, AK" "Bethel, AK" [13] "Bristol Bay, AK" "Denali, AK" [15] "Dillingham, AK" "Fairbanks North Star, AK" [17] "Haines, AK" "Juneau, AK" [19] "Kenai Peninsula, AK" "Ketchikan Gateway, AK" [21] "Kodiak Island, AK" "Lake and Peninsula, AK" [23] "Matanuska-Susitna, AK" "Nome, AK" [25] "North Slope, AK" "Northwest Arctic, AK" [27] "Prince of Wales-Outer Ketchi, AK" "Sitka, AK" [29] "Skagway-Yakutat-Angoon, AK" "Southeast Fairbanks, AK" [31] "Valdez-Cordova, AK" "Wade Hampton, AK" [33] "Wrangell-Petersburg, AK" "Yukon-Koyukuk, AK" [35] "Saint Francis, AR" "Broomfield, CO" [37] "District of Columbia, DC" "Desoto, FL" [39] "Saint Johns, FL" "Saint Lucie, FL" [41] "De Kalb, IL" "Dupage, IL" [43] "La Salle, IL" "Lake Michigan, IL" [45] "Saint Clair, IL" "De Kalb, IN" [47] "Lake Michigan, IN" "Saint Joseph, IN" [49] "Desoto, MS" "Saint Bernard, LA" [51] "Saint Charles, LA" "Saint Helena, LA" [53] "Saint James, LA" "Saint John the Baptist, LA" [55] "Saint Landry, LA" "Saint Martin, LA" [57] "Saint Mary, LA" "Saint Tammany, LA" [59] "Saint Mary's, MD" "Lake Hurron, MI" [61] "Lake Michigan, MI" "Lake St. Clair, MI" [63] "Lake Superior, MI" "Saint Clair, MI" [65] "Saint Joseph, MI" "Lake Superior, MN" [67] "Saint Louis, MN" "Saint Charles, MO" [69] "Saint Clair, MO" "Saint Francois, MO" [71] "Saint Louis, MO" "Sainte Genevieve, MO" [73] "Carson City, NV" "Debaca, NM" [75] "Saint Lawrence, NY" "Lake Ontario, NY" [77] "Lamoure, ND" "Lake Erie, OH" [79] "Mc Kean, PA" "Dewitt, TX" [81] "Clifton Forge City, VA" "Lake Michigan, WI" [83] "Lake Superior, WI" "Saint Croix, WI" > We see that there are some mismatches between names. In many cases St. is used instead of Saint; also some Frensh names are written differently (DeKalb / De Kalb, LaSalle / La Salle, ...). We corrected manually such mismatches on the file ''match.csv''. After some iterations we finally obtained: > Aname <- cou$Areaname > lst <- file("match.csv","w") > for(i in ina) cat(i,';"',Aname[i],'"\n',sep='',file=lst) > close(lst) # ... manual editing of match.csv > m <- read.csv(file="match.csv",stringsAsFactors=FALSE,sep=';',header=FALSE) > m V1 V2 1 25 De Kalb, AL 2 58 Saint Clair, AL 3 144 Saint Francis, AR 4 290 District of Columbia 5 304 DeSoto, FL 6 345 Saint Johns, FL 7 346 Saint Lucie, FL 8 579 De Kalb, IL 9 582 Du Page, IL 10 610 La Salle, IL 11 642 Saint Clair, IL 12 679 De Kalb, IN 13 733 Saint Joseph, IN 14 1122 Saint Bernard, LA 15 1123 Saint Charles, LA 16 1124 Saint Helena, LA 17 1125 Saint James, LA 18 1126 Saint John the Baptist, LA 19 1127 Saint Landry, LA 20 1128 Saint Martin, LA 21 1129 Saint Mary, LA 22 1130 Saint Tammany, LA 23 1176 Saint Mary's, MD 24 1182 Baltimore city, MD 25 1270 Saint Clair, MI 26 1271 Saint Joseph, MI 27 1348 Saint Louis, MN 28 1383 De Soto, MS 29 1480 De Kalb, MO 30 1540 Saint Charles, MO 31 1541 Saint Clair, MO 32 1542 Sainte Genevieve, MO 33 1543 Saint Francois, MO 34 1544 Saint Louis, MO 35 1563 Saint Louis city, MO 36 1620 Yellowstone National Park, MT 37 1730 Carson City city, NV 38 1768 De Baca, NM 39 1839 Saint Lawrence, NY 40 1979 LaMoure, ND 41 2252 McKean, PA 42 2551 De Witt, TX 43 2888 Clifton Forge, VA 44 2916 South Boston, VA 45 3072 Saint Croix, WI > Aname[m$V1] <- m$V2 > Aname[1542] [1] "Sainte Genevieve, MO" > Q <- match(names,Aname) > q <- match(Aname,names) # ... additional corrections > Aname <- cou$Areaname > m <- read.csv(file="match.csv",stringsAsFactors=FALSE,sep=';',header=FALSE) > Aname[m$V1] <- m$V2 > Q <- match(names,Aname) > q <- match(Aname,names) > ina <- which(is.na(q)) > inb <- which(is.na(Q)) > Aname[ina] [1] "Baltimore city, MD" "Saint Louis city, MO" "Yellowstone National Park, MT" [4] "South Boston, VA" > ina [1] 1182 1563 1620 2916 > Aname[2916] [1] "South Boston, VA" > aname <- Aname[-2916] > length(aname) [1] 3110 > Q <- match(names,aname) > q <- match(aname,names) > ina <- which(is.na(q)) > inb <- which(is.na(Q)) > aname[ina] [1] "Baltimore city, MD" "Saint Louis city, MO" "Yellowstone National Park, MT" > names[inb] [1] "Hawaii, HI" "Honolulu, HI" [3] "Kalawao, HI" "Kauai, HI" [5] "Maui, HI" "Aleutians East, AK" [7] "Aleutians West, AK" "Anchorage, AK" [9] "Bethel, AK" "Bristol Bay, AK" [11] "Denali, AK" "Dillingham, AK" [13] "Fairbanks North Star, AK" "Haines, AK" [15] "Juneau, AK" "Kenai Peninsula, AK" [17] "Ketchikan Gateway, AK" "Kodiak Island, AK" [19] "Lake and Peninsula, AK" "Matanuska-Susitna, AK" [21] "Nome, AK" "North Slope, AK" [23] "Northwest Arctic, AK" "Prince of Wales-Outer Ketchi, AK" [25] "Sitka, AK" "Skagway-Yakutat-Angoon, AK" [27] "Southeast Fairbanks, AK" "Valdez-Cordova, AK" [29] "Wade Hampton, AK" "Wrangell-Petersburg, AK" [31] "Yukon-Koyukuk, AK" "Broomfield, CO" [33] "Lake Michigan, IL" "Lake Michigan, IN" [35] "Lake Hurron, MI" "Lake Michigan, MI" [37] "Lake St. Clair, MI" "Lake Superior, MI" [39] "Lake Superior, MN" "Lake Ontario, NY" [41] "Lake Erie, OH" "Lake Michigan, WI" [43] "Lake Superior, WI" > length(aname) [1] 3110 As we see there are no shapes for 3 vertices (Baltimore city, MD; Saint Louis city, MO; Yellowstone National Park, MT) and there are no vertices for the shapes from Alask (AK) and Hawaii (HI) and lake areas. Now we can finally produce a mapping between vertices and shapes. > ids <- read.csv("../usc3110lab.csv",header=FALSE,stringsAsFactors=FALSE) > ids[1:10,] V1 V2 V3 V4 1 1 1001 0.6472 0.6803 2 2 1003 0.6305 0.7479 3 3 1005 0.6676 0.7045 4 4 1007 0.6394 0.6596 5 5 1009 0.6477 0.6217 6 6 1011 0.6626 0.6964 7 7 1013 0.6471 0.7099 8 8 1015 0.6605 0.6314 9 9 1017 0.6682 0.6643 10 10 1019 0.6640 0.6131 > standard <- function(x) {s <- paste("0000",x,sep=""); substr(s,nchar(s)-4,nchar(s))} > ids$V2 <- standard(ids$V2) > ids[1:10,] V1 V2 V3 V4 1 1 01001 0.6472 0.6803 2 2 01003 0.6305 0.7479 3 3 01005 0.6676 0.7045 4 4 01007 0.6394 0.6596 5 5 01009 0.6477 0.6217 6 6 01011 0.6626 0.6964 7 7 01013 0.6471 0.7099 8 8 01015 0.6605 0.6314 9 9 01017 0.6682 0.6643 10 10 01019 0.6640 0.6131 > id <- ids$V2 > cou$STCOU[1:10] [1] 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 > cou$STCOU <- standard(cou$STCOU) > cou$STCOU[1:10] [1] "01001" "01003" "01005" "01007" "01009" "01011" "01013" "01015" "01017" "01019" > lab <- cou$STCOU > r <- match(id,lab) > which(is.na(r)) integer(0) > ===== Mapping vertices to shapes ===== > setwd("D:/Data/counties/9nations") > standard <- function(x) {s <- paste("0000",x,sep=""); substr(s,nchar(s)-4,nchar(s))} > library(maptools) > gpclibPermit() > UScou <- readShapeSpatial("USA/USA_adm2.shp") # county borders > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col="wheat",bg="skyblue",border="red",lwd=0.05) > cou <- read.csv(file="../varsN.csv",stringsAsFactors=FALSE) > ids <- read.csv("../usc3110lab.csv",header=FALSE,stringsAsFactors=FALSE) > ids$V2 <- standard(ids$V2) > id <- ids$V2 > cou$STCOU <- standard(cou$STCOU) > lab <- cou$STCOU > p <- match(id,lab) > which(is.na(p)) integer(0) > S <- read.csv(file="states.csv",stringsAsFactors=FALSE,sep=";") > states <- levels(UScou$NAME_1); ps <- match(states,S$name) > names <- paste(UScou$NAME_2,", ",S$code[ps[as.integer(UScou$NAME_1)]],sep="") > m <- read.csv(file="match.csv",stringsAsFactors=FALSE,sep=';',header=FALSE) > Aname <- cou$Areaname > Aname[m$V1] <- m$V2 > q <- match(Aname,names) > ina <- which(is.na(q)) > Aname[ina] [1] "Baltimore city, MD" "Saint Louis city, MO" [3] "Yellowstone National Park, MT" "South Boston, VA" > > stat <- read.csv("../pajek/reg3110-9.clu",header=FALSE,skip=1)$V1 > length(stat) [1] 3110 > clu <- rep(NA,length(names)) > for(v in 1:3110) {i <- q[[p[[v]]]]; if(!is.na(i)) clu[[i]] <- stat[[v]]} > UScou$clu <- clu > library(colorspace) > col <- rainbow_hcl(10) > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col=col[UScou$clu],bg="skyblue",border="black",lwd=0.05) > > ina <- which(is.na(UScou$clu)) > UScou$clu[ina] <- 10 > col <- c("darkorange","darkolivegreen3","red","darkorchid1","royalblue1","brown2","gold", + "green2","khaki2","white") > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col=col[UScou$clu],bg="skyblue",border="black",lwd=0.05) Since we saved the mappings p and q we can now just load them and use for mapping the data. > setwd("D:/Data/counties/9nations") > standard <- function(x) {s <- paste("0000",x,sep=""); substr(s,nchar(s)-4,nchar(s))} > library(maptools); gpclibPermit() > UScou <- readShapeSpatial("USA/USA_adm2.shp") # county borders > load('pq.Rdata') > objects() [1] "p" "q" "standard" "UScou" > stat <- read.csv("../pajek/reg3110-9.clu",header=FALSE,skip=1)$V1 > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[[p[[v]]]]; if(!is.na(i)) clu[[i]] <- stat[[v]]} > UScou$clu <- clu > ina <- which(is.na(UScou$clu)) > UScou$clu[ina] <- 10 > col <- c("darkorange","darkolivegreen3","red","darkorchid1","royalblue1","brown2","gold", + "green2","khaki2","white") > png("UScounties.png",width=1170,height=830) > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col=col[UScou$clu],bg="skyblue",border="black",lwd=0.05) > title("Central US"); dev.off() {{notes:clu:pics:uscounties9.png?720}} ===== Transforming clusterings ===== Clusters can have very big numbers. To renumber clusters in a clustering with consecutive numbers we can > nc <- 0; r2 <- integer(3110) > C <- new.env(hash=TRUE,parent=emptyenv()) > for (v in 1:3110){ + key <- as.character(r1[[v]]) + if (exists(key,env=C,inherits=FALSE)){ + r2[v] <- get(key,env=C,inherits=FALSE) + } else { + nc <- nc + 1; assign(key,nc,env=C); r2[v] <- nc + } + } > r2 [1] 1 2 3 1 4 1 3 4 4 4 1 3 3 4 4 3 5 3 4 3 1 4 3 3 4 1 3 4 4 4 [31] 3 1 3 1 3 4 1 4 5 4 1 4 1 1 4 3 4 4 2 3 1 4 1 4 3 4 1 4 1 1 [61] 4 4 1 4 3 1 4 1 6 1 6 6 1 6 6 6 1 6 6 1 6 1 1 7 8 7 8 7 1 8 .... [3061] 1 1 34 34 1 34 1 21 21 34 35 21 38 34 1 34 40 1 34 21 38 1 1 34 34 21 1 13 42 1 [3091] 16 16 16 42 16 42 42 13 16 42 1 42 16 42 16 16 1 16 42 16 > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[[p[[v]]]]; if(!is.na(i)) clu[[i]] <- r2[[v]]} > UScou$clu <- clu > pdf("UScRank.pdf",width=11.7,height=8.3,paper="a4r") > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col=col[UScou$clu],bg="skyblue",border="black",lwd=0.05) > plot(USsta,xlim=c(-124,-67),ylim=c(23,48),lwd=0.2,border="violet",add=TRUE) > text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US"); dev.off() When the clustering contains many clusters we can draw only (up to 8) main (largest) clusters. The remaining clusters are put into cluster 10 (grey); the unclassified units are in cluster 1 (white). > r1 <- rank > t <- table(r1) > T <- sort(t[-1],decreasing=TRUE) > Nt <- names(T) > best <- c("0",Nt[1:8]) > C <- new.env(hash=TRUE,parent=emptyenv()) > for(i in 1:9) assign(best[i],i,env=C) > for (v in 1:3110){ + key <- as.character(r1[[v]]) + if (exists(key,env=C,inherits=FALSE)){ + r2[v] <- get(key,env=C,inherits=FALSE) + } else { + r2[v] <- 10 + } + } > r2 [1] 10 10 8 10 5 1 8 5 5 5 10 8 8 5 5 8 2 8 5 8 1 5 8 8 5 10 8 5 5 5 [31] 8 10 8 1 8 5 10 5 2 5 1 5 10 1 5 8 5 5 10 8 1 5 1 5 8 5 10 5 1 10 ..... [3061] 1 1 10 10 10 10 10 3 3 10 10 3 10 10 10 10 10 10 10 3 10 10 10 10 10 3 10 9 10 1 [3091] 10 10 10 10 10 10 10 9 10 10 1 10 10 10 10 10 1 10 10 10 > col <- c("white","red","yellow","green","blue","pink","brown","orange","purple","grey") > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[[p[[v]]]]; if(!is.na(i)) clu[[i]] <- r2[[v]]} > UScou$clu <- clu > # UScou$clu[which(is.na(UScou$clu))] <- 10 > pdf("UScRank.pdf",width=11.7,height=8.3,paper="a4r") > plot(UScou,xlim=c(-124,-67),ylim=c(23,48),col=col[UScou$clu],bg="skyblue",border="black",lwd=0.05) > plot(USsta,xlim=c(-124,-67),ylim=c(23,48),lwd=0.2,border="violet",add=TRUE) > text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US"); dev.off()