March 2018
Data from the CIA World Factbook were obtained by inspecting a selected variable for some country and then clicking on the icon on the right margin. We obtained a list of corresponding values for all world countries. By copy-pasting it into Textpad and some additional editing we produce a CSV file.
We will extend the country codes data frame from Country codes with selected data from the CIA World Factbook. To produce a mapping between the CIA World Factbook and the country codes we will use keys U$COUNTRY
and C$Country
. Because some countries are labeled differently we have to establish links manually.
> fmtU <- c("character","numeric","factor","numeric","factor") > U <- read.csv("Urban.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU) > str(U) 'data.frame': 230 obs. of 5 variables: $ COUNTRY: chr "Afghanistan" "Albania" "Algeria" "American Samoa" ... $ UrbPop : num 27.6 59.3 71.9 87.1 84.1 45.6 100 23 92 62.5 ... $ yearP : Factor w/ 2 levels "(2012)","(2017)": 2 2 2 2 2 2 2 2 2 2 ... $ UrbRate: num 3.77 1.81 2.26 0.63 0.09 4.6 0.88 -0.38 0.93 -0.1 ... $ yearR : Factor w/ 2 levels "","(2015-20 est.)": 2 2 2 2 2 2 2 2 2 2 ... > p <- match(C$Country,U$COUNTRY) > q <- match(U$COUNTRY,C$Country) > U$COUNTRY[is.na(q)] > C$Country[is.na(p)] > C$CIA <- C$Country > p <- match(C$Country,U$COUNTRY) > q <- match(U$COUNTRY,C$Country) > C$CIA <- C$Country > T<-cbind(which(is.na(p)),C$Country[which(is.na(p))]) > head(T) [,1] [,2] [1,] "2" "Aland Islands" [2,] "9" "Antarctica" [3,] "17" "Bahamas" [4,] "30" "Bouvet Island" [5,] "33" "British Indian Ocean Territory" [6,] "34" "Brunei Darussalam" > Z<-cbind(which(is.na(q)),U$COUNTRY[which(is.na(q))]) > head(Z) [,1] [,2] [1,] "15" "Bahamas, The" [2,] "30" "Brunei" [3,] "33" "Burma" [4,] "35" "Cabo Verde" [5,] "46" "Congo, Democratic Republic of the" [6,] "47" "Congo, Republic of the" > u <- c( 17,34,152,41,54,53,57,157,61,73,82,170,99,47,105,118,119, + 122, 48,131,176,183,186,191,189,216,219,235,240,241,242,243) > v <- c( 15,30, 33,35,46,47,50, 53,55,67,74, 75,90,92, 97,110,111, + 114,123,124,163,169,171,175,185,199,201,217,221,222,223,224) > cbind(u,C$CIA[u],v,U$COUNTRY[v]) u v [1,] "17" "Bahamas" "15" "Bahamas, The" [2,] "34" "Brunei Darussalam" "30" "Brunei" [3,] "152" "Myanmar" "33" "Burma" [4,] "41" "Cape Verde" "35" "Cabo Verde" [5,] "54" "Congo, (Kinshasa)" "46" "Congo, Democratic Republic of the" [6,] "53" "Congo (Brazzaville)" "47" "Congo, Republic of the" [7,] "57" "CĂ´te d'Ivoire" "50" "Cote d'Ivoire" [8,] "157" "Netherlands Antilles" "53" "Curacao" [9,] "61" "Czech Republic" "55" "Czechia" [10,] "73" "Falkland Islands (Malvinas)" "67" "Falkland Islands (Islas Malvinas)" [11,] "82" "Gambia" "74" "Gambia, The" [12,] "170" "Palestinian Territory" "75" "Gaza Strip" [13,] "99" "Holy See (Vatican City State)" "90" "Holy See (Vatican City)" [14,] "47" "Hong Kong, SAR China" "92" "Hong Kong" [15,] "105" "Iran, Islamic Republic of" "97" "Iran" [16,] "118" "Korea (North)" "110" "Korea, North" [17,] "119" "Korea (South)" "111" "Korea, South" [18,] "122" "Lao PDR" "114" "Laos" [19,] "48" "Macao, SAR China" "123" "Macau" [20,] "131" "Macedonia, Republic of" "124" "Macedonia" [21,] "176" "Pitcairn" "163" "Pitcairn Islands" [22,] "183" "Russian Federation" "169" "Russia" [23,] "186" "Saint Helena" "171" "Saint Helena, Ascension, and Tristan da Cunha" [24,] "191" "Saint Vincent and Grenadines" "175" "Saint Vincent and the Grenadines" [25,] "189" "Saint-Martin (French part)" "185" "Sint Maarten" [26,] "216" "Syrian Arab Republic (Syria)" "199" "Syria" [27,] "219" "Tanzania, United Republic of" "201" "Tanzania" [28,] "235" "United States of America" "217" "United States" [29,] "240" "Venezuela (Bolivarian Republic)" "221" "Venezuela" [30,] "241" "Viet Nam" "222" "Vietnam" [31,] "242" "Virgin Islands, US" "223" "Virgin Islands" [32,] "243" "Wallis and Futuna Islands" "224" "Wallis and Futuna" > C$CIA[u] <- U$COUNTRY[v] > s <- match(U$COUNTRY,C$CIA) > i <- which(is.na(s)) > i [1] 225 227 > U$COUNTRY[i] [1] "West Bank" "World" > names(U) [1] "COUNTRY" "UrbPop" "yearP" "UrbRate" "yearR" > s <- match(U$COUNTRY,C$CIA) > r <- which(is.na(s)) > i <- (1:length(s))[-r] > j <- s[-r] > C$UrbPop <- NA > C$UrbPop[j] <- U$UrbPop[i] > C$UrbRate <- NA > C$UrbRate[j] <- U$UrbRate[i] > cbind(U$COUNTRY,U$UrbPop,U$UrbRate)[1:10,] [,1] [,2] [,3] [1,] "Afghanistan" "27.6" "3.77" [2,] "Albania" "59.3" "1.81" [3,] "Algeria" "71.9" "2.26" [4,] "American Samoa" "87.1" "0.63" [5,] "Andorra" "84.1" "0.09" [6,] "Angola" "45.6" "4.6" [7,] "Anguilla" "100" "0.88" [8,] "Antigua and Barbuda" "23" "-0.38" [9,] "Argentina" "92" "0.93" [10,] "Armenia" "62.5" "-0.1" > cbind(C$CIA,C$UrbPop,C$UrbRate)[1:10,] [,1] [,2] [,3] [1,] "Afghanistan" "27.6" "3.77" [2,] "Aland Islands" NA NA [3,] "Albania" "59.3" "1.81" [4,] "Algeria" "71.9" "2.26" [5,] "American Samoa" "87.1" "0.63" [6,] "Andorra" "84.1" "0.09" [7,] "Angola" "45.6" "4.6" [8,] "Anguilla" "100" "0.88" [9,] "Antarctica" NA NA [10,] "Antigua and Barbuda" "23" "-0.38" > e <- match(Europe,C$ISOalpha2) > e [1] 3 6 12 15 21 22 28 35 215 60 61 84 62 71 208 74 76 77 234 83 86 87 101 58 107 [26] 102 110 129 130 123 145 146 131 137 166 156 177 178 182 183 214 202 201 193 227 232 99 NA > C[e,] Country ISOalpha2 ISOalpha3 ISOnumeric CIA UrbPop UrbRate 3 Albania AL ALB 8 Albania 59.3 1.81 6 Andorra AD AND 20 Andorra 84.1 0.09 12 Armenia AM ARM 51 Armenia 62.5 -0.10 15 Austria AT AUT 40 Austria 66.1 0.51 21 Belarus BY BLR 112 Belarus 77.4 -0.04 ... 61 Czech Republic CZ CZE 203 Czechia 73.0 0.30 ... 131 Macedonia, Republic of MK MKD 807 Macedonia 57.3 0.24 ... 227 Turkey TR TUR 792 Turkey 74.4 1.54 232 Ukraine UA UKR 804 Ukraine 70.1 -0.35 99 Holy See (Vatican City State) VA VAT 336 Holy See (Vatican City) 100.0 0.10 NA <NA> <NA> <NA> NA <NA> NA NA > save(C,ascii=TRUE,file='CIA.Rdata')
> setwd("C:/Users/batagelj/Downloads/data/CIA") > Europe <- c("AL", "AD", "AM", "AT", "BY", "BE", "BA", "BG", "CH", "CY", + "CZ", "DE", "DK", "EE", "ES", "FO", "FI", "FR", "GB", "GE", "GI", + "GR", "HU", "HR", "IE", "IS", "IT", "LT", "LU", "LV", "MD", "MC", + "MK", "MT", "NO", "NL", "PL", "PT", "RO", "RU", "SE", "SI", "SK", + "SM", "TR", "UA", "VA", "XK") > load('CIA.Rdata') > str(C) 'data.frame': 247 obs. of 7 variables: $ Country : chr "Afghanistan" "Aland Islands" "Albania" "Algeria" ... $ ISOalpha2 : chr "AF" "AX" "AL" "DZ" ... $ ISOalpha3 : chr "AFG" "ALA" "ALB" "DZA" ... $ ISOnumeric: int 4 248 8 12 16 20 24 660 10 28 ... $ CIA : chr "Afghanistan" "Aland Islands" "Albania" "Algeria" ... $ UrbPop : num 27.6 NA 59.3 71.9 87.1 84.1 45.6 100 NA 23 ... $ UrbRate : num 3.77 NA 1.81 2.26 0.63 0.09 4.6 0.88 NA -0.38 ... > e <- match(Europe,C$ISOalpha2)
> fmtU <- c("character","numeric","factor") > U <- read.csv("BirthRate.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU) > str(U) 'data.frame': 228 obs. of 3 variables: $ COUNTRY : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ... $ BirthRate: num 37.9 13.2 22.2 19.6 7.5 44.2 12.5 15.7 16.7 12.9 ... $ year : Factor w/ 3 levels "(2012 est.)",..: 3 3 3 3 3 3 3 3 3 3 ... > s <- match(U$COUNTRY,C$CIA) > which(is.na(s)) [1] 67 198 223 225 > U$COUNTRY[which(is.na(s))] [1] "European Union" "Taiwan" "West Bank" "World" > r <- which(is.na(s)) > i <- (1:length(s))[-r] > j <- s[-r] > C$BirthRate <- NA > C$BirthRate[j] <- U$BirthRate[i] > head(C) Country ISOalpha2 ISOalpha3 ISOnumeric CIA UrbPop UrbRate BirthRate 1 Afghanistan AF AFG 4 Afghanistan 27.6 3.77 37.9 2 Aland Islands AX ALA 248 Aland Islands NA NA NA 3 Albania AL ALB 8 Albania 59.3 1.81 13.2 4 Algeria DZ DZA 12 Algeria 71.9 2.26 22.2 5 American Samoa AS ASM 16 American Samoa 87.1 0.63 19.6 6 Andorra AD AND 20 Andorra 84.1 0.09 7.5
> U <- read.csv("DeathRate.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU) > str(U) 'data.frame': 228 obs. of 3 variables: $ COUNTRY : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ... $ DeathRate: num 13.4 6.8 4.3 5.9 7.3 9.2 4.6 5.7 7.5 9.4 ... $ year : Factor w/ 3 levels "(2012 est.)",..: 3 3 3 3 3 3 3 3 3 3 ... > s <- match(U$COUNTRY,C$CIA) > which(is.na(s)) [1] 67 198 223 225 > U$COUNTRY[which(is.na(s))] [1] "European Union" "Taiwan" "West Bank" "World" > r <- which(is.na(s)) > i <- (1:length(s))[-r] > j <- s[-r] > C$DeathRate <- NA > C$DeathRate[j] <- U$DeathRate[i] > C[1:6,c(2,5:9)] ISOalpha2 CIA UrbPop UrbRate BirthRate DeathRate 1 AF Afghanistan 27.6 3.77 37.9 13.4 2 AX Aland Islands NA NA NA NA 3 AL Albania 59.3 1.81 13.2 6.8 4 DZ Algeria 71.9 2.26 22.2 4.3 5 AS American Samoa 87.1 0.63 19.6 5.9 6 AD Andorra 84.1 0.09 7.5 7.3 >
> U <- read.csv("FiBiAge.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU) > str(U) 'data.frame': 131 obs. of 3 variables: $ COUNTRY: chr "Afghanistan" "Albania" "Angola" "Armenia" ... $ FiBiAge: num 19.9 24.5 19.4 24.4 28.7 29 23.2 18.5 25.7 28.6 ... $ year : Factor w/ 33 levels " (2006 est.)",..: 31 8 32 10 8 8 8 29 8 7 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > C$FiBiAge <- NA; C$FiBiAge[j] <- U$FiBiAge[i] > C[1:10,c(2,5:10)] ISOalpha2 CIA UrbPop UrbRate BirthRate DeathRate FiBiAge 1 AF Afghanistan 27.6 3.77 37.9 13.4 19.9 2 AX Aland Islands NA NA NA NA NA 3 AL Albania 59.3 1.81 13.2 6.8 24.5 4 DZ Algeria 71.9 2.26 22.2 4.3 NA 5 AS American Samoa 87.1 0.63 19.6 5.9 NA 6 AD Andorra 84.1 0.09 7.5 7.3 NA 7 AO Angola 45.6 4.60 44.2 9.2 19.4 8 AI Anguilla 100.0 0.88 12.5 4.6 NA 9 AQ Antarctica NA NA NA NA NA 10 AG Antigua and Barbuda 23.0 -0.38 15.7 5.7 NA
> fmtU <- c("character","numeric","numeric","numeric","factor") > U <- read.csv("InfMort.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU) > str(U) 'data.frame': 234 obs. of 5 variables: $ COUNTRY: chr "Afghanistan" "Albania" "Algeria" "American Samoa" ... $ InfMtot: num 110.6 11.9 19.6 11.3 3.6 ... $ InfMmal: num 118 13.3 21.2 13.2 3.6 73.3 3.7 13.9 10.7 14.1 ... $ InfMfem: num 102.9 10.5 17.9 9.3 3.6 ... $ year : Factor w/ 2 levels "(2016 est.)",..: 2 2 2 2 2 2 2 2 2 2 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > r [1] 69 198 203 229 231 > U$COUNTRY[r] [1] "European Union" "Svalbard" "Taiwan" "West Bank" "World" > C$InfMtot <- NA; C$InfMtot[j] <- U$InfMtot[i] > C$InfMmal <- NA; C$InfMmal[j] <- U$InfMmal[i] > C$InfMfem <- NA; C$InfMfem[j] <- U$InfMfem[i] > C[1:10,c(2,5:13)] ISOalpha2 CIA UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem 1 AF Afghanistan 27.6 3.77 37.9 13.4 19.9 110.6 118.0 102.9 2 AX Aland Islands NA NA NA NA NA NA NA NA 3 AL Albania 59.3 1.81 13.2 6.8 24.5 11.9 13.3 10.5 4 DZ Algeria 71.9 2.26 22.2 4.3 NA 19.6 21.2 17.9 5 AS American Samoa 87.1 0.63 19.6 5.9 NA 11.3 13.2 9.3 6 AD Andorra 84.1 0.09 7.5 7.3 NA 3.6 3.6 3.6 7 AO Angola 45.6 4.60 44.2 9.2 19.4 67.6 73.3 61.8 8 AI Anguilla 100.0 0.88 12.5 4.6 NA 3.3 3.7 2.9 9 AQ Antarctica NA NA NA NA NA NA NA NA 10 AG Antigua and Barbuda 23.0 -0.38 15.7 5.7 NA 12.1 13.9 10.2 > save(C,ascii=TRUE,file='CIA.Rdata')
> fmtU <- c("character","numeric","numeric","numeric","factor") > U <- read.csv("MedAge.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU) > str(U) 'data.frame': 230 obs. of 5 variables: $ COUNTRY: chr "Afghanistan" "Albania" "Algeria" "American Samoa" ... $ total : num 18.8 32.9 28.1 25.5 44.3 15.9 34.8 31.9 31.7 35.1 ... $ male : num 18.8 31.6 27.8 25.1 44.4 15.4 32.9 30 30.5 33.3 ... $ female : num 18.9 34.3 28.4 26 44.1 16.3 36.7 33.5 32.9 36.9 ... $ year : Factor w/ 1 level "(2017 est.)": 1 1 1 1 1 1 1 1 1 1 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > r [1] 67 111 169 173 200 225 227 > U$COUNTRY[r] [1] "European Union" "Kosovo" "Saint Barthelemy" "Saint Martin" "Taiwan" "West Bank" [7] "World" > C$MedAtot <- NA; C$MedAtot[j] <- U$total[i] > C$MedAmal <- NA; C$MedAmal[j] <- U$male[i] > C$MedAfem <- NA; C$MedAfem[j] <- U$female[i] > C[1:10,c(3,6:16)] ISOalpha3 UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem 1 AFG 27.6 3.77 37.9 13.4 19.9 110.6 118.0 102.9 18.8 18.8 18.9 2 ALA NA NA NA NA NA NA NA NA NA NA NA 3 ALB 59.3 1.81 13.2 6.8 24.5 11.9 13.3 10.5 32.9 31.6 34.3 4 DZA 71.9 2.26 22.2 4.3 NA 19.6 21.2 17.9 28.1 27.8 28.4 5 ASM 87.1 0.63 19.6 5.9 NA 11.3 13.2 9.3 25.5 25.1 26.0 6 AND 84.1 0.09 7.5 7.3 NA 3.6 3.6 3.6 44.3 44.4 44.1 7 AGO 45.6 4.60 44.2 9.2 19.4 67.6 73.3 61.8 15.9 15.4 16.3 8 AIA 100.0 0.88 12.5 4.6 NA 3.3 3.7 2.9 34.8 32.9 36.7 9 ATA NA NA NA NA NA NA NA NA NA NA NA 10 ATG 23.0 -0.38 15.7 5.7 NA 12.1 13.9 10.2 31.9 30.0 33.5 > save(C,ascii=TRUE,file='CIA.Rdata')
> fmtS <- c("character","numeric","factor") > U <- read.csv("Obesity.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS) > str(U) 'data.frame': 192 obs. of 3 variables: $ COUNTRY: chr "Afghanistan" "Albania" "Algeria" "Andorra" ... $ Obesity: num 5.5 21.7 27.4 25.6 8.2 18.9 28.3 20.2 29 20.1 ... $ year : Factor w/ 2 levels "(2014)","(2016)": 2 2 2 2 2 2 2 2 2 2 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > r integer(0) > C$Obesity <- NA; C$Obesity[j] <- U$Obesity[i] > C[1:6,c(3,10:17)] ISOalpha3 FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity 1 AFG 19.9 110.6 118.0 102.9 18.8 18.8 18.9 5.5 2 ALA NA NA NA NA NA NA NA NA 3 ALB 24.5 11.9 13.3 10.5 32.9 31.6 34.3 21.7 4 DZA NA 19.6 21.2 17.9 28.1 27.8 28.4 27.4 5 ASM NA 11.3 13.2 9.3 25.5 25.1 26.0 NA 6 AND NA 3.6 3.6 3.6 44.3 44.4 44.1 25.6 > save(C,ascii=TRUE,file='CIA.Rdata')
> U <- read.csv("PhysDens.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS) > str(U) 'data.frame': 183 obs. of 3 variables: $ COUNTRY : chr "Afghanistan" "Albania" "Algeria" "Andorra" ... $ PhysDens: num 0.3 1.29 1.19 3.69 0.14 3.91 2.8 3.5 5.23 3.4 ... $ year : Factor w/ 10 levels "(2007)","(2008)",..: 10 7 1 9 3 7 8 9 10 8 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > r [1] 180 > U$COUNTRY[r] [1] "West Bank" > C$PhysDens <- NA; C$PhysDens[j] <- U$PhysDens[i] > C[1:6,c(3,10:18)] ISOalpha3 FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens 1 AFG 19.9 110.6 118.0 102.9 18.8 18.8 18.9 5.5 0.30 2 ALA NA NA NA NA NA NA NA NA NA 3 ALB 24.5 11.9 13.3 10.5 32.9 31.6 34.3 21.7 1.29 4 DZA NA 19.6 21.2 17.9 28.1 27.8 28.4 27.4 1.19 5 ASM NA 11.3 13.2 9.3 25.5 25.1 26.0 NA NA 6 AND NA 3.6 3.6 3.6 44.3 44.4 44.1 25.6 3.69 > save(C,ascii=TRUE,file='CIA.Rdata')
> U <- read.csv("EduExp.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS) > str(U) 'data.frame': 199 obs. of 3 variables: $ COUNTRY: chr "Afghanistan" "Albania" "Algeria" "American Samoa" ... $ EduExp : num 3.4 3.5 4.3 NA 3.1 3.5 2.8 2.6 5.5 2.8 ... $ year : Factor w/ 10 levels "(2007)","(2008)",..: 9 7 2 NA 8 4 2 3 8 9 ... > levels(U$year) [1] "(2007)" "(2008)" "(2009)" "(2010)" "(2011)" "(2012)" "(2013)" "(2014)" "(2015)" "(2016)" > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > r [1] 196 > U$COUNTRY[r] [1] "West Bank" > C$EduExp <- NA; C$EduExp[j] <- U$EduExp[i] > C[1:6,c(3,11:19)] ISOalpha3 InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp 1 AFG 110.6 118.0 102.9 18.8 18.8 18.9 5.5 0.30 3.4 2 ALA NA NA NA NA NA NA NA NA NA 3 ALB 11.9 13.3 10.5 32.9 31.6 34.3 21.7 1.29 3.5 4 DZA 19.6 21.2 17.9 28.1 27.8 28.4 27.4 1.19 4.3 5 ASM 11.3 13.2 9.3 25.5 25.1 26.0 NA NA NA 6 AND 3.6 3.6 3.6 44.3 44.4 44.1 25.6 3.69 3.1
> U <- read.csv("Pop.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS) > str(U) 'data.frame': 273 obs. of 3 variables: $ COUNTRY: chr "Afghanistan" "Akrotiri" "Albania" "Algeria" ... $ Pop : num 34124811 15700 3047987 40969443 51504 ... $ year : Factor w/ 7 levels "()","(2014 est.)",..: 7 1 7 7 7 7 7 7 1 7 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > cbind(r,U$COUNTRY[r]) r [1,] "2" "Akrotiri" [2,] "14" "Ashmore and Cartier " [3,] "18" "Bahamas The" [4,] "50" "Clipperton Island" [5,] "54" "Congo Democratic Republic of the" [6,] "55" "Congo Republic of the" [7,] "57" "Coral Sea Islands" [8,] "66" "Dhekelia" [9,] "77" "European Union" [10,] "84" "French Southern and Antarctic Lands" [11,] "85" "Ile Amsterdam (Ile Amsterdam et Ile Saint-Paul):" [12,] "86" "Ile Saint-Paul (Ile Amsterdam et Ile Saint-Paul):" [13,] "87" "Iles Crozet " [14,] "88" "Iles Kerguelen " [15,] "89" "Bassas da India (Iles Eparses)" [16,] "90" "Europa Island Glorioso Islands Juan de Nova Island (Iles Eparses)" [17,] "91" "Tromelin Island (Iles Eparses)" [18,] "93" "Gambia The" [19,] "109" "Heard Island and McDonald Islands" [20,] "113" "Howland Island" [21,] "125" "Jan Mayen" [22,] "127" "Jarvis Island" [23,] "129" "Johnston Atoll" [24,] "133" "Kingman Reef" [25,] "135" "Korea North" [26,] "136" "Korea South" [27,] "137" "Kosovo" [28,] "161" "Micronesia Federated States of" [29,] "162" "Midway Islands" [30,] "172" "Navassa Island" [31,] "187" "Palmyra Atoll" [32,] "190" "Paracel Islands" [33,] "202" "Saint Barthelemy" [34,] "203" "Saint Helena Ascension and Tristan da Cunha" [35,] "206" "Saint Martin" [36,] "224" "South Georgia and South Sandwich Islands" [37,] "227" "Spratly Islands" [38,] "231" "Svalbard" [39,] "236" "Taiwan" [40,] "254" "United States Pacific Island Wildlife Refuges" [41,] "255" "Jarvis Island" [42,] "256" "Johnston Atoll" [43,] "257" "Midway Islands" [44,] "258" "Palmyra Atoll" [45,] "266" "Wake Island" [46,] "268" "West Bank" [47,] "270" "World" > C$Pop <- NA; C$Pop[j] <- U$Pop[i] > C[1:6,c(3,11:20)] ISOalpha3 InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp Pop 1 AFG 110.6 118.0 102.9 18.8 18.8 18.9 5.5 0.30 3.4 34124811 2 ALA NA NA NA NA NA NA NA NA NA NA 3 ALB 11.9 13.3 10.5 32.9 31.6 34.3 21.7 1.29 3.5 3047987 4 DZA 19.6 21.2 17.9 28.1 27.8 28.4 27.4 1.19 4.3 40969443 5 ASM 11.3 13.2 9.3 25.5 25.1 26.0 NA NA NA 51504 6 AND 3.6 3.6 3.6 44.3 44.4 44.1 25.6 3.69 3.1 76965 > save(C,ascii=TRUE,file='CIA.Rdata')
> fmtA <- c("character","numeric","numeric","numeric") > U <- read.csv("Area.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtA) > str(U) 'data.frame': 275 obs. of 4 variables: $ COUNTRY : chr "Afghanistan" "Akrotiri" "Albania" "Algeria" ... $ AreaTot : num 652230 123 28748 2381741 199 ... $ AreaLand : num 652230 123 27398 2381741 199 ... $ AreaWater: num 0 0 1350 0 0 0 0 0 0 0 ... > s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s)) > i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]} > cbind(r,U$COUNTRY[r]) r [1,] "2" "Akrotiri" [2,] "11" "Arctic Ocean" [3,] "15" "Ashmore and Cartier Islands" [4,] "16" "Atlantic Ocean" [5,] "20" "Bahamas The" [6,] "52" "Clipperton Island" [7,] "56" "Congo Democratic Republic of the" [8,] "57" "Congo Republic of the" [9,] "59" "Coral Sea Islands" [10,] "68" "Dhekelia" [11,] "79" "European Union" [12,] "86" "Ile Amsterdam (Ile Amsterdam et Ile Saint-Paul)" [13,] "87" "Ile Saint-Paul (Ile Amsterdam et Ile Saint-Paul)" [14,] "88" "Iles Crozet" [15,] "89" "Iles Kerguelen" [16,] "90" "Bassas da India (Iles Eparses)" [17,] "91" "Europa Island (Iles Eparses)" [18,] "92" "Glorioso Islands (Iles Eparses)" [19,] "93" "Juan de Nova Island (Iles Eparses)" [20,] "94" "Tromelin Island (Iles Eparses)" [21,] "96" "Gambia The" [22,] "112" "Heard Island and McDonald Islands" [23,] "116" "Howland Island" [24,] "120" "Indian Ocean" [25,] "129" "Jan Mayen" [26,] "131" "Jarvis Island" [27,] "133" "Johnston Atoll" [28,] "137" "Kingman Reef" [29,] "139" "Korea North" [30,] "140" "Korea South" [31,] "141" "Kosovo" [32,] "165" "Micronesia Federated States of" [33,] "166" "Midway Islands" [34,] "176" "Navassa Island" [35,] "189" "Pacific Ocean" [36,] "192" "Palmyra Atoll" [37,] "195" "Paracel Islands" [38,] "207" "Saint Barthelemy" [39,] "208" "Saint Helena Ascension and Tristan da Cunha" [40,] "211" "Saint Martin" [41,] "229" "South Georgia and South Sandwich Islands" [42,] "231" "Southern Ocean" [43,] "233" "Spratly Islands" [44,] "237" "Svalbard" [45,] "242" "Taiwan" [46,] "260" "United States Pacific Island Wildlife Refuges" [47,] "268" "Wake Island" [48,] "270" "West Bank" [49,] "272" "World" > C$AreaTot <- NA; C$AreaTot[j] <- U$AreaTot[i] > C[1:6,c(3,11:21)] ISOalpha3 InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp Pop AreaTot 1 AFG 110.6 118.0 102.9 18.8 18.8 18.9 5.5 0.30 3.4 34124811 652230 2 ALA NA NA NA NA NA NA NA NA NA NA NA 3 ALB 11.9 13.3 10.5 32.9 31.6 34.3 21.7 1.29 3.5 3047987 28748 4 DZA 19.6 21.2 17.9 28.1 27.8 28.4 27.4 1.19 4.3 40969443 2381741 5 ASM 11.3 13.2 9.3 25.5 25.1 26.0 NA NA NA 51504 199 6 AND 3.6 3.6 3.6 44.3 44.4 44.1 25.6 3.69 3.1 76965 468 > save(C,ascii=TRUE,file='CIA.Rdata')
Most data for Kosovo are missing.
> C[248,1:21] <- NA > C[248,1:21] Country ISOalpha2 ISOalpha3 ISOnumeric CIA UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem 248 <NA> <NA> <NA> NA <NA> NA NA NA NA NA NA NA NA MedAtot MedAmal MedAfem Obesity PhysDens EduExp Pop AreaTot 248 NA NA NA NA NA NA NA NA > C[248,1:5] <- c("Kosovo","XK","XKX",NA,"Kosovo") > C[248,20] <- 1895250 > C[248,21] <- 10887 > C[248,14:16] <- c(29.1,28.8,29.5) > C[248,] Country ISOalpha2 ISOalpha3 ISOnumeric CIA UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem 248 Kosovo XK XKX <NA> Kosovo NA NA NA NA NA NA NA NA MedAtot MedAmal MedAfem Obesity PhysDens EduExp Pop AreaTot 248 29.1 28.8 29.5 NA NA NA 1895250 10887 > save(C,ascii=TRUE,file='C:/Users/batagelj/Downloads/data/CIA/CIA.Rdata')
To preserve Kosovo in analysis we imputed some of the missing values by guessing their values on the basis of the corresponding values in the neighboring countries:
> C[which(C$ISOalpha2=="MK"),] > C[c(197,148,248,3,131),c(3,6:19)] ISOalpha3 UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp 197 SRB 55.8 -0.29 9.0 13.6 27.9 5.8 6.7 4.9 42.6 40.9 44.3 21.5 2.46 4.2 148 MNE 64.4 0.25 10.0 9.7 26.3 NA NA NA 40.7 39.9 41.8 23.3 2.34 NA 248 XKX NA NA NA NA NA NA NA NA 29.1 28.8 29.5 NA NA NA 3 ALB 59.3 1.81 13.2 6.8 24.5 11.9 13.3 10.5 32.9 31.6 34.3 21.7 1.29 3.5 131 MKD 57.3 0.24 11.4 9.2 26.8 7.4 7.6 7.1 37.9 36.8 39.0 22.4 2.80 NA
Our estimates are as follows:
248 XKX 56 NA 14 7 24 NA NA NA 29.1 28.8 29.5 21 1.4 NA > C[248,c(6,8:10,17,18)] <- c(56,14,7,24,21,1.4) > C[248,c(3,6:19)] ISOalpha3 UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp 248 XKX 56 NA 14 7 24 NA NA NA 29.1 28.8 29.5 21 1.4 NA
Vatican has index 99 in the data frame C. No additional data available.