CIA World Factbook

Creating CIA World Factbook data frame

March 2018

Data from the CIA World Factbook were obtained by inspecting a selected variable for some country and then clicking on the icon on the right margin. We obtained a list of corresponding values for all world countries. By copy-pasting it into Textpad and some additional editing we produce a CSV file.

We will extend the country codes data frame from Country codes with selected data from the CIA World Factbook. To produce a mapping between the CIA World Factbook and the country codes we will use keys U$COUNTRY and C$Country. Because some countries are labeled differently we have to establish links manually.

> fmtU <- c("character","numeric","factor","numeric","factor")
> U <- read.csv("Urban.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU)
> str(U)
'data.frame':   230 obs. of  5 variables:
 $ COUNTRY: chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
 $ UrbPop : num  27.6 59.3 71.9 87.1 84.1 45.6 100 23 92 62.5 ...
 $ yearP  : Factor w/ 2 levels "(2012)","(2017)": 2 2 2 2 2 2 2 2 2 2 ...
 $ UrbRate: num  3.77 1.81 2.26 0.63 0.09 4.6 0.88 -0.38 0.93 -0.1 ...
 $ yearR  : Factor w/ 2 levels "","(2015-20 est.)": 2 2 2 2 2 2 2 2 2 2 ...
> p <- match(C$Country,U$COUNTRY)
> q <- match(U$COUNTRY,C$Country)
> U$COUNTRY[is.na(q)]
> C$Country[is.na(p)]
> C$CIA <- C$Country
> p <- match(C$Country,U$COUNTRY)
> q <- match(U$COUNTRY,C$Country)
> C$CIA <- C$Country
> T<-cbind(which(is.na(p)),C$Country[which(is.na(p))])
> head(T)
     [,1] [,2]                            
[1,] "2"  "Aland Islands"                 
[2,] "9"  "Antarctica"                    
[3,] "17" "Bahamas"                       
[4,] "30" "Bouvet Island"                 
[5,] "33" "British Indian Ocean Territory"
[6,] "34" "Brunei Darussalam"             
> Z<-cbind(which(is.na(q)),U$COUNTRY[which(is.na(q))])
> head(Z)
     [,1] [,2]                               
[1,] "15" "Bahamas, The"                     
[2,] "30" "Brunei"                           
[3,] "33" "Burma"                            
[4,] "35" "Cabo Verde"                       
[5,] "46" "Congo, Democratic Republic of the"
[6,] "47" "Congo, Republic of the"           
> u <- c( 17,34,152,41,54,53,57,157,61,73,82,170,99,47,105,118,119,
+        122, 48,131,176,183,186,191,189,216,219,235,240,241,242,243)
> v <- c( 15,30, 33,35,46,47,50, 53,55,67,74, 75,90,92, 97,110,111,
+        114,123,124,163,169,171,175,185,199,201,217,221,222,223,224)
> cbind(u,C$CIA[u],v,U$COUNTRY[v])
      u                                       v                                                    
 [1,] "17"  "Bahamas"                         "15"  "Bahamas, The"                                 
 [2,] "34"  "Brunei Darussalam"               "30"  "Brunei"                                       
 [3,] "152" "Myanmar"                         "33"  "Burma"                                        
 [4,] "41"  "Cape Verde"                      "35"  "Cabo Verde"                                   
 [5,] "54"  "Congo, (Kinshasa)"               "46"  "Congo, Democratic Republic of the"            
 [6,] "53"  "Congo (Brazzaville)"             "47"  "Congo, Republic of the"                       
 [7,] "57"  "CĂ´te d'Ivoire"                  "50"  "Cote d'Ivoire"                                
 [8,] "157" "Netherlands Antilles"            "53"  "Curacao"                                      
 [9,] "61"  "Czech Republic"                  "55"  "Czechia"                                      
[10,] "73"  "Falkland Islands (Malvinas)"     "67"  "Falkland Islands (Islas Malvinas)"            
[11,] "82"  "Gambia"                          "74"  "Gambia, The"                                  
[12,] "170" "Palestinian Territory"           "75"  "Gaza Strip"                                   
[13,] "99"  "Holy See (Vatican City State)"   "90"  "Holy See (Vatican City)"                      
[14,] "47"  "Hong Kong, SAR China"            "92"  "Hong Kong"                                    
[15,] "105" "Iran, Islamic Republic of"       "97"  "Iran"                                         
[16,] "118" "Korea (North)"                   "110" "Korea, North"                                 
[17,] "119" "Korea (South)"                   "111" "Korea, South"                                 
[18,] "122" "Lao PDR"                         "114" "Laos"                                         
[19,] "48"  "Macao, SAR China"                "123" "Macau"                                        
[20,] "131" "Macedonia, Republic of"          "124" "Macedonia"                                    
[21,] "176" "Pitcairn"                        "163" "Pitcairn Islands"                             
[22,] "183" "Russian Federation"              "169" "Russia"                                       
[23,] "186" "Saint Helena"                    "171" "Saint Helena, Ascension, and Tristan da Cunha"
[24,] "191" "Saint Vincent and Grenadines"    "175" "Saint Vincent and the Grenadines"             
[25,] "189" "Saint-Martin (French part)"      "185" "Sint Maarten"                                 
[26,] "216" "Syrian Arab Republic (Syria)"    "199" "Syria"                                        
[27,] "219" "Tanzania, United Republic of"    "201" "Tanzania"                                     
[28,] "235" "United States of America"        "217" "United States"                                
[29,] "240" "Venezuela (Bolivarian Republic)" "221" "Venezuela"                                    
[30,] "241" "Viet Nam"                        "222" "Vietnam"                                      
[31,] "242" "Virgin Islands, US"              "223" "Virgin Islands"                               
[32,] "243" "Wallis and Futuna Islands"       "224" "Wallis and Futuna"                            
> C$CIA[u] <- U$COUNTRY[v]
> s <- match(U$COUNTRY,C$CIA)
> i <- which(is.na(s))
> i
[1] 225 227
> U$COUNTRY[i]
[1] "West Bank" "World"  
> names(U)
[1] "COUNTRY" "UrbPop"  "yearP"   "UrbRate" "yearR"  
> s <- match(U$COUNTRY,C$CIA)
> r <- which(is.na(s))
> i <- (1:length(s))[-r]
> j <- s[-r]
> C$UrbPop <- NA
> C$UrbPop[j] <- U$UrbPop[i]
> C$UrbRate <- NA
> C$UrbRate[j] <- U$UrbRate[i]
> cbind(U$COUNTRY,U$UrbPop,U$UrbRate)[1:10,]
      [,1]                  [,2]   [,3]   
 [1,] "Afghanistan"         "27.6" "3.77" 
 [2,] "Albania"             "59.3" "1.81" 
 [3,] "Algeria"             "71.9" "2.26" 
 [4,] "American Samoa"      "87.1" "0.63" 
 [5,] "Andorra"             "84.1" "0.09" 
 [6,] "Angola"              "45.6" "4.6"  
 [7,] "Anguilla"            "100"  "0.88" 
 [8,] "Antigua and Barbuda" "23"   "-0.38"
 [9,] "Argentina"           "92"   "0.93" 
[10,] "Armenia"             "62.5" "-0.1" 
> cbind(C$CIA,C$UrbPop,C$UrbRate)[1:10,]
      [,1]                  [,2]   [,3]   
 [1,] "Afghanistan"         "27.6" "3.77" 
 [2,] "Aland Islands"       NA     NA     
 [3,] "Albania"             "59.3" "1.81" 
 [4,] "Algeria"             "71.9" "2.26" 
 [5,] "American Samoa"      "87.1" "0.63" 
 [6,] "Andorra"             "84.1" "0.09" 
 [7,] "Angola"              "45.6" "4.6"  
 [8,] "Anguilla"            "100"  "0.88" 
 [9,] "Antarctica"          NA     NA     
[10,] "Antigua and Barbuda" "23"   "-0.38"
> e <- match(Europe,C$ISOalpha2)
> e
 [1]   3   6  12  15  21  22  28  35 215  60  61  84  62  71 208  74  76  77 234  83  86  87 101  58 107
[26] 102 110 129 130 123 145 146 131 137 166 156 177 178 182 183 214 202 201 193 227 232  99  NA
> C[e,]
                          Country ISOalpha2 ISOalpha3 ISOnumeric                     CIA UrbPop UrbRate
3                         Albania        AL       ALB          8                 Albania   59.3    1.81
6                         Andorra        AD       AND         20                 Andorra   84.1    0.09
12                        Armenia        AM       ARM         51                 Armenia   62.5   -0.10
15                        Austria        AT       AUT         40                 Austria   66.1    0.51
21                        Belarus        BY       BLR        112                 Belarus   77.4   -0.04
...
61                 Czech Republic        CZ       CZE        203                 Czechia   73.0    0.30
...
131        Macedonia, Republic of        MK       MKD        807               Macedonia   57.3    0.24
...
227                        Turkey        TR       TUR        792                  Turkey   74.4    1.54
232                       Ukraine        UA       UKR        804                 Ukraine   70.1   -0.35
99  Holy See (Vatican City State)        VA       VAT        336 Holy See (Vatican City)  100.0    0.10
NA                           <NA>      <NA>      <NA>         NA                    <NA>     NA      NA
> save(C,ascii=TRUE,file='CIA.Rdata')  

Adding variables

Initialization

> setwd("C:/Users/batagelj/Downloads/data/CIA")
> Europe <- c("AL", "AD", "AM", "AT", "BY", "BE", "BA", "BG", "CH", "CY",
+       "CZ", "DE", "DK", "EE", "ES", "FO", "FI", "FR", "GB", "GE", "GI",
+       "GR", "HU", "HR", "IE", "IS", "IT", "LT", "LU", "LV", "MD", "MC",
+       "MK", "MT", "NO", "NL", "PL", "PT", "RO", "RU", "SE", "SI", "SK",
+       "SM", "TR", "UA", "VA", "XK")
> load('CIA.Rdata')
> str(C)
'data.frame':   247 obs. of  7 variables:
 $ Country   : chr  "Afghanistan" "Aland Islands" "Albania" "Algeria" ...
 $ ISOalpha2 : chr  "AF" "AX" "AL" "DZ" ...
 $ ISOalpha3 : chr  "AFG" "ALA" "ALB" "DZA" ...
 $ ISOnumeric: int  4 248 8 12 16 20 24 660 10 28 ...
 $ CIA       : chr  "Afghanistan" "Aland Islands" "Albania" "Algeria" ...
 $ UrbPop    : num  27.6 NA 59.3 71.9 87.1 84.1 45.6 100 NA 23 ...
 $ UrbRate   : num  3.77 NA 1.81 2.26 0.63 0.09 4.6 0.88 NA -0.38 ...
> e <- match(Europe,C$ISOalpha2)

Birth rate

> fmtU <- c("character","numeric","factor")
> U <- read.csv("BirthRate.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU)
> str(U)
'data.frame':   228 obs. of  3 variables:
 $ COUNTRY  : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
 $ BirthRate: num  37.9 13.2 22.2 19.6 7.5 44.2 12.5 15.7 16.7 12.9 ...
 $ year     : Factor w/ 3 levels "(2012 est.)",..: 3 3 3 3 3 3 3 3 3 3 ...
> s <- match(U$COUNTRY,C$CIA)
> which(is.na(s))
[1]  67 198 223 225
> U$COUNTRY[which(is.na(s))]
[1] "European Union" "Taiwan"         "West Bank"      "World"         
> r <- which(is.na(s))
> i <- (1:length(s))[-r]
> j <- s[-r]
> C$BirthRate <- NA
> C$BirthRate[j] <- U$BirthRate[i]
> head(C)
         Country ISOalpha2 ISOalpha3 ISOnumeric            CIA UrbPop UrbRate BirthRate
1    Afghanistan        AF       AFG          4    Afghanistan   27.6    3.77      37.9
2  Aland Islands        AX       ALA        248  Aland Islands     NA      NA        NA
3        Albania        AL       ALB          8        Albania   59.3    1.81      13.2
4        Algeria        DZ       DZA         12        Algeria   71.9    2.26      22.2
5 American Samoa        AS       ASM         16 American Samoa   87.1    0.63      19.6
6        Andorra        AD       AND         20        Andorra   84.1    0.09       7.5

Death rate

> U <- read.csv("DeathRate.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU)
> str(U)
'data.frame':   228 obs. of  3 variables:
 $ COUNTRY  : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
 $ DeathRate: num  13.4 6.8 4.3 5.9 7.3 9.2 4.6 5.7 7.5 9.4 ...
 $ year     : Factor w/ 3 levels "(2012 est.)",..: 3 3 3 3 3 3 3 3 3 3 ...
> s <- match(U$COUNTRY,C$CIA)
> which(is.na(s))
[1]  67 198 223 225
> U$COUNTRY[which(is.na(s))]
[1] "European Union" "Taiwan"         "West Bank"      "World"         
> r <- which(is.na(s))
> i <- (1:length(s))[-r]
> j <- s[-r]
> C$DeathRate <- NA
> C$DeathRate[j] <- U$DeathRate[i]
> C[1:6,c(2,5:9)]
  ISOalpha2            CIA UrbPop UrbRate BirthRate DeathRate
1        AF    Afghanistan   27.6    3.77      37.9      13.4
2        AX  Aland Islands     NA      NA        NA        NA
3        AL        Albania   59.3    1.81      13.2       6.8
4        DZ        Algeria   71.9    2.26      22.2       4.3
5        AS American Samoa   87.1    0.63      19.6       5.9
6        AD        Andorra   84.1    0.09       7.5       7.3
>

First birth age

> U <- read.csv("FiBiAge.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU)
> str(U)
'data.frame':   131 obs. of  3 variables:
 $ COUNTRY: chr  "Afghanistan" "Albania" "Angola" "Armenia" ...
 $ FiBiAge: num  19.9 24.5 19.4 24.4 28.7 29 23.2 18.5 25.7 28.6 ...
 $ year   : Factor w/ 33 levels " (2006 est.)",..: 31 8 32 10 8 8 8 29 8 7 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> C$FiBiAge <- NA; C$FiBiAge[j] <- U$FiBiAge[i]
> C[1:10,c(2,5:10)]
   ISOalpha2                 CIA UrbPop UrbRate BirthRate DeathRate FiBiAge
1         AF         Afghanistan   27.6    3.77      37.9      13.4    19.9
2         AX       Aland Islands     NA      NA        NA        NA      NA
3         AL             Albania   59.3    1.81      13.2       6.8    24.5
4         DZ             Algeria   71.9    2.26      22.2       4.3      NA
5         AS      American Samoa   87.1    0.63      19.6       5.9      NA
6         AD             Andorra   84.1    0.09       7.5       7.3      NA
7         AO              Angola   45.6    4.60      44.2       9.2    19.4
8         AI            Anguilla  100.0    0.88      12.5       4.6      NA
9         AQ          Antarctica     NA      NA        NA        NA      NA
10        AG Antigua and Barbuda   23.0   -0.38      15.7       5.7      NA

Infant mortality

> fmtU <- c("character","numeric","numeric","numeric","factor")
> U <- read.csv("InfMort.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU)
> str(U)
'data.frame':   234 obs. of  5 variables:
 $ COUNTRY: chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
 $ InfMtot: num  110.6 11.9 19.6 11.3 3.6 ...
 $ InfMmal: num  118 13.3 21.2 13.2 3.6 73.3 3.7 13.9 10.7 14.1 ...
 $ InfMfem: num  102.9 10.5 17.9 9.3 3.6 ...
 $ year   : Factor w/ 2 levels "(2016 est.)",..: 2 2 2 2 2 2 2 2 2 2 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> r
[1]  69 198 203 229 231
> U$COUNTRY[r]
[1] "European Union" "Svalbard"       "Taiwan"         "West Bank"      "World"         
> C$InfMtot <- NA; C$InfMtot[j] <- U$InfMtot[i]
> C$InfMmal <- NA; C$InfMmal[j] <- U$InfMmal[i]
> C$InfMfem <- NA; C$InfMfem[j] <- U$InfMfem[i]
> C[1:10,c(2,5:13)]
   ISOalpha2                 CIA UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem
1         AF         Afghanistan   27.6    3.77      37.9      13.4    19.9   110.6   118.0   102.9
2         AX       Aland Islands     NA      NA        NA        NA      NA      NA      NA      NA
3         AL             Albania   59.3    1.81      13.2       6.8    24.5    11.9    13.3    10.5
4         DZ             Algeria   71.9    2.26      22.2       4.3      NA    19.6    21.2    17.9
5         AS      American Samoa   87.1    0.63      19.6       5.9      NA    11.3    13.2     9.3
6         AD             Andorra   84.1    0.09       7.5       7.3      NA     3.6     3.6     3.6
7         AO              Angola   45.6    4.60      44.2       9.2    19.4    67.6    73.3    61.8
8         AI            Anguilla  100.0    0.88      12.5       4.6      NA     3.3     3.7     2.9
9         AQ          Antarctica     NA      NA        NA        NA      NA      NA      NA      NA
10        AG Antigua and Barbuda   23.0   -0.38      15.7       5.7      NA    12.1    13.9    10.2
> save(C,ascii=TRUE,file='CIA.Rdata')

Median Age

> fmtU <- c("character","numeric","numeric","numeric","factor")
> U <- read.csv("MedAge.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtU)
> str(U)
'data.frame':   230 obs. of  5 variables:
 $ COUNTRY: chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
 $ total  : num  18.8 32.9 28.1 25.5 44.3 15.9 34.8 31.9 31.7 35.1 ...
 $ male   : num  18.8 31.6 27.8 25.1 44.4 15.4 32.9 30 30.5 33.3 ...
 $ female : num  18.9 34.3 28.4 26 44.1 16.3 36.7 33.5 32.9 36.9 ...
 $ year   : Factor w/ 1 level "(2017 est.)": 1 1 1 1 1 1 1 1 1 1 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> r
[1]  67 111 169 173 200 225 227
> U$COUNTRY[r]
[1] "European Union"   "Kosovo"           "Saint Barthelemy" "Saint Martin"     "Taiwan"           "West Bank"       
[7] "World"           
> C$MedAtot <- NA; C$MedAtot[j] <- U$total[i]
> C$MedAmal <- NA; C$MedAmal[j] <- U$male[i]
> C$MedAfem <- NA; C$MedAfem[j] <- U$female[i]
> C[1:10,c(3,6:16)]
   ISOalpha3 UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem
1        AFG   27.6    3.77      37.9      13.4    19.9   110.6   118.0   102.9    18.8    18.8    18.9
2        ALA     NA      NA        NA        NA      NA      NA      NA      NA      NA      NA      NA
3        ALB   59.3    1.81      13.2       6.8    24.5    11.9    13.3    10.5    32.9    31.6    34.3
4        DZA   71.9    2.26      22.2       4.3      NA    19.6    21.2    17.9    28.1    27.8    28.4
5        ASM   87.1    0.63      19.6       5.9      NA    11.3    13.2     9.3    25.5    25.1    26.0
6        AND   84.1    0.09       7.5       7.3      NA     3.6     3.6     3.6    44.3    44.4    44.1
7        AGO   45.6    4.60      44.2       9.2    19.4    67.6    73.3    61.8    15.9    15.4    16.3
8        AIA  100.0    0.88      12.5       4.6      NA     3.3     3.7     2.9    34.8    32.9    36.7
9        ATA     NA      NA        NA        NA      NA      NA      NA      NA      NA      NA      NA
10       ATG   23.0   -0.38      15.7       5.7      NA    12.1    13.9    10.2    31.9    30.0    33.5
> save(C,ascii=TRUE,file='CIA.Rdata')

Obesity

> fmtS <- c("character","numeric","factor")
> U <- read.csv("Obesity.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS)
> str(U)
'data.frame':   192 obs. of  3 variables:
 $ COUNTRY: chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
 $ Obesity: num  5.5 21.7 27.4 25.6 8.2 18.9 28.3 20.2 29 20.1 ...
 $ year   : Factor w/ 2 levels "(2014)","(2016)": 2 2 2 2 2 2 2 2 2 2 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> r
integer(0)
> C$Obesity <- NA; C$Obesity[j] <- U$Obesity[i]
> C[1:6,c(3,10:17)]
  ISOalpha3 FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity
1       AFG    19.9   110.6   118.0   102.9    18.8    18.8    18.9     5.5
2       ALA      NA      NA      NA      NA      NA      NA      NA      NA
3       ALB    24.5    11.9    13.3    10.5    32.9    31.6    34.3    21.7
4       DZA      NA    19.6    21.2    17.9    28.1    27.8    28.4    27.4
5       ASM      NA    11.3    13.2     9.3    25.5    25.1    26.0      NA
6       AND      NA     3.6     3.6     3.6    44.3    44.4    44.1    25.6
> save(C,ascii=TRUE,file='CIA.Rdata')

Physicians density

> U <- read.csv("PhysDens.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS)
> str(U)
'data.frame':   183 obs. of  3 variables:
 $ COUNTRY : chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
 $ PhysDens: num  0.3 1.29 1.19 3.69 0.14 3.91 2.8 3.5 5.23 3.4 ...
 $ year    : Factor w/ 10 levels "(2007)","(2008)",..: 10 7 1 9 3 7 8 9 10 8 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> r
[1] 180
> U$COUNTRY[r]
[1] "West Bank"
> C$PhysDens <- NA; C$PhysDens[j] <- U$PhysDens[i]
> C[1:6,c(3,10:18)]
  ISOalpha3 FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens
1       AFG    19.9   110.6   118.0   102.9    18.8    18.8    18.9     5.5     0.30
2       ALA      NA      NA      NA      NA      NA      NA      NA      NA       NA
3       ALB    24.5    11.9    13.3    10.5    32.9    31.6    34.3    21.7     1.29
4       DZA      NA    19.6    21.2    17.9    28.1    27.8    28.4    27.4     1.19
5       ASM      NA    11.3    13.2     9.3    25.5    25.1    26.0      NA       NA
6       AND      NA     3.6     3.6     3.6    44.3    44.4    44.1    25.6     3.69
> save(C,ascii=TRUE,file='CIA.Rdata')

Education expenditures

> U <- read.csv("EduExp.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS)
> str(U)
'data.frame':   199 obs. of  3 variables:
 $ COUNTRY: chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
 $ EduExp : num  3.4 3.5 4.3 NA 3.1 3.5 2.8 2.6 5.5 2.8 ...
 $ year   : Factor w/ 10 levels "(2007)","(2008)",..: 9 7 2 NA 8 4 2 3 8 9 ...
> levels(U$year)
 [1] "(2007)" "(2008)" "(2009)" "(2010)" "(2011)" "(2012)" "(2013)" "(2014)" "(2015)" "(2016)"
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> r
[1] 196
> U$COUNTRY[r]
[1] "West Bank"
> C$EduExp <- NA; C$EduExp[j] <- U$EduExp[i]
> C[1:6,c(3,11:19)]
  ISOalpha3 InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp
1       AFG   110.6   118.0   102.9    18.8    18.8    18.9     5.5     0.30    3.4
2       ALA      NA      NA      NA      NA      NA      NA      NA       NA     NA
3       ALB    11.9    13.3    10.5    32.9    31.6    34.3    21.7     1.29    3.5
4       DZA    19.6    21.2    17.9    28.1    27.8    28.4    27.4     1.19    4.3
5       ASM    11.3    13.2     9.3    25.5    25.1    26.0      NA       NA     NA
6       AND     3.6     3.6     3.6    44.3    44.4    44.1    25.6     3.69    3.1

Population

> U <- read.csv("Pop.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtS)
> str(U)
'data.frame':   273 obs. of  3 variables:
 $ COUNTRY: chr  "Afghanistan" "Akrotiri" "Albania" "Algeria" ...
 $ Pop    : num  34124811 15700 3047987 40969443 51504 ...
 $ year   : Factor w/ 7 levels "()","(2014 est.)",..: 7 1 7 7 7 7 7 7 1 7 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> cbind(r,U$COUNTRY[r])
      r                                                                        
 [1,] "2"   "Akrotiri"                                                         
 [2,] "14"  "Ashmore and Cartier "                                             
 [3,] "18"  "Bahamas The"                                                      
 [4,] "50"  "Clipperton Island"                                                
 [5,] "54"  "Congo Democratic Republic of the"                                 
 [6,] "55"  "Congo Republic of the"                                            
 [7,] "57"  "Coral Sea Islands"                                                
 [8,] "66"  "Dhekelia"                                                         
 [9,] "77"  "European Union"                                                   
[10,] "84"  "French Southern and Antarctic Lands"                              
[11,] "85"  "Ile Amsterdam (Ile Amsterdam et Ile Saint-Paul):"                 
[12,] "86"  "Ile Saint-Paul (Ile Amsterdam et Ile Saint-Paul):"                
[13,] "87"  "Iles Crozet "                                                     
[14,] "88"  "Iles Kerguelen "                                                  
[15,] "89"  "Bassas da India (Iles Eparses)"                                   
[16,] "90"  "Europa Island Glorioso Islands Juan de Nova Island (Iles Eparses)"
[17,] "91"  "Tromelin Island (Iles Eparses)"                                   
[18,] "93"  "Gambia The"                                                       
[19,] "109" "Heard Island and McDonald Islands"                                
[20,] "113" "Howland Island"                                                   
[21,] "125" "Jan Mayen"                                                        
[22,] "127" "Jarvis Island"                                                    
[23,] "129" "Johnston Atoll"                                                   
[24,] "133" "Kingman Reef"                                                     
[25,] "135" "Korea North"                                                      
[26,] "136" "Korea South"                                                      
[27,] "137" "Kosovo"                                                           
[28,] "161" "Micronesia Federated States of"                                   
[29,] "162" "Midway Islands"                                                   
[30,] "172" "Navassa Island"                                                   
[31,] "187" "Palmyra Atoll"                                                    
[32,] "190" "Paracel Islands"                                                  
[33,] "202" "Saint Barthelemy"                                                 
[34,] "203" "Saint Helena Ascension and Tristan da Cunha"                      
[35,] "206" "Saint Martin"                                                     
[36,] "224" "South Georgia and South Sandwich Islands"                         
[37,] "227" "Spratly Islands"                                                  
[38,] "231" "Svalbard"                                                         
[39,] "236" "Taiwan"                                                           
[40,] "254" "United States Pacific Island Wildlife Refuges"                    
[41,] "255" "Jarvis Island"                                                    
[42,] "256" "Johnston Atoll"                                                   
[43,] "257" "Midway Islands"                                                   
[44,] "258" "Palmyra Atoll"                                                    
[45,] "266" "Wake Island"                                                      
[46,] "268" "West Bank"                                                        
[47,] "270" "World"                                                            
> C$Pop <- NA; C$Pop[j] <- U$Pop[i]
> C[1:6,c(3,11:20)]
  ISOalpha3 InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp      Pop
1       AFG   110.6   118.0   102.9    18.8    18.8    18.9     5.5     0.30    3.4 34124811
2       ALA      NA      NA      NA      NA      NA      NA      NA       NA     NA       NA
3       ALB    11.9    13.3    10.5    32.9    31.6    34.3    21.7     1.29    3.5  3047987
4       DZA    19.6    21.2    17.9    28.1    27.8    28.4    27.4     1.19    4.3 40969443
5       ASM    11.3    13.2     9.3    25.5    25.1    26.0      NA       NA     NA    51504
6       AND     3.6     3.6     3.6    44.3    44.4    44.1    25.6     3.69    3.1    76965
> save(C,ascii=TRUE,file='CIA.Rdata')

Area

> fmtA <- c("character","numeric","numeric","numeric")
> U <- read.csv("Area.txt",sep="\t",skip=1,header=TRUE,colClasses=fmtA)
> str(U)
'data.frame':   275 obs. of  4 variables:
 $ COUNTRY  : chr  "Afghanistan" "Akrotiri" "Albania" "Algeria" ...
 $ AreaTot  : num  652230 123 28748 2381741 199 ...
 $ AreaLand : num  652230 123 27398 2381741 199 ...
 $ AreaWater: num  0 0 1350 0 0 0 0 0 0 0 ...
> s <- match(U$COUNTRY,C$CIA); r <- which(is.na(s))
> i <- 1:length(s); j <- s; if(length(r)>0) {i <- i[-r]; j <- s[-r]}
> cbind(r,U$COUNTRY[r])
      r                                                       
 [1,] "2"   "Akrotiri"                                        
 [2,] "11"  "Arctic Ocean"                                    
 [3,] "15"  "Ashmore and Cartier Islands"                     
 [4,] "16"  "Atlantic Ocean"                                  
 [5,] "20"  "Bahamas The"                                     
 [6,] "52"  "Clipperton Island"                               
 [7,] "56"  "Congo Democratic Republic of the"                
 [8,] "57"  "Congo Republic of the"                           
 [9,] "59"  "Coral Sea Islands"                               
[10,] "68"  "Dhekelia"                                        
[11,] "79"  "European Union"                                  
[12,] "86"  "Ile Amsterdam (Ile Amsterdam et Ile Saint-Paul)" 
[13,] "87"  "Ile Saint-Paul (Ile Amsterdam et Ile Saint-Paul)"
[14,] "88"  "Iles Crozet"                                     
[15,] "89"  "Iles Kerguelen"                                  
[16,] "90"  "Bassas da India (Iles Eparses)"                  
[17,] "91"  "Europa Island (Iles Eparses)"                    
[18,] "92"  "Glorioso Islands (Iles Eparses)"                 
[19,] "93"  "Juan de Nova Island (Iles Eparses)"              
[20,] "94"  "Tromelin Island (Iles Eparses)"                  
[21,] "96"  "Gambia The"                                      
[22,] "112" "Heard Island and McDonald Islands"               
[23,] "116" "Howland Island"                                  
[24,] "120" "Indian Ocean"                                    
[25,] "129" "Jan Mayen"                                       
[26,] "131" "Jarvis Island"                                   
[27,] "133" "Johnston Atoll"                                  
[28,] "137" "Kingman Reef"                                    
[29,] "139" "Korea North"                                     
[30,] "140" "Korea South"                                     
[31,] "141" "Kosovo"                                          
[32,] "165" "Micronesia Federated States of"                  
[33,] "166" "Midway Islands"                                  
[34,] "176" "Navassa Island"                                  
[35,] "189" "Pacific Ocean"                                   
[36,] "192" "Palmyra Atoll"                                   
[37,] "195" "Paracel Islands"                                 
[38,] "207" "Saint Barthelemy"                                
[39,] "208" "Saint Helena Ascension and Tristan da Cunha"     
[40,] "211" "Saint Martin"                                    
[41,] "229" "South Georgia and South Sandwich Islands"        
[42,] "231" "Southern Ocean"                                  
[43,] "233" "Spratly Islands"                                 
[44,] "237" "Svalbard"                                        
[45,] "242" "Taiwan"                                          
[46,] "260" "United States Pacific Island Wildlife Refuges"   
[47,] "268" "Wake Island"                                     
[48,] "270" "West Bank"                                       
[49,] "272" "World"                                           
> C$AreaTot <- NA; C$AreaTot[j] <- U$AreaTot[i]
> C[1:6,c(3,11:21)]
  ISOalpha3 InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp      Pop AreaTot
1       AFG   110.6   118.0   102.9    18.8    18.8    18.9     5.5     0.30    3.4 34124811  652230
2       ALA      NA      NA      NA      NA      NA      NA      NA       NA     NA       NA      NA
3       ALB    11.9    13.3    10.5    32.9    31.6    34.3    21.7     1.29    3.5  3047987   28748
4       DZA    19.6    21.2    17.9    28.1    27.8    28.4    27.4     1.19    4.3 40969443 2381741
5       ASM    11.3    13.2     9.3    25.5    25.1    26.0      NA       NA     NA    51504     199
6       AND     3.6     3.6     3.6    44.3    44.4    44.1    25.6     3.69    3.1    76965     468
> save(C,ascii=TRUE,file='CIA.Rdata')

Kosovo

Most data for Kosovo are missing.

> C[248,1:21] <- NA
> C[248,1:21] 
    Country ISOalpha2 ISOalpha3 ISOnumeric  CIA UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem
248    <NA>      <NA>      <NA>         NA <NA>     NA      NA        NA        NA      NA      NA      NA      NA
    MedAtot MedAmal MedAfem Obesity PhysDens EduExp    Pop AreaTot
248      NA      NA      NA      NA       NA     NA     NA      NA
> C[248,1:5] <- c("Kosovo","XK","XKX",NA,"Kosovo")
> C[248,20] <- 1895250 
> C[248,21] <- 10887 
> C[248,14:16] <- c(29.1,28.8,29.5) 
> C[248,]
    Country ISOalpha2 ISOalpha3 ISOnumeric    CIA UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem
248  Kosovo        XK       XKX       <NA> Kosovo     NA      NA        NA        NA      NA      NA      NA      NA
    MedAtot MedAmal MedAfem Obesity PhysDens EduExp     Pop AreaTot
248    29.1    28.8    29.5      NA       NA     NA 1895250   10887
> save(C,ascii=TRUE,file='C:/Users/batagelj/Downloads/data/CIA/CIA.Rdata')

To preserve Kosovo in analysis we imputed some of the missing values by guessing their values on the basis of the corresponding values in the neighboring countries:

> C[which(C$ISOalpha2=="MK"),]
> C[c(197,148,248,3,131),c(3,6:19)]
    ISOalpha3 UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp
197       SRB   55.8   -0.29       9.0      13.6    27.9     5.8     6.7     4.9    42.6    40.9    44.3    21.5     2.46    4.2
148       MNE   64.4    0.25      10.0       9.7    26.3      NA      NA      NA    40.7    39.9    41.8    23.3     2.34     NA
248       XKX     NA      NA        NA        NA      NA      NA      NA      NA    29.1    28.8    29.5      NA       NA     NA
3         ALB   59.3    1.81      13.2       6.8    24.5    11.9    13.3    10.5    32.9    31.6    34.3    21.7     1.29    3.5
131       MKD   57.3    0.24      11.4       9.2    26.8     7.4     7.6     7.1    37.9    36.8    39.0    22.4     2.80     NA

Our estimates are as follows:

248       XKX     56      NA        14         7      24      NA      NA      NA    29.1    28.8    29.5      21      1.4     NA
> C[248,c(6,8:10,17,18)] <- c(56,14,7,24,21,1.4)
> C[248,c(3,6:19)]
    ISOalpha3 UrbPop UrbRate BirthRate DeathRate FiBiAge InfMtot InfMmal InfMfem MedAtot MedAmal MedAfem Obesity PhysDens EduExp
248       XKX     56      NA        14         7      24      NA      NA      NA    29.1    28.8    29.5      21      1.4     NA

Vatican

Vatican has index 99 in the data frame C. No additional data available.

notes/da/cia.txt · Last modified: 2018/04/06 09:23 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki