CIA World Factbook

Reading the data frame

In Creating CIA World Factbook data frame in R, we described the transformation of the Factbook JSON file into an R data frame. From this data frame, we can derive some hypernets.

> wdir <- "C:/Users/vlado/DL/data/hyper/CIA"
> setwd(wdir)
> library(jsonlite)
> FB <- fromJSON("./test/Factbook.json")

https://github.com/bavla/symData/tree/master/CIA

Resources

> R <- FB$resources; m <- length(R); S <- NULL
> for(i in 1:m) S <- union(S,R[[i]])
> V <- data.frame(ID=sort(unlist(S))); n <- nrow(V)
> FB$region[74] <- "Europe"
> reg <- factor(FB$region)
> data <- list(regID=levels(reg))
> reg <- as.integer(reg)
> info <- list(network="FB_resources",
+   title="CIA factbook: resources",
+   by="iancoleman",
+   href=c("https://www.cia.gov/the-world-factbook/",
+          "https://github.com/iancoleman/cia_world_factbook_api#data"),
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> H <- data.frame(ID=FB$Country,iso2=FB$ISO2,area=FB$area,pop=FB$pop,
+   gdp=FB$gdp,E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- as.integer(factor(R[[i]],levels=V$ID))
> H$E <- E
> HR <- list(format="hypernets",info=info,nodes=V,links=H,data=data)
> str(HR)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 9
  ..$ network: chr "FB_resources"
  ..$ title  : chr "CIA factbook: resources"
  ..$ by     : chr "iancoleman"
  ..$ href   : chr [1:2] "https://www.cia.gov/the-world-factbook/" "https://github.com/iancoleman/cia_world_factbook_api#data"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sun Oct 22 18:56:12 2023"
  ..$ nNodes : int 293
  ..$ nLinks : int 237
  ..$ simple : logi NA
 $ nodes :'data.frame': 293 obs. of  1 variable:
  ..$ ID: chr [1:293] "abundant hydropower" "agricultural" "aloes" "alumina" ...
 $ links :'data.frame': 237 obs. of  6 variables:
  ..$ ID  : chr [1:237] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ iso2: chr [1:237] "AF" "AL" "DZ" "AS" ...
  ..$ area: int [1:237] 652230 28748 2381740 224 468 1246700 91 443 2780400 29743 ...
  ..$ pop : int [1:237] 36643815 3074579 42972878 49437 77000 32522339 18090 98179 45479118 3021324 ...
  ..$ gdp : num [1:237] 6.94e+10 3.60e+10 6.30e+11 6.58e+08 3.33e+09 ...
  ..$ E   :List of 237
  .. ..$ : int [1:15] 171 195 52 62 43 263 18 261 133 291 ...
  .. ..$ : int [1:12] 195 171 52 20 43 62 127 175 232 271 ...
  .. ..$ : int [1:7] 195 171 127 200 280 133 291
  .. .. [list output truncated]
 $ data  :List of 1
  ..$ regID: chr [1:11] "Africa" "Arctic Region" "Asia" "AsiaEurope" ...
> write(toJSON(HR),"FB_resources.json")

The set of nodes/resources needs some additional cleaning.

Making a Pajek 2-mode network

For the conversion of a hypernet HN into the corresponding two-mode network we prepared in the R-library hyperNets the function hyper.save2pajek

hyper.save2pajek <- function(HN,netFile){
  H <- HN$links; m <- nrow(H); N <- names(H)
  V <- HN$nodes$ID; n <- length(V)
  net <- file(netFile,"w")
  cat("% hyper2pajek",date(),"\n*vertices",m+n,m,"\n",file=net)
  for(i in 1:m) cat(i,' "',H$ID[i],'"\n',sep="",file=net)
  for(i in 1:n) cat(m+i,' "',V[i],'"\n',sep="",file=net)
  if("E" %in% N){ cat("*edges\n",file=net)
    for(i in 1:m){L <- H$E[[i]]; 
    if(!is.na(L[1])) for(j in L) cat(i,m+j,"\n",file=net)}}
  if("In" %in% N){ cat("*arcs\n",file=net)
    for(i in 1:m){L <- H$In[[i]]; 
    if(!is.na(L[1])) for(j in L) cat(m+j,i,"\n",file=net)}}
  if("Out" %in% N){ cat("*arcs\n",file=net)
    for(i in 1:m){L <- H$Out[[i]]; 
    if(!is.na(L[1])) for(j in L) cat(i,m+j,"\n",file=net)}}
  close(net)
}

Using this function we create the Pajek file by

> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> hyper.save2pajek(HR,"FB_resources.net")

https://github.com/bavla/NormNet/tree/main/data/CIA/resources

Import/Export

> Im <- FB$impCom; Ex <- FB$expCom; m <- length(Im); S <- NULL
> for(i in 1:m) S <- union(S,union(Ex[[i]],Im[[i]]))
> V <- data.frame(ID=sort(unlist(S))); n <- nrow(V)
> FB$region[74] <- "Europe"
> reg <- factor(FB$region)
> data <- list(regID=levels(reg))
> reg <- as.integer(reg)
> info <- list(network="FB_ImpExp",
+   title="CIA factbook: import/export",
+   by="iancoleman",
+   href=c("https://www.cia.gov/the-world-factbook/",
+          "https://github.com/iancoleman/cia_world_factbook_api#data"),
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> H <- data.frame(ID=FB$Country,iso2=FB$ISO2,area=FB$area,pop=FB$pop,
+   gdp=FB$gdp,In=rep(NA,m),Out=rep(NA,m))
> In = vector(mode="list",m); Out = vector(mode="list",m)
> for(i in 1:m) In[[i]] <- as.integer(factor(Im[[i]],levels=V$ID))
> for(i in 1:m) Out[[i]] <- as.integer(factor(Ex[[i]],levels=V$ID))
> H$In <- In; H$Out <- Out
> HR <- list(format="hypernets",info=info,nodes=V,links=H,data=data)
> str(HR)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 9
  ..$ network: chr "FB_ImpExp"
  ..$ title  : chr "CIA factbook: import/export"
  ..$ by     : chr "iancoleman"
  ..$ href   : chr [1:2] "https://www.cia.gov/the-world-factbook/" "https://github.com/iancoleman/cia_world_factbook_api#data"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Mon Oct 23 03:20:24 2023"
  ..$ nNodes : int 753
  ..$ nLinks : int 237
  ..$ simple : logi NA
 $ nodes :'data.frame': 753 obs. of  1 variable:
  ..$ ID: chr [1:753] "\"jewelry" "(10%)" "(13%)" "(14%)" ...
 $ links :'data.frame': 237 obs. of  7 variables:
  ..$ ID  : chr [1:237] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ iso2: chr [1:237] "AF" "AL" "DZ" "AS" ...
  ..$ area: int [1:237] 652230 28748 2381740 224 468 1246700 91 443 2780400 29743 ...
  ..$ pop : int [1:237] 36643815 3074579 42972878 49437 77000 32522339 18090 98179 45479118 3021324 ...
  ..$ gdp : num [1:237] 6.94e+10 3.60e+10 6.30e+11 6.58e+08 3.33e+09 ...
  ..$ In  :List of 237
  .. ..$ : int [1:5] 382 484 266 691 524
  .. ..$ : int [1:5] 382 231 271 691 109
  .. ..$ : int [1:3] 85 271 145
  .. .. [list output truncated]
  ..$ Out :List of 237
  .. ..$ : int [1:11] 474 283 464 316 748 161 319 520 552 621 ...
  .. ..$ : int [1:12] 34 119 273 43 429 427 173 100 143 726 ...
  .. ..$ : int [1:3] 523 454 524
  .. .. [list output truncated]
 $ data  :List of 1
  ..$ regID: chr [1:11] "Africa" "Arctic Region" "Asia" "AsiaEurope" ...
> # str(HR)
> write(toJSON(HR),"FB_ImpExp.json")
> source(https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R)
> hyper.save2pajek(HR,"FB_ImpExp.net")

Agriculture_products

> A <- FB$agroP; m <- length(A); S <- NULL
> for(i in 1:m) S <- union(S,A[[i]])
> V <- data.frame(ID=sort(unlist(S))); n <- nrow(V)
> FB$region[74] <- "Europe"
> reg <- factor(FB$region)
> data <- list(regID=levels(reg))
> reg <- as.integer(reg)
> info <- list(network="FB_agroP",
+   title="CIA factbook: agriculture_products",
+   by="iancoleman",
+   href=c("https://www.cia.gov/the-world-factbook/",
+          "https://github.com/iancoleman/cia_world_factbook_api#data"),
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> H <- data.frame(ID=FB$Country,iso2=FB$ISO2,area=FB$area,pop=FB$pop,
+   gdp=FB$gdp,E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- as.integer(factor(A[[i]],levels=V$ID))
> H$E <- E
> HR <- list(format="hypernets",info=info,nodes=V,links=H,data=data)
> str(HR)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 9
  ..$ network: chr "FB_agroP"
  ..$ title  : chr "CIA factbook: agriculture_products"
  ..$ by     : chr "iancoleman"
  ..$ href   : chr [1:2] "https://www.cia.gov/the-world-factbook/" "https://github.com/iancoleman/cia_world_factbook_api#data"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Tue Oct 24 02:18:55 2023"
  ..$ nNodes : int 290
  ..$ nLinks : int 237
  ..$ simple : logi NA
 $ nodes :'data.frame': 290 obs. of  1 variable:
  ..$ ID: chr [1:290] "ackees" "African palm" "alfalfa" "almonds" ...
 $ links :'data.frame': 237 obs. of  6 variables:
  ..$ ID  : chr [1:237] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ iso2: chr [1:237] "AF" "AL" "DZ" "AS" ...
  ..$ area: int [1:237] 652230 28748 2381740 224 468 1246700 91 443 2780400 29743 ...
  ..$ pop : int [1:237] 36643815 3074579 42972878 49437 77000 32522339 18090 98179 45479118 3021324 ...
  ..$ gdp : num [1:237] 6.94e+10 3.60e+10 6.30e+11 6.58e+08 3.33e+09 ...
  ..$ E   :List of 237
  .. ..$ : int [1:9] 172 282 93 162 287 156 230 128 200
  .. ..$ : int [1:12] 282 63 202 275 93 169 168 100 147 72 ...
  .. ..$ : int [1:9] 282 18 163 100 169 50 93 229 39
  .. .. [list output truncated]
 $ data  :List of 1
  ..$ regID: chr [1:11] "Africa" "Arctic Region" "Asia" "AsiaEurope" ...
> write(toJSON(HR),"FB_agroP.json")
> hyper.save2pajek(HR,"FB_agroP.net")

International organization participation

> A <- FB$orgs; m <- length(A); S <- NULL
> for(i in 1:m) S <- union(S,A[[i]])
> V <- data.frame(ID=sort(unlist(S))); n <- nrow(V)
> FB$region[74] <- "Europe"
> reg <- factor(FB$region)
> data <- list(regID=levels(reg))
> reg <- as.integer(reg)
> info <- list(network="FB_orgs",
+   title="CIA factbook: international organization participation",
+   by="iancoleman",
+   href=c("https://www.cia.gov/the-world-factbook/",
+          "https://github.com/iancoleman/cia_world_factbook_api#data"),
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> H <- data.frame(ID=FB$Country,iso2=FB$ISO2,area=FB$area,pop=FB$pop,
+   gdp=FB$gdp,E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- as.integer(factor(A[[i]],levels=V$ID))
> H$E <- E
> HR <- list(format="hypernets",info=info,nodes=V,links=H,data=data)
> str(HR)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 9
  ..$ network: chr "FB_orgs"
  ..$ title  : chr "CIA factbook: international organization participation"
  ..$ by     : chr "iancoleman"
  ..$ href   : chr [1:2] "https://www.cia.gov/the-world-factbook/" "https://github.com/iancoleman/cia_world_factbook_api#data"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Tue Oct 24 02:33:23 2023"
  ..$ nNodes : int 264
  ..$ nLinks : int 237
  ..$ simple : logi NA
 $ nodes :'data.frame': 264 obs. of  1 variable:
  ..$ ID: chr [1:264] "ABEDA" "ACP" "ADB" "ADB?" ...
 $ links :'data.frame': 237 obs. of  6 variables:
  ..$ ID  : chr [1:237] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ iso2: chr [1:237] "AF" "AL" "DZ" "AS" ...
  ..$ area: int [1:237] 652230 28748 2381740 224 468 1246700 91 443 2780400 29743 ...
  ..$ pop : int [1:237] 36643815 3074579 42972878 49437 77000 32522339 18090 98179 45479118 3021324 ...
  ..$ gdp : num [1:237] 6.94e+10 3.60e+10 6.30e+11 6.58e+08 3.33e+09 ...
  ..$ E   :List of 237
  .. ..$ : int [1:50] 3 55 61 77 82 91 104 113 114 115 ...
  .. ..$ : int [1:50] 31 43 45 47 72 74 82 91 113 114 ...
  .. ..$ : int [1:59] 1 5 7 9 10 22 29 35 43 91 ...
  .. .. [list output truncated]
 $ data  :List of 1
  ..$ regID: chr [1:11] "Africa" "Arctic Region" "Asia" "AsiaEurope" ...
> write(toJSON(HR),"FB_orgs.json")
> hyper.save2pajek(HR,"FB_orgs.net")

Cleaning

> wdir <- "C:/Users/vlado/DL/data/hyper/CIA"
> setwd(wdir)
> library(jsonlite)
> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> FB <- fromJSON("./test/Factbook.json")
> wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs"
> setwd(wdir)

> FB$ISO2[[48]] <- "CD"; FB$ISO2[[78]] <- "PX"
> FB$Country[[48]] <- "Congo DR"; FB$Country[[49]] <- "Congo R"
> FB$Country[[78]] <- "Gaza strip"; FB$Country[[233]] <- "West bank"

> FB$Country <- gsub("Island","Is",gsub("Islands","Iss",FB$Country))
> FB$Country <- gsub("Republic","R",gsub("SAR","/",FB$Country))
> FB$Country[[45]] <- "Cocos Iss"; > FB$Country[[138]] <- "Micronesia"
> FB$Country[[33]] <- "Myanmar"

> A <- FB$orgs; m <- length(A); S <- NULL
> B = vector(mode="list",m); S <- NULL
> for(i in 1:m) {
+    T <- trimws(gsub("\\?"," ",A[[i]]))
+    T[grep("has",T)] <- "CIS"
+    T[grep("NATO,",T)] <- "NATO"
+    T[grep("UNIDO,",T)] <- "UNIDO"
+    T[grep("UN Security Council",T)] <- "UN"
+    T[grep("and Matsu",T)] <- "Kinmen"
+    T[grep("China;",T)] <- "Taiwan"
+    B[[i]] <- T <- setdiff(T,"none")
+    S <- union(S,T)
+ }
> V <- data.frame(ID=sort(unlist(S))); n <- nrow(V)
> FB$region[74] <- "Europe"
> reg <- factor(FB$region)
> data <- list(regID=levels(reg))
> reg <- as.integer(reg)
> info <- list(network="FB_orgsInc",
+   title="CIA factbook: international organization participation/inclusive",
+   by="iancoleman",
+   href=c("https://www.cia.gov/the-world-factbook/",
+          "https://github.com/iancoleman/cia_world_factbook_api#data"),
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> H <- data.frame(ID=FB$Country,iso2=FB$ISO2,area=FB$area,pop=FB$pop,
+   gdp=FB$gdp,E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- as.integer(factor(B[[i]],levels=V$ID))
> H$E <- E
> HR <- list(format="hypernets",info=info,nodes=V,links=H,data=data)
> str(HR)
> write(toJSON(HR),"FB_orgsInc.json")
> hyper.save2pajek(HR,"FB_orgsInc.net")
> length(S)
[1] 197

Hypernet symbolic data frame

Industries

> A <- FB$indust; m <- length(A); S <- NULL
> for(i in 1:m) S <- union(S,A[[i]])
> V <- data.frame(ID=sort(unlist(S))); n <- nrow(V)
> FB$region[74] <- "Europe"
> reg <- factor(FB$region)
> data <- list(regID=levels(reg))
> reg <- as.integer(reg)
> info <- list(network="FB_indust",
+   title="CIA factbook: industries",
+   by="iancoleman",
+   href=c("https://www.cia.gov/the-world-factbook/",
+          "https://github.com/iancoleman/cia_world_factbook_api#data"),
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> H <- data.frame(ID=FB$Country,iso2=FB$ISO2,area=FB$area,pop=FB$pop,
+   gdp=FB$gdp,E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- as.integer(factor(A[[i]],levels=V$ID))
> H$E <- E
> HR <- list(format="hypernets",info=info,nodes=V,links=H,data=data)
> str(HR)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 9
  ..$ network: chr "FB_indust"
  ..$ title  : chr "CIA factbook: industries"
  ..$ by     : chr "iancoleman"
  ..$ href   : chr [1:2] "https://www.cia.gov/the-world-factbook/" "https://github.com/iancoleman/cia_world_factbook_api#data"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Tue Oct 24 02:44:24 2023"
  ..$ nNodes : int 660
  ..$ nLinks : int 237
  ..$ simple : logi NA
 $ nodes :'data.frame': 660 obs. of  1 variable:
  ..$ ID: chr [1:660] "advanced electronic components" "aerospace" "agribusiness" "agricultural" ...
 $ links :'data.frame': 237 obs. of  6 variables:
  ..$ ID  : chr [1:237] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ iso2: chr [1:237] "AF" "AL" "DZ" "AS" ...
  ..$ area: int [1:237] 652230 28748 2381740 224 468 1246700 91 443 2780400 29743 ...
  ..$ pop : int [1:237] 36643815 3074579 42972878 49437 77000 32522339 18090 98179 45479118 3021324 ...
  ..$ gdp : num [1:237] 6.94e+10 3.60e+10 6.30e+11 6.58e+08 3.33e+09 ...
  ..$ E   :List of 237
  .. ..$ : int [1:15] 553 600 562 239 541 207 29 227 390 367 ...
  .. ..$ : int [1:11] 224 229 29 107 322 403 87 94 369 54 ...
  .. ..$ : int [1:7] 444 385 304 369 181 442 225
  .. .. [list output truncated]
 $ data  :List of 1
  ..$ regID: chr [1:11] "Africa" "Arctic Region" "Asia" "AsiaEurope" ...
> write(toJSON(HR),"FB_indust.json")
> hyper.save2pajek(HR,"FB_indust.net")




vlado/work/hn/cia.txt · Last modified: 2023/11/08 15:01 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki