Creating networks

The structure of BACI dataset is described here.

Network for the year 2000

I downloaded the BACI data (2.2G ZIP) from CEPII and extracted the CSV file for the year 2000 for experimenting.

> wdir <- "C:/Users/vlado/docs/papers/2023/HiTeC/London/slides/BACI/95-21"
> setwd(wdir)
> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R")
> 
> C <- read.csv("country_codes_V202301.csv")
> head(C)
  country_code country_name_abbreviation country_name_full iso_2digit_alpha iso_3digit_alpha
1            4               Afghanistan       Afghanistan               AF              AFG
2            8                   Albania           Albania               AL              ALB
3           12                   Algeria           Algeria               DZ              DZA
4           16            American Samoa    American Samoa               AS              ASM
5           20                   Andorra           Andorra               AD              AND
6           24                    Angola            Angola               AO              AGO
> B <- read.csv("BACI_HS92_Y2000_V202301.csv")
> str(B)
'data.frame':   7271864 obs. of  6 variables:
 $ t: int  2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
 $ i: int  4 4 4 4 4 4 4 4 4 4 ...
 $ j: int  12 12 12 12 12 12 20 20 20 20 ...
 $ k: int  90920 90930 90940 121190 330300 848330 300490 570110 570210 570242 ...
 $ v: num  2.823 11.216 7.981 2.051 0.552 ...
 $ q: chr  "        6.800" "       11.550" "       19.140" "        2.000" ...
> N <- aggregate(B$v,by=list(B$i,B$j),FUN=sum)
> str(N)
'data.frame':   26043 obs. of  3 variables:
 $ Group.1: int  8 31 36 40 50 56 76 100 112 124 ...
 $ Group.2: int  4 4 4 4 4 4 4 4 4 4 ...
 $ x      : num  0.209 757.165 61.238 506.723 4008.518 ...
> sum(B$v[1:6])
[1] 24.812
> N[which((N$Group.1==4)&(N$Group.2==12)),]
    Group.1 Group.2      x
178       4      12 24.812
> colnames(N) <- c("u","v","w")
> cc <- C$country_code
> N$u <- as.integer(factor(N$u,level=cc))
> N$v <- as.integer(factor(N$v,level=cc))
> uvLab2net(C$iso_2digit_alpha,N$u,N$v,N$w,rep(2000,nrow(N)),Net="WT00.net",dir=TRUE)

On the graphical representation in Pajek, I noticed that some country labels (DEU, BEL, …) appear twice. I checked them in source data.

> which(C$iso_3digit_alpha=="DEU")
[1] 79 81
> C[c(79,81),]
   country_code country_name_abbreviation                  country_name_full iso_2digit_alpha   iso_3digit_alpha
79          276                   Germany                            Germany               DE                DEU
81          280  Fmr Fed. Rep. of Germany Former Federal Republic of Germany               DE                DEU

Reading from ZIP file

> zdir <- unzip("../BACI_HS92_V202301.zip",list=TRUE) 
> zdir
                             Name    Length                Date
1     BACI_HS92_Y1995_V202301.csv 238112541 2023-01-31 11:11:00
2     BACI_HS92_Y1996_V202301.csv 255110534 2023-01-31 11:13:00
3     BACI_HS92_Y1997_V202301.csv 268464528 2023-01-31 11:14:00
4     BACI_HS92_Y1998_V202301.csv 279524363 2023-01-31 11:16:00
5     BACI_HS92_Y1999_V202301.csv 286562400 2023-01-31 11:18:00
6     BACI_HS92_Y2000_V202301.csv 347577043 2023-01-31 11:20:00
7     BACI_HS92_Y2001_V202301.csv 359134170 2023-01-31 11:22:00
8     BACI_HS92_Y2002_V202301.csv 369188019 2023-01-31 11:24:00
9     BACI_HS92_Y2003_V202301.csv 381371385 2023-01-31 11:27:00
10    BACI_HS92_Y2004_V202301.csv 398264263 2023-01-31 11:29:00
11    BACI_HS92_Y2005_V202301.csv 412270441 2023-01-31 11:32:00
12    BACI_HS92_Y2006_V202301.csv 428661613 2023-01-31 11:34:00
13    BACI_HS92_Y2007_V202301.csv 439195266 2023-01-31 11:37:00
14    BACI_HS92_Y2008_V202301.csv 445018106 2023-01-31 11:40:00
15    BACI_HS92_Y2009_V202301.csv 440450004 2023-01-31 11:42:00
16    BACI_HS92_Y2010_V202301.csv 451977993 2023-01-31 11:45:00
17    BACI_HS92_Y2011_V202301.csv 459970011 2023-01-31 11:48:00
18    BACI_HS92_Y2012_V202301.csv 473158279 2023-01-31 11:51:00
19    BACI_HS92_Y2013_V202301.csv 482756570 2023-01-31 11:54:00
20    BACI_HS92_Y2014_V202301.csv 483932125 2023-01-31 11:57:00
21    BACI_HS92_Y2015_V202301.csv 500816869 2023-01-31 12:00:00
22    BACI_HS92_Y2016_V202301.csv 500809051 2023-01-31 12:03:00
23    BACI_HS92_Y2017_V202301.csv 511566990 2023-01-31 12:06:00
24    BACI_HS92_Y2018_V202301.csv 515154942 2023-01-31 12:09:00
25    BACI_HS92_Y2019_V202301.csv 517328358 2023-01-31 12:13:00
26    BACI_HS92_Y2020_V202301.csv 499104038 2023-01-31 12:15:00
27    BACI_HS92_Y2021_V202301.csv 509386838 2023-01-31 12:22:00
28      country_codes_V202301.csv     10431 2023-02-03 11:11:00
29 product_codes_HS92_V202301.csv    543275 2023-02-03 13:58:00

Network of each year

> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R") 
> C <- read.csv("country_codes_V202301.csv")
> cc <- C$country_code; cn <- C$iso_2digit_alpha; cn[139] <- "NA"
> zdir <- unzip("../BACI_HS92_V202301.zip",list=TRUE)
> for(i in 1:27){
+   year <- 1994+i; cat(year,":",date(),"\n"); flush.console()
+   net <- paste("WT",year,".net",sep="")
+   B <- read.csv(unzip("../BACI_HS92_V202301.zip",files=zdir$Name[i]))
+   file.remove(zdir$Name[i])
+   N <- aggregate(B$v,by=list(B$i,B$j),FUN=sum)
+   colnames(N) <- c("u","v","w")
+   N$u <- as.integer(factor(N$u,level=cc))
+   N$v <- as.integer(factor(N$v,level=cc)) 
+   uvLab2net(cn,N$u,N$v,N$w,rep(year,nrow(N)),Net=net,dir=TRUE)
+ }
> cat("****",":",date(),"\n")
1995 : Fri Aug 11 01:59:58 2023 
1996 : Fri Aug 11 02:00:12 2023 
1997 : Fri Aug 11 02:00:27 2023 
...
2020 : Fri Aug 11 02:14:01 2023 
2021 : Fri Aug 11 02:14:44 2023 
**** : Fri Aug 11 02:15:29 2023 

Temporal network 1995-2021

> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R") 
> C <- read.csv("country_codes_V202301.csv")
> cc <- C$country_code; cn <- C$iso_2digit_alpha; cn[139] <- "NA"
> net <- file("WTtime.net","w",encoding="UTF-8")
> act <- file("activity.txt","w",encoding="UTF-8")
> n <- length(cn); tin <- '" [1995-2021]\n'
> cat("% WorldTrade",date(),"\n*vertices",n,"\n",file=net)
> for(v in 1:n) cat(v,' "',cn[v],tin,sep="",file=net)
> cat("*arcs\n",file=net)
> zdir <- unzip("../BACI_HS92_V202301.zip",list=TRUE)
> for(i in 1:27){
>   year <- 1994+i; cat(year,":",date(),"\n"); flush.console()
>   B <- read.csv(unzip("../BACI_HS92_V202301.zip",files=zdir$Name[i]))
>   file.remove(zdir$Name[i])
>   N <- aggregate(B$v,by=list(B$i,B$j),FUN=sum)
>   colnames(N) <- c("u","v","w")
>   N$u <- as.integer(factor(N$u,level=cc))
>   N$v <- as.integer(factor(N$v,level=cc)) 
>   for(e in 1:nrow(N)) cat(N$u[e],N$v[e],N$w[e]," [",year,"]\n",file=net) 
>   cat(year,setdiff(1:n,union(N$u,N$v)),"\n",file=act)
> }
> cat("****",":",date(),"\n"); close(act); close(net)

1995 : Fri Aug 11 03:12:59 2023 
1996 : Fri Aug 11 03:13:13 2023 
1997 : Fri Aug 11 03:13:28 2023 
...
2019 : Fri Aug 11 03:23:15 2023 
2020 : Fri Aug 11 03:23:47 2023 
2021 : Fri Aug 11 03:24:19 2023 
**** : Fri Aug 11 03:24:51 2023 

For each year we also determined the nonactive countries

1995 4 17 23 48 56 78 80 81 88 115 120 134 139 144 146 147 176 183 187 190 197 201 202 205 224 
1996 4 17 23 48 56 78 80 81 88 115 120 134 139 144 146 147 176 183 187 197 201 202 205 224 
1997 4 17 23 48 56 78 80 81 88 115 120 134 139 144 146 147 176 183 187 190 197 201 202 205 224 
1998 4 17 23 48 56 78 80 81 88 115 120 134 139 144 146 147 176 183 187 197 201 202 205 224 
1999 4 18 23 48 56 78 80 81 88 115 134 139 144 146 147 176 183 187 190 197 201 202 205 224 
2000 18 56 80 81 134 144 146 147 176 187 190 198 201 202 224 
2001 18 56 80 81 134 144 146 147 176 187 190 198 201 202 224 
2002 18 56 80 81 134 144 146 147 176 187 190 198 201 202 224 
2003 18 56 80 81 134 144 146 147 176 187 190 198 201 202 224 229 
2004 18 56 80 81 134 144 146 147 176 187 190 198 201 202 224 229 
2005 18 56 80 81 134 144 146 147 176 187 190 198 201 202 224 229 
2006 18 56 80 81 144 146 147 176 190 198 201 202 224 229 237 
2007 18 56 80 81 144 146 147 176 190 198 201 202 224 229 237 
2008 18 56 80 81 144 146 147 176 190 198 201 202 224 229 237 
2009 18 56 80 81 144 146 147 176 190 198 201 202 224 229 237 
2010 18 56 80 81 146 147 176 190 198 201 202 224 229 237 
2011 18 56 80 81 176 190 198 201 202 224 229 237 
2012 18 56 80 81 143 176 190 198 203 224 229 237 
2013 18 56 80 81 143 190 198 203 224 229 237 
2014 18 48 56 80 81 143 190 198 203 224 229 237 
2015 18 48 56 80 81 143 190 198 203 224 229 237 
2016 18 48 56 80 81 143 190 198 203 224 229 237 
2017 18 48 56 80 81 143 190 198 203 224 229 237 
2018 18 48 56 80 81 190 198 203 224 229 237 
2019 18 48 56 80 81 143 190 198 203 224 229 237 
2020 18 48 56 80 81 143 190 198 203 224 229 237 
2021 18 48 56 80 81 143 190 198 203 224 229 237 

WT multiway network / chapters

> library(jsonlite)
> sec <- rep(0,99)
> sec[1:5] <- 1; sec[6:14] <- 2; sec[15] <- 3; sec[16:24] <- 4
> sec[25:27] <- 5; sec[28:38] <- 6; sec[39:40] <- 7; sec[41:43] <- 8
> sec[44:46] <- 9; sec[47:49] <- 10; sec[50:63] <- 11; sec[64:67] <- 12
> sec[68:70] <- 13; sec[71] <- 14; sec[72:83] <- 15; sec[84:85] <- 16
> sec[86:89] <- 17; sec[90:92] <- 18; sec[93] <- 19; sec[94:96] <- 20
> sec[97:98] <- 21; sec[99] <- 22
> C <- read.csv("country_codes_V202301.csv")
> cc <- C$country_code; cn <- C$iso_2digit_alpha; cn[139] <- "NA"
> c3 <- C$iso_3digit_alpha; ca <- C$country_name_abbreviation
> S <- read.csv("../sectShort.txt",sep="",head=TRUE)
> Ch <- read.csv("../chapShort.txt",sep=";",head=TRUE)
> n <- length(cn)
> info <- list(network="WTchapters",
+   title="BACI/CEPII World trade flows 1995-2021",
+   by="Gaulier, G. and Zignago, S.",
+   ref="Gaulier, G. and Zignago, S. (2010) BACI: International Trade Database at the Product-Level. 
    The 1994-2007 Version. CEPII Working Paper, N°2010-23.",
+   href="http://www.cepii.fr/CEPII/en/bdd_modele/bdd_modele_item.asp?id=37",
+   creator="V. Batagelj",
+   date=date() )
> ways <- list(ex="exporter",im="importer",ch="chapter",y="year")
> nodes <- list(ex=data.frame(ID=cn,iso3=c3,long=ca),im=data.frame(ID=cn),
+   ch=data.frame(ID=trimws(Ch$lab)),y=data.frame(ID=Y))
> data <- list(sectID=S$lab,sect=sec)
> zdir <- unzip("../BACI_HS92_V202301.zip",list=TRUE)
> V <- NULL
> for(i in 1:27){
+   year <- 1994+i; cat(year,":",date(),"\n"); flush.console()
+   B <- read.csv(unzip("../BACI_HS92_V202301.zip",files=zdir$Name[i]))
+   file.remove(zdir$Name[i]); B$k <- as.integer(B$k %/% 10000)
+   N <- aggregate(B$v,by=list(B$i,B$j,B$k,B$t),FUN=sum)
+   colnames(N) <- c("ex","im","ch","y","w")
+   N$ex <- as.integer(factor(N$ex,level=cc))
+   N$im <- as.integer(factor(N$im,level=cc)) 
+   N$y <- as.integer(factor(N$y,level=1995:2021)) 
+   V <- rbind(V,N)  
+ }
> cat("****",":",date(),"\n")
> MN <- list(format="MWnets",info=info,ways=ways,nodes=nodes,links=V,data=data)
> write(toJSON(MN),"WTchap.json")
1995 : Sat Aug 12 00:47:24 2023 
1996 : Sat Aug 12 00:47:45 2023 
1997 : Sat Aug 12 00:48:08 2023 
1998 : Sat Aug 12 00:48:33 2023 
...
2018 : Sat Aug 12 01:04:08 2023 
2019 : Sat Aug 12 01:05:14 2023 
2020 : Sat Aug 12 01:06:17 2023 
2021 : Sat Aug 12 01:07:22 2023 
> str(MN)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 7
  ..$ network: chr "WTchapters"
  ..$ title  : chr "BACI/CEPII World trade flows 1995-2021"
  ..$ by     : chr "Gaulier, G. and Zignago, S."
  ..$ ref    : chr "Gaulier, G. and Zignago, S. (2010) BACI: International Trade Database at the Product-Level. 
               The 1994-2007 Versi"| __truncated__
  ..$ href   : chr "http://www.cepii.fr/CEPII/en/bdd_modele/bdd_modele_item.asp?id=37"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sat Aug 12 00:47:23 2023"
 $ ways  :List of 4
  ..$ ex: chr "exporter"
  ..$ im: chr "importer"
  ..$ ch: chr "chapter"
  ..$ y : chr "year"
 $ nodes :List of 4
  ..$ ex:'data.frame':  238 obs. of  3 variables:
  .. ..$ ID  : chr [1:238] "AF" "AL" "DZ" "AS" ...
  .. ..$ iso3: chr [1:238] "AFG" "ALB" "DZA" "ASM" ...
  .. ..$ long: chr [1:238] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ im:'data.frame':  238 obs. of  1 variable:
  .. ..$ ID: chr [1:238] "AF" "AL" "DZ" "AS" ...
  ..$ ch:'data.frame':  99 obs. of  1 variable:
  .. ..$ ID: chr [1:99] "Live Animals and poultry" "Animal Meat and edible offals" "Fish, fillets and aquatic meat" "Honey, Eggs and Dairy" ...
  ..$ y :'data.frame':  27 obs. of  1 variable:
  .. ..$ ID: chr [1:27] "1995" "1996" "1997" "1998" ...
 $ links :'data.frame': 20384334 obs. of  5 variables:
  ..$ ex: int [1:20384334] 71 85 95 102 142 223 18 71 79 95 ...
  ..$ im: int [1:20384334] 2 2 2 2 2 2 3 3 3 3 ...
  ..$ ch: int [1:20384334] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ y : int [1:20384334] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w : num [1:20384334] 352.82 126.256 225 398.26 0.997 ...
 $ data  :List of 2
  ..$ sectID: chr [1:21] "animals" "vegetables" "fats&oils" "foodstuffs" ...
  ..$ sect  : num [1:99] 1 1 1 1 1 2 2 2 2 2 ...
>

WT multiway network / sections

HS 2002 Classification by Section

> library(jsonlite)
> sec <- rep(0,99)
> sec[1:5] <- 1; sec[6:14] <- 2; sec[15] <- 3; sec[16:24] <- 4
> sec[25:27] <- 5; sec[28:38] <- 6; sec[39:40] <- 7; sec[41:43] <- 8
> sec[44:46] <- 9; sec[47:49] <- 10; sec[50:63] <- 11; sec[64:67] <- 12
> sec[68:70] <- 13; sec[71] <- 14; sec[72:83] <- 15; sec[84:85] <- 16
> sec[86:89] <- 17; sec[90:92] <- 18; sec[93] <- 19; sec[94:96] <- 20
> sec[97:98] <- 21; sec[99] <- 22
> C <- read.csv("country_codes_V202301.csv")
> cc <- C$country_code; cn <- C$iso_2digit_alpha; cn[139] <- "NA"
> c3 <- C$iso_3digit_alpha; ca <- C$country_name_abbreviation
> S <- read.csv("../sectShort.txt",sep="",head=TRUE)
> Ch <- read.csv("../chapShort.txt",sep=";",head=TRUE)
> n <- length(cn)
> info <- list(network="WTsections",
+   title="BACI/CEPII World trade flows 1995-2021",
+   by="Gaulier, G. and Zignago, S.",
+   ref="Gaulier, G. and Zignago, S. (2010) BACI: International Trade Database at the Product-Level. 
         The 1994-2007 Version. CEPII Working Paper, N°2010-23.",
+   href="http://www.cepii.fr/CEPII/en/bdd_modele/bdd_modele_item.asp?id=37",
+   creator="V. Batagelj",
+   date=date() )
> ways <- list(ex="exporter",im="importer",s="section",y="year")
> nodes <- list(ex=data.frame(ID=cn,iso3=c3,long=ca),im=data.frame(ID=cn),
+   s=data.frame(ID=S$lab),y=data.frame(ID=Y))
> data <- list()
> zdir <- unzip("../BACI_HS92_V202301.zip",list=TRUE)
> V <- NULL
> for(i in 1:27){
+   year <- 1994+i; cat(year,":",date(),"\n"); flush.console()
+   B <- read.csv(unzip("../BACI_HS92_V202301.zip",files=zdir$Name[i]))
+   file.remove(zdir$Name[i]); B$k <- sec[as.integer(B$k %/% 10000)]
+   N <- aggregate(B$v,by=list(B$i,B$j,B$k,B$t),FUN=sum)
+   colnames(N) <- c("ex","im","s","y","w")
+   N$ex <- as.integer(factor(N$ex,level=cc))
+   N$im <- as.integer(factor(N$im,level=cc)) 
+   N$y <- as.integer(factor(N$y,level=1995:2021)) 
+   V <- rbind(V,N)  
+ }
> cat("****",":",date(),"\n")
1995 : Sat Aug 12 01:45:32 2023 
1996 : Sat Aug 12 01:45:50 2023 
1997 : Sat Aug 12 01:46:10 2023 
1998 : Sat Aug 12 01:46:33 2023 
...
2018 : Sat Aug 12 02:02:17 2023 
2019 : Sat Aug 12 02:03:15 2023 
2020 : Sat Aug 12 02:04:13 2023 
2021 : Sat Aug 12 02:05:06 2023 
**** : Sat Aug 12 02:06:03 2023 
> MN <- list(format="MWnets",info=info,ways=ways,nodes=nodes,links=V,data=data)
> write(toJSON(MN),"WTsect.json")
> str(MN)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 7
  ..$ network: chr "WTsections"
  ..$ title  : chr "BACI/CEPII World trade flows 1995-2021"
  ..$ by     : chr "Gaulier, G. and Zignago, S."
  ..$ ref    : chr "Gaulier, G. and Zignago, S. (2010) BACI: International Trade Database at the Product-Level. 
               The 1994-2007 Versi"| __truncated__
  ..$ href   : chr "http://www.cepii.fr/CEPII/en/bdd_modele/bdd_modele_item.asp?id=37"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sat Aug 12 01:45:32 2023"
 $ ways  :List of 4
  ..$ ex: chr "exporter"
  ..$ im: chr "importer"
  ..$ s : chr "section"
  ..$ y : chr "year"
 $ nodes :List of 4
  ..$ ex:'data.frame':  238 obs. of  3 variables:
  .. ..$ ID  : chr [1:238] "AF" "AL" "DZ" "AS" ...
  .. ..$ iso3: chr [1:238] "AFG" "ALB" "DZA" "ASM" ...
  .. ..$ long: chr [1:238] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
  ..$ im:'data.frame':  238 obs. of  1 variable:
  .. ..$ ID: chr [1:238] "AF" "AL" "DZ" "AS" ...
  ..$ s :'data.frame':  21 obs. of  1 variable:
  .. ..$ ID: chr [1:21] "animals" "vegetables" "fats&oils" "foodstuffs" ...
  ..$ y :'data.frame':  27 obs. of  1 variable:
  .. ..$ ID: chr [1:27] "1995" "1996" "1997" "1998" ...
 $ links :'data.frame': 7575645 obs. of  5 variables:
  ..$ ex: int [1:7575645] 43 53 77 100 112 142 210 228 10 11 ...
  ..$ im: int [1:7575645] 1 1 1 1 1 1 1 1 2 2 ...
  ..$ s : num [1:7575645] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ y : int [1:7575645] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w : num [1:7575645] 84 6.99 21.07 303.3 849.5 ...
 $ data  : list()

Reading the multiway network

> wdir <- "C:/Users/vlado/docs/papers/2023/HiTeC/London/slides/BACI/95-21"
> setwd(wdir)
> library(jsonlite)
> MN <- fromJSON("WTsect.json")
> str(MN)
> source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R")
pro/wn/wt/baci/nets.txt · Last modified: 2023/08/12 03:03 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki