Genova / Italian universities / Analysis

Reading data

> wdir <- "C:/Users/vlado/docs/papers/2022/ifcs2022/genova/data"
> setwd(wdir)
> library(jsonlite)
> library(magrittr)
> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R")
> source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R")
> MN <- fromJSON("students.json")
> str(MN)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 5
  ..$ network: chr "students"
  ..$ title  : chr "Student mobility data in Italian universities 2008, 2011, 2014, 2017"
  ..$ by     : chr [1:4] "VG Genova" "G Giordano" "G Ragozini" "MP Vitale"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sun Nov  6 22:31:11 2022"
 $ ways  :List of 4
  ..$ prov: chr "province"
  ..$ univ: chr "university"
  ..$ prog: chr "programme"
  ..$ year: chr "year"
 $ nodes :List of 4
  ..$ prov:'data.frame':        107 obs. of  9 variables:
  .. ..$ ID        : chr [1:107] "AG" "AL" "AN" "AO" ...
  .. ..$ type      : chr [1:107] "F" "O" "O" "D" ...
  .. ..$ province  : chr [1:107] "Agrigento" "Alessandria" "Ancona" "Aosta" ...
  .. ..$ capital   : chr [1:107] "Agrigento" "Alessandria" "Ancona" "Aosta" ...
  .. ..$ region    : chr [1:107] "Sicily" "Piedmont" "Marche" "Aosta Valley" ...
  .. ..$ IDreg     : chr [1:107] "SIC" "PIE" "MAR" "VAL" ...
  .. ..$ macreg    : chr [1:107] "Insular" "North-West" "Centre" "North-West" ...
  .. ..$ population: int [1:107] 416181 409392 464419 124089 203425 290811 336501 209390 402929 1230158 ...
  .. ..$ area      : num [1:107] 3053 3559 1963 3261 1228 ...
  ..$ univ:'data.frame':        79 obs. of  2 variables:
  .. ..$ ID  : chr [1:79] "Bicocca" "Bocconi" "Foscari" "Biomedico" ...
  .. ..$ long: chr [1:79] "Bicocca" "Bocconi" "Cà Foscari" "Campus Biomedico" ...
  ..$ prog:'data.frame':        11 obs. of  2 variables:
  .. ..$ ID  : chr [1:11] "Q" "AFFV" "AH" "BAL" ...
  .. ..$ long: chr [1:11] "?" "Agriculture, forestry, fisheries and veterinary" "Arts and humanities" ...
  ..$ year:'data.frame':        4 obs. of  1 variable:
  .. ..$ ID: chr [1:4] "2008" "2011" "2014" "2017"
 $ links :'data.frame': 37205 obs. of  5 variables:
  ..$ prov: int [1:37205] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ univ: int [1:37205] 1 1 1 1 2 3 4 5 5 5 ...
  ..$ prog: int [1:37205] 4 5 9 11 4 3 9 3 5 9 ...
  ..$ year: int [1:37205] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w   : int [1:37205] 4 1 1 1 11 1 1 1 1 1 ...
 $ data  :List of 1
  ..$ regs:'data.frame':        20 obs. of  2 variables:
  .. ..$ ID  : chr [1:20] "ABR" "BAS" "CAL" "CAM" ...
  .. ..$ long: chr [1:20] "Abruzzo" "Basilicata" "Calabria" "Campania" ...

Extracting multiway subnetworks for different years

> MN %>% 
+   slice("year==1") %>% 
+   flatten("w",c("prov","univ","prog")) -> 
+   S2008
> MN %>% 
+   slice("year==2") %>% 
+   flatten("w",c("prov","univ","prog")) -> 
+   S2011
> MN %>% 
+   slice("year==3") %>% 
+   flatten("w",c("prov","univ","prog")) -> 
+   S2014
> MN %>% 
+   slice("year==4") %>% 
+   flatten("w",c("prov","univ","prog")) -> 
+   S2017
> str(S2014)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 6
  ..$ network: chr "students"
  ..$ title  : chr "Student mobility data in Italian universities 2008, 2011, 2014, 2017"
  ..$ by     : chr [1:4] "VG Genova" "G Giordano" "G Ragozini" "MP Vitale"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sun Nov  6 22:31:11 2022"
  ..$ trace  :List of 2
  .. ..$ :List of 3
  .. .. ..$ op  : chr "slice"
  .. .. ..$ P   : chr "year==3"
  .. .. ..$ date: chr "Wed Nov 23 04:46:40 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "flatten"
  .. .. ..$ par : chr [1:4] "prov" "univ" "prog" "w"
  .. .. ..$ FUN : chr "sum"
  .. .. ..$ date: chr "Wed Nov 23 04:46:40 2022"
 $ ways  :List of 3
  ..$ prov: chr "province"
  ..$ univ: chr "university"
  ..$ prog: chr "programme"
 $ nodes :List of 3
  ..$ prov:'data.frame':        107 obs. of  9 variables:
  .. ..$ ID        : chr [1:107] "AG" "AL" "AN" "AO" ...
  .. ..$ type      : chr [1:107] "F" "O" "O" "D" ...
  .. ..$ province  : chr [1:107] "Agrigento" "Alessandria" "Ancona" "Aosta" ...
  .. ..$ capital   : chr [1:107] "Agrigento" "Alessandria" "Ancona" "Aosta" ...
  .. ..$ region    : chr [1:107] "Sicily" "Piedmont" "Marche" "Aosta Valley" ...
  .. ..$ IDreg     : chr [1:107] "SIC" "PIE" "MAR" "VAL" ...
  .. ..$ macreg    : chr [1:107] "Insular" "North-West" "Centre" "North-West" ...
  .. ..$ population: int [1:107] 416181 409392 464419 124089 203425 290811 336501 209390 402929 1230158 ...
  .. ..$ area      : num [1:107] 3053 3559 1963 3261 1228 ...
  ..$ univ:'data.frame':        79 obs. of  2 variables:
  .. ..$ ID  : chr [1:79] "Bicocca" "Bocconi" "Foscari" "Biomedico" ...
  .. ..$ long: chr [1:79] "Bicocca" "Bocconi" "Cà Foscari" "Campus Biomedico" ...
  ..$ prog:'data.frame':        11 obs. of  2 variables:
  .. ..$ ID  : chr [1:11] "Q" "AFFV" "AH" "BAL" ...
  .. ..$ long: chr [1:11] "?" "Agriculture, forestry, fisheries and veterinary" "Arts and humanities" ...
 $ links :'data.frame': 9293 obs. of  4 variables:
  ..$ prov: int [1:9293] 58 7 1 2 3 4 6 9 10 13 ...
  ..$ univ: int [1:9293] 14 35 61 61 61 61 61 61 61 61 ...
  ..$ prog: int [1:9293] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w   : int [1:9293] 1 1 2 1 2 1 1 1 4 1 ...
 $ data  :List of 1
  ..$ regs:'data.frame':        20 obs. of  2 variables:
  .. ..$ ID  : chr [1:20] "ABR" "BAS" "CAL" "CAM" ...
  .. ..$ long: chr [1:20] "Abruzzo" "Basilicata" "Calabria" "Campania" ...

Projections to universities and clustering based on Salton dissimilarities

> CoU14 <- projection(S2014,"univ","w")
> SaU14 <- salton(CoU14); DU14 <- as.dist(1-SaU14)
> tU14 <- hclust(DU14,method="ward.D")
> plot(tU14,hang=-1,cex=0.8,main="Universities 2014 / Ward")

Projection to regions and clustering

Recode provinces to regions

> S2014 %>%
+   recodeway2part("prov","IDreg","regs","region") %>%
+   flatten("w",c("regs","univ","prog")) ->
+   Reg14
> str(Reg14)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 6
  ..$ network: chr "students"
  ..$ title  : chr "Student mobility data in Italian universities 2008, 2011, 2014, 2017"
  ..$ by     : chr [1:4] "VG Genova" "G Giordano" "G Ragozini" "MP Vitale"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sun Nov  6 22:31:11 2022"
  ..$ trace  :List of 4
  .. ..$ :List of 3
  .. .. ..$ op  : chr "slice"
  .. .. ..$ P   : chr "year==3"
  .. .. ..$ date: chr "Wed Nov 23 04:46:40 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "flatten"
  .. .. ..$ par : chr [1:4] "prov" "univ" "prog" "w"
  .. .. ..$ FUN : chr "sum"
  .. .. ..$ date: chr "Wed Nov 23 04:46:40 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "recodeway2part"
  .. .. ..$ pars: chr [1:3] "prov" "IDreg" "regs"
  .. .. ..$ desc: chr "region"
  .. .. ..$ date: chr "Wed Nov 23 05:32:32 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "flatten"
  .. .. ..$ par : chr [1:4] "regs" "univ" "prog" "w"
  .. .. ..$ FUN : chr "sum"
  .. .. ..$ date: chr "Wed Nov 23 05:32:32 2022"
 $ ways  :List of 3
  ..$ regs: chr "region"
  ..$ univ: chr "university"
  ..$ prog: chr "programme"
 $ nodes :List of 3
  ..$ regs:'data.frame':        20 obs. of  2 variables:
  .. ..$ ID  : chr [1:20] "ABR" "BAS" "CAL" "CAM" ...
  .. ..$ long: chr [1:20] "Abruzzo" "Basilicata" "Calabria" "Campania" ...
  ..$ univ:'data.frame':        79 obs. of  2 variables:
  .. ..$ ID  : chr [1:79] "Bicocca" "Bocconi" "Foscari" "Biomedico" ...
  .. ..$ long: chr [1:79] "Bicocca" "Bocconi" "Cà Foscari" "Campus Biomedico" ...
  ..$ prog:'data.frame':        11 obs. of  2 variables:
  .. ..$ ID  : chr [1:11] "Q" "AFFV" "AH" "BAL" ...
  .. ..$ long: chr [1:11] "?" "Agriculture, forestry, fisheries and veterinary" "Arts and humanities" "Business, administration and law" ...
 $ links :'data.frame': 3783 obs. of  4 variables:
  ..$ regs: int [1:3783] 12 16 1 3 4 6 7 9 10 12 ...
  ..$ univ: int [1:3783] 14 35 61 61 61 61 61 61 61 61 ...
  ..$ prog: int [1:3783] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w   : int [1:3783] 1 1 2 2 10 4 9 1 3 4 ...
 $ data  : Named list()

clustering

> CoR14 <- projection(Reg14,"regs","w")
> SaR14 <- salton(CoR14); DR14 <- as.dist(1-SaR14)
> tR14 <- hclust(DR14,method="ward.D")
> plot(tR14,hang=-1,cex=0.8,main="Regions 2014 / Ward")
> 

Two-mode network (Regs,Univ)

> RU14 <- flatten(Reg14,"w",c("regs","univ"))
> str(RU14)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 6
  ..$ network: chr "students"
  ..$ title  : chr "Student mobility data in Italian universities 2008, 2011, 2014, 2017"
  ..$ by     : chr [1:4] "VG Genova" "G Giordano" "G Ragozini" "MP Vitale"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Sun Nov  6 22:31:11 2022"
  ..$ trace  :List of 5
  .. ..$ :List of 3
  .. .. ..$ op  : chr "slice"
  .. .. ..$ P   : chr "year==3"
  .. .. ..$ date: chr "Wed Nov 23 04:46:40 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "flatten"
  .. .. ..$ par : chr [1:4] "prov" "univ" "prog" "w"
  .. .. ..$ FUN : chr "sum"
  .. .. ..$ date: chr "Wed Nov 23 04:46:40 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "recodeway2part"
  .. .. ..$ pars: chr [1:3] "prov" "IDreg" "regs"
  .. .. ..$ desc: chr "region"
  .. .. ..$ date: chr "Wed Nov 23 05:32:32 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "flatten"
  .. .. ..$ par : chr [1:4] "regs" "univ" "prog" "w"
  .. .. ..$ FUN : chr "sum"
  .. .. ..$ date: chr "Wed Nov 23 05:32:32 2022"
  .. ..$ :List of 4
  .. .. ..$ op  : chr "flatten"
  .. .. ..$ par : chr [1:3] "regs" "univ" "w"
  .. .. ..$ FUN : chr "sum"
  .. .. ..$ date: chr "Wed Nov 23 05:45:14 2022"
 $ ways  :List of 2
  ..$ regs: chr "region"
  ..$ univ: chr "university"
 $ nodes :List of 2
  ..$ regs:'data.frame':        20 obs. of  2 variables:
  .. ..$ ID  : chr [1:20] "ABR" "BAS" "CAL" "CAM" ...
  .. ..$ long: chr [1:20] "Abruzzo" "Basilicata" "Calabria" "Campania" ...
  ..$ univ:'data.frame':        79 obs. of  2 variables:
  .. ..$ ID  : chr [1:79] "Bicocca" "Bocconi" "Foscari" "Biomedico" ...
  .. ..$ long: chr [1:79] "Bicocca" "Bocconi" "Cà Foscari" "Campus Biomedico" ...
 $ links :'data.frame': 1099 obs. of  3 variables:
  ..$ regs: int [1:1099] 1 2 3 4 5 6 7 8 10 11 ...
  ..$ univ: int [1:1099] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w   : int [1:1099] 9 8 38 23 52 17 13 38 20 2 ...
 $ data  : Named list()

Heatmap (Regs,Univ)

From the frequency distribution of weights in RU14

> f <- table(RU14$links$w)
> f
  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22 
172 109  85  50  48  37  35  27  21  23  16  18  13  14  15   9  11  10  13  10  10   7 
 23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44 
  8   7   7   6   4   7   4   3   7   7   6   2   5   7   4   8  10   1   6   2   1   3 
 45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  67 
  5   7   3   5   5   2   2   3   6   4   6   2   4   2   3   1   2   2   3   1   6   3 
 68  70  71  72  73  76  77  79  80  81  82  83  84  85  86  87  89  90  91  92  93  94 
  5   3   2   1   4   2   1   1   4   5   3   1   1   1   2   1   1   2   2   2   2   2 
 97  98  99 100 102 103 107 108 109 111 112 113 117 118 119 120 121 123 124 125 127 129 
  1   2   1   1   1   1   2   2   1   2   1   1   1   1   1   1   3   2   1   1   1   2 
130 131 133 134 139 140 142 143 144 146 148 152 153 155 157 158 162 165 166 169 170 172 
  2   1   1   1   1   1   2   1   1   1   1   2   1   2   1   1   1   1   1   1   1   1 
174 175 176 179 181 185 187 188 190 191 194 203 207 208 211 222 231 250 263 264 274 277 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
288 296 300 302 303 308 309 317 326 332 336 338 356 371 384 399 424 431 435 490 504 517 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
525 541 576 583 746 846 
  1   1   1   1   1   1 

I selected bins (0,5,40,Inf) and recoded the weights

> library(gplots)

> RUc14 <- recodecol2bins(RU14,"w","code",bins=c(0,5,40,Inf))
> T <- matrix(0,nrow=length(RUc14$nodes$regs$ID),ncol=length(RUc14$nodes$univ$ID))
> dim(T)
[1] 20 79
> rownames(T) <- RUc14$nodes$regs$ID; colnames(T) <- RUc14$nodes$univ$ID
> L <- RUc14$links
> for(i in 1:length(L$w)) T[L$regs[i],L$univ[i]] <- L$code[i]
> cols <- c("white","orange","darkred","black")
> heatmap.2(T,Rowv=as.dendrogram(tR14),Colv=as.dendrogram(tU14),col=cols,trace="none",
+   main=paste("Italy ",2014," / recoded / Ward",sep=""))

To do:

  • Normalizations !?
  • We could also determine partitions from both clusterings and produce the corresponding blockmodel.
  • Binarization.

Exporting networks to Pajek

The projection produces a square matrix that can be exported as an ordinary network into Pajek

> matrix2net(CoU14,Net="CoU14.net")

A multiway network can be exported to Pajek as a (multi-relational temporal) two-mode network on node sets u=way1 and v=way2 (and relation optional r=way3 and time instance t=way4). Additional ways are producing parallel links

> mwn2net(S2014,"prov","univ",r="prog",w="w",Net="S2014.net")

We can also export the node partition way$part as a Pajek clustering file.

> mwn2clu(S2014,"prov","IDreg",Clu="regions.clu")

and the node property way$prop as a Pajek vector file if the property is numerical; otherwise it is exported as a numbered list.

> mwn2vec(S2014,"prov","area",Vec="area.vec")
> mwn2vec(S2014,"prov","capital",Vec="capital.vec")

3D visualization of the year 2008

> wdir <- "C:/Users/vlado/docs/papers/2022/ifcs2022/genova/data"
> setwd(wdir)
> library(jsonlite)
> library(magrittr)
> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R")
> source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R")

> SM <- fromJSON("students.json")
> SM %>% 
+   slice("year==1") %>% 
+   flatten("w",c("prov","univ","prog")) -> 
+   MN
> Cox <- projection(MN,"prov","w")
> Sax <- salton(Cox); Dx <- as.dist(1-Sax)
> tx <- hclust(Dx,method="complete")
> plot(tx,hang=-1,cex=0.8,main="Students 2008 prov / Complete")
> Coy <- projection(MN,"univ","w")
> Say <- salton(Coy); Dy <- as.dist(1-Say); Dy[is.na(Dy)] <- 1
> ty <- hclust(Dy,method="ward")
> plot(ty,hang=-1,cex=0.8,main="Students 2008 univ / Ward")
> Coz <- projection(MN,"prog","w")
> Saz <- salton(Coz); Dz <- as.dist(1-Saz)
> # tz <- hclust(Dz,method="complete")
> # plot(tz,hang=-1,cex=0.8,main="Students 2008 prog / Complete")
> tz <- hclust(Dz,method="ward")
> plot(tz,hang=-1,cex=0.8,main="Students 2008 prog / Ward")
> I <- 1:107; J <- 1:79; K <- 1:11
> I[tx$order] <- 1:107; J[ty$order] <- 1:79; K[tz$order] <- 1:11
> mwnX3D(MN,"prov","univ","prog","w",maxsize=1.2,pu=I,pv=J,pz=K,file="students08Clu.x3d")

> qx <- cutree(tx,k=8); Cx <- as.vector((qx-1)/7)
> qy <- cutree(ty,k=6); Cy <- as.vector((qy-1)/5)
> qz <- cutree(tz,k=3); Cz <- as.vector((qz-1)/2)
> L <- MN$links
> cluCol <- cbind(Cx[L$prov],Cy[L$univ],Cz[L$prog])
> mwnX3D(MN,"prov","univ","prog","w",lu="province",lv="long",lz="long",maxsize=0.8,
+ col=cluCol,pu=I,pv=J,pz=K,file="students08Clux.x3d")

Italian students mobility core

Italian students mobility 2008 multiway network reordered and colored by clusterings

Multiway/Bavla
This is a 3D layout using X3DOM.
Use the mouse to navigate the space - rotate, zoom in/out, ...

Click a link (cube) for its identification (province, university, program)!

December 2022

Unfortunately, X3DOM doesn't display the description info of each object. This option is available in the viewer view3dscene.

To do: It seems that this can be done also in X3DOM using Javasript (see example). X3DOM

January 23, 2023

It works! X3DOM Anchor description

Slices

There is a very large number of moves of a single or a pair of students. Let's remove them

> table(L$w)
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20 
4266 1555  760  479  310  230  200  142  102   99   69   67   48   57   45   40   31   30   27   22 
  21   22   23   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38   39   40 
  15   19   17   16   16   13   12   14   11    9    5    4   14   14    4    3   11    5    5    7 
  41   42   43   44   45   46   47   48   49   50   51   52   53   54   55   56   58   60   61   63 
   7    4    4    6    1    3    2    1    2    2    1    1    2    1    1    1    1    2    1    1 
  65   67   68   70   71   76   77   80   81   82   83   88   92   95   99  107  115  133  147  157 
   2    3    1    1    4    1    1    1    1    2    2    1    1    1    1    1    1    1    1    1 
 168  326 
   1    1 
> MS <- slice(MN,"w>3")
> mwnX3D(MS,"prov","univ","prog","w",lu="province",lv="long",lz="long",maxsize=0.8,
+ col=cluCol,pu=I,pv=J,pz=K,file="students08Clu2.x3d")

X3D inline example

Italian students mobility 2008 multiway network / w > 2

Multiway/Bavla
This is a 3D layout. Use the mouse to navigate the space - rotate, zoom in/out, ...

and to inspect the moves to the program “Engineering, manufacturing and construction” we display the ME slice

> MN$nodes$prog$ID
 [1] "Q"    "AFFV" "AH"   "BAL"  "Edu"  "EMC"  "HW"   "ICT"  "NsMS" "Serv" "SsJI"
> MN$nodes$prog$ID[6]
[1] "EMC"
> MN$nodes$prog$long[6]
[1] "Engineering, manufacturing and construction"
> ME <- slice(MN,"prog==6")
> mwnX3D(ME,"prov","univ","prog","w",lu="province",lv="long",lz="long",maxsize=0.8,
+ col=cluCol,pu=I,pv=J,pz=K,file="students08CluE.x3d")

X3D inline example

Italian students mobility 2008 multiway network / prog == "Engineering, manufacturing and construction"

Multiway/Bavla

vlado/work/2m/mwn/genova/ana1.txt · Last modified: 2023/01/24 14:31 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki