====== Multi-modal variables / (Sub)sets ======
[[.:clu|Clustering]]
===== Oils =====
==== Data ====
M. Ichino and H. Yaguchi, "Generalized Minkowski metrics for mixed feature-type data analysis," in IEEE Transactions on Systems, Man, and Cybernetics, vol. 24, no. 4, pp. 698-708, April 1994, doi: 10.1109/21.286391. https://ieeexplore.ieee.org/document/286391
Oils and Fats data
Oil_or_Fat; Specific_gravity_(g/cm3); Freezing_point_(C); Iodine_value; Saponif._value; Major_acids
Linseed; [0.930, 0.935]; [-27,-18]; [170, 204]; [118, 196]; L, Ln, O, P, M
Perilla; [0.930, 0.937]; [ -5, -4]; [192, 208]; [188, 197]; L, Ln, O, P, S
Cotton ; [0.916, 0.918]; [ -6, -1]; [ 99, 113]; [189, 198]; L, O, P, M, S
Sesame ; [0.920, 0.926]; [ -6, -4]; [104, 116]; [187, 193]; L, O, P, S, A
Camelia; [0.916, 0.917]; [-21,-15]; [ 80, 82]; [189, 193]; L, O
Olive ; [0.914, 0.919]; [ 0, 6]; [ 79, 90]; [187, 196]; L, O, P, S
Beef ; [0.860, 0.870]; [ 30, 38]; [ 40, 48]; [190, 199]; O, P, M, C, S
Hog ; [0.858, 0.864]; [ 22, 32]; [ 53, 77]; [190, 202]; L, O, P, M, S, Lu
L: linoleic acid, Ln: linolenic acid, 0: oleic acid, P palmitic acid, M: myristic acid,
S: searic acid, A: arachic acid, C: capric acid, Lu: lauric acid
==== Matrix ====
> wdir <- "C:/Users/vlado/docs/papers/2023/SDA/Paris/test"
> setwd(wdir)
> library(jsonlite)
> T <- read.csv("oils.csv",sep=";",head=TRUE,skip=1)
> T[,1] <- trimws(T[,1])
> T[,c(1,6)]
Oil_or_Fat Major_acids
1 Linseed L, Ln, O, P, M
2 Perilla L, Ln, O, P, S
3 Cotton L, O, P, M, S
4 Sesame L, O, P, S, A
5 Camelia L, O
6 Olive L, O, P, S
7 Beef O, P, M, C, S
8 Hog L, O, P, M, S, Lu
> A <- lapply(strsplit(T[,6],","),trimws)
> L <- Reduce(union,A)
> L
[1] "L" "Ln" "O" "P" "M" "S" "A" "C" "Lu"
> D <- matrix(0,nrow=8,ncol=9); rownames(D) <- T[,1]; colnames(D) <- L
> for(i in 1:8) D[i,factor(A[[i]],L)] <- 1
> D
L Ln O P M S A C Lu
Linseed 1 1 1 1 1 0 0 0 0
Perilla 1 1 1 1 0 1 0 0 0
Cotton 1 0 1 1 1 1 0 0 0
Sesame 1 0 1 1 0 1 1 0 0
Camelia 1 0 1 0 0 0 0 0 0
Olive 1 0 1 1 0 1 0 0 0
Beef 0 0 1 1 1 1 0 1 0
Hog 1 0 1 1 1 1 0 0 1
>
==== Symbolic data frame ====
> getInterval <- function(x) as.numeric(unlist(strsplit(gsub("\\]","",gsub("\\[","",x)),",")))
> n <- 8; u <- rep(NA,n)
> SDF <- data.frame(Gravity=u,Freezing=u,Iodine=u,Saponif=u,MajorAcids=u,MAbin=u)
> rownames(SDF) <- T[,1]
> for(j in 1:4) SDF[[j]] <- lapply(T[,j+1],getInterval)
> ma <- vector("list",n); mb <- vector("list",n)
> for(i in 1:8) {ma[[i]] <- as.integer(factor(A[[i]],L));
+ v <- rep(0,9); v[ma[[i]]] <- 1; mb[[i]] <- v}
> SDF$MajorAcids <- ma; SDF$MAbin <- mb
> SDF
Gravity Freezing Iodine Saponif MajorAcids MAbin
Linseed 0.930, 0.935 -27, -18 170, 204 118, 196 1, 2, 3, 4, 5 1, 1, 1, 1, 1, 0, 0, 0, 0
Perilla 0.930, 0.937 -5, -4 192, 208 188, 197 1, 2, 3, 4, 6 1, 1, 1, 1, 0, 1, 0, 0, 0
Cotton 0.916, 0.918 -6, -1 99, 113 189, 198 1, 3, 4, 5, 6 1, 0, 1, 1, 1, 1, 0, 0, 0
Sesame 0.920, 0.926 -6, -4 104, 116 187, 193 1, 3, 4, 6, 7 1, 0, 1, 1, 0, 1, 1, 0, 0
Camelia 0.916, 0.917 -21, -15 80, 82 189, 193 1, 3 1, 0, 1, 0, 0, 0, 0, 0, 0
Olive 0.914, 0.919 0, 6 79, 90 187, 196 1, 3, 4, 6 1, 0, 1, 1, 0, 1, 0, 0, 0
Beef 0.86, 0.87 30, 38 40, 48 190, 199 3, 4, 5, 8, 6 0, 0, 1, 1, 1, 1, 0, 1, 0
Hog 0.858, 0.864 22, 32 53, 77 190, 202 1, 3, 4, 5, 6, 9 1, 0, 1, 1, 1, 1, 0, 0, 1
> long <- c("linoleic","linolenic","oleic","palmitic","myristic","searic","arachic","capric","lauric")
> head <- list(nUnits=8, nVars=6,
+ vars=list(
+ V1=list(ID="Gravity",type="interval"),
+ V2=list(ID="Freezing",type="interval"),
+ V3=list(ID="Iodine",type="interval"),
+ V4=list(ID="Saponif",type="interval"),
+ V5=list(ID="MajorAcids",type="set",cats=L,long=long),
+ V6=list(ID="MAbin",type="members",cats=L,long=long)
+ )
+ )
> str(head)
List of 3
$ nUnits: num 8
$ nVars : num 6
$ vars :List of 6
..$ V1:List of 2
.. ..$ ID : chr "Gravity"
.. ..$ type: chr "interval"
..$ V2:List of 2
.. ..$ ID : chr "Freezing"
.. ..$ type: chr "interval"
..$ V3:List of 2
.. ..$ ID : chr "Iodine"
.. ..$ type: chr "interval"
..$ V4:List of 2
.. ..$ ID : chr "Saponif"
.. ..$ type: chr "interval"
..$ V5:List of 4
.. ..$ ID : chr "MajorAcids"
.. ..$ type: chr "set"
.. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
.. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
..$ V6:List of 4
.. ..$ ID : chr "MAbin"
.. ..$ type: chr "members"
.. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
.. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
>
> info <- list(dataset="Oils",
+ title="Oils and fats",
+ by="Ichino M., Yaguchi H.",
+ ref="Generalized Minkowski metrics for mixed feature-type data analysis. IEEE Trans. Syst. Man Cybern., 24 (4) (1994), pp. 698-708",
+ href=c("https://ieeexplore.ieee.org/document/286391",
+ "https://github.com/bavla/symData/tree/master/SDAJSON"),
+ creator="V. Batagelj",
+ date=date()
+ )
> Oils <- list(format="SDAJSON",info=info,head=head,SDF=SDF)
> write(toJSON(Oils),"Oils.json")
> str(Oils)
List of 4
$ format: chr "SDAJSON"
$ info :List of 7
..$ dataset: chr "Oils"
..$ title : chr "Oils and fats"
..$ by : chr "Ichino M., Yaguchi H."
..$ ref : chr "Generalized Minkowski metrics for mixed feature-type data analysis. IEEE Trans. Syst. Man Cybern., 24 (4) (1994), pp. 698-708"
..$ href : chr [1:2] "https://ieeexplore.ieee.org/document/286391" "https://github.com/bavla/symData/tree/master/SDAJSON"
..$ creator: chr "V. Batagelj"
..$ date : chr "Mon Oct 30 01:14:37 2023"
$ head :List of 3
..$ nUnits: num 8
..$ nVars : num 6
..$ vars :List of 6
.. ..$ V1:List of 2
.. .. ..$ ID : chr "Gravity"
.. .. ..$ type: chr "interval"
.. ..$ V2:List of 2
.. .. ..$ ID : chr "Freezing"
.. .. ..$ type: chr "interval"
.. ..$ V3:List of 2
.. .. ..$ ID : chr "Iodine"
.. .. ..$ type: chr "interval"
.. ..$ V4:List of 2
.. .. ..$ ID : chr "Saponif"
.. .. ..$ type: chr "interval"
.. ..$ V5:List of 4
.. .. ..$ ID : chr "MajorAcids"
.. .. ..$ type: chr "set"
.. .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
.. .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
.. ..$ V6:List of 4
.. .. ..$ ID : chr "MAbin"
.. .. ..$ type: chr "members"
.. .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
.. .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
$ SDF :'data.frame': 8 obs. of 6 variables:
..$ Gravity:List of 8
.. ..$ : num [1:2] 0.93 0.935
.. ..$ : num [1:2] 0.93 0.937
...
..$ Freezing :List of 8
.. ..$ : num [1:2] -27 -18
.. ..$ : num [1:2] -5 -4
...
..$ Iodine :List of 8
.. ..$ : num [1:2] 170 204
.. ..$ : num [1:2] 192 208
...
..$ Saponif :List of 8
.. ..$ : num [1:2] 118 196
.. ..$ : num [1:2] 188 197
...
..$ MajorAcids :List of 8
.. ..$ : int [1:5] 1 2 3 4 5
...
.. ..$ : int [1:6] 1 3 4 5 6 9
..$ MAbin :List of 8
.. ..$ : num [1:9] 1 1 1 1 1 0 0 0 0
...
.. ..$ : num [1:9] 1 0 1 1 1 1 0 0 1
>
===== Network file formats and tools =====
EasyGraph: Patterns 4, 100839, October 13, 2023
Comparison of network analysis tools in terms of supporting different network I/O types
name; EasyGraph; NetworkX; igraph; SNAP; graph-tool; Gephi; Cytoscape
Edge List; 1; 1; 1; 1; 0; 1; 0
GraphML ; 1; 1; 1; 0; 1; 1; 1
GML ; 1; 1; 1; 0; 1; 1; 1
Pickle ; 1; 1; 1; 0; 1; 0; 0
Pajek ; 1; 1; 1; 0; 0; 1; 0
GraphViz ; 1; 0; 1; 1; 1; 1; 0
UCINET DL; 1; 0; 0; 0; 0; 1; 0
GEXF ; 1; 0; 0; 0; 0; 1; 0
> G <- as.matrix(read.csv("NetFormats.csv",sep=";",head=TRUE,skip=2,row.names=1))
> G
EasyGraph NetworkX igraph SNAP graph.tool Gephi Cytoscape
Edge List 1 1 1 1 0 1 0
GraphML 1 1 1 0 1 1 1
GML 1 1 1 0 1 1 1
Pickle 1 1 1 0 1 0 0
Pajek 1 1 1 0 0 1 0
GraphViz 1 0 1 1 1 1 0
UCINET DL 1 0 0 0 0 1 0
GEXF 1 0 0 0 0 1 0
> n <- 8; u <- rep(NA,n)
> GDF <- data.frame(Tools=u)
> rownames(GDF) <- rownames(G)
> v <- vector("list",n)
> for(i in 1:8) v[[i]] <- as.vector(G[i,])
> GDF$Tools <- v
> GDF
Tools
Edge List 1, 1, 1, 1, 0, 1, 0
GraphML 1, 1, 1, 0, 1, 1, 1
GML 1, 1, 1, 0, 1, 1, 1
Pickle 1, 1, 1, 0, 1, 0, 0
Pajek 1, 1, 1, 0, 0, 1, 0
GraphViz 1, 0, 1, 1, 1, 1, 0
UCINET DL 1, 0, 0, 0, 0, 1, 0
GEXF 1, 0, 0, 0, 0, 1, 0
>
> Ghead <- list(nUnits=8, nVars=1, vars=list(V1=list(ID="Tools",type="members",cats=colnames(G))))
> Ginfo <- list(dataset="NetFormats",
+ title="Network file formats and tools",
+ by="Gao etal.",
+ ref="EasyGraph: Patterns 4, 100839, October 13, 2023",
+ href=c("https://www.cell.com/patterns/pdf/S2666-3899(23)00218-0.pdf",
+ "https://github.com/bavla/symData/tree/master/SDAJSON"),
+ creator="V. Batagelj",
+ date=date()
+ )
> GT <- list(format="SDAJSON",info=Ginfo,head=Ghead,SDF=GDF)
> write(toJSON(GT),"NetFormats.json")
> str(GT)
List of 4
$ format: chr "SDAJSON"
$ info :List of 7
..$ dataset: chr "NetFormats"
..$ title : chr "Network file formats and tools"
..$ by : chr "Gao etal."
..$ ref : chr "EasyGraph: Patterns 4, 100839, October 13, 2023"
..$ href : chr [1:2] "https://www.cell.com/patterns/pdf/S2666-3899(23)00218-0.pdf" "https://github.com/bavla/symData/tree/master/SDAJSON"
..$ creator: chr "V. Batagelj"
..$ date : chr "Mon Oct 30 01:47:36 2023"
$ head :List of 3
..$ nUnits: num 8
..$ nVars : num 1
..$ vars :List of 1
.. ..$ V1:List of 3
.. .. ..$ ID : chr "Tools"
.. .. ..$ type: chr "members"
.. .. ..$ cats: chr [1:7] "EasyGraph" "NetworkX" "igraph" "SNAP" ...
$ SDF :'data.frame': 8 obs. of 1 variable:
..$ Tools:List of 8
.. ..$ : num [1:7] 1 1 1 1 0 1 0
.. ..$ : num [1:7] 1 1 1 0 1 1 1
.. ..$ : num [1:7] 1 1 1 0 1 1 1
.. ..$ : num [1:7] 1 1 1 0 1 0 0
.. ..$ : num [1:7] 1 1 1 0 0 1 0
.. ..$ : num [1:7] 1 0 1 1 1 1 0
.. ..$ : num [1:7] 1 0 0 0 0 1 0
.. ..$ : num [1:7] 1 0 0 0 0 1 0