====== Multi-modal variables / (Sub)sets ====== [[.:clu|Clustering]] ===== Oils ===== ==== Data ==== M. Ichino and H. Yaguchi, "Generalized Minkowski metrics for mixed feature-type data analysis," in IEEE Transactions on Systems, Man, and Cybernetics, vol. 24, no. 4, pp. 698-708, April 1994, doi: 10.1109/21.286391. https://ieeexplore.ieee.org/document/286391 Oils and Fats data Oil_or_Fat; Specific_gravity_(g/cm3); Freezing_point_(C); Iodine_value; Saponif._value; Major_acids Linseed; [0.930, 0.935]; [-27,-18]; [170, 204]; [118, 196]; L, Ln, O, P, M Perilla; [0.930, 0.937]; [ -5, -4]; [192, 208]; [188, 197]; L, Ln, O, P, S Cotton ; [0.916, 0.918]; [ -6, -1]; [ 99, 113]; [189, 198]; L, O, P, M, S Sesame ; [0.920, 0.926]; [ -6, -4]; [104, 116]; [187, 193]; L, O, P, S, A Camelia; [0.916, 0.917]; [-21,-15]; [ 80, 82]; [189, 193]; L, O Olive ; [0.914, 0.919]; [ 0, 6]; [ 79, 90]; [187, 196]; L, O, P, S Beef ; [0.860, 0.870]; [ 30, 38]; [ 40, 48]; [190, 199]; O, P, M, C, S Hog ; [0.858, 0.864]; [ 22, 32]; [ 53, 77]; [190, 202]; L, O, P, M, S, Lu L: linoleic acid, Ln: linolenic acid, 0: oleic acid, P palmitic acid, M: myristic acid, S: searic acid, A: arachic acid, C: capric acid, Lu: lauric acid ==== Matrix ==== > wdir <- "C:/Users/vlado/docs/papers/2023/SDA/Paris/test" > setwd(wdir) > library(jsonlite) > T <- read.csv("oils.csv",sep=";",head=TRUE,skip=1) > T[,1] <- trimws(T[,1]) > T[,c(1,6)] Oil_or_Fat Major_acids 1 Linseed L, Ln, O, P, M 2 Perilla L, Ln, O, P, S 3 Cotton L, O, P, M, S 4 Sesame L, O, P, S, A 5 Camelia L, O 6 Olive L, O, P, S 7 Beef O, P, M, C, S 8 Hog L, O, P, M, S, Lu > A <- lapply(strsplit(T[,6],","),trimws) > L <- Reduce(union,A) > L [1] "L" "Ln" "O" "P" "M" "S" "A" "C" "Lu" > D <- matrix(0,nrow=8,ncol=9); rownames(D) <- T[,1]; colnames(D) <- L > for(i in 1:8) D[i,factor(A[[i]],L)] <- 1 > D L Ln O P M S A C Lu Linseed 1 1 1 1 1 0 0 0 0 Perilla 1 1 1 1 0 1 0 0 0 Cotton 1 0 1 1 1 1 0 0 0 Sesame 1 0 1 1 0 1 1 0 0 Camelia 1 0 1 0 0 0 0 0 0 Olive 1 0 1 1 0 1 0 0 0 Beef 0 0 1 1 1 1 0 1 0 Hog 1 0 1 1 1 1 0 0 1 > ==== Symbolic data frame ==== > getInterval <- function(x) as.numeric(unlist(strsplit(gsub("\\]","",gsub("\\[","",x)),","))) > n <- 8; u <- rep(NA,n) > SDF <- data.frame(Gravity=u,Freezing=u,Iodine=u,Saponif=u,MajorAcids=u,MAbin=u) > rownames(SDF) <- T[,1] > for(j in 1:4) SDF[[j]] <- lapply(T[,j+1],getInterval) > ma <- vector("list",n); mb <- vector("list",n) > for(i in 1:8) {ma[[i]] <- as.integer(factor(A[[i]],L)); + v <- rep(0,9); v[ma[[i]]] <- 1; mb[[i]] <- v} > SDF$MajorAcids <- ma; SDF$MAbin <- mb > SDF Gravity Freezing Iodine Saponif MajorAcids MAbin Linseed 0.930, 0.935 -27, -18 170, 204 118, 196 1, 2, 3, 4, 5 1, 1, 1, 1, 1, 0, 0, 0, 0 Perilla 0.930, 0.937 -5, -4 192, 208 188, 197 1, 2, 3, 4, 6 1, 1, 1, 1, 0, 1, 0, 0, 0 Cotton 0.916, 0.918 -6, -1 99, 113 189, 198 1, 3, 4, 5, 6 1, 0, 1, 1, 1, 1, 0, 0, 0 Sesame 0.920, 0.926 -6, -4 104, 116 187, 193 1, 3, 4, 6, 7 1, 0, 1, 1, 0, 1, 1, 0, 0 Camelia 0.916, 0.917 -21, -15 80, 82 189, 193 1, 3 1, 0, 1, 0, 0, 0, 0, 0, 0 Olive 0.914, 0.919 0, 6 79, 90 187, 196 1, 3, 4, 6 1, 0, 1, 1, 0, 1, 0, 0, 0 Beef 0.86, 0.87 30, 38 40, 48 190, 199 3, 4, 5, 8, 6 0, 0, 1, 1, 1, 1, 0, 1, 0 Hog 0.858, 0.864 22, 32 53, 77 190, 202 1, 3, 4, 5, 6, 9 1, 0, 1, 1, 1, 1, 0, 0, 1 > long <- c("linoleic","linolenic","oleic","palmitic","myristic","searic","arachic","capric","lauric") > head <- list(nUnits=8, nVars=6, + vars=list( + V1=list(ID="Gravity",type="interval"), + V2=list(ID="Freezing",type="interval"), + V3=list(ID="Iodine",type="interval"), + V4=list(ID="Saponif",type="interval"), + V5=list(ID="MajorAcids",type="set",cats=L,long=long), + V6=list(ID="MAbin",type="members",cats=L,long=long) + ) + ) > str(head) List of 3 $ nUnits: num 8 $ nVars : num 6 $ vars :List of 6 ..$ V1:List of 2 .. ..$ ID : chr "Gravity" .. ..$ type: chr "interval" ..$ V2:List of 2 .. ..$ ID : chr "Freezing" .. ..$ type: chr "interval" ..$ V3:List of 2 .. ..$ ID : chr "Iodine" .. ..$ type: chr "interval" ..$ V4:List of 2 .. ..$ ID : chr "Saponif" .. ..$ type: chr "interval" ..$ V5:List of 4 .. ..$ ID : chr "MajorAcids" .. ..$ type: chr "set" .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ... .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ... ..$ V6:List of 4 .. ..$ ID : chr "MAbin" .. ..$ type: chr "members" .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ... .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ... > > info <- list(dataset="Oils", + title="Oils and fats", + by="Ichino M., Yaguchi H.", + ref="Generalized Minkowski metrics for mixed feature-type data analysis. IEEE Trans. Syst. Man Cybern., 24 (4) (1994), pp. 698-708", + href=c("https://ieeexplore.ieee.org/document/286391", + "https://github.com/bavla/symData/tree/master/SDAJSON"), + creator="V. Batagelj", + date=date() + ) > Oils <- list(format="SDAJSON",info=info,head=head,SDF=SDF) > write(toJSON(Oils),"Oils.json") > str(Oils) List of 4 $ format: chr "SDAJSON" $ info :List of 7 ..$ dataset: chr "Oils" ..$ title : chr "Oils and fats" ..$ by : chr "Ichino M., Yaguchi H." ..$ ref : chr "Generalized Minkowski metrics for mixed feature-type data analysis. IEEE Trans. Syst. Man Cybern., 24 (4) (1994), pp. 698-708" ..$ href : chr [1:2] "https://ieeexplore.ieee.org/document/286391" "https://github.com/bavla/symData/tree/master/SDAJSON" ..$ creator: chr "V. Batagelj" ..$ date : chr "Mon Oct 30 01:14:37 2023" $ head :List of 3 ..$ nUnits: num 8 ..$ nVars : num 6 ..$ vars :List of 6 .. ..$ V1:List of 2 .. .. ..$ ID : chr "Gravity" .. .. ..$ type: chr "interval" .. ..$ V2:List of 2 .. .. ..$ ID : chr "Freezing" .. .. ..$ type: chr "interval" .. ..$ V3:List of 2 .. .. ..$ ID : chr "Iodine" .. .. ..$ type: chr "interval" .. ..$ V4:List of 2 .. .. ..$ ID : chr "Saponif" .. .. ..$ type: chr "interval" .. ..$ V5:List of 4 .. .. ..$ ID : chr "MajorAcids" .. .. ..$ type: chr "set" .. .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ... .. .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ... .. ..$ V6:List of 4 .. .. ..$ ID : chr "MAbin" .. .. ..$ type: chr "members" .. .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ... .. .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ... $ SDF :'data.frame': 8 obs. of 6 variables: ..$ Gravity:List of 8 .. ..$ : num [1:2] 0.93 0.935 .. ..$ : num [1:2] 0.93 0.937 ... ..$ Freezing :List of 8 .. ..$ : num [1:2] -27 -18 .. ..$ : num [1:2] -5 -4 ... ..$ Iodine :List of 8 .. ..$ : num [1:2] 170 204 .. ..$ : num [1:2] 192 208 ... ..$ Saponif :List of 8 .. ..$ : num [1:2] 118 196 .. ..$ : num [1:2] 188 197 ... ..$ MajorAcids :List of 8 .. ..$ : int [1:5] 1 2 3 4 5 ... .. ..$ : int [1:6] 1 3 4 5 6 9 ..$ MAbin :List of 8 .. ..$ : num [1:9] 1 1 1 1 1 0 0 0 0 ... .. ..$ : num [1:9] 1 0 1 1 1 1 0 0 1 > ===== Network file formats and tools ===== EasyGraph: Patterns 4, 100839, October 13, 2023 Comparison of network analysis tools in terms of supporting different network I/O types name; EasyGraph; NetworkX; igraph; SNAP; graph-tool; Gephi; Cytoscape Edge List; 1; 1; 1; 1; 0; 1; 0 GraphML ; 1; 1; 1; 0; 1; 1; 1 GML ; 1; 1; 1; 0; 1; 1; 1 Pickle ; 1; 1; 1; 0; 1; 0; 0 Pajek ; 1; 1; 1; 0; 0; 1; 0 GraphViz ; 1; 0; 1; 1; 1; 1; 0 UCINET DL; 1; 0; 0; 0; 0; 1; 0 GEXF ; 1; 0; 0; 0; 0; 1; 0 > G <- as.matrix(read.csv("NetFormats.csv",sep=";",head=TRUE,skip=2,row.names=1)) > G EasyGraph NetworkX igraph SNAP graph.tool Gephi Cytoscape Edge List 1 1 1 1 0 1 0 GraphML 1 1 1 0 1 1 1 GML 1 1 1 0 1 1 1 Pickle 1 1 1 0 1 0 0 Pajek 1 1 1 0 0 1 0 GraphViz 1 0 1 1 1 1 0 UCINET DL 1 0 0 0 0 1 0 GEXF 1 0 0 0 0 1 0 > n <- 8; u <- rep(NA,n) > GDF <- data.frame(Tools=u) > rownames(GDF) <- rownames(G) > v <- vector("list",n) > for(i in 1:8) v[[i]] <- as.vector(G[i,]) > GDF$Tools <- v > GDF Tools Edge List 1, 1, 1, 1, 0, 1, 0 GraphML 1, 1, 1, 0, 1, 1, 1 GML 1, 1, 1, 0, 1, 1, 1 Pickle 1, 1, 1, 0, 1, 0, 0 Pajek 1, 1, 1, 0, 0, 1, 0 GraphViz 1, 0, 1, 1, 1, 1, 0 UCINET DL 1, 0, 0, 0, 0, 1, 0 GEXF 1, 0, 0, 0, 0, 1, 0 > > Ghead <- list(nUnits=8, nVars=1, vars=list(V1=list(ID="Tools",type="members",cats=colnames(G)))) > Ginfo <- list(dataset="NetFormats", + title="Network file formats and tools", + by="Gao etal.", + ref="EasyGraph: Patterns 4, 100839, October 13, 2023", + href=c("https://www.cell.com/patterns/pdf/S2666-3899(23)00218-0.pdf", + "https://github.com/bavla/symData/tree/master/SDAJSON"), + creator="V. Batagelj", + date=date() + ) > GT <- list(format="SDAJSON",info=Ginfo,head=Ghead,SDF=GDF) > write(toJSON(GT),"NetFormats.json") > str(GT) List of 4 $ format: chr "SDAJSON" $ info :List of 7 ..$ dataset: chr "NetFormats" ..$ title : chr "Network file formats and tools" ..$ by : chr "Gao etal." ..$ ref : chr "EasyGraph: Patterns 4, 100839, October 13, 2023" ..$ href : chr [1:2] "https://www.cell.com/patterns/pdf/S2666-3899(23)00218-0.pdf" "https://github.com/bavla/symData/tree/master/SDAJSON" ..$ creator: chr "V. Batagelj" ..$ date : chr "Mon Oct 30 01:47:36 2023" $ head :List of 3 ..$ nUnits: num 8 ..$ nVars : num 1 ..$ vars :List of 1 .. ..$ V1:List of 3 .. .. ..$ ID : chr "Tools" .. .. ..$ type: chr "members" .. .. ..$ cats: chr [1:7] "EasyGraph" "NetworkX" "igraph" "SNAP" ... $ SDF :'data.frame': 8 obs. of 1 variable: ..$ Tools:List of 8 .. ..$ : num [1:7] 1 1 1 1 0 1 0 .. ..$ : num [1:7] 1 1 1 0 1 1 1 .. ..$ : num [1:7] 1 1 1 0 1 1 1 .. ..$ : num [1:7] 1 1 1 0 1 0 0 .. ..$ : num [1:7] 1 1 1 0 0 1 0 .. ..$ : num [1:7] 1 0 1 1 1 1 0 .. ..$ : num [1:7] 1 0 0 0 0 1 0 .. ..$ : num [1:7] 1 0 0 0 0 1 0