Multi-modal variables / (Sub)sets

Oils

Data

M. Ichino and H. Yaguchi, “Generalized Minkowski metrics for mixed feature-type data analysis,” in IEEE Transactions on Systems, Man, and Cybernetics, vol. 24, no. 4, pp. 698-708, April 1994, doi: 10.1109/21.286391. https://ieeexplore.ieee.org/document/286391

Oils and Fats data
Oil_or_Fat; Specific_gravity_(g/cm3); Freezing_point_(C); Iodine_value; Saponif._value; Major_acids
Linseed;  [0.930, 0.935]; [-27,-18]; [170, 204]; [118, 196]; L, Ln, O, P, M
Perilla;  [0.930, 0.937]; [ -5, -4]; [192, 208]; [188, 197]; L, Ln, O, P, S
Cotton ;  [0.916, 0.918]; [ -6, -1]; [ 99, 113]; [189, 198]; L, O, P, M, S
Sesame ;  [0.920, 0.926]; [ -6, -4]; [104, 116]; [187, 193]; L, O, P, S, A
Camelia;  [0.916, 0.917]; [-21,-15]; [ 80,  82]; [189, 193]; L, O
Olive  ;  [0.914, 0.919]; [  0,  6]; [ 79,  90]; [187, 196]; L, O, P, S
Beef   ;  [0.860, 0.870]; [ 30, 38]; [ 40,  48]; [190, 199]; O, P, M, C, S
Hog    ;  [0.858, 0.864]; [ 22, 32]; [ 53,  77]; [190, 202]; L, O, P, M, S, Lu

L: linoleic acid, Ln: linolenic acid, 0: oleic acid, P palmitic acid, M: myristic acid, S: searic acid, A: arachic acid, C: capric acid, Lu: lauric acid

Matrix

> wdir <- "C:/Users/vlado/docs/papers/2023/SDA/Paris/test"
> setwd(wdir)
> library(jsonlite)
> T <- read.csv("oils.csv",sep=";",head=TRUE,skip=1)
> T[,1] <- trimws(T[,1])
> T[,c(1,6)]
  Oil_or_Fat        Major_acids
1    Linseed     L, Ln, O, P, M
2    Perilla     L, Ln, O, P, S
3     Cotton      L, O, P, M, S
4     Sesame      L, O, P, S, A
5    Camelia               L, O
6      Olive         L, O, P, S
7       Beef      O, P, M, C, S
8        Hog  L, O, P, M, S, Lu
> A <- lapply(strsplit(T[,6],","),trimws)
> L <- Reduce(union,A)
> L
[1] "L"  "Ln" "O"  "P"  "M"  "S"  "A"  "C"  "Lu"
> D <- matrix(0,nrow=8,ncol=9); rownames(D) <- T[,1]; colnames(D) <- L
> for(i in 1:8) D[i,factor(A[[i]],L)] <- 1
> D
        L Ln O P M S A C Lu
Linseed 1  1 1 1 1 0 0 0  0
Perilla 1  1 1 1 0 1 0 0  0
Cotton  1  0 1 1 1 1 0 0  0
Sesame  1  0 1 1 0 1 1 0  0
Camelia 1  0 1 0 0 0 0 0  0
Olive   1  0 1 1 0 1 0 0  0
Beef    0  0 1 1 1 1 0 1  0
Hog     1  0 1 1 1 1 0 0  1
> 

Symbolic data frame

> getInterval <- function(x) as.numeric(unlist(strsplit(gsub("\\]","",gsub("\\[","",x)),",")))
> n <- 8; u <- rep(NA,n)
> SDF <- data.frame(Gravity=u,Freezing=u,Iodine=u,Saponif=u,MajorAcids=u,MAbin=u)
> rownames(SDF) <- T[,1]
> for(j in 1:4) SDF[[j]] <- lapply(T[,j+1],getInterval)
> ma <- vector("list",n); mb <- vector("list",n)
> for(i in 1:8) {ma[[i]] <- as.integer(factor(A[[i]],L));
+   v <- rep(0,9); v[ma[[i]]] <- 1; mb[[i]] <- v}
> SDF$MajorAcids <- ma; SDF$MAbin <- mb
> SDF
             Gravity Freezing   Iodine  Saponif       MajorAcids                     MAbin
Linseed 0.930, 0.935 -27, -18 170, 204 118, 196    1, 2, 3, 4, 5 1, 1, 1, 1, 1, 0, 0, 0, 0
Perilla 0.930, 0.937   -5, -4 192, 208 188, 197    1, 2, 3, 4, 6 1, 1, 1, 1, 0, 1, 0, 0, 0
Cotton  0.916, 0.918   -6, -1  99, 113 189, 198    1, 3, 4, 5, 6 1, 0, 1, 1, 1, 1, 0, 0, 0
Sesame  0.920, 0.926   -6, -4 104, 116 187, 193    1, 3, 4, 6, 7 1, 0, 1, 1, 0, 1, 1, 0, 0
Camelia 0.916, 0.917 -21, -15   80, 82 189, 193             1, 3 1, 0, 1, 0, 0, 0, 0, 0, 0
Olive   0.914, 0.919     0, 6   79, 90 187, 196       1, 3, 4, 6 1, 0, 1, 1, 0, 1, 0, 0, 0
Beef      0.86, 0.87   30, 38   40, 48 190, 199    3, 4, 5, 8, 6 0, 0, 1, 1, 1, 1, 0, 1, 0
Hog     0.858, 0.864   22, 32   53, 77 190, 202 1, 3, 4, 5, 6, 9 1, 0, 1, 1, 1, 1, 0, 0, 1
> long <- c("linoleic","linolenic","oleic","palmitic","myristic","searic","arachic","capric","lauric")
> head <- list(nUnits=8, nVars=6,
+   vars=list(
+     V1=list(ID="Gravity",type="interval"),
+     V2=list(ID="Freezing",type="interval"),
+     V3=list(ID="Iodine",type="interval"),
+     V4=list(ID="Saponif",type="interval"),
+     V5=list(ID="MajorAcids",type="set",cats=L,long=long),
+     V6=list(ID="MAbin",type="members",cats=L,long=long)
+   )
+ )
> str(head)
List of 3
 $ nUnits: num 8
 $ nVars : num 6
 $ vars  :List of 6
  ..$ V1:List of 2
  .. ..$ ID  : chr "Gravity"
  .. ..$ type: chr "interval"
  ..$ V2:List of 2
  .. ..$ ID  : chr "Freezing"
  .. ..$ type: chr "interval"
  ..$ V3:List of 2
  .. ..$ ID  : chr "Iodine"
  .. ..$ type: chr "interval"
  ..$ V4:List of 2
  .. ..$ ID  : chr "Saponif"
  .. ..$ type: chr "interval"
  ..$ V5:List of 4
  .. ..$ ID  : chr "MajorAcids"
  .. ..$ type: chr "set"
  .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
  .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
  ..$ V6:List of 4
  .. ..$ ID  : chr "MAbin"
  .. ..$ type: chr "members"
  .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
  .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
> 
> info <- list(dataset="Oils",
+   title="Oils and fats",
+   by="Ichino M., Yaguchi H.",
+   ref="Generalized Minkowski metrics for mixed feature-type data analysis. IEEE Trans. Syst. Man Cybern., 24 (4) (1994), pp. 698-708",
+   href=c("https://ieeexplore.ieee.org/document/286391",
+          "https://github.com/bavla/symData/tree/master/SDAJSON"),
+   creator="V. Batagelj",
+   date=date()
+ )
> Oils <- list(format="SDAJSON",info=info,head=head,SDF=SDF)
> write(toJSON(Oils),"Oils.json")
> str(Oils)
List of 4
 $ format: chr "SDAJSON"
 $ info  :List of 7
  ..$ dataset: chr "Oils"
  ..$ title  : chr "Oils and fats"
  ..$ by     : chr "Ichino M., Yaguchi H."
  ..$ ref    : chr "Generalized Minkowski metrics for mixed feature-type data analysis. IEEE Trans. Syst. Man Cybern., 24 (4) (1994), pp. 698-708"
  ..$ href   : chr [1:2] "https://ieeexplore.ieee.org/document/286391" "https://github.com/bavla/symData/tree/master/SDAJSON"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Mon Oct 30 01:14:37 2023"
 $ head  :List of 3
  ..$ nUnits: num 8
  ..$ nVars : num 6
  ..$ vars  :List of 6
  .. ..$ V1:List of 2
  .. .. ..$ ID  : chr "Gravity"
  .. .. ..$ type: chr "interval"
  .. ..$ V2:List of 2
  .. .. ..$ ID  : chr "Freezing"
  .. .. ..$ type: chr "interval"
  .. ..$ V3:List of 2
  .. .. ..$ ID  : chr "Iodine"
  .. .. ..$ type: chr "interval"
  .. ..$ V4:List of 2
  .. .. ..$ ID  : chr "Saponif"
  .. .. ..$ type: chr "interval"
  .. ..$ V5:List of 4
  .. .. ..$ ID  : chr "MajorAcids"
  .. .. ..$ type: chr "set"
  .. .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
  .. .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
  .. ..$ V6:List of 4
  .. .. ..$ ID  : chr "MAbin"
  .. .. ..$ type: chr "members"
  .. .. ..$ cats: chr [1:9] "L" "Ln" "O" "P" ...
  .. .. ..$ long: chr [1:9] "linoleic" "linolenic" "oleic" "palmitic" ...
 $ SDF   :'data.frame': 8 obs. of  6 variables:
  ..$ Gravity:List of 8
  .. ..$ : num [1:2] 0.93 0.935
  .. ..$ : num [1:2] 0.93 0.937
  ...
  ..$ Freezing   :List of 8
  .. ..$ : num [1:2] -27 -18
  .. ..$ : num [1:2] -5 -4
  ...
  ..$ Iodine     :List of 8
  .. ..$ : num [1:2] 170 204
  .. ..$ : num [1:2] 192 208
  ...
  ..$ Saponif    :List of 8
  .. ..$ : num [1:2] 118 196
  .. ..$ : num [1:2] 188 197
  ...
  ..$ MajorAcids :List of 8
  .. ..$ : int [1:5] 1 2 3 4 5
  ...
  .. ..$ : int [1:6] 1 3 4 5 6 9
  ..$ MAbin      :List of 8
  .. ..$ : num [1:9] 1 1 1 1 1 0 0 0 0
  ...
  .. ..$ : num [1:9] 1 0 1 1 1 1 0 0 1
> 

Network file formats and tools

EasyGraph: Patterns 4, 100839, October 13, 2023
Comparison of network analysis tools in terms of supporting different network I/O types
name; EasyGraph; NetworkX; igraph; SNAP; graph-tool; Gephi; Cytoscape
Edge List; 1; 1; 1; 1; 0; 1; 0 
GraphML  ; 1; 1; 1; 0; 1; 1; 1 
GML      ; 1; 1; 1; 0; 1; 1; 1 
Pickle   ; 1; 1; 1; 0; 1; 0; 0 
Pajek    ; 1; 1; 1; 0; 0; 1; 0 
GraphViz ; 1; 0; 1; 1; 1; 1; 0 
UCINET DL; 1; 0; 0; 0; 0; 1; 0 
GEXF     ; 1; 0; 0; 0; 0; 1; 0 
> G <- as.matrix(read.csv("NetFormats.csv",sep=";",head=TRUE,skip=2,row.names=1))
> G
          EasyGraph NetworkX igraph SNAP graph.tool Gephi Cytoscape
Edge List         1        1      1    1          0     1         0
GraphML           1        1      1    0          1     1         1
GML               1        1      1    0          1     1         1
Pickle            1        1      1    0          1     0         0
Pajek             1        1      1    0          0     1         0
GraphViz          1        0      1    1          1     1         0
UCINET DL         1        0      0    0          0     1         0
GEXF              1        0      0    0          0     1         0
> n <- 8; u <- rep(NA,n)
> GDF <- data.frame(Tools=u)
> rownames(GDF) <- rownames(G)
> v <- vector("list",n)
> for(i in 1:8) v[[i]] <- as.vector(G[i,])
> GDF$Tools <- v
> GDF
                        Tools
Edge List 1, 1, 1, 1, 0, 1, 0
GraphML   1, 1, 1, 0, 1, 1, 1
GML       1, 1, 1, 0, 1, 1, 1
Pickle    1, 1, 1, 0, 1, 0, 0
Pajek     1, 1, 1, 0, 0, 1, 0
GraphViz  1, 0, 1, 1, 1, 1, 0
UCINET DL 1, 0, 0, 0, 0, 1, 0
GEXF      1, 0, 0, 0, 0, 1, 0
> 
> Ghead <- list(nUnits=8, nVars=1, vars=list(V1=list(ID="Tools",type="members",cats=colnames(G))))
> Ginfo <- list(dataset="NetFormats",
+   title="Network file formats and tools",
+   by="Gao etal.",
+   ref="EasyGraph: Patterns 4, 100839, October 13, 2023",
+   href=c("https://www.cell.com/patterns/pdf/S2666-3899(23)00218-0.pdf",
+          "https://github.com/bavla/symData/tree/master/SDAJSON"),
+   creator="V. Batagelj",
+   date=date()
+ )
> GT <- list(format="SDAJSON",info=Ginfo,head=Ghead,SDF=GDF)
> write(toJSON(GT),"NetFormats.json")
> str(GT)
List of 4
 $ format: chr "SDAJSON"
 $ info  :List of 7
  ..$ dataset: chr "NetFormats"
  ..$ title  : chr "Network file formats and tools"
  ..$ by     : chr "Gao etal."
  ..$ ref    : chr "EasyGraph: Patterns 4, 100839, October 13, 2023"
  ..$ href   : chr [1:2] "https://www.cell.com/patterns/pdf/S2666-3899(23)00218-0.pdf" "https://github.com/bavla/symData/tree/master/SDAJSON"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Mon Oct 30 01:47:36 2023"
 $ head  :List of 3
  ..$ nUnits: num 8
  ..$ nVars : num 1
  ..$ vars  :List of 1
  .. ..$ V1:List of 3
  .. .. ..$ ID  : chr "Tools"
  .. .. ..$ type: chr "members"
  .. .. ..$ cats: chr [1:7] "EasyGraph" "NetworkX" "igraph" "SNAP" ...
 $ SDF   :'data.frame': 8 obs. of  1 variable:
  ..$ Tools:List of 8
  .. ..$ : num [1:7] 1 1 1 1 0 1 0
  .. ..$ : num [1:7] 1 1 1 0 1 1 1
  .. ..$ : num [1:7] 1 1 1 0 1 1 1
  .. ..$ : num [1:7] 1 1 1 0 1 0 0
  .. ..$ : num [1:7] 1 1 1 0 0 1 0
  .. ..$ : num [1:7] 1 0 1 1 1 1 0
  .. ..$ : num [1:7] 1 0 0 0 0 1 0
  .. ..$ : num [1:7] 1 0 0 0 0 1 0




vlado/work/sda/mm.txt · Last modified: 2023/10/31 00:21 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki