R> dissassoc(dm, group = mydata$sex, R = 1000)
; dm - dissimilarity matrix, mydata - data frame of covariates. See in R: help(dissvar)
, help(dissassoc)
and help(dissmfac)
.GANOVA - zapiski iz 31.5.2016 (kaj naj bi se še naredilo)
Kaj je ničelna domneva?
Kaj je že narejeno (članki Anderson in ostali) - pregled!
Dobiti primer iz biologije…, kjer imajo zanimivo definirane različnosti med enotami (Katja Rostohar?) Primer - različnost med besedili.
Računanje metričnega indeksa iz matrike:
Povezava: različnosti med enotami in različnostjo od enote do središča
g <- function(x,a=3,b=4,c=5) a**x + b**x - c**x curve(g(x,a=2,b=3,c=6),from=0,to=1,n=101,main="f(r)=2**r+3**r-6**r",xlab="r",ylab="f") lines(c(0,1),c(0,0),col="red") curve(g(x,a=3,b=4,c=5),from=0,to=2.2,n=101,main="f(r)=3**r+4**r-5**r",xlab="r",ylab="f") lines(c(0,2.2),c(0,0),col="red") curve(g(x,a=0.1,b=0.2,c=100),from=0,to=0.2,n=101,main="f(r)=0.1**r+0.2**r-100**r",xlab="r",ylab="f") lines(c(0,0.2),c(0,0),col="red") curve(g(x,a=2,b=8,c=3),from=0,to=1,n=101,main="f(r)=2**r+8**r-3**r",xlab="r",ylab="f") lines(c(0,1),c(0,0),col="red")
f(r;x,y,z) = xr + yr - zr
Newton's method: r' = r - f(r;x,y,z)/f'(r;x,y,z)
f'(r;x,y,z) = xr ln(x) + yr ln(y) - zr ln(z)
https://en.wikipedia.org/wiki/Triangle_inequality : 2 max(a, b, c) < a + b + c
> metrIdx <- function(a,b,c,eps=1e-7){ + r <- 1; delta <- 1; s <- 0 + x <- min(a,b,c); y <- median(c(a,b,c)); z <- max(a,b,c) + if(x==z) return(Inf) + if(x==0) {if(x==z) return(Inf) else return(0)} + if(x+y>z){while(x**r + y**r > z**r) r <- 2*r + } else {while(x**r + y**r > z**r) r <- r/2; r <- 2*r} + while((delta > eps)&&(s<100)){s <- s+1 + delta <- (x**r + y**r - z**r)/(log(x)*x**r + log(y)*y**r - log(z)*z**r) + r <- r - delta; cat(s,r,delta,"\n") + } + return(r) + } > metrIdx(3,4,5) 1 2 0 [1] 2 > metrIdx(2,3,6) 1 1.556355 0.4436451 2 1.185778 0.3705771 3 0.9281817 0.257596 4 0.8102006 0.1179811 5 0.7885321 0.02166846 6 0.7878855 0.0006466786 7 0.7878849 5.594962e-07 8 0.7878849 4.182778e-13 [1] 0.7878849 > metrIdx(0.1,0.2,100) 1 1.782854 0.2171457 2 1.565713 0.2171413 3 1.34859 0.2171233 4 1.131539 0.2170508 5 0.9147816 0.2167574 6 0.6992131 0.2155684 7 0.488413 0.2108001 8 0.2956796 0.1927335 9 0.1578744 0.1378051 10 0.1095511 0.04832334 11 0.1057652 0.003785881 12 0.105746 1.923971e-05 13 0.105746 4.893126e-10 [1] 0.105746 > metrIdx(2,8,3) 1 1.576497 0.4235031 2 1.194857 0.3816404 3 0.8840544 0.3108021 4 0.6816694 0.2023851 5 0.6023689 0.07930052 6 0.5918757 0.01049317 7 0.5917098 0.000165908 8 0.5917097 4.081135e-08 [1] 0.5917097 >
Matrix D from the next section Cars.
> r <- Inf; diag(D) <- 0 > for(i in 1:3){ + for(k in (i+1):4){ + for(j in setdiff(1:4,c(i,k))){ + rr <- metrIdx(D[i,j],D[j,k],D[i,k]) + r <- min(r,rr) + cat(i,j,k,rr,r,"\n\n") + }}} 1 1.580902 0.4190978 2 1.197178 0.3837241 3 0.8759452 0.3212328 4 0.6565239 0.2194213 5 0.5627061 0.09381786 6 0.5480994 0.01460669 7 0.5477855 0.000313949 8 0.5477853 1.417759e-07 9 0.5477853 2.881858e-14 1 3 2 0.5477853 0.5477853 1 1.559775 0.4402253 2 1.16772 0.3920543 3 0.8587376 0.3089828 4 0.6748005 0.1839371 5 0.6168648 0.0579357 6 0.6119783 0.004886454 7 0.6119461 3.219644e-05 8 0.6119461 1.387763e-09 1 4 2 0.6119461 0.5477853 1 1.580902 0.4190978 2 1.197178 0.3837241 3 0.8759452 0.3212328 4 0.6565239 0.2194213 5 0.5627061 0.09381786 6 0.5480994 0.01460669 7 0.5477855 0.000313949 8 0.5477853 1.417759e-07 9 0.5477853 2.881858e-14 1 2 3 0.5477853 0.5477853 1 1.632998 0.3670016 2 1.350959 0.2820399 3 1.185265 0.1656936 4 1.132346 0.0529188 5 1.127643 0.004702768 6 1.127609 3.455153e-05 7 1.127609 1.851484e-09 1 4 3 1.127609 0.5477853 1 1.559775 0.4402253 2 1.16772 0.3920543 3 0.8587376 0.3089828 4 0.6748005 0.1839371 5 0.6168648 0.0579357 6 0.6119783 0.004886454 7 0.6119461 3.219644e-05 8 0.6119461 1.387763e-09 1 2 4 0.6119461 0.5477853 1 1.632998 0.3670016 2 1.350959 0.2820399 3 1.185265 0.1656936 4 1.132346 0.0529188 5 1.127643 0.004702768 6 1.127609 3.455153e-05 7 1.127609 1.851484e-09 1 3 4 1.127609 0.5477853 1 1.580902 0.4190978 2 1.197178 0.3837241 3 0.8759452 0.3212328 4 0.6565239 0.2194213 5 0.5627061 0.09381786 6 0.5480994 0.01460669 7 0.5477855 0.000313949 8 0.5477853 1.417759e-07 9 0.5477853 2.881858e-14 2 1 3 0.5477853 0.5477853 1 3.700217 0.2997831 2 3.461561 0.2386555 3 3.308258 0.1533036 4 3.24794 0.06031766 5 3.239723 0.008217567 6 3.239584 0.0001384606 7 3.239584 3.86783e-08 2 4 3 3.239584 0.5477853 1 1.559775 0.4402253 2 1.16772 0.3920543 3 0.8587376 0.3089828 4 0.6748005 0.1839371 5 0.6168648 0.0579357 6 0.6119783 0.004886454 7 0.6119461 3.219644e-05 8 0.6119461 1.387763e-09 2 1 4 0.6119461 0.5477853 1 3.700217 0.2997831 2 3.461561 0.2386555 3 3.308258 0.1533036 4 3.24794 0.06031766 5 3.239723 0.008217567 6 3.239584 0.0001384606 7 3.239584 3.86783e-08 2 3 4 3.239584 0.5477853 1 1.632998 0.3670016 2 1.350959 0.2820399 3 1.185265 0.1656936 4 1.132346 0.0529188 5 1.127643 0.004702768 6 1.127609 3.455153e-05 7 1.127609 1.851484e-09 3 1 4 1.127609 0.5477853 1 3.700217 0.2997831 2 3.461561 0.2386555 3 3.308258 0.1533036 4 3.24794 0.06031766 5 3.239723 0.008217567 6 3.239584 0.0001384606 7 3.239584 3.86783e-08 3 2 4 3.239584 0.5477853
> metrIdx <- function(a,b,c,eps=1e-7){ + r <- 1; delta <- 1; s <- 0 + x <- min(a,b,c); y <- median(c(a,b,c)); z <- max(a,b,c) + if(x==z) return(Inf) + if(x==0) {if(x==z) return(Inf) else return(0)} + if(x+y>z){while(x**r + y**r > z**r) r <- 2*r + } else {while(x**r + y**r > z**r) r <- r/2; r <- 2*r} + while((delta > eps)&&(s<100)){s <- s+1 + delta <- (x**r + y**r - z**r)/(log(x)*x**r + log(y)*y**r - log(z)*z**r) + r <- r - delta } + return(r) + } > metricIdx <- function(D){ + r <- Inf; numL <- nrow(D); numLm <- numL-1 + for(i in 1:numLm){ for(k in (i+1):numL){ for(j in setdiff(1:numL,c(i,k))){ + r <- min(r,metrIdx(D[i,j],D[j,k],D[i,k])) + }}} + return(r) + } > (rD <- metricIdx(D)) [1] 0.3645253 > S <- D**rD > (rS <- metricIdx(S)) [1] 1
Can we get a lower bound for r using minimal/maximal element of the matrix?
> i <- which.max(D) > arrayInd(i,dim(D)) [,1] [,2] [1,] 18 11 > D[i] [1] 26.29788 > D[18,11] [1] 26.29788 > diag(D) <- Inf > j <- which.min(D) > D[j] [1] 1.386916 > arrayInd(j,dim(D)) [,1] [,2] [1,] 2 1 > D[2,1] [1] 1.386916
> library(TraMineR) > help(TraMineR) > help(dissvar) > help(dissassoc) > help(dissmfac) > setwd("C:/Users/Batagelj/work/clamix/clamix.R") > source("C:\\Users\\Batagelj\\work\\clamix\\clamix.R\\clamix3t.R") > load("./cars2/cars25.rez") > load("./cars2/cars.so") > load("./cars2/cars.meta") > alpha <-rep(1/nVar,nVar) > L <- rez$leaders > names(L) [1] "L1" "L2" "L3" "L4" "L5" "L6" "L7" "L8" "L9" "L10" "L11" "L12" [13] "L13" "L14" "L15" "L16" "L17" "L18" "L19" "L20" "L21" "L22" "L23" "L24" [25] "L25" > hc <- hclustSO(L) > plot(hc,hang=-1) > long[rez$clust==9] [1] "ALFA ROMEO 145 1.4 16V L" "CITROEN SAXO 1.6 VTL" [3] "CITROEN ZX 1.6i AVANTAGE" "CITROEN XANTIA 1.6i X" [5] "DAEWOO NEXIA HB GL STD" "DAEWOO NEXIA HB GL ABG" ... [107] "VOLKSWAGEN GOLF 1.6 CL" "VOLKSWAGEN GOLF 1.6 GL" [109] "VOLKSWAGEN GOLF 1.6 GT" "VOLKSWAGEN GOLF 1.6 PLUS" > total <- computeTotal(L) > numL <- length(L) > numL [1] 25 > numLm <- numL-1 > D <- matrix(nrow=numL,ncol=numL); diag(D) <- Inf > for(i in 1:numLm) for(j in (i+1):numL) { + D[i,j] <- distCl(L[[i]],L[[j]]); D[j,i] <- D[i,j] + } > A <- as.dist(D) > A 1 2 3 4 5 6 7 2 1.386916 3 3.582262 8.397003 4 3.356421 7.104338 6.415633 5 3.693970 7.019706 7.888564 5.153226 6 3.605076 5.832940 4.895284 4.740308 5.805828 7 3.306033 6.352564 6.961267 4.677096 3.937799 5.020761 8 2.592247 4.501111 8.509287 7.658725 6.987216 6.455513 6.909217 9 4.036341 10.416816 11.456966 8.328767 5.413662 7.683022 6.089895 10 3.472821 8.035387 4.845966 2.977475 7.679482 4.702332 7.082956 11 3.250934 10.824382 9.351577 11.642114 12.412319 8.768878 12.648273 12 2.629572 5.249485 5.534103 5.541161 7.545072 5.977826 5.773090 13 2.785301 7.668458 5.306050 5.157297 7.634207 5.499815 6.873963 14 3.483834 10.076100 7.854228 10.320341 10.945101 9.091888 11.255809 15 3.707527 8.460017 7.776793 4.606644 5.363650 5.952800 3.256083 16 3.953338 6.831914 7.246344 6.157174 3.846592 6.317949 5.727512 17 4.027002 9.534567 7.509222 6.748467 5.399018 5.383006 4.796475 18 4.687177 14.541875 16.168530 14.245039 6.358992 11.106008 12.709572 19 3.445288 8.812849 4.376746 6.803439 9.642363 5.529869 9.150236 20 4.101289 7.070411 8.026177 4.514790 4.136549 3.268842 5.145231 21 4.171015 9.960677 10.637548 7.921125 2.331700 7.456142 7.258457 22 1.675898 3.972340 6.545308 6.420424 6.560445 5.777787 6.218061 23 3.399929 6.259198 3.959837 3.880152 5.980594 2.066168 4.195328 24 3.336785 4.857392 4.148561 2.598911 3.742954 3.413352 2.673155 25 2.235043 3.549708 4.768962 4.232677 3.980621 4.786093 4.055086 8 9 10 11 12 13 14 8 9 10.328313 10 8.764973 10.577360 11 12.139533 19.148113 10.878854 12 6.077666 12.245622 6.695133 8.617629 13 9.031828 13.627706 5.781517 9.853327 5.662595 14 9.998934 13.343736 7.141143 5.742655 9.451189 9.091763 15 9.273916 7.198497 7.354566 14.691288 8.877081 8.472681 12.042983 16 6.219615 4.609104 6.787612 9.569811 7.361549 7.143369 8.314172 17 9.478498 4.951715 5.917122 13.245604 10.676097 10.275677 11.367578 18 13.380517 12.029638 17.468914 26.297885 16.120426 19.882129 17.662338 19 9.570105 14.655353 5.018135 7.046792 4.818606 6.655113 9.408573 20 6.969604 6.116134 7.219930 13.185988 7.940596 7.942214 11.656472 21 9.695003 5.568285 10.805160 16.645523 11.293219 11.701357 13.518996 22 2.925262 9.030335 7.249211 5.360439 4.705823 5.839052 5.866798 23 7.226293 7.681937 3.623840 7.641230 5.900978 4.073250 8.159579 24 5.485786 6.281107 4.045954 8.424491 4.070893 3.754298 8.135563 25 2.009623 3.245456 4.364927 6.032709 4.363828 4.661146 4.981074 15 16 17 18 19 20 21 16 6.089135 17 4.992753 5.402473 18 16.289304 2.511787 14.998501 19 10.340307 8.091243 8.253026 21.774856 20 5.375634 6.257397 6.349464 11.931383 9.433085 21 8.416492 3.799841 6.958137 7.741296 12.810770 6.891555 22 7.669240 6.555361 8.427444 11.075206 7.223033 7.216993 8.455048 23 4.748261 5.993337 4.869672 10.922880 4.817182 6.006513 7.598098 24 3.027707 5.615018 5.745588 9.214332 5.878896 3.286702 6.462960 25 4.637410 3.348819 4.557659 4.089213 5.338677 4.398977 4.362140 22 23 24 23 5.798148 24 5.115428 3.306279 25 2.611763 4.701425 4.070759 > print(dissvar(A)) [1] 3.450939