====== Statistics ====== ===== Combinatorics ===== > combn(letters[1:4], 2) [,1] [,2] [,3] [,4] [,5] [,6] [1,] "a" "a" "a" "b" "b" "c" [2,] "b" "c" "d" "c" "d" "d" > combn(letters[1:4], 3) [,1] [,2] [,3] [,4] [1,] "a" "a" "a" "b" [2,] "b" "b" "c" "c" [3,] "c" "d" "d" "d" > utils:::menuInstallPkgs() package ‘combinat’ successfully unpacked and MD5 sums checked > library(combinat) > permn(letters[1:4]) [[1]] [1] "a" "b" "c" "d" [[2]] [1] "a" "b" "d" "c" [[3]] [1] "a" "d" "b" "c" https://www.topcoder.com/blog/generating-combinations/ https://www.geeksforgeeks.org/heaps-algorithm-for-generating-permutations/ https://www.topcoder.com/blog/generating-permutations/ > startSubsets <- function(n){.n <<- n; .s <<- -1} > > nextSubset <- function(){ + .s <<- .s+1 + S <- as.logical(intToBits(.s)[1:(.n+1)]) + if(S[.n+1]) {.s <<- -1; return(NULL)} + return(S[1:.n]) + } #.Machine$integer.max > startSubsets(3) > repeat {S <- nextSubset(); if(is.null(S)) break; cat("{",letters[1:3][S],'}\n')} { } { a } { b } { a b } { c } { a c } { b c } { a b c } [[https://www4.uwsp.edu/math/nwodarz/Math209Files/209-0809F-L10-Section06_03-AlgorithmsForGeneratingPermutationsAndCombinations-Notes.pdf|Permutations]] in increasing lexicographic order. > startPerm <- function(n){.n <<- n; .s <<- 1:n; .first <<- TRUE } > startPerm <- function(n){.n <<- n; .s <<- 1:n; .first <<- TRUE } > nextPerm <- function(){ + if(.first){ .first <<- FALSE; return(.s) } + m <- .n-1 + while(.s[m]>.s[m+1]) m <- m-1 + k <- .n + while(.s[m]>.s[k]) k <- k-1 + .t <<- .s[m]; .s[m] <<- .s[k]; .s[k] <<- .t + p <- m+1; q <- .n + while(p > startPerm(4) > tryCatch({for(i in 1:30) cat(i,nextPerm(),"\n")}, error=function(err){}) 1 1 2 3 4 2 1 2 4 3 3 1 3 2 4 4 1 3 4 2 5 1 4 2 3 6 1 4 3 2 7 2 1 3 4 8 2 1 4 3 9 2 3 1 4 10 2 3 4 1 11 2 4 1 3 12 2 4 3 1 13 3 1 2 4 14 3 1 4 2 15 3 2 1 4 16 3 2 4 1 17 3 4 1 2 18 3 4 2 1 19 4 1 2 3 20 4 1 3 2 21 4 2 1 3 22 4 2 3 1 23 4 3 1 2 24 4 3 2 1 NULL > https://blogs.msdn.microsoft.com/gpalem/2013/03/28/make-vectorize-your-friend-in-r/ ===== Autocorrelation ===== ==== Simple example ==== > library(statnet) > g <- c( + 0, 1, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0 ) > G <- symmetrize(matrix(g,byrow=TRUE,nrow=9)) > rownames(G) <- colnames(G) <- c("A","B","C","D","E","F","G","H","I") > plot.sociomatrix(G) > gplot(G,gmode="graph",displaylabels=TRUE) > a1 <- c( 1, 2, 3, 2, 3, 4, 3, 4, 5 ) > gplot(G,gmode="graph",displaylabels=TRUE,vertex.cex=a1) > nacf(G,a1,type="geary",mode="graph")[2] 1 0.3333333 > nacf(G,a1,type="moran",mode="graph")[2] 1 0.5 > a2 <- c(3, 4, 3, 4, 3, 2, 1, 2, 5 ) > nacf(G,a2,type="geary",mode="graph")[2] 1 1 > nacf(G,a2,type="moran",mode="graph")[2] 1 -0.25 > a3 <- c(4, 1, 4, 2, 5, 2, 3, 3, 3 ) > nacf(G,a3,type="geary",mode="graph")[2] 1 1.833333 > nacf(G,a3,type="moran",mode="graph")[2] 1 -0.875 ==== Florentine families ==== > library(statnet) > data(florentine) > fw <- flomarriage %v% "wealth" > gplot(flomarriage,displaylabels=TRUE,vertex.cex=0.025*fw, + gmode="graph",main='Florentine families') > I <- nacf(flomarriage,fw,type="moran",mode="graph") > I 0 1 2 3 4 5 1.00000000 -0.31073529 0.06531299 -0.06045322 -0.06267282 0.01039729 6 7 8 9 10 11 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 12 13 14 15 0.00000000 0.00000000 0.00000000 0.00000000 > C <- nacf(flomarriage,fw,type="geary",mode="graph") > C 0 1 2 3 4 5 0.000000000 1.683607336 0.811642725 0.899673648 0.826831324 0.006496392 6 7 8 9 10 11 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 12 13 14 15 0.000000000 0.000000000 0.000000000 0.000000000 > I[2] 1 -0.3107353 > C[2] 1 1.683607 ===== CUG ===== > library(statnet) > data(florentine) > rSize <- cug.test(flobusiness,centralization, + FUN.arg=list(FUN=betweenness),mode="graph",cmode="size") > rEdges <- cug.test(flobusiness,centralization, + FUN.arg=list(FUN=betweenness),mode="graph",cmode="edges") > rDyad <- cug.test(flobusiness,centralization, + FUN.arg=list(FUN=betweenness),mode="graph",cmode="dyad.census") > names(rSize) [1] "obs.stat" "rep.stat" "mode" "diag" "cmode" "plteobs" "pgteobs" [8] "reps" > rSize Univariate Conditional Uniform Graph Test Conditioning Method: size Graph Type: graph Diagonal Used: FALSE Replications: 1000 Observed Value: 0.2057143 Pr(X>=Obs): 0.001 Pr(X<=Obs): 0.999 > > # Aggregate results > Betweenness <- c(rSize$obs.stat,rEdges$obs.stat,rDyad$obs.stat) > PctGreater <- c(rSize$pgteobs,rEdges$pgteobs,rDyad$pgteobs) > PctLess <- c(rSize$plteobs,rEdges$plteobs,rDyad$plteobs) > report <- cbind(Betweenness, PctGreater, PctLess) > rownames(report) <- c("Size","Edges","Dyads") > report Betweenness PctGreater PctLess Size 0.2057143 0.001 0.999 Edges 0.2057143 0.713 0.289 Dyads 0.2057143 0.738 0.263 > gplot(flobusiness,gmode="graph") > gplot(flobusiness,gmode="graph",displaylabels=TRUE) > > par(mfrow=c(1,3)) > plot(rSize, main="Betweenness \nConditioned on Size" ) > plot(rEdges, main="Betweenness \nConditioned on Edges" ) > plot(rDyad, main="Betweenness \nConditioned on Dyads" ) > par(mfrow=c(1,1)) ===== QAP ===== > help(qaptest) > gcor(flobusiness,flomarriage) [1] 0.3718679 > (rCor <- qaptest(list(flobusiness,flomarriage),gcor, + g1=1,g2=2,reps=1000)) QAP Test Results Estimated p-values: p(f(perm) >= f(d)): 0.001 p(f(perm) <= f(d)): 1 > > plot(rCor, xlim=c(-0.25, 0.4)) ===== Preparing data ===== C:\Users\batagelj\Documents\papers\2018\moskva\NetR\doc\R\grund\ https://raw.githubusercontent.com/bavla/netstat/master/data/LGang/ The LGang data includes attributes and the co-offending network of a youth gang. Data were collected by James Densley (Metropolitan University) and Thomas Grund (UCD). > setwd("C:/Users/batagelj/Documents/papers/2018/moskva/NetR/nets") > library(sna) > list.files() [1] "campnet" "grund" "padgett" "rivers" "test" > # LG.mat <- read.csv("./grund/LGang-matrix.csv",header=TRUE) > LG.mat <- read.csv("https://raw.githubusercontent.com/bavla/netstat/master/data/LGang/LGang-matrix.csv",header=TRUE) > head(LG.mat) ID X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 1 1 0 1 1 2 1 1 2 3 2 2 3 1 0 0 0 0 1 1 0 2 2 3 1 0 1 0 ... > LG.mat <- LG.mat[,-1] # remove ID > LG.mat[LG.mat==1]<-0 # recode > LG.mat[LG.mat>=2]<-1 > LG.net <- network(as.matrix(LG.mat),directed=FALSE) # convert to network > # LG.attrib <- read.csv("./grund/LGang-attributes.csv") > LG.attrib <- read.csv("https://raw.githubusercontent.com/bavla/netstat/master/data/LGang/LGang-attributes.csv") > head(LG.attrib) ID year age birthplace residence arrests convictions prison music rank rankrev core 1 1 1989 20 West Africa 0 16 4 1 1 1 5 1 2 2 1989 20 Caribbean 0 16 7 1 0 2 4 1 3 3 1990 19 Caribbean 0 12 4 1 0 2 4 1 4 4 1988 21 Caribbean 0 8 1 0 0 2 4 1 5 5 1985 24 Caribbean 0 11 3 0 0 2 4 1 6 6 1984 25 UK 1 17 10 0 0 2 4 1 > LG.net %v% "ethnicity" <- as.character(LG.attrib$birthplace) > LG.net %v% "prison" <- LG.attrib$prison > plot(LG.net) > plot(LG.net,vertex.col="ethnicity") > plot(LG.net,vertex.col="prison") > prisonsize<- 1 + LG.net %v% "prison" # display as vertex size > plot(LG.net, vertex.cex=prisonsize, vertex.col="ethnicity") ===== Changes of transitivity with density ===== > # changes for different densities > (d<-seq(from=0.05,to=0.85,by=0.1)) [1] 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75 0.85 > den <-lapply(d, function(x) gden(rgraph(100,m=50, tprob= x))) > class(den) [1] "list" > length(den) [1] 9 > class(den[[1]]) [1] "numeric" > den[[1]] [1] 0.05161616 0.05282828 0.04979798 0.04727273 0.05161616 0.05111111 0.05252525 0.04969697 0.04777778 [10] 0.04969697 0.04919192 0.05181818 0.05424242 0.04939394 0.04909091 0.05000000 0.04727273 0.04626263 [19] 0.04959596 0.05474747 0.04858586 0.04888889 0.05181818 0.05171717 0.04868687 0.05060606 0.05343434 [28] 0.04585859 0.04777778 0.04969697 0.05242424 0.05161616 0.04989899 0.04929293 0.05090909 0.04898990 [37] 0.04808081 0.05161616 0.04616162 0.04949495 0.05020202 0.04737374 0.04737374 0.04656566 0.05171717 [46] 0.05080808 0.05040404 0.04939394 0.05313131 0.05060606 > library(beanplot) > beanplot(den, names = d) > trans <-lapply(d, function(x) gtrans(rgraph(100,m=50, tprob= x), measure="weakcensus")) > beanplot(trans, names = d) > # changes of betweenness with density > d <- seq(0.05,0.5,by=0.05) > n <- 100 > r <- 50 > > bw <- lapply(d, + function(x){ temp <- vector('list',r) + for (i in 1:r) temp[[i]] <- betweenness(rgraph(n,tprob=x),rescale=TRUE) + return(temp) + }) > beanplot(dat, names=d, what=c(1,1,1,0)) ===== Testing ===== ==== Transitivity ==== Are there fewer unclosed triangles in the LGang network than one would expect by chance, given the size and number of edges that are there? > (LG.trans <- gtrans(LG.net)) [1] 0.3635371 > (LG.density <- gden(LG.net)) [1] 0.092942 > (LG.size <- network.size(LG.net)) [1] 54 > (LG.nedges <- network.edgecount(LG.net)) [1] 133 > ERgphs <- rgnm(n=200, nv=LG.size, m=LG.nedges, mode="graph") > ERgph1 <- network(ERgphs[1,,],directed=F) > plot(ERgph1) > plot(LG.net) > gden(ERgph1) [1] 0.092942 > gden(LG.net) [1] 0.092942 > ERtrans <- gtrans(ERgphs) > plot(density(ERtrans)) > plot(density(ERtrans),xlim=c(0,0.38)) > abline(v=LG.trans,col="red",lw=2) > # packed test > cg <- cug.test(LG.net,"gtrans",mode="graph",cmode=c("edges"),reps=10000) > cg Univariate Conditional Uniform Graph Test Conditioning Method: edges Graph Type: graph Diagonal Used: FALSE Replications: 10000 Observed Value: 0.3635371 Pr(X>=Obs): 0 Pr(X<=Obs): 1 > plot(cg) The probability for a random network with the same number of edges as the LGang network to have more triangles than the LGang network is practically 0. So, relative to a random network with the same amount of edges there would appear to be high clustering in the LGang network. > cg2 <- cug.test(LG.net,gtrans,cmode="dyad.census") > cg2 Univariate Conditional Uniform Graph Test Conditioning Method: dyad.census Graph Type: digraph Diagonal Used: FALSE Replications: 1000 Observed Value: 0.3635371 Pr(X>=Obs): 0 Pr(X<=Obs): 1 > plot(cg2) ===== Permutation test QAP ===== > ety <- get.vertex.attribute(LG.net,"ethnicity") > ety [1] "West Africa" "Caribbean" "Caribbean" "Caribbean" "Caribbean" "UK" "East Africa" [8] "West Africa" "West Africa" "West Africa" "West Africa" "UK" "UK" "UK" ... > Mety <- 1 * outer(ety, ety, "==") # 1 * for conversion to integer > Mety[1:10,1:10] [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 1 0 0 0 0 0 0 1 1 1 [2,] 0 1 1 1 1 0 0 0 0 0 [3,] 0 1 1 1 1 0 0 0 0 0 [4,] 0 1 1 1 1 0 0 0 0 0 [5,] 0 1 1 1 1 0 0 0 0 0 [6,] 0 0 0 0 0 1 0 0 0 0 [7,] 0 0 0 0 0 0 1 0 0 0 [8,] 1 0 0 0 0 0 0 1 1 1 [9,] 1 0 0 0 0 0 0 1 1 1 [10,] 1 0 0 0 0 0 0 1 1 1 > gcor(LG.net,Mety) [1] 0.1249278 > LG.perm <- network(rmperm(LG.net),directed=F) > gden(LG.perm) [1] 0.092942 > gden(LG.net) [1] 0.092942 > list.vertex.attributes(LG.net) [1] "ethnicity" "na" "prison" "vertex.names" > list.vertex.attributes(LG.perm) [1] "na" "vertex.names" > LG.net %v% "vertex.names" [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9" "X10" "X11" "X12" "X13" "X14" "X15" "X16" "X17" [18] "X18" "X19" "X20" "X21" "X22" "X23" "X24" "X25" "X26" "X27" "X28" "X29" "X30" "X31" "X32" "X33" "X34" [35] "X35" "X36" "X37" "X38" "X39" "X40" "X41" "X42" "X43" "X44" "X45" "X46" "X47" "X48" "X49" "X50" "X51" [52] "X52" "X53" "X54" > LG.perm %v% "vertex.names" [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 [35] 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 > LG.perm %v% "vertex.names" <- LG.net %v% "vertex.names" > plot(LG.net,displaylabels=TRUE) > plot(LG.perm,displaylabels=TRUE) > gcor(LG.net, LG.perm) [1] 0.03845129 > gcor(LG.perm, Mety) [1] -0.03857968 > qap <- qaptest(list(LG.net,Mety),g1=1,g2=2,FUN=gcor,reps=5000) > summary(qap) QAP Test Results Estimated p-values: p(f(perm) >= f(d)): 2e-04 p(f(perm) <= f(d)): 0.9998 Test Diagnostics: Test Value (f(d)): 0.1249278 Replications: 5000 Distribution Summary: Min: -0.09659847 1stQ: -0.02275637 Med: -0.001658628 Mean: 0.0001832054 3rdQ: 0.01943912 Max: 0.1354767 > plot(qap) Our actual test-statistic (here, correlation) is much higher than what we would see if we were to scramble the network. > Netr1<-rgraph(100, tprob=0.1) > Netr2<-rgraph(100, tprob=0.5) > plot(network(Netr1)) > > qap<-qaptest(list(Netr1,Netr2),g1=1,g2=2,FUN=gcor,reps=5000) > summary(qap) QAP Test Results Estimated p-values: p(f(perm) >= f(d)): 0.3958 p(f(perm) <= f(d)): 0.6132 Test Diagnostics: Test Value (f(d)): 0.002507697 Replications: 5000 Distribution Summary: Min: -0.03736669 1stQ: -0.006954022 Med: -0.0001956514 Mean: -0.0001938942 3rdQ: 0.006562719 Max: 0.03562371 > plot(qap) Our observed correlation falls right into the distribution of correlations we would expect by chance, controlling for the underlying structure. Hence, the correlation between the two random networks is not significant. ===== Regression ===== > ties <- as.matrix(LG.net) > diag(ties) <- NA > ties <- ties[upper.tri(ties)] > Maty <- Mety > Maty <- Maty[upper.tri(Maty)] > dyads <- data.frame(ties,Maty) > dyads[1:10,] ties Maty 1 0 0 2 0 0 3 1 1 4 1 0 5 0 1 6 1 1 7 0 0 8 0 1 9 1 1 10 1 1 > results <- glm(ties~Maty,family="binomial",data=dyads) > summary(results) Call: glm(formula = ties ~ Maty, family = "binomial", data = dyads) Deviance Residuals: Min 1Q Median 3Q Max -0.5679 -0.5679 -0.3794 -0.3794 2.3096 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.5953 0.1239 -20.946 < 2e-16 *** Maty 0.8523 0.1844 4.622 3.8e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 885.19 on 1430 degrees of freedom Residual deviance: 864.48 on 1429 degrees of freedom AIC: 868.48 Number of Fisher Scoring iterations: 5 > The null hypothesis is that networks are random with the right number of edges (so the dyads are independent). The x1 estimate (=beta coefficient) is positive at 0.85 and this effect is significant with p = 4.14...e-06 = very small p. Basically, the regression says that a tie between two gang members is Exp(b) = 2.34 times more likely when the two gang members have the same ethnicity. There is clear evidence for ethnic homophily in co-offending. help(netlogit) > dyQap <- netlogit(LG.net,Mety,mode="graph",nullhyp="qapy",reps=100) > dyQap Network Logit Model Coefficients: Estimate Exp(b) Pr(<=b) Pr(>=b) Pr(>=|b|) (intercept) -2.5952547 0.07462687 0.91 0.09 0.91 x1 0.8522854 2.34500000 1.00 0.00 0.00 Goodness of Fit Statistics: Null deviance: 1983.787 on 1431 degrees of freedom Residual deviance: 864.4812 on 1429 degrees of freedom Chi-Squared test of fit improvement: 1119.306 on 2 degrees of freedom, p-value 0 AIC: 868.4812 BIC: 879.0135 Pseudo-R^2 Measures: (Dn-Dr)/(Dn-Dr+dfn): 0.4388909 (Dn-Dr)/Dn: 0.5642268 > ==== Interactive coordinates ==== > xy <- gplot(LG.net,displaylabels=TRUE,label.cex=0.7,usearrows=FALSE,interactive=TRUE) > gplot(LG.net,coord=xy,displaylabels=TRUE,label.cex=0.7,usearrows=FALSE) ====== ====== \\