Code from Random

Current value of a changing quantity in the computer

Different quantities in the computer are unpredictably changing and could serve as a source of randomness. Most of them are very difficult to access from high-level programming languages such as R. A good candidate is the system time

> t <- Sys.time()
> t
[1] "2022-08-14 23:17:59 CEST"
> cat(t,"\n")
1660511880 
> class(t)
[1] "POSIXct" "POSIXt" 
> s <- Sys.time()
> s
[1] "2022-08-14 23:19:42 CEST"
> cat(s,"\n")
1660511982 
> e <- Sys.time()
> e
[1] "2022-08-14 23:20:12 CEST"
> cat(e,"\n")
1660512013 
> e-s
Time difference of 30.37726 secs
> as.numeric(s)
[1] 1660511982
> as.numeric(e)
[1] 1660512013
> as.numeric(e)-as.numeric(s)
[1] 30.37726
> T <- "1970-01-01 00:00:14"
> t <- as.POSIXct(T,tz="GMT")
> as.numeric(t)
[1] 14

In R, the Sys.time() counts the seconds from January 1, 1970. For many possible applications, the changes in seconds are too slow.

Recently two R libraries were developed that provide access to time at the level of nanoseconds: nanotime and RcppCCTZ. They are based on the 64 bit representation of integers.

> install.packages("RcppCCTZ")
> install.packages("nanotime")

Let's try!

> library(nanotime)
> library(bit64)
> t <- Sys.time()
> as.numeric(t)
[1] 1660442526
> v <- nanotime(t)
> as.integer64(v)
integer64
[1] 1660442525739342000

The last three places are 0 - Windows provides time stamps at the level of microseconds.

We could use Sys.time() values for throwing dice in an interactive game. Changing fast are last 3 or 4 places.

> k <- 6
> repeat{
+   a <- readline("Throw - press ENTER (yes) or s and ENTER (stop)")
+   if(a=="s") break
+   t <- nanotime(Sys.time()); n <- as.integer64(t)
+   d <- as.integer((n%/%1000)%%k+1); print(n); print(d)
+ }
Throw - press ENTER (yes) or s and ENTER (stop)
integer64
[1] 1660517375129460000
[1] 1
Throw - press ENTER (yes) or s and ENTER (stop)
integer64
[1] 1660517379681713000
[1] 6
Throw - press ENTER (yes) or s and ENTER (stop)
integer64
[1] 1660517381783372000
[1] 3
Throw - press ENTER (yes) or s and ENTER (stop)
integer64
[1] 1660517384964824000
[1] 3
Throw - press ENTER (yes) or s and ENTER (stop)
integer64
[1] 1660517387146669000
[1] 2
Throw - press ENTER (yes) or s and ENTER (stop)
integer64
[1] 1660517391795778000
[1] 5
...

A more serious application is a random initialization of some parameter in a program.

Wichmann and Hill's generator

> WichHill <- function(n){
+    output <- numeric(n)
+    seed <- get(".WHseed",env=.WH)
+    x <- seed[1]; y <- seed[2]; z <- seed[3]
+    for(i in 1:n){
+       x <- (171*x) %% 30269
+       y <- (172*y) %% 30307
+       z <- (170*z) %% 30323
+       output[i] <- (x/30269 + y/30307 + z/30323) %% 1.0
+    }
+    assign(".WHseed",c(x,y,z),env=.WH)
+    output
+ }
> 
> .WH <- new.env(); assign(".WHseed",1:3,env=.WH)
> WichHill(10)
 [1] 0.03381877 0.77754189 0.05273525 0.74462407 0.49036219 0.98285437
 [7] 0.80915099 0.71338138 0.80102091 0.98958603
> r <- WichHill(10000)
> hist(r,prob=TRUE)
> r <- WichHill(100000)
> hist(r,prob=TRUE)

Basic generators in R

RNGkind()
set.seed(2018)
.Random.seed[1:6]
runif(6)
.Random.seed[1:6]
set.seed(2018)
runif(6)
RNGkind("Super")
RNGkind()
.Random.seed[1:6]

> RNGkind()
[1] "Mersenne-Twister" "Inversion"       
> set.seed(2018)
> .Random.seed[1:6]
[1]        403        624  -79855522 -803040953  -27922212 -118944723
> runif(6)
[1] 0.33615347 0.46372327 0.06058539 0.19743361 0.47431419 0.30104860
> .Random.seed[1:6]
[1]        403          6 -881521081  953668654 1212651912  902275211
> set.seed(2018)
> runif(6)
[1] 0.33615347 0.46372327 0.06058539 0.19743361 0.47431419 0.30104860
> RNGkind("Super")
> RNGkind()
[1] "Super-Duper" "Inversion"  
> .Random.seed[1:6]
[1]         402  1572731790 -1300846921          NA          NA          NA

Uniform and Bernoulli distribution

> random <- function() {return(runif(1,0,1))}
> dice <- function(n=6) {return(1+trunc(n*random()))}
> Bernoulli <- function(p) {if(random()<=p) return(1) else return(0)}
> n <- 10; s <- numeric(n)
> for(i in 1:n) s[i] <- random()
> s
 [1] 0.29062721 0.70364315 0.25889652 0.22473082 0.69339126 0.13130646
 [7] 0.04117978 0.53640553 0.39368591 0.56187369
> n <- 20; s <- numeric(n)
> for(i in 1:n) s[i] <- dice(3)
> s
 [1] 1 3 2 2 2 3 2 2 2 2 3 3 2 1 1 1 1 2 2 3
> for(i in 1:n) s[i] <- Bernoulli(1/3)
> s
 [1] 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0
> table(s)
s
 0  1 
15  5 
> n <- 10000; s <- numeric(n)
> for(i in 1:n) s[i] <- Bernoulli(1/3)
> table(s)
s
   0    1 
6672 3328

Geometric distribution

> geometric <- function(p){
+   if (p>=1) return(1)
+   if (p<=0) return(Inf)
+   return(trunc(log(1-random())/log(1-p))+1)
+ }
> n <- 10; s <- numeric(n)
> for(i in 1:n) s[i] <- geometric(1/3)
> s
 [1] 3 2 2 1 4 4 2 5 1 3
> for(i in 1:n) s[i] <- geometric(0.1)
> s
 [1] 21 11 12  2 12 10 17  2  2  6
> n <- 100000; s <- numeric(n)
> for(i in 1:n) s[i] <- geometric(0.1)
> t <- table(s)
> x <- as.numeric(names(t))
> plot(x,t,pch=16,cex=0.7)

Tabelaric distribution

> tabelaR <- function(p){
+   r <- random(); k <- 0;
+   while (r >= 0) {k <- k+1; r <- r - p[k]}
+   return(names(p)[k])
+ }
>
> f <- c(1,2,3,2,1); names(f) <- c("mon","tue","wed","thu","fri")
> p <- f/sum(f)
> s <- vector("character",10)
> for (i in 1:10) s[i] <- tabelaR(p)
> s
 [1] "fri" "tue" "wed" "wed" "tue" "wed" "tue" "wed" "tue" "wed"
> n <- 100000; s <- vector("character",n)
> for (i in 1:n) s[i] <- tabelaR(t)
> table(s)
s
  fri   mon   thu   tue   wed 
11127 11114 22403 22015 33341

Poisson distribution

> PoissonRnd <- function(lambda){
+   k <- 0; p <- exp(-lambda); r <- random()-p
+   while(r > 0) {k <- k+1; p <- p*lambda/k; r <- r-p}
+   return(k)
+ }
> 
> n <- 10000; s <- numeric(n)
> for(i in 1:n) s[i] <- PoissonRnd(0.3)
> table(s)
s
   0    1    2    3    4    5 
7381 2251  333   33    1    1 
> z <- rpois(n,0.3)
> table(z)
z
   0    1    2    3    4 
7430 2204  324   38    4

Uniform continuous distribution

> uniformRnd <- function(a,b) {a+random()*(b-a)}
> n <- 100000; s <- numeric(n)
> for(i in 1:n) s[i] <- uniformRnd(75,100)
> hist(s,prob=TRUE)

(Negative) exponential distribution

> exponRnd <- function(lambda) {return(-log(1-random())/lambda)} 
> n <- 100000; s <- numeric(n)
> for(i in 1:n) s[i] <- exponRnd(1.5)
> hist(s,prob=TRUE); lines(density(s),col="red")

Cauchy distribution

> cauchyRnd <- function(a=1,b=0) {tan(pi*(random()-0.5))/a + b}
> n <- 20; s <- numeric(n)
> for(i in 1:n) s[i] <- cauchyRnd()
> s
 [1]  1.4517597 -2.3231091 -1.2913811 -1.6924863  8.4182520 -4.6988607
 [7]  1.4703372  0.1900819  0.2629615  2.9184602 -0.1944157  6.6547234
[13] -0.3956465 -0.4921238  0.4071398 -8.2433149 -0.6530219  0.2205136
[19]  0.1589820 -0.5175836

von Neumann rejection method

Triangle { (0,0), (1,1), (2,0) }

> vonNeumann <- function(a,b,G,g,...){
+   repeat { s <- a + random()*(b-a)
+     if (G*random()<=g(s,...)) return(s)
+   }
+ }
> triang <- function(x){if (x>2) return(0); if (x<0) return(0);
+   if (x<1) return(x) else return(2-x)
+ }
> 
> n <- 100000; s <- numeric(n)
> for(i in 1:n) s[i] <- vonNeumann(-0.5,2.5,1,triang)
> hist(s,prob=TRUE); lines(density(s),col="red",lw=2)

Normal or Gaussian distribution

> gaussRnd <- function(st=1,m=0,s=1){
+   if (st==0){new <- TRUE; .Gauss <<- list(m=m,s=s,n=new,r=0)}
+   else {m <- .Gauss$m; s <- .Gauss$s; new <- .Gauss$n }
+   if (new) {p <- sqrt(-2*log(random())); q <- 2*pi*random()
+     x <- p*sin(q); .Gauss$r <<- p*cos(q)
+   } else  x <- .Gauss$r
+   .Gauss$n <<- !new
+   return(m+s*x)
+ }
> n <- 100000; s <- numeric(n); t <- gaussRnd(0,175,10)
> for(i in 1:n) s[i] <- gaussRnd()
> hist(s,prob=TRUE); lines(density(s),col="red",lw=2)

Multidimensional normal distribution

 
> multinormal <- function(T){return(t(t(T)%*%rnorm(dim(T)[1])))}
> data(longley); R <- cor(longley); T <- chol(R); pairs(longley)
> n <- 2000; s <- NULL;
> for (i in 1:n) s <- rbind(s,multinormal(T))
> pairs(s)

Random 3D direction

> dir3D<-function(){return(c(2*pi*random(),acos(1-2*random())))}
> vector3D <- function(dir){
+   return( c( sin(dir[2])*cos(dir[1]), sin(dir[2])*sin(dir[1]), cos(dir[2]) ))
+ }
> points3D <- function(n){
+   cat(file="sphere.net",c("*vertices ",n,"\n"))
+   for (i in 1:n) {
+     cat(file="sphere.net",i,i,vector3D(dir3D()),"\n",append=TRUE)
+   }
+ }
> setwd("C:/Users/batagelj/Documents/papers/2018/moskva/NetR/nets/test")
> points3D(500)

To view the file sphere.net use Pajek.

Levy flights

> direction <- function() {phi <- 2*pi*random(); return(c(cos(phi),sin(phi)))}
>
> # Brown motion
> x <- c(0,0); n <- 200; s <- matrix(0,nrow=n,ncol=2); s[1,] <- x
> for (i in 2:n) {x <- x + direction(); s[i,] <- x}
> plot(s,pch=16,col="red"); lines(s)
>
> # Levy flights
> x <- c(0,0); n <- 500; s <- matrix(0,nrow=n,ncol=2); s[1,] <- x
> for (i in 2:n) {x <- x + cauchyRnd(3)*direction(); s[i,] <- x}
> plot(s,pch=16,col="red"); lines(s)
> 
> x <- c(0,0); n <- 500; s <- matrix(0,nrow=n,ncol=2); s[1,] <- x
> for (i in 2:n) {x <- x + cauchyRnd(0.1)*direction(); s[i,] <- x}
> plot(s,pch=16,col="red"); lines(s)

Uniform distribution in multidimensional ellipsoid

> elipsoid <- function(T){
+   m <- dim(T)[1]; r <- random()^(1/m)
+   x <- rnorm(m); y <- x*r/sqrt(sum(x^2))
+   return(t(t(T) %*% y))
+ }
> 
> R <-
+   c( 4.0000000, 1.1395235, 1.775876, 0.7753723,
+      1.1395235, 4.0000000, 1.065415, 0.7881692,
+      1.7758761, 1.0654147, 4.000000, 1.5841046,
+      0.7753723, 0.7881692, 1.584105, 4.0000000 )
> m <- 4; dim(R) <- c(m,m); T <- chol(R)
> n <- 500; s <- matrix(0,nrow=n,ncol=m)
> for (i in 1:n) s[i,] <- elipsoid(T)
> pairs(s)
>
> R <- matrix(0,4,4); diag(R) <- c(1,2,3,4); T <- chol(R)
> n <- 500; s <- matrix(0,nrow=n,ncol=m)
> for (i in 1:n) s[i,] <- elipsoid(T)
> pairs(s)

Shuffle

> shuffle <- function(x){
+   n <- length(x)
+   for ( i in n:2 ){j <- dice(i)
+     t <- x[i]; x[i] <- x[j]; x[j] <- t }
+   return(x)
+ }
> x <- c("⇐","⇑","⇒","⇓","⇖","⇗","⇘","⇙","●") 
> for(i in 1:5) cat(x <- shuffle(x),"\n")
⇒ ⇘ ⇗ ⇖ ⇙ ● ⇓ ⇑ ⇐ 
⇐ ⇖ ⇒ ⇓ ⇑ ⇗ ● ⇙ ⇘ 
● ⇑ ⇐ ⇓ ⇙ ⇒ ⇘ ⇗ ⇖ 
⇖ ⇐ ⇒ ⇗ ● ⇑ ⇓ ⇘ ⇙ 
⇗ ⇒ ⇓ ● ⇐ ⇖ ⇙ ⇑ ⇘

Sample

> sampleN <- function(n,m){
+   k <- 0; s <- integer(m)
+   for(i in 0:n) if ((n-i)*random() < m-k) {
+     k <- k+1; s[k] <- i
+     if (k==m) return(s)
+   }
+ }
> for(i in 1:5) cat(sampleN(100,15),"\n")
2 14 19 25 26 33 39 45 53 58 62 73 87 92 94 
6 18 20 21 31 33 34 36 40 49 54 69 73 74 98 
9 12 27 41 44 45 48 49 53 54 71 77 78 82 97 
12 24 33 36 37 44 49 52 54 58 67 72 73 79 84 
1 5 15 26 49 53 54 58 61 74 76 84 92 93 97

Number π

> k <- 0; n <- 2000; u <- integer(n)
> for(i in 1:n){
+   if (random()^2+random()^2 < 1) k <- k+1;
+   u[i] <- k }
> p <- u/seq(n)*4
> plot(p,pch=20,cex=0.8)
> lines(c(-10,n),c(pi,pi),col="red")

Cube

Average distance between two random points in a unit cube.

MathWorld

> n <- 100000; t <- 0.6617
> x <- matrix(runif(3*n),byrow=FALSE,nrow=n)
> y <- matrix(runif(3*n),byrow=FALSE,nrow=n) 
> a <- cumsum(d<-sqrt(rowSums((x-y)^2)))/seq(n)
> plot(1:n,a,pch=16,cex=0.5)
> lines(c(-n/20,n),c(t,t),col="red",lw=2)
> 
> I <- a[n]; lambda <- 3
> sigma <- sqrt(sum((d-I)^2)/n) 
> delta <- lambda*sigma/sqrt(n)
> cat("I =",I,"within error",delta,"with probability 0.997\n") 
I = 0.6619123 within error 0.001121217 with probability 0.997
> plot(1:n,a,pch=16,cex=0.5,ylim=c(0.66,0.665))
> lines(c(-n/20,n),c(t,t),col="red",lw=2)

Sandokan

> sandokan <- function(n,m=-1,r=100){
+   if (m<0) m <- n
+   s <- integer(r)
+   for ( i in 1:r ){
+     album <- rep(TRUE,n)
+     different <- 0; cards <-0
+     while (different < m) {
+       k <- dice(n); cards <- cards + 1
+       if (album[k]) {
+         album[k] <- FALSE; different <- different + 1
+       }
+     }
+     s[i] <- cards
+   }
+   return(s)
+ }
> 
> sandokanT <- function(n,m=-1){
+   if (m<0) m <- n
+   return(n*sum(1/((n-m+1):n)))
+ }
> 
> picture1 <- function(t,s,...){
+   n <- length(s)
+   mu <- cumsum(s)/1:n
+   sigma <- sqrt(cumsum(s^2)/1:n - mu^2)
+   plot(s,pch=16,cex=0.2,...)
+   lines(1:n,mu,col="red")
+   lines(1:n,mu+sigma,col="blue")
+   lines(1:n,mu-sigma,col="blue")
+   abline(h=t,col="goldenrod")
+ }
> 
> picture2 <- function(s,breaks=50,...){
+   n <- length(s); t <- NULL 
+   for(x in min(s):max(s)) {t <- c(t,5*n*dnorm(x,mean(s),sqrt(var(s))))}
+   hist(s,breaks=breaks,...)
+   lines(min(s):max(s),t,col="red",lw=2)
+ }
>
> (teo <- sandokanT(400,350))
[1] 828.2897
>
> s <- sandokan(400,350,2000)
> head(s)
[1] 866 837 747 772 815 801
> picture1(teo,s)
> picture2(s)
>
> pdf("sandokan.pdf")
> picture1(teo,s,ylim=c(820,835))
> dev.off()

Monte Carlo in statistics

Variates X1 and X2 have standard normal distribution. Estimate the expected value of their absolute difference.

> N <- 10000
> x1 <- rnorm(N); x2 <- rnorm(N); y <- abs(x1-x2)
> print(theta.hat <- mean(y))
[1] 1.14084
> print(se.theta <- sd(y)/sqrt(N))
[1] 0.008639172
> print(theta <- 2/sqrt(pi))
[1] 1.128379
> print(se <- sqrt((2-4/pi)/N))
[1] 0.008525025
>
> N <- 1000000
> x1 <- rnorm(N); x2 <- rnorm(N); y <- abs(x1-x2)
> print(theta.hat <- mean(y))
[1] 1.128559
> print(se.theta <- sd(y)/sqrt(N))
[1] 0.0008520579
> print(theta <- 2/sqrt(pi))
[1] 1.128379
> print(se <- sqrt((2-4/pi)/N))
[1] 0.0008525025

Resampling

Approximating a distribution F by the sample

> n <- 20
> mu <- 175; sigma <- 10; a <- 130; b <- 220
> x <- sort(rnorm(n,mean=mu,sd=sigma))
> y <- (1:n)/n
> tit <- paste("N(",mu,",",sigma,"), n=",n,sep="")
> curve(pnorm(x,mean=mu,sd=sigma),a,b,lwd=2,main=tit,ylab="p")
> points(x,y,pch=16,col="red")

Bootstrap

> library(bootstrap)
> plot(law82$LSAT,law82$GPA)
> points(law$LSAT,law$GPA,pch=16,col='Red')
> print(cor(law82$LSAT,law82$GPA))
[1] 0.7599979
> print(theta.hat <- cor(law$LSAT,law$GPA))
[1] 0.7763745
> N <- 2000
> n <- nrow(law)
> theta.star <- numeric(N)
> for(k in 1:N){
+   i <- sample(1:n,size=n,replace=TRUE)
+   L <- law$LSAT[i];G <- law$GPA[i]
+   theta.star[k] <- cor(L,G)
+ }
> hist(theta.star,prob=TRUE)
> print(theta.star.mean <- mean(theta.star))
[1] 0.7712286
> print(bias <- theta.star.mean - theta.hat)
[1] -0.005145919
> print(se.theta.star <- sd(theta.star))
[1] 0.1302567