> a <- "TOMATO"; b <- "TOMATO" > a == b [1] FALSE > utf8ToInt(a) [1] 84 79 77 65 84 79 > utf8ToInt(b) [1] 1058 1054 1052 1040 1058 1054
> wdir <- "C:/Users/batagelj/Documents/papers/2017/Moscow/EDA/test" > setwd(wdir) > W <- c(4,3,5,8,4,9,8,4,9) > H <- c("D","H","V","CD","CH","CDCH","CD2","CH2","X") > T <- read.fwf("trees.dat",widths=W,skip=1,sep="") > names(T) <- H > head(T) D H V CD CH CDCH CD2 CH2 X 1 8.3 70 10.3 -4.9484 -6 29.6903 24.4865 36 4822.30 2 8.6 65 10.3 -4.6484 -11 51.1323 21.6075 121 4807.40 3 8.8 63 10.2 -4.4484 -13 57.8290 19.7881 169 4878.72 4 10.5 72 16.4 -2.7484 -4 10.9935 7.5536 16 7938.00 5 10.7 81 18.8 -2.5484 5 -12.7419 6.4943 25 9273.69 6 10.8 83 19.7 -2.4484 7 -17.1387 5.9946 49 9681.12 >
> wdir <- "C:/Users/batagelj/Documents/papers/2017/Moscow/EDA/test" > setwd(wdir) > wines <- read.csv("wine.data",header=FALSE) > fn <- c("Class","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium", + "Total phenols","Flavanoids","Nonflavanoid phenols","Proanthocyanins", + "Color intensity","Hue","OD280/OD315 of diluted wines","Proline") > names(wines) <- fn > dim(wines) [1] 178 14 > wines[1:3,] Class Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols 1 1 14.23 1.71 2.43 15.6 127 2.80 2 1 13.20 1.78 2.14 11.2 100 2.65 3 1 13.16 2.36 2.67 18.6 101 2.80 Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue 1 3.06 0.28 2.29 5.64 1.04 2 2.76 0.26 1.28 4.38 1.05 3 3.24 0.30 2.81 5.68 1.03 OD280/OD315 of diluted wines Proline 1 3.92 1065 2 3.40 1050 3 3.17 1185 >
> wdir <- "C:/Users/batagelj/Documents/papers/2017/Moscow/EDA/test" > setwd(wdir) > library(readxl) Warning message: package ‘readxl’ was built under R version 3.4.2 > M <- read_excel("./data-7523-2017-10-13.xlsx") > str(M) Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 93 obs. of 5 variables: $ <U+041A><U+043E><U+0434> : chr "1" "2" "3" "4" ... $ <U+0413><U+043E><U+0434> : chr "2010" "2010" "2010" "2010" ... $ global_id : chr "37591658" "37591659" "37591660" "37591661" ... $ <U+041C><U+0435><U+0441><U+044F><U+0446> : chr "<U+044F><U+043D><U+0432><U+0430><U+0440><U+044C>" "<U+0444><U+0435><U+0432><U+0440><U+0430><U+043B><U+044C>" ... $ <U+0413><U+043E><U+0441><U+0443><U+0434><U+0430><U+0440><U+0441><U+0442><U+0432><U+0435><U+043D><U+043D><U+0430><U+044F> ... > names(M) <- c("code","year","ID","month","marriage") > mm <- M$month > mN <- mm[1:12] > month <- factor(mm,levels=mN) > as.numeric(month) [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 > D <- read_excel("./data-7522-2017-10-13.xlsx") > names(D) <- c("code","year","ID","month","divorce") > str(D) Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 93 obs. of 5 variables: $ code : chr "1" "2" "3" "4" ... $ year : chr "2010" "2010" "2010" "2010" ... $ ID : chr "37591658" "37591659" "37591660" "37591661" ... $ month : chr "<U+044F><U+043D><U+0432><U+0430><U+0440><U+044C>" "<U+0444><U+0435><U+0432><U+0440><U+0430><U+043B><U+044C>" ... $ divorce: chr "3302" "2937" "4361" "3943" ... > MD <- data.frame(month=paste(M$year,"-",as.numeric(month),sep=""), + mar=as.integer(M$marriage),div=as.integer(D$divorce)) > L <- nrow(MD) > summary(MD$mar) Min. 1st Qu. Median Mean 3rd Qu. Max. 3642 5207 7260 7908 10882 13915 > summary(MD$div) Min. 1st Qu. Median Mean 3rd Qu. Max. 2596 3459 3720 3677 3918 4474 > me <- as.Date(paste(MD$month,'-01',sep=''),"%Y-%m-%d") > plot(me,MD$mar,ylim=c(0,14000),xlab="months",ylab="freq", + main="Marriage and divorce in Moscow",type="n") > points(me,MD$mar,col="red",pch=20,cex=1.5) > points(me,MD$div,col="blue",pch=20,cex=1.5) > plot(me,MD$mar,col="red",pch=20,cex=1.5,ylim=c(0,14000),xlab="months", + ylab="freq",main="Marriage and divorce in Moscow",type='l') > plot(1:L,1:L,ylim=c(0,14000),main="Marriage and divorce in Moscow", + xlab="",ylab="",type="n") > points(1:L,sort(MD$mar),col="red",pch=20,cex=1.5) > points(1:L,sort(MD$div),col="blue",pch=20,cex=1.5) > (q <- (m <- sum(MD$mar))/(d <- sum(MD$div))) [1] 2.150769 > c(m,d) [1] 735477 341960 > plot(MD$mar,MD$div,xlab="marriages",ylab="divorces",pch=16,main="Marriage and divorce in Moscow") > rp <- lm(MD$div ~ MD$mar) > abline(rp,col="red",lwd=2) > plot(month,MD$mar,col="red",pch=20,cex=1.5,ylim=c(0,14000),xlab="months", + ylab="freq",main="Marriage in Moscow") > y <- as.integer(M$year)-2009 > b <- rev(heat.colors(n=10)[1:8]) > plot(1:12,1:12,ylim=c(0,14000),xlab="months",ylab="freq",main="Marriage in Moscow",type="n") > for(k in 1:8){ Y <- MD$mar[y==k]; X <- 1:length(Y); points(X,Y,col=b[k],pch=20,cex=1.5,type="b") }
> page <- "http://www.gutenberg.org/cache/epub/3435/pg3435.txt" > text <- readLines(con<-url(page)); close(con) > length(text) [1] 17581 > i <- grep("\\*\\*\\* START OF THIS PROJECT GUTENBERG EBOOK",text,ignore.case=TRUE) > i [1] 21 > j <- grep("End of the Project Gutenberg EBook",text,ignore.case=TRUE)[1] > j [1] 17220 > book <- text[(i+1):(j-1)] > separator <- "[[:punct:]]+|[[:space:]]+" > items <- unlist(strsplit(book,separator)) > words <- tolower(items[nchar(items)>0]) > length(words) [1] 178740 > t <- table(words) > z <- rev(sort(t)) > z[1:10] words the and of to a i in he my his 10596 10259 5015 4111 3176 3169 2457 2331 2069 1867 > length(z) [1] 14546 > plot(1:length(z),z,log="xy",pch=16,cex=0.7,xlab="",ylab="freq", + main="The Book of the Thousand Nights and a Night, Vol 1") > N <- names(z) > sw <- read.table("stopwords.dat",header=FALSE,stringsAsFactors=FALSE)$V1 > head(sw) [1] "a" "about" "above" "across" "after" [6] "afterwards" > spec <- z[!(N %in% sw)] > spec[1:30] words fn thou thee said o thy s allah 1398 949 919 852 777 723 704 527 king till man al came day night went 409 397 382 369 352 332 323 303 ... > sw <- c(sw,"fn","thou","thee","o","s","thy") > spec <- z[!(N %in% sw)] > library(wordcloud) > wordcloud(names(spec)[1:100],spec[1:100],scale=c(5,.5))
files.dir
alum.dat anneal.dat anscomb.xls anscombe.dat batch.dat beam.dat bearings.dat boxes.dat boxmix.dat bread.dat
> wdir <- "C:/Users/batagelj/Documents/papers/2017/Moscow/EDA/test" > setwd(wdir) > dir.create("./DL") > pa <- "https://msu.edu/course//stt/351/snapshot.afs/Petruccelli_dat/" > L <- as.vector(read.csv("files.dir",header=FALSE)$V1) > length(L) [1] 10 > for(fn in L){ + fname <- paste(pa,fn,sep=""); cat("---",fn,date(),"\n") + test <- tryCatch(download.file(fname,fn,method="auto"),error=function(e) e) + } --- alum.dat Fri Nov 10 14:58:50 2017 trying URL 'https://msu.edu/course//stt/351/snapshot.afs/Petruccelli_dat/alum.dat' Content type 'text/plain' length 1810 bytes downloaded 1810 bytes ... --- bread.dat Fri Nov 10 14:59:00 2017 trying URL 'https://msu.edu/course//stt/351/snapshot.afs/Petruccelli_dat/bread.dat' Content type 'text/plain' length 973 bytes downloaded 973 bytes > date() [1] "Fri Nov 10 14:59:00 2017"
> library(XML) > pnews <- xmlParse(file="TrumpPutin.xml") > root <- xmlRoot(pnews) > meta <- root[[1]][[1]] > meta[[4]] <date key="9-11-2017">9-11-2017</date> > xmlAttrs(meta[[4]]) key "9-11-2017" > xmlValue(meta[[4]]) [1] "9-11-2017" > cont <- root[[1]][[2]] > xmlName(cont) [1] "content" > xmlSize(cont) [1] 4 > xpathSApply(doc = pnews, path = "//person") [[1]] <person key="Donald Trump">Трампе</person> [[2]] <person key="Vladimir Putin">Владимир Путин</person> [[3]] <person key="Donald Trump">Дональд Трамп</person> [[4]] <person key="Donald Trump">Американский лидер</person> [[5]] <person key="Vladimir Putin">Путиным</person> [[6]] <person key="Donald Trump">главы Белого дома</person> [[7]] <person key="Vladimir Putin">российский президент</person> [[8]] <person key="Vladimir Putin">Владимир Путин</person> [[9]] <person key="Sergey Lavrov">Сергей Лавров</person> [[10]] <person key="Sergey Lavrov">Лавров</person> [[11]] <person key="Barack Obama">Барака Обамы</person> [[12]] <person key="Sergey Lavrov">Лаврова</person> [[13]] <person key="Sergey Lavrov">Лавров</person> > xpathSApply(doc = pnews, path = "//country") [[1]] <country key="Russia">России</country> [[2]] <country key="USA">США</country> [[3]] <country key="Vietnam">вьетнамском <place key="">Дананге</place></country> [[4]] <country key="Russia">России</country> [[5]] <country key="USA">США</country> [[6]] <country key="Ukraine">Украине</country> [[7]] <country key="Siria">сирийского конфликта</country> [[8]] <country key="North Korea; South Korea"> <topic key="Korean peninsula">ситуацию на Корейском полуострове</topic> </country> [[9]] <country key="Siria"> <topic key="conflict Siria">сирийское урегулирование</topic> </country> [[10]] <country key="Iraq"> <topic key="Iraq">Ирак</topic> </country> [[11]] <country key="Israel; Palestine"> <topic key="confict Israel-Palestine"> палестино-израильский конфликт</topic> </country> [[12]] <country key="Ukraine"> <topic key="Ukraine">Украину</topic> </country> > (P <- xpathSApply(pnews,"//person",xmlAttrs)) key key key key "Donald Trump" "Vladimir Putin" "Donald Trump" "Donald Trump" key key key key "Vladimir Putin" "Donald Trump" "Vladimir Putin" "Vladimir Putin" key key key key "Sergey Lavrov" "Sergey Lavrov" "Barack Obama" "Sergey Lavrov" key "Sergey Lavrov" > xpathSApply(pnews,"//person",xmlValue) [1] "Трампе" "Владимир Путин" "Дональд Трамп" [4] "Американский лидер" "Путиным" "главы Белого дома" [7] "российский президент" "Владимир Путин" "Сергей Лавров" [10] "Лавров" "Барака Обамы" "Лаврова" [13] "Лавров" > (C <- xpathSApply(pnews,"//country",xmlAttrs)) key key "Russia" "USA" key key "Vietnam" "Russia" key key "USA" "Ukraine" key key "Siria" "North Korea; South Korea" key key "Siria" "Iraq" key key "Israel; Palestine" "Ukraine" > (Pn <- table(P)) P Barack Obama Donald Trump Sergey Lavrov Vladimir Putin 1 4 4 4 > (Cn <- table(unlist(strsplit(C,";")))) Palestine South Korea Iraq Israel North Korea Russia 1 1 1 1 1 2 Siria Ukraine USA Vietnam 2 2 2 1 >
> s <- "3+4*5" > s [1] "3+4*5" > cat(s,"=",eval(parse(text=s)),"\n") 3+4*5 = 23
> library(jsonlite) > J <- fromJSON(readLines("john.json")) > J $firstName [1] "John" $lastName [1] "Smith" $isAlive [1] TRUE $age [1] 25 $address $address$streetAddress [1] "21 2nd Street" $address$city [1] "New York" $address$state [1] "NY" $address$postalCode [1] "10021-3100" $phoneNumbers type number 1 home 212 555-1234 2 office 646 555-4567 $children list() $spouse NULL > john <- toJSON(J) > john {"firstName":["John"],"lastName":["Smith"],"isAlive":[true],"age":[25],"address":{"streetAddress":["21 2nd Street"], "city":["New York"],"state":["NY"],"postalCode":["10021-3100"]},"phoneNumbers":[{"type":"home","number":"212 555-1234"}, {"type":"office","number":"646 555-4567"}],"children":[],"spouse":{}} > js <- file("john2.json",encoding="UTF-8") > write(john,file=js)
> ime <- new.env(hash=TRUE,parent=emptyenv()) > assign("Liska",list(i=1,n=1),env=ime) > length(ime) [1] 1 > exists("Liska",env=ime,inherits=FALSE) [1] TRUE > exists("Šeka",env=ime,inherits=FALSE) [1] FALSE > assign("Šeka",list(i=2,n=1),env=ime) > exists("Šeka",env=ime,inherits=FALSE) [1] TRUE > get("Šeka",env=ime,inherits=FALSE) $i [1] 2 $n [1] 1
Environments can be used also to keep some variables to survive the call of a function.
> raj <- new.env() > exA <- function(a,b,c){u <- a; v <<- b; assign("z",c,env=raj)} > u Error: object 'u' not found > v Error: object 'v' not found > z Error: object 'z' not found > exA(3,4,5) > u Error: object 'u' not found > v [1] 4 > z Error: object 'z' not found > get("z",env=raj) [1] 5