====== Mineral waters ====== ===== Mineral Waters on the World ===== http://www.mineralwaters.org/ or http://www.pmgeiser.ch ==== October 27, 2013 ==== # Mineral Waters # Vladimir Batagelj October 27, 2013 / October 9, 2009 # ------------------------------------------------------------------------------ # setwd("C:/Users/batagelj/test/R/mineral") # source("C:\\Users\\batagelj\\test\\R\\mineral\\mineralWaters.R") # mineral() initial <- function(s,term) substr(s,1,regexpr(term,s[1])[1]-1) # mineral <- function(){ library(XML) dat <- file("waters.dat","w") cat('*** Mineral waters:',date(),'\n*** from http://www.mineralwaters.org/\n') cat('*** Mineral waters:',date(),'\n*** from http://www.mineralwaters.org/\n', file=dat) url <- 'http://www.mineralwaters.org/index.php?func=disp&parval=' ind <- 1:4138 # ind <- c(173,306,956,1255,1347,2044,2079,2660,3659) # strange chars for(p in ind){ pa <- paste(url,p,sep='') st <- readLines(con<-url(pa),warn=FALSE); close(con) dw <- htmlParse(st) ti <- dw['//h2'] if(length(ti)==0){ cat('***',p,'Missing\n'); flush.console() next } tit <- xmlValue(ti[[1]]); Encoding(tit) <- "UTF-8" coun <- xmlValue(dw['//p/b'][[5]]) pl <- dw['//table/tr/td/table/tr/td/p[i]'] plac <- NA if(length(pl)>0){ pla <- xmlValue(pl[[1]]); Encoding(pla) <- "UTF-8" if(substr(pla,1,4)=="Name") { plac <- substr(pla,26,nchar(pla)) } else if(length(pl)>1) { pla <- xmlValue(pl[[2]]); Encoding(pla) <- "UTF-8" if(substr(pla,1,4)=="Name") plac <- substr(pla,26,nchar(pla)) } } cat('***',p,tit,'\n'); flush.console() cat(p,tit,coun,plac,"",sep='|',file=dat) j <- grep('No data from analysis available',st,ignore.case=TRUE) if(length(j)!=0) {cat('\n',file=dat); flush(dat); next} i <- grep('Country: ',st,ignore.case=TRUE) v <- st[i]; i <- regexpr('')) for(k in 2:length(t)){ u <- unlist(strsplit(t[k],'')) q <- unlist(strsplit(initial(u[2],''),' ')) e <- u[1]; i <- regexpr('\">',e) if(i>0) e <- initial(substr(e,i+2,nchar(e)),'') cat(e,q[1],q[2],"",sep='|',file=dat) } cat('\n',file=dat); flush(dat) } cat('*** Finished:',date(),'\n',file=dat) close(dat) cat('*** Finished:',date(),'\n\n') } ==== October 18, 2009 ==== initial <- function(s,term) substr(s,1,regexpr(term,s[1])[1]-1) mineral <- function(){ library(XML) dat <- file("waters.dat","w") url <- 'http://www.pmgeiser.ch' page <- 'http://www.pmgeiser.ch/mineral/index.php?func=alpha&parval=0' stran <- readLines(con<-url(page)); close(con) doc <- htmlParse(stran) ind <- unlist(doc['/html/body/table/tr/td/p/a/@href'],use.names=FALSE) ind <- ind[regexpr('/mineral',ind)==1] cat('*** Mineral waters:',date(),'\n',file=dat) cat('*** from http://www.pmgeiser.ch/\n',file=dat) # ind <- c('/mineral/index.php?func=alpha&parval=0') for(p in ind){ pag <- paste(url,p,sep='') str <- readLines(con<-url(pag),warn=FALSE); close(con) dow <- htmlParse(str) wat <- unlist(dow['//li/a/@href'],use.names=FALSE) for(w in wat){ pa <- paste(url,w,sep='') st <- readLines(con<-url(pa),warn=FALSE); close(con) dw <- htmlParse(st) tit <- xmlValue(dw['//h2'][[1]]) coun <- xmlValue(dw['//p/b'][[6]]) i <- grep('Country: ',st,ignore.case=TRUE) s <- unlist(strsplit(st[i],'Place of Source: ')) plac <- initial(s[2],'

') cat('***',tit,'\n'); flush.console() cat('W',tit,'\n',sep='|',file=dat) cat('C',coun,'\n',sep='|',file=dat) cat('P',plac,'\n',sep='|',file=dat) flush(dat) j <- grep('No data from analysis available',st,ignore.case=TRUE) if(length(j)!=0) next v <- st[i]; i <- regexpr('')) for(k in 2:length(t)){ u <- unlist(strsplit(t[k],'')) q <- unlist(strsplit(initial(u[2],''),' ')) e <- u[1]; i <- regexpr('\">',e) if(i>0) e <- initial(substr(e,i+2,nchar(e)),'') cat('E',e,q[1],q[2],'\n',sep='|',file=dat) flush(dat) } } } close(dat) }
===== Fondazione AMGA - acque in bottiglia ===== http://www.acqueinbottiglia.fondazioneamga.org/ ==== World ==== # http://www.acqueinbottiglia.fondazioneamga.org/world_search_det.asp?ID=1735 setwd("C:/Users/batagelj/work/R/mineral") trim <- function (x) gsub("^\\s+|\\s+$", "", x) mineralA <- function(from,to){ library(XML) dat <- file("watersMGAw.dat","w") cat('*** Mineral waters - MGA world:',date(), '\n*** from http://www.acqueinbottiglia.fondazioneamga.org/\n') cat('*** Mineral waters - MGA world:',date(), '\n*** from http://www.acqueinbottiglia.fondazioneamga.org/\n',file=dat) url <- 'http://www.acqueinbottiglia.fondazioneamga.org/world_search_det.asp?ID=' ind <- from:to cat('*** Started:',date(),'\n',from,'>') for(p in ind){ pa <- paste(url,p,sep='') st <- tryCatch(suppressWarnings(readLines(con<-url(pa),warn=FALSE)), error=function(e) e) close(con) if(typeof(st)!="character") {cat("\n***",p); flush.console(); next } dw <- htmlParse(st) ti <- dw['//div/div/div[@class="bandablutitolo"]'] tit <- xmlValue(ti[[1]]); Encoding(tit) <- "UTF-8" ta <- dw['//table/tr'] if(length(ta)==0){ cat('***',p,'Missing\n'); flush.console() next } cat(p,tit,"",sep="|",file=dat) for(i in 1:length(ta)) { tat <- xmlValue(ta[[i]]); Encoding(tat) <- "UTF-8" u <- unlist(strsplit(tat,':',fixed=TRUE)) if (length(u)>1) cat(paste(trim(u),collapse="|"),"|",sep="",file=dat) } cat('\n',file=dat); flush(dat); cat('.') if(p%%50==0) cat("\n",p+1,">") flush.console() } close(dat) cat('\n*** Finished:',date(),'\n\n') } On November 29, 2013 there were waters with indices in 480:2488 (some missing). ==== Italy ==== # http://www.acqueinbottiglia.fondazioneamga.org/search_det.asp?ID=16 setwd("C:/Users/batagelj/work/R/mineral") trim <- function (x) gsub("^\\s+|\\s+$", "", x) mineralA <- function(from,to){ library(XML) dat <- file("watersMGAi.dat","w") cat('*** Mineral waters - MGA Italia:',date(), '\n*** from http://www.acqueinbottiglia.fondazioneamga.org/\n') cat('*** Mineral waters - MGA Italia:',date(), '\n*** from http://www.acqueinbottiglia.fondazioneamga.org/\n',file=dat) url <- 'http://www.acqueinbottiglia.fondazioneamga.org/search_det.asp?ID=' ind <- from:to cat('*** Started:',date(),'\n',from,'>') for(p in ind){ pa <- paste(url,p,sep='') st <- tryCatch(suppressWarnings(readLines(con<-url(pa),warn=FALSE)), error=function(e) e) close(con) if(typeof(st)!="character") {cat("\n***",p); flush.console(); next } dw <- htmlParse(st) ti <- dw['//div/div/div[@class="bandablutitolo"]'] tit <- xmlValue(ti[[1]]); Encoding(tit) <- "UTF-8" ta <- dw['//table/tr'] if(length(ta)==0){ cat('***',p,'Missing\n'); flush.console() next } cat(p,tit,"",sep="|",file=dat) for(i in 1:length(ta)) { tat <- xmlValue(ta[[i]]); Encoding(tat) <- "UTF-8" u <- unlist(strsplit(tat,':',fixed=TRUE)) if (length(u)>1) cat(paste(trim(u),collapse="|"),"|",sep="",file=dat) } cat('\n',file=dat); flush(dat); cat('.') if(p%%50==0) cat("\n",p+1,">") flush.console() } close(dat) cat('\n*** Finished:',date(),'\n\n') } On November 29, 2013 there were waters with indices in 1:523 (some missing).