Explore / Fitting

Explore / Fitting

Cars

> attach(mtcars)                       
> res <- lm(wt ~ disp)                  
> res[[1]]                              
(Intercept)        disp              
1.599814597 0.007010325               
> plot(wt ~ disp)
> abline(res,col="red",lwd=2)
> predict(res,list(disp=c(410,200)))
         1        2
  4.474048 3.001880

Rivers

> plot(rev(sort(rivers)))
> plot(rev(sort(rivers)),log="xy")
> x <- log(1:length(rivers))
> y <- log(rev(sort(rivers)))
> plot(y ~ x)
> rp <- lm(y ~ x)
> (a <- rp[[1]])
(Intercept)           x 
  8.6233680  -0.6160568 
> abline(rp,col="red",lwd=2)
> plot(rev(sort(rivers)),ylab="rivers",
+ pch=16,main="Power law")
> pow <- function(x){exp(a[1])*x^a[2]}
> x <- 1:length(rivers)
> y <- pow(x)
> points(x,y,pch=20,col="red")

Smoothing

> library(MASS); attach(Boston)
> pairs(Boston)
> plot(dis,nox); s <- order(dis)
> plot(dis,nox,col="blue")
> lines(dis[s],nox[s])
> par(mfrow=c(2,2),cex=0.5)
> plot(dis,nox,col="blue")
> text(11,0.8,"lowess",pos=2)
> lines(lowess(dis,nox))
> plot(dis,nox,col="blue")
> text(11,0.8,"loess",pos=2)
> model <- loess(nox ~ dis)
> x <- seq(1,12.2,0.05)
> y <- predict(model,data.frame(dis=x))
> lines(x,y)
> plot(dis,nox,col="blue")
> text(11,0.8,"gam",pos=2)
> library(mgcv)
> model <- gam(nox ~ s(dis))
> y <- predict(model,list(dis=x))
> lines(x,y)
> plot(dis,nox,col="blue")
> text(11,0.8,"polynomial",pos=2)
> model <- lm(nox ~ dis+I(dis^2)+I(dis^3))
> y <- predict(model,list(dis=x))
> lines(x,y)
> par(mfrow=c(1,1),cex=1)

OECD

OECD data

> oecd <- read.table("OECD.dat",header=TRUE)
> pairs(oecd); attach(oecd)
> plot(agr,pcinc,pch="+")
> # linear regression
> lin <- lm(pcinc ~ agr)
> abline(lin,col="green")
> lp <- lin$coef[2]*agr + lin$coef[1]
> sum((lp - pcinc)^2)
> # exponential with linear regression
> pi <- log(pcinc); m <- lm(pi ~ agr )
> b <- exp(m$coef[1]); a <- exp(m$coef[2])
> pl <- function(x){b*a^x}
> points(agr,pl(agr),col="red",pch=16)
> # least squares
> f <- function(t,p){a <- p[1]; b <- p[2]; b*a^t}
> E <- function(p){d <- f(agr,p) - pcinc; sum(d^2)}
> p0 <- c(a,b); best <- optim(p0,E)
> E(p0)
> best
> pr <- function(x){f(x,best$par)}
> points(agr,pr(agr),col="blue",pch=16)
> d <- seq(0,84,2); lines(spline(d,pr(d)),col="blue")

Fitting lognormal distribution

> wdir <- "C:/Users/batagelj/Documents/papers/2017/Moscow/EDA/test"
> setwd(wdir)
> D <- read.csv2("newBooksClean.csv",stringsAsFactors=FALSE)
> F <- table(D$year)
> y <- as.integer(names(F))
> plot(y,F)
> freq <- as.vector(F)
> model <- nls(freq ~ c*dlnorm(2019-y,a,b),start=list(c=1000,a=2,b=0.7))
> model
> plot(y,freq)
> lines(y,predict(model,list(x=2019-y)),col="red")

EDA