target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric")
target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric")
target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric")

 
y.lrn <- target.lrn[,1]
y.val <- target.val[,1]
y.tst <- target.tst[,1]
y     <- c(y.lrn,y.val,y.tst)

n.lrn <- length(y.lrn)
n.val <- length(y.val)
n.tst <- length(y.tst)
n     <- length(y)

rm(target.lrn,target.val,target.tst)

wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1
idx.lrn <- 1:n.lrn
idx.val <- (n.lrn+1):(n.lrn+n.val)
idx.tst <- (n.lrn+n.val+1):n

mod <- read.table("../cty_mod.txt",
         header=F,colClasses="character",col.names=c("file","feature","type"))

n.mod <- length(mod$file)

first.time <- TRUE

for (i in 1:n.mod) {

  fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="")
  fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="")
  fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="")
  print(mod$feature[i])

  if (mod$type[i]=="chr") {

    f.lrn <- read.table(fn.lrn,
               header=T,colClasses="character",blank.lines.skip=F)
    f.val <- read.table(fn.val,
               header=T,colClasses="character",blank.lines.skip=F)
    f.tst <- read.table(fn.tst,
               header=T,colClasses="character",blank.lines.skip=F)

    f <- c(f.lrn[,1],f.val[,1],f.tst[,1])

    if (mod$feature[i]=="STATE") {
      f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1"
      f[f=="OH"|f=="RI"|f=="VI"|f=="WV"]                 <- "S1"
      f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2"
      f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2"
      f[f=="WY"]                                         <- "S2"  
      f[f=="AK"|f=="UT"|f=="MS"]                         <- "S3"
      f[f=="NE"|f=="ND"]                                 <- "S4"
      f[f=="SD"|f=="SC"]                                 <- "S5"
    } 
    
    f <- as.factor(f)

    n.lev <- nlevels(f)
    print(paste("  nlevels = ",n.lev))

    f <- model.matrix(y ~ f - 1)   # Note: Intercept removed.
    f <- f[,2:ncol(f)]             # Note: First dummy deleted.

  } else {
    
    f.lrn<-read.table(fn.lrn,
             header=T,colClasses="numeric",blank.lines.skip=F)
    f.val<-read.table(fn.val,
             header=T,colClasses="numeric",blank.lines.skip=F)
    f.tst<-read.table(fn.tst,
             header=T,colClasses="numeric",blank.lines.skip=F)

    f <- c(f.lrn[,1],f.val[,1],f.tst[,1])

    f[is.na(f)] <- 0

    if (mod$feature[i]=="DOB") { 
      d <- f ; d[d>0] <- 1         # Note: Dummy for missing DOB
      f <- cbind(d,f,f^2)          # Note: Quadratic term added to DOB.
      rm(d)
    }
  }

  if (first.time) {
    X <- f
    first.time <- FALSE
  } else {
    X <- cbind(prev.X,f)
  }

  prev.X <- X

  rm(f.lrn,f.val,f)

  fit <- lm(y~X,weights=wts)       # Note: Validation sample not in fit.

  ehat     <- fit$residuals
  mse.lrn <-  sum(ehat[idx.lrn]^2)/n.lrn 
  mse.val <-  sum(ehat[idx.val]^2)/n.val
  mse.tst <-  sum(ehat[idx.tst]^2)/n.tst

  aov <- anova.lm(fit)

  print(paste("  rank    = ",fit$rank))
  print(paste("  P-value = ",aov[1,5]))
  print(paste("  mse.lrn = ",mse.lrn))
  print(paste("  mse.val = ",mse.val))
  print(paste("  mse.tst = ",mse.tst))
}

print(summary.lm(fit))

outvec <- fit$fitted.values[idx.lrn]
outlab <- "yhat.lrn"
outfil <- "yhat_lrn.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

outvec <- fit$fitted.values[idx.val]
outlab <- "yhat.val"
outfil <- "yhat_val.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

outvec <- fit$fitted.values[idx.tst]
outlab <- "yhat.tst"
outfil <- "yhat_tst.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

rm(outvec,outlab,outfil)

source("psopts.r");

idx <- seq(1,n.lrn,length=500)

y0.lrn <- c(0,sum(y.lrn-0.68))
x0.lrn <- c(0,n.lrn)

idx.yhat <- sort.list(fit$fitted.values[idx.lrn],decreasing=TRUE)
y1.lrn <- y.lrn[idx.yhat]-0.68 
y1.lrn <- cumsum(y1.lrn)
y1.lrn <- y1.lrn[idx]
x1.lrn <- 1:n.lrn
x1.lrn <- x1.lrn[idx]

postscript(file="cty_lif_lrn.eps")

plot(x=c(x0.lrn,x1.lrn),y=c(y0.lrn,y1.lrn),
  ylab="dollars",xlab="size of mailing",type="n")
lines(x=x0.lrn,y=y0.lrn,col="green")
lines(x=x1.lrn,y=y1.lrn,col="red")

dev.off()

idx <- seq(1,n.val,length=500)

y0.val <- c(0,sum(y.val-0.68))
x0.val <- c(0,n.val)

idx.yhat <- sort.list(fit$fitted.values[idx.val],decreasing=TRUE)
y1.val <- y.val[idx.yhat]-0.68 
y1.val <- cumsum(y1.val)
y1.val <- y1.val[idx]
x1.val <- 1:n.val
x1.val <- x1.val[idx]

postscript(file="cty_lif_val.eps")

plot(x=c(x0.val,x1.val),y=c(y0.val,y1.val),
  ylab="dollars",xlab="size of mailing",type="n")
lines(x=x0.val,y=y0.val,col="green")
lines(x=x1.val,y=y1.val,col="blue")

dev.off()

y0.lrn <- (y0.lrn/y0.lrn[2])*100
x0.lrn <- (x0.lrn/x0.lrn[2])*100

y1.lrn <- (y1.lrn/y1.lrn[500])*100
x1.lrn <- (x1.lrn/x1.lrn[500])*100

y1.val <- (y1.val/y1.val[500])*100
x1.val <- (x1.val/x1.val[500])*100

postscript(file="cty_lif_both.eps")

plot(x=c(x0.lrn,x1.lrn,x1.val),y=c(y0.lrn,y1.lrn,y1.val),
  ylab="percent",xlab="percent",type="n")
lines(x=x0.lrn,y=y0.lrn,col="green")
lines(x=x1.lrn,y=y1.lrn,col="red")
lines(x=x1.val,y=y1.val,col="blue")

dev.off()

outvec <-  x1.val
outlab <- "x.net.val"
outfil <- "lif_x_net_val.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

outvec <-  y1.val
outlab <- "y.net.val"
outfil <- "lif_y_net_val.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

idx <- seq(1,n.lrn,length=500)

y0.lrn <- c(0,sum(y.lrn))
x0.lrn <- c(0,n.lrn)

idx.yhat <- sort.list(fit$fitted.values[idx.lrn],decreasing=TRUE)
y1.lrn <- y.lrn[idx.yhat] 
y1.lrn <- cumsum(y1.lrn)
y1.lrn <- y1.lrn[idx]
x1.lrn <- 1:n.lrn
x1.lrn <- x1.lrn[idx]

idx <- seq(1,n.val,length=500)

y0.val <- c(0,sum(y.val))
x0.val <- c(0,n.val)

idx.yhat <- sort.list(fit$fitted.values[idx.val],decreasing=TRUE)
y1.val <- y.val[idx.yhat] 
y1.val <- cumsum(y1.val)
y1.val <- y1.val[idx]
x1.val <- 1:n.val
x1.val <- x1.val[idx]

y0.lrn <- (y0.lrn/y0.lrn[2])*100
x0.lrn <- (x0.lrn/x0.lrn[2])*100

y1.lrn <- (y1.lrn/y1.lrn[500])*100
x1.lrn <- (x1.lrn/x1.lrn[500])*100

y1.val <- (y1.val/y1.val[500])*100
x1.val <- (x1.val/x1.val[500])*100

postscript(file="cty_lif_conv.eps")

plot(x=c(x0.lrn,x1.lrn,x1.val),y=c(y0.lrn,y1.lrn,y1.val),
  ylab="percent",xlab="percent",type="n")
lines(x=x0.lrn,y=y0.lrn,col="green")
lines(x=x1.lrn,y=y1.lrn,col="red")
lines(x=x1.val,y=y1.val,col="blue")

dev.off()

outvec <-  x1.val
outlab <- "x.gross.val"
outfil <- "lif_x_gross_val.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

outvec <-  y1.val
outlab <- "y.gross.val"
outfil <- "lif_y_gross_val.dat"
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)





