library(nnet)

target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric")
target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric")
target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric")
 
y.lrn <- target.lrn[,1]
y.val <- target.val[,1]
y.tst <- target.tst[,1]
y     <- c(y.lrn,y.val,y.tst)

n.lrn <- length(y.lrn)
n.val <- length(y.val)
n.tst <- length(y.tst)
n     <- length(y)

rm(target.lrn,target.val,target.tst)

mod <- read.table("../cty_mod.txt",
         header=F,colClasses="character",col.names=c("file","feature","type"))

n.mod <- length(mod$file)

first.time.1 <- TRUE
first.time.2 <- TRUE

for (i in 1:n.mod) {

  fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="")
  fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="")
  fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="")
  print(mod$feature[i])

  if (mod$type[i]=="chr") {

    f.lrn <- read.table(fn.lrn,
               header=T,colClasses="character",blank.lines.skip=F)
    f.val <- read.table(fn.val,
               header=T,colClasses="character",blank.lines.skip=F)
    f.tst <- read.table(fn.tst,
               header=T,colClasses="character",blank.lines.skip=F)

    f <- c(f.lrn[,1],f.val[,1],f.tst[,1])

    if (mod$feature[i]=="STATE") {
      f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1"
      f[f=="OH"|f=="RI"|f=="VI"|f=="WV"]                 <- "S1"
      f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2"
      f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2"
      f[f=="WY"]                                         <- "S2"  
      f[f=="AK"|f=="UT"|f=="MS"]                         <- "S3"
      f[f=="NE"|f=="ND"]                                 <- "S4"
      f[f=="SD"|f=="SC"]                                 <- "S5"
    } 
    
    f <- as.factor(f)

    n.lev <- nlevels(f)
    print(paste("  nlevels = ",n.lev))

    f <- model.matrix(y ~ f - 1)   # Note: Intercept removed.
    f <- f[,2:ncol(f)]             # Note: First dummy deleted.
  
    if (first.time.2) {
      X.2 <- f
      first.time.2 <- FALSE
    } else {
      X.2 <- cbind(prev.X.2,f)
    }
    prev.X.2 <- X.2

  } else {
    
    f.lrn<-read.table(fn.lrn,
             header=T,colClasses="numeric",blank.lines.skip=F)
    f.val<-read.table(fn.val,
             header=T,colClasses="numeric",blank.lines.skip=F)
    f.tst<-read.table(fn.tst,
             header=T,colClasses="numeric",blank.lines.skip=F)

    f <- c(f.lrn[,1],f.val[,1],f.tst[,1])

    f[is.na(f)] <- 0

#   if (mod$feature[i]=="DOB") { 
#     d <- f ; d[d>0] <- 1         # Note: Dummy for missing DOB
#     f <- cbind(d,f,f^2)          # Note: Quadratic term added to DOB.
#     rm(d)
#   }

    if (first.time.1) {
      X.1 <- f
      first.time.1 <- FALSE
    } else {
      X.1 <- cbind(prev.X.1,f)
    }
    prev.X.1 <- X.1

  }
}

rm(prev.X.1,prev.X.2)
rm(f.lrn,f.val,f.tst,f)

wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1
idx.lrn <- 1:n.lrn
idx.val <- (n.lrn+1):(n.lrn+n.val)
idx.tst <- (n.lrn+n.val+1):n
  
yhat <- mat.or.vec(n,1)
ehat <- y - yhat

best.mse.val <-  sum(ehat[idx.val]^2)/n.val 

for (iter in 1:10) {

  y.1 <- y - yhat
  
  set.seed(100)

  sz <- 10
  n.parm <- 1 + sz * (1 + 1 + 5)      # bias + sz * (wt + bias + features)

  best.parm <- mat.or.vec(n.parm,1)
  best.sse <- sum(y.1[idx.lrn]^2)
  
  for (r in seq(from=.1,to=35.1,by=1.0)) {
    for (i in 1:50) {
      parm <- runif(n=n.parm,min=-r,max=r)
      fit.1<-nnet.formula(y.1~X.1,size=sz,maxit=10,weights=wts,Wts=parm,trace=F)
      sse <- sum(fit.1$residuals[idx.lrn]^2) 
      if (sse < best.sse) {
        best.sse <- sse  
        best.parm <- fit.1$wts
      }
    }
  }
  
  fit.1 <- nnet.formula(y.1~X.1,size=sz,maxit=500,weights=wts,Wts=best.parm)

  print(fit.1$wts)
  
  yhat <- yhat + fit.1$fitted.values
  ehat <- y - yhat

  print(" ")
  print(paste("  iter     = ",iter + 0.1 ))
  mse.lrn <-  sum(ehat[idx.lrn]^2)/n.lrn
  mse.val <-  sum(ehat[idx.val]^2)/n.val 
  mse.tst <-  sum(ehat[idx.tst]^2)/n.tst 
  print(paste("  mse.lrn = ",mse.lrn))
  print(paste("  mse.val = ",mse.val))
  print(paste("  mse.tst = ",mse.tst))

  if (mse.val > best.mse.val) {
    break
  } else {
    best.mse.val = mse.val
    best.yhat <- yhat
  }

  y.2 <- y - yhat
  
  fit.2 <- lm(y.2~X.2,weights=wts) 
                             
  print(summary.lm(fit.2))            
  
  yhat <- yhat + fit.2$fitted.values
  ehat <- y - yhat
  
  print(" ")
  print(paste("  iter     = ",iter + 0.2 ))
  mse.lrn <-  sum(ehat[idx.lrn]^2)/n.lrn 
  mse.val <-  sum(ehat[idx.val]^2)/n.val 
  mse.tst <-  sum(ehat[idx.tst]^2)/n.tst 
  print(paste("  mse.lrn = ",mse.lrn))
  print(paste("  mse.val = ",mse.val))
  print(paste("  mse.tst = ",mse.tst))

  if (mse.val > best.mse.val) {
    break
  } else {
    best.mse.val = mse.val
    best.yhat <- yhat
  }
}  


x0 <- c(0,n.lrn)  
y0 <- c(0,sum(y[idx.lrn]-0.68)) 

x1 <- (1:n.lrn)
y1 <- best.yhat[idx.lrn]
y1 <- y1-0.68
y1 <- sort(y1)
y1 <- y1[n.lrn:1]
y1 <- cumsum(y1) 

idx <- 1:n.lrn

print(paste("maximum profit in learning sample is ", max(y1)))
print(paste("maximum occurs at ", idx[y1==max(y1)]))

idx <- seq(1,n.lrn,length=200)

x1 <- x1[idx]
y1 <- y1[idx]

source("psopts.r");
if (sz < 10) {
  plt.name <- paste("cty_lif_0",sz,".eps",sep="")
} else {
  plt.name <- paste("cty_lif_",sz,".eps",sep="") 
}
postscript(file=plt.name)

plot(x=c(x0,x1),y=c(y0,y1),ylab="dollars",xlab="size of mailing",type="n")
lines(x=x0,y=y0,col="green")
lines(x=x1,y=y1,col="red")

dev.off()


outvec <- best.yhat[idx.lrn]
outlab <- "yhat.lrn"
if (sz < 10) {
  outfil <- paste("yhat_lrn_0",sz,".dat",sep="")
} else {
  outfil <- paste("yhat_lrn_",sz,".dat",sep="")
}
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

outvec <- best.yhat[idx.val]
outlab <- "yhat.val"
if (sz < 10) {
  outfil <- paste("yhat_val_0",sz,".dat",sep="")
} else {
  outfil <- paste("yhat_val_",sz,".dat",sep="")
}
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

outvec <- best.yhat[idx.tst]
outlab <- "yhat.tst"
if (sz < 10) {
  outfil <- paste("yhat_tst_0",sz,".dat",sep="")
} else {
  outfil <- paste("yhat_tst_",sz,".dat",sep="")
}
write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)

