R : Copyright 2002, The R Development Core Team Version 1.5.1 (2002-06-17) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > invisible(options(echo = TRUE)) > library(nnet) Loading required package: MASS > > target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric") > target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric") > target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric") > > y.lrn <- target.lrn[,1] > y.val <- target.val[,1] > y.tst <- target.tst[,1] > y <- c(y.lrn,y.val,y.tst) > > n.lrn <- length(y.lrn) > n.val <- length(y.val) > n.tst <- length(y.tst) > n <- length(y) > > rm(target.lrn,target.val,target.tst) > > mod <- read.table("../cty_mod.txt", + header=F,colClasses="character",col.names=c("file","feature","type")) > > n.mod <- length(mod$file) > > first.time.1 <- TRUE > first.time.2 <- TRUE > > for (i in 1:n.mod) { + + fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="") + print(mod$feature[i]) + + if (mod$type[i]=="chr") { + + f.lrn <- read.table(fn.lrn, + header=T,colClasses="character",blank.lines.skip=F) + f.val <- read.table(fn.val, + header=T,colClasses="character",blank.lines.skip=F) + f.tst <- read.table(fn.tst, + header=T,colClasses="character",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + if (mod$feature[i]=="STATE") { + f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1" + f[f=="OH"|f=="RI"|f=="VI"|f=="WV"] <- "S1" + f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2" + f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2" + f[f=="WY"] <- "S2" + f[f=="AK"|f=="UT"|f=="MS"] <- "S3" + f[f=="NE"|f=="ND"] <- "S4" + f[f=="SD"|f=="SC"] <- "S5" + } + + f <- as.factor(f) + + n.lev <- nlevels(f) + print(paste(" nlevels = ",n.lev)) + + f <- model.matrix(y ~ f - 1) # Note: Intercept removed. + f <- f[,2:ncol(f)] # Note: First dummy deleted. + + if (first.time.2) { + X.2 <- f + first.time.2 <- FALSE + } else { + X.2 <- cbind(prev.X.2,f) + } + prev.X.2 <- X.2 + + } else { + + f.lrn<-read.table(fn.lrn, + header=T,colClasses="numeric",blank.lines.skip=F) + f.val<-read.table(fn.val, + header=T,colClasses="numeric",blank.lines.skip=F) + f.tst<-read.table(fn.tst, + header=T,colClasses="numeric",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + f[is.na(f)] <- 0 + + # if (mod$feature[i]=="DOB") { + # d <- f ; d[d>0] <- 1 # Note: Dummy for missing DOB + # f <- cbind(d,f,f^2) # Note: Quadratic term added to DOB. + # rm(d) + # } + + if (first.time.1) { + X.1 <- f + first.time.1 <- FALSE + } else { + X.1 <- cbind(prev.X.1,f) + } + prev.X.1 <- X.1 + + } + } [1] "LASTGIFT" [1] "PEPSTRFL" [1] " nlevels = 2" [1] "STATE" [1] " nlevels = 33" [1] "RECP3" [1] " nlevels = 2" [1] "DOB" [1] "MAILCODE" [1] " nlevels = 2" [1] "MHUC2" [1] "LASTDATE" [1] "MINRAMNT" > > rm(prev.X.1,prev.X.2) > rm(f.lrn,f.val,f.tst,f) > > wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1 > idx.lrn <- 1:n.lrn > idx.val <- (n.lrn+1):(n.lrn+n.val) > idx.tst <- (n.lrn+n.val+1):n > > yhat <- mat.or.vec(n,1) > ehat <- y - yhat > > best.mse.val <- sum(ehat[idx.val]^2)/n.val > > for (iter in 1:10) { + + y.1 <- y - yhat + + set.seed(100) + + sz <- 2 + n.parm <- 1 + sz * (1 + 1 + 5) # bias + sz * (wt + bias + features) + + best.parm <- mat.or.vec(n.parm,1) + best.sse <- sum(y.1[idx.lrn]^2) + + for (r in seq(from=.1,to=35.1,by=1.0)) { + for (i in 1:50) { + parm <- runif(n=n.parm,min=-r,max=r) + fit.1<-nnet.formula(y.1~X.1,size=sz,maxit=10,weights=wts,Wts=parm,trace=F) + sse <- sum(fit.1$residuals[idx.lrn]^2) + if (sse < best.sse) { + best.sse <- sse + best.parm <- fit.1$wts + } + } + } + + fit.1 <- nnet.formula(y.1~X.1,size=sz,maxit=500,weights=wts,Wts=best.parm) + + print(fit.1$wts) + + yhat <- yhat + fit.1$fitted.values + ehat <- y - yhat + + print(" ") + print(paste(" iter = ",iter + 0.1 )) + mse.lrn <- sum(ehat[idx.lrn]^2)/n.lrn + mse.val <- sum(ehat[idx.val]^2)/n.val + mse.tst <- sum(ehat[idx.tst]^2)/n.tst + print(paste(" mse.lrn = ",mse.lrn)) + print(paste(" mse.val = ",mse.val)) + print(paste(" mse.tst = ",mse.tst)) + + if (mse.val > best.mse.val) { + break + } else { + best.mse.val = mse.val + } + + y.2 <- y - yhat + + fit.2 <- lm(y.2~X.2,weights=wts) + + print(summary.lm(fit.2)) + + yhat <- yhat + fit.2$fitted.values + ehat <- y - yhat + + print(" ") + print(paste(" iter = ",iter + 0.2 )) + mse.lrn <- sum(ehat[idx.lrn]^2)/n.lrn + mse.val <- sum(ehat[idx.val]^2)/n.val + mse.tst <- sum(ehat[idx.tst]^2)/n.tst + print(paste(" mse.lrn = ",mse.lrn)) + print(paste(" mse.val = ",mse.val)) + print(paste(" mse.tst = ",mse.tst)) + + if (mse.val > best.mse.val) { + break + } else { + best.mse.val = mse.val + } + } # weights: 15 initial value 1344178.537723 iter 10 value 1344135.108892 iter 20 value 1344122.985427 iter 30 value 1344121.360681 iter 30 value 1344121.347405 final value 1344121.068274 converged [1] -47.63229598 2922.49510848 88.42103467 989.86877489 -19.98629994 [6] -798.29090435 0.77223853 -0.01928286 0.26688824 -0.24337058 [11] -0.85858251 0.39245144 0.69979465 1.06158839 -0.17261156 [1] " " [1] " iter = 1.1" [1] " mse.lrn = 20.0914957588650" [1] " mse.val = 18.8205196394296" [1] " mse.tst = 17.8658412594102" Call: lm(formula = y.2 ~ X.2, weights = wts) Residuals: Min 1Q Median 3Q Max -2.0594 -0.8560 -0.6493 0.0000 199.4201 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.276494 0.131516 -2.102 0.035526 * X.2prev.X.2 0.135241 0.034821 3.884 0.000103 *** X.2fAR 0.084994 0.213166 0.399 0.690098 X.2fAZ 0.179074 0.170235 1.052 0.292838 X.2fCA 0.466413 0.136697 3.412 0.000645 *** X.2fCO 0.288706 0.177249 1.629 0.103357 X.2fFL 0.178334 0.143022 1.247 0.212438 X.2fGA 0.188278 0.159047 1.184 0.236499 X.2fHI 0.676165 0.282438 2.394 0.016667 * X.2fIA 0.003828 0.197083 0.019 0.984502 X.2fID 0.581626 0.263695 2.206 0.027410 * X.2fIL 0.150208 0.146551 1.025 0.305388 X.2fIN 0.026401 0.163176 0.162 0.871468 X.2fKS 0.109463 0.198288 0.552 0.580922 X.2fKY 0.065804 0.186672 0.353 0.724457 X.2fLA 0.016682 0.186784 0.089 0.928833 X.2fMI 0.143028 0.148500 0.963 0.335473 X.2fMN -0.062816 0.173133 -0.363 0.716742 X.2fMO 0.137740 0.165653 0.831 0.405698 X.2fMT 0.196832 0.268587 0.733 0.463657 X.2fNC 0.203422 0.154662 1.315 0.188426 X.2fNM 0.307590 0.223527 1.376 0.168802 X.2fNV 0.117098 0.218306 0.536 0.591688 X.2fOK -0.041415 0.185827 -0.223 0.823640 X.2fOR 0.443466 0.172479 2.571 0.010139 * X.2fS1 -0.543392 0.492347 -1.104 0.269738 X.2fS2 0.387804 0.244159 1.588 0.112217 X.2fS3 -0.080459 0.181279 -0.444 0.657159 X.2fS4 0.140408 0.213025 0.659 0.509826 X.2fS5 0.279153 0.175165 1.594 0.111017 X.2fTN 0.030538 0.168613 0.181 0.856281 X.2fTX 0.146963 0.144414 1.018 0.308850 X.2fWA 0.302097 0.158508 1.906 0.056670 . X.2fWI -0.033152 0.165052 -0.201 0.840810 X.2f 0.671105 0.121773 5.511 3.58e-08 *** X.2f -0.389587 0.145303 -2.681 0.007338 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 4.479 on 66864 degrees of freedom Multiple R-Squared: 0.002207, Adjusted R-squared: 0.001684 F-statistic: 4.225 on 35 and 66864 DF, p-value: 8.367e-16 [1] " " [1] " iter = 1.2" [1] " mse.lrn = 20.0471608062996" [1] " mse.val = 18.7931555382009" [1] " mse.tst = 17.8462476397071" # weights: 15 initial value 1340462.636522 final value 1340439.523340 converged [1] -2.21879827 2.89828658 -4.23344816 0.08203626 -2.39026893 [6] 6.75365734 0.54538784 16.83947897 -5.83180296 1.86482658 [11] -0.05565869 6.22053585 -14.99514024 2.37440113 15.35958182 [1] " " [1] " iter = 2.1" [1] " mse.lrn = 20.0364652216703" [1] " mse.val = 18.7813275782897" [1] " mse.tst = 17.8294811780631" Call: lm(formula = y.2 ~ X.2, weights = wts) Residuals: Min 1Q Median 3Q Max -2.2409 -0.8503 -0.6565 0.0000 199.2994 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.806e-02 1.315e-01 -0.213 0.831 X.2prev.X.2 1.877e-02 3.481e-02 0.539 0.590 X.2fAR -4.463e-03 2.131e-01 -0.021 0.983 X.2fAZ 1.387e-03 1.702e-01 0.008 0.993 X.2fCA -4.565e-03 1.367e-01 -0.033 0.973 X.2fCO 2.661e-03 1.772e-01 0.015 0.988 X.2fFL 5.760e-03 1.430e-01 0.040 0.968 X.2fGA -5.890e-03 1.590e-01 -0.037 0.970 X.2fHI -2.395e-03 2.824e-01 -0.008 0.993 X.2fIA 2.845e-03 1.970e-01 0.014 0.988 X.2fID 9.910e-03 2.636e-01 0.038 0.970 X.2fIL 3.574e-03 1.465e-01 0.024 0.981 X.2fIN 8.350e-04 1.631e-01 0.005 0.996 X.2fKS 5.312e-03 1.982e-01 0.027 0.979 X.2fKY -6.072e-04 1.866e-01 -0.003 0.997 X.2fLA 9.081e-03 1.867e-01 0.049 0.961 X.2fMI 9.833e-03 1.485e-01 0.066 0.947 X.2fMN 8.075e-03 1.731e-01 0.047 0.963 X.2fMO 9.761e-03 1.656e-01 0.059 0.953 X.2fMT -4.621e-03 2.685e-01 -0.017 0.986 X.2fNC 1.211e-06 1.546e-01 7.83e-06 1.000 X.2fNM -5.435e-03 2.235e-01 -0.024 0.981 X.2fNV 3.211e-03 2.182e-01 0.015 0.988 X.2fOK -4.906e-03 1.858e-01 -0.026 0.979 X.2fOR 5.309e-03 1.724e-01 0.031 0.975 X.2fS1 -1.260e-02 4.922e-01 -0.026 0.980 X.2fS2 -1.241e-02 2.441e-01 -0.051 0.959 X.2fS3 6.453e-05 1.812e-01 3.56e-04 1.000 X.2fS4 5.419e-04 2.130e-01 0.003 0.998 X.2fS5 5.440e-03 1.751e-01 0.031 0.975 X.2fTN 1.058e-03 1.686e-01 0.006 0.995 X.2fTX 4.784e-03 1.444e-01 0.033 0.974 X.2fWA 1.632e-03 1.585e-01 0.010 0.992 X.2fWI 1.172e-02 1.650e-01 0.071 0.943 X.2f 2.071e-04 1.217e-01 0.002 0.999 X.2f -1.564e-02 1.453e-01 -0.108 0.914 Residual standard error: 4.477 on 66864 degrees of freedom Multiple R-Squared: 6.065e-06, Adjusted R-squared: -0.0005174 F-statistic: 0.01159 on 35 and 66864 DF, p-value: 1 [1] " " [1] " iter = 2.2" [1] " mse.lrn = 20.0360402176356" [1] " mse.val = 18.7807480011608" [1] " mse.tst = 17.8307438134155" # weights: 15 initial value 1339888.869418 final value 1339888.869418 converged [1] 2.0992545 -29.3141840 -34.0631916 2.3376474 -8.4760584 -30.8640487 [7] 12.2112211 16.4762296 -11.5346530 -28.2101564 -0.3832074 7.4969603 [13] -9.4972641 18.8706931 30.2083768 [1] " " [1] " iter = 3.1" [1] " mse.lrn = 20.0282341942534" [1] " mse.val = 18.7791832113146" [1] " mse.tst = 17.8311807733668" Call: lm(formula = y.2 ~ X.2, weights = wts) Residuals: Min 1Q Median 3Q Max -2.9889 -0.8500 -0.6558 0.0000 199.2997 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.104e-04 1.315e-01 -0.002 0.999 X.2prev.X.2 3.502e-04 3.480e-02 0.010 0.992 X.2fAR -3.283e-06 2.131e-01 -1.54e-05 1.000 X.2fAZ -1.173e-03 1.702e-01 -0.007 0.994 X.2fCA -3.957e-04 1.366e-01 -0.003 0.998 X.2fCO 1.581e-05 1.772e-01 8.92e-05 1.000 X.2fFL -1.618e-04 1.430e-01 -0.001 0.999 X.2fGA 1.517e-05 1.590e-01 9.54e-05 1.000 X.2fHI 7.167e-05 2.823e-01 2.54e-04 1.000 X.2fIA -1.121e-03 1.970e-01 -0.006 0.995 X.2fID -9.104e-06 2.636e-01 -3.45e-05 1.000 X.2fIL -4.434e-04 1.465e-01 -0.003 0.998 X.2fIN 5.134e-06 1.631e-01 3.15e-05 1.000 X.2fKS -1.123e-03 1.982e-01 -0.006 0.995 X.2fKY -1.867e-05 1.866e-01 -1.00e-04 1.000 X.2fLA 3.118e-06 1.867e-01 1.67e-05 1.000 X.2fMI -1.307e-06 1.484e-01 -8.81e-06 1.000 X.2fMN -1.885e-05 1.731e-01 -1.09e-04 1.000 X.2fMO -5.183e-04 1.656e-01 -0.003 0.998 X.2fMT -2.756e-05 2.685e-01 -1.03e-04 1.000 X.2fNC -1.790e-07 1.546e-01 -1.16e-06 1.000 X.2fNM 1.203e-05 2.234e-01 5.38e-05 1.000 X.2fNV 3.509e-05 2.182e-01 1.61e-04 1.000 X.2fOK -8.859e-04 1.857e-01 -0.005 0.996 X.2fOR -3.212e-06 1.724e-01 -1.86e-05 1.000 X.2fS1 -8.806e-07 4.921e-01 -1.79e-06 1.000 X.2fS2 8.765e-07 2.440e-01 3.59e-06 1.000 X.2fS3 -7.626e-04 1.812e-01 -0.004 0.997 X.2fS4 -2.895e-05 2.129e-01 -1.36e-04 1.000 X.2fS5 -6.785e-04 1.751e-01 -0.004 0.997 X.2fTN -1.204e-06 1.685e-01 -7.15e-06 1.000 X.2fTX -1.768e-04 1.443e-01 -0.001 0.999 X.2fWA -3.864e-04 1.584e-01 -0.002 0.998 X.2fWI -2.001e-05 1.650e-01 -1.21e-04 1.000 X.2f -4.792e-04 1.217e-01 -0.004 0.997 X.2f -1.831e-03 1.452e-01 -0.013 0.990 Residual standard error: 4.476 on 66864 degrees of freedom Multiple R-Squared: 8.739e-09, Adjusted R-squared: -0.0005234 F-statistic: 1.67e-05 on 35 and 66864 DF, p-value: 1 [1] " " [1] " iter = 3.2" [1] " mse.lrn = 20.0282339008194" [1] " mse.val = 18.7791856158816" [1] " mse.tst = 17.8312594676257" > > > x0 <- c(0,n.lrn) > y0 <- c(0,sum(y[idx.lrn]-0.68)) > > x1 <- (1:n.lrn) > y1 <- yhat[idx.lrn] > y1 <- y1-0.68 > y1 <- sort(y1) > y1 <- y1[n.lrn:1] > y1 <- cumsum(y1) > > idx <- 1:n.lrn > > print(paste("maximum profit in learning sample is ", max(y1))) [1] "maximum profit in learning sample is 10106.0870428757" > print(paste("maximum occurs at ", idx[y1==max(y1)])) [1] "maximum occurs at 46886" > > idx <- seq(1,n.lrn,length=200) > > x1 <- x1[idx] > y1 <- y1[idx] > > source("psopts.r"); > if (sz < 10) { + plt.name <- paste("cty_lif_0",sz,".eps",sep="") + } else { + plt.name <- paste("cty_lif_",sz,".eps",sep="") + } > postscript(file=plt.name) > > plot(x=c(x0,x1),y=c(y0,y1),ylab="dollars",xlab="size of mailing",type="n") > lines(x=x0,y=y0,col="green") > lines(x=x1,y=y1,col="red") > > dev.off() null device 1 > > > proc.time() [1] 24840.43 4097.91 28976.85 0.00 0.00 >