R : Copyright 2003, The R Development Core Team
Version 1.6.2  (2003-01-10)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type `license()' or `licence()' for distribution details.

R is a collaborative project with many contributors.
Type `contributors()' for more information.

Type `demo()' for some demos, `help()' for on-line help, or
`help.start()' for a HTML browser interface to help.
Type `q()' to quit R.

> invisible(options(echo = TRUE))
> library(rpart)
> 
> target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric")
> target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric")
> target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric")
> 
>  
> y.lrn <- target.lrn[,1]
> y.val <- target.val[,1]
> y.tst <- target.tst[,1]
> y     <- c(y.lrn,y.val,y.tst)
> 
> n.lrn <- length(y.lrn)
> n.val <- length(y.val)
> n.tst <- length(y.tst)
> n     <- length(y)
> 
> rm(target.lrn,target.val,target.tst)
> 
> wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1
> idx.lrn <- 1:n.lrn
> idx.val <- (n.lrn+1):(n.lrn+n.val)
> idx.tst <- (n.lrn+n.val+1):n
> 
> mod <- read.table("../cty_mod.txt",
+          header=F,colClasses="character",col.names=c("file","feature","type"))
> 
> n.mod <- length(mod$file)
> 
> first.time <- TRUE
> 
> for (i in 1:n.mod) {
+ 
+   fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="")
+   fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="")
+   fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="")
+   print(mod$feature[i])
+ 
+   if (mod$type[i]=="chr") {
+ 
+     f.lrn <- read.table(fn.lrn,
+                header=T,colClasses="character",blank.lines.skip=F)
+     f.val <- read.table(fn.val,
+                header=T,colClasses="character",blank.lines.skip=F)
+     f.tst <- read.table(fn.tst,
+                header=T,colClasses="character",blank.lines.skip=F)
+ 
+     f <- c(f.lrn[,1],f.val[,1],f.tst[,1])
+ 
+     if (mod$feature[i]=="STATE") {
+       f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1"
+       f[f=="OH"|f=="RI"|f=="VI"|f=="WV"]                 <- "S1"
+       f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2"
+       f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2"
+       f[f=="WY"]                                         <- "S2"  
+       f[f=="AK"|f=="UT"|f=="MS"]                         <- "S3"
+       f[f=="NE"|f=="ND"]                                 <- "S4"
+       f[f=="SD"|f=="SC"]                                 <- "S5"
+     } 
+     
+     f <- as.factor(f)
+ 
+     n.lev <- nlevels(f)
+     f.name <- levels(f) 
+     if(n.lev==2) f.name <- c(mod$feature[i],mod$feature[i])
+     print(paste("  nlevels = ",n.lev))
+ 
+     f <- model.matrix(y ~ f - 1)      # Note: Intercept removed.
+     f <- f[,2:n.lev]                  # Note: First dummy deleted.
+     f.name <- f.name[2:n.lev]         # Note: First name deleted.
+ 
+   } else {
+     
+     f.lrn<-read.table(fn.lrn,
+              header=T,colClasses="numeric",blank.lines.skip=F)
+     f.val<-read.table(fn.val,
+              header=T,colClasses="numeric",blank.lines.skip=F)
+     f.tst<-read.table(fn.tst,
+              header=T,colClasses="numeric",blank.lines.skip=F)
+ 
+     f <- c(f.lrn[,1],f.val[,1],f.tst[,1])
+ 
+     f[is.na(f)] <- 0
+ 
+     f.name <- mod$feature[i]
+ 
+     if (mod$feature[i]=="DOB") { 
+       d <- f ; d[d>0] <- 1         # Note: Dummy for missing DOB
+       f <- cbind(d,f,f^2)          # Note: Quadratic term added to DOB.
+       rm(d)
+       f.name <- c("DOB_miss","DOB_linr","DOB_quad")
+     }
+ 
+   }
+ 
+   if (first.time) {
+     X <- f
+     X.names <- f.name
+     first.time <- FALSE
+   } else {
+     X <- cbind(prev.X,f)
+     X.names <- c(prev.X.names,f.name)
+   }
+ 
+   prev.X <- X
+   prev.X.names <- X.names
+ 
+ }
[1] "LASTGIFT"
[1] "PEPSTRFL"
[1] "  nlevels =  2"
[1] "STATE"
[1] "  nlevels =  33"
[1] "RECP3"
[1] "  nlevels =  2"
[1] "DOB"
[1] "MAILCODE"
[1] "  nlevels =  2"
[1] "MHUC2"
[1] "LASTDATE"
[1] "MINRAMNT"
> 
> LAST.by.PEP <- X[,1]*X[,2]
> 
> X <- cbind(prev.X,LAST.by.PEP)
> X.names <- c(prev.X.names,"LAST.by.PEP")
> 
> rm(prev.X,prev.X.names)
> rm(f.lrn,f.val,f)
> 
> dimnames(X) <- list(NULL,X.names)
> 
> fit <- lm(y~X,weights=wts)
> aov <- anova.lm(fit) 
> 
> ehat <- fit$residuals
> mse.lrn <- sum(ehat[idx.lrn]^2)/n.lrn 
> mse.val <- sum(ehat[idx.val]^2)/n.val
> mse.tst <- sum(ehat[idx.tst]^2)/n.tst
> 
> print(paste("  rank    = ",fit$rank))
[1] "  rank    =  44"
> print(paste("  P-value = ",aov[1,5]))
[1] "  P-value =  4.78123188298365e-105"
> print(paste("  mse.lrn = ",mse.lrn)) 
[1] "  mse.lrn =  19.9111807739091"
> print(paste("  mse.val = ",mse.val)) 
[1] "  mse.val =  18.6052588093825"
> print(paste("  mse.tst = ",mse.tst)) 
[1] "  mse.tst =  17.7951514079772"
> 
> print(summary.lm(fit))

Call:
lm(formula = y ~ X, weights = wts)

Residuals:
     Min       1Q   Median       3Q      Max 
-21.9624  -0.8230  -0.5556   0.0000 199.2981 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -1.976e+01  3.398e+00  -5.816 6.07e-09 ***
XLASTGIFT     7.567e-03  1.767e-03   4.283 1.85e-05 ***
XPEPSTRFL    -3.302e-01  5.562e-02  -5.936 2.93e-09 ***
XAR           5.091e-02  2.125e-01   0.240 0.810633    
XAZ           1.390e-01  1.698e-01   0.819 0.413017    
XCA           4.036e-01  1.365e-01   2.958 0.003101 ** 
XCO           2.650e-01  1.769e-01   1.498 0.134211    
XFL           1.566e-01  1.429e-01   1.096 0.272890    
XGA           1.465e-01  1.586e-01   0.924 0.355617    
XHI           6.318e-01  2.815e-01   2.244 0.024823 *  
XIA          -1.748e-02  1.966e-01  -0.089 0.929180    
XID           5.446e-01  2.629e-01   2.072 0.038293 *  
XIL           1.017e-01  1.469e-01   0.692 0.489065    
XIN           1.430e-02  1.627e-01   0.088 0.929992    
XKS           7.198e-02  1.978e-01   0.364 0.715886    
XKY           7.201e-02  1.861e-01   0.387 0.698835    
XLA           4.555e-02  1.863e-01   0.245 0.806829    
XMI           1.112e-01  1.490e-01   0.746 0.455621    
XMN          -6.804e-02  1.728e-01  -0.394 0.693752    
XMO           1.503e-01  1.653e-01   0.910 0.363041    
XMT           1.683e-01  2.678e-01   0.628 0.529690    
XNC           1.936e-01  1.543e-01   1.255 0.209558    
XNM           2.740e-01  2.228e-01   1.230 0.218841    
XNV           7.333e-02  2.177e-01   0.337 0.736222    
XOK          -7.684e-02  1.852e-01  -0.415 0.678292    
XOR           3.836e-01  1.722e-01   2.227 0.025943 *  
XS1          -5.454e-01  4.908e-01  -1.111 0.266505    
XS2           3.526e-01  2.435e-01   1.448 0.147487    
XS3          -1.053e-01  1.808e-01  -0.583 0.560198    
XS4           1.296e-01  2.125e-01   0.610 0.542080    
XS5           2.649e-01  1.747e-01   1.517 0.129351    
XTN           1.160e-02  1.681e-01   0.069 0.944966    
XTX           1.185e-01  1.444e-01   0.821 0.411659    
XWA           2.643e-01  1.581e-01   1.671 0.094651 .  
XWI          -4.792e-02  1.652e-01  -0.290 0.771823    
XRECP3        6.015e-01  1.225e-01   4.912 9.03e-07 ***
XDOB_miss    -2.269e-01  9.654e-02  -2.350 0.018756 *  
XDOB_linr     1.906e-04  5.203e-05   3.664 0.000248 ***
XDOB_quad    -2.576e-08  6.786e-09  -3.796 0.000147 ***
XMAILCODE    -4.372e-01  1.451e-01  -3.014 0.002579 ** 
XMHUC2        5.183e-02  2.057e-02   2.520 0.011753 *  
XLASTDATE     2.091e-03  3.557e-04   5.880 4.13e-09 ***
XMINRAMNT     3.675e-03  2.492e-03   1.475 0.140218    
XLAST.by.PEP  3.472e-02  2.689e-03  12.912  < 2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 

Residual standard error: 4.464 on 66856 degrees of freedom
Multiple R-Squared: 0.009356,	Adjusted R-squared: 0.008718 
F-statistic: 14.68 on 43 and 66856 DF,  p-value: < 2.2e-16 

> 
> outvec <- fit$fitted.values[idx.lrn]
> outlab <- "yhat.lrn"
> outfil <- "yhat_lrn_intr.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> outvec <- fit$fitted.values[idx.val]
> outlab <- "yhat.val"
> outfil <- "yhat_val_intr.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> outvec <- fit$fitted.values[idx.tst]
> outlab <- "yhat.tst"
> outfil <- "yhat_tst_intr.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> rm(outvec,outlab,outfil)
> 
> x0 <- c(0,n.lrn)
> y0 <- c(0,sum(y.lrn-0.68))
> 
> x1 <- (1:n.lrn)
> y1 <- fit$fitted.values[1:n.lrn]
> y1 <- y1-0.68
> y1 <- sort(y1)
> y1 <- y1[n.lrn:1]
> y1 <- cumsum(y1)
> 
> idx <- 1:n.lrn
> 
> print(paste("maximum profit in learning sample is ", max(y1)))
[1] "maximum profit in learning sample is  12486.2488227675"
> print(paste("maximum occurs at ", idx[y1==max(y1)]))
[1] "maximum occurs at  38276"
> 
> idx <- seq(1,n.lrn,length=200)
> 
> x1 <- x1[idx]
> y1 <- y1[idx]
> 
> source("psopts.r");
> postscript(file="cty_intr_lif.eps");
> 
> plot(x=c(x0,x1),y=c(y0,y1),ylab="dollars",xlab="size of mailing",type="n")
> lines(x=x0,y=y0,col="green")
> lines(x=x1,y=y1,col="red")
> 
> dev.off()
null device 
          1 
> proc.time()
[1] 23.86  4.94 28.94  0.00  0.00
>