R : Copyright 2003, The R Development Core Team Version 1.6.2 (2003-01-10) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > invisible(options(echo = TRUE)) > library(rpart) > > target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric") > target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric") > target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric") > > > y.lrn <- target.lrn[,1] > y.val <- target.val[,1] > y.tst <- target.tst[,1] > y <- c(y.lrn,y.val,y.tst) > > n.lrn <- length(y.lrn) > n.val <- length(y.val) > n.tst <- length(y.tst) > n <- length(y) > > rm(target.lrn,target.val,target.tst) > > wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1 > idx.lrn <- 1:n.lrn > idx.val <- (n.lrn+1):(n.lrn+n.val) > idx.tst <- (n.lrn+n.val+1):n > > mod <- read.table("../cty_mod.txt", + header=F,colClasses="character",col.names=c("file","feature","type")) > > n.mod <- length(mod$file) > > first.time <- TRUE > > for (i in 1:n.mod) { + + fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="") + print(mod$feature[i]) + + if (mod$type[i]=="chr") { + + f.lrn <- read.table(fn.lrn, + header=T,colClasses="character",blank.lines.skip=F) + f.val <- read.table(fn.val, + header=T,colClasses="character",blank.lines.skip=F) + f.tst <- read.table(fn.tst, + header=T,colClasses="character",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + if (mod$feature[i]=="STATE") { + f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1" + f[f=="OH"|f=="RI"|f=="VI"|f=="WV"] <- "S1" + f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2" + f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2" + f[f=="WY"] <- "S2" + f[f=="AK"|f=="UT"|f=="MS"] <- "S3" + f[f=="NE"|f=="ND"] <- "S4" + f[f=="SD"|f=="SC"] <- "S5" + } + + f <- as.factor(f) + + n.lev <- nlevels(f) + f.name <- levels(f) + if(n.lev==2) f.name <- c(mod$feature[i],mod$feature[i]) + print(paste(" nlevels = ",n.lev)) + + f <- model.matrix(y ~ f - 1) # Note: Intercept removed. + f <- f[,2:n.lev] # Note: First dummy deleted. + f.name <- f.name[2:n.lev] # Note: First name deleted. + + } else { + + f.lrn<-read.table(fn.lrn, + header=T,colClasses="numeric",blank.lines.skip=F) + f.val<-read.table(fn.val, + header=T,colClasses="numeric",blank.lines.skip=F) + f.tst<-read.table(fn.tst, + header=T,colClasses="numeric",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + f[is.na(f)] <- 0 + + f.name <- mod$feature[i] + + if (mod$feature[i]=="DOB") { + d <- f ; d[d>0] <- 1 # Note: Dummy for missing DOB + f <- cbind(d,f,f^2) # Note: Quadratic term added to DOB. + rm(d) + f.name <- c("DOB_miss","DOB_linr","DOB_quad") + } + + } + + if (first.time) { + X <- f + X.names <- f.name + first.time <- FALSE + } else { + X <- cbind(prev.X,f) + X.names <- c(prev.X.names,f.name) + } + + prev.X <- X + prev.X.names <- X.names + + } [1] "LASTGIFT" [1] "PEPSTRFL" [1] " nlevels = 2" [1] "STATE" [1] " nlevels = 33" [1] "RECP3" [1] " nlevels = 2" [1] "DOB" [1] "MAILCODE" [1] " nlevels = 2" [1] "MHUC2" [1] "LASTDATE" [1] "MINRAMNT" > > LAST.by.PEP <- X[,1]*X[,2] > > X <- cbind(prev.X,LAST.by.PEP) > X.names <- c(prev.X.names,"LAST.by.PEP") > > rm(prev.X,prev.X.names) > rm(f.lrn,f.val,f) > > dimnames(X) <- list(NULL,X.names) > > fit <- lm(y~X,weights=wts) > aov <- anova.lm(fit) > > ehat <- fit$residuals > mse.lrn <- sum(ehat[idx.lrn]^2)/n.lrn > mse.val <- sum(ehat[idx.val]^2)/n.val > mse.tst <- sum(ehat[idx.tst]^2)/n.tst > > print(paste(" rank = ",fit$rank)) [1] " rank = 44" > print(paste(" P-value = ",aov[1,5])) [1] " P-value = 4.78123188298365e-105" > print(paste(" mse.lrn = ",mse.lrn)) [1] " mse.lrn = 19.9111807739091" > print(paste(" mse.val = ",mse.val)) [1] " mse.val = 18.6052588093825" > print(paste(" mse.tst = ",mse.tst)) [1] " mse.tst = 17.7951514079772" > > print(summary.lm(fit)) Call: lm(formula = y ~ X, weights = wts) Residuals: Min 1Q Median 3Q Max -21.9624 -0.8230 -0.5556 0.0000 199.2981 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.976e+01 3.398e+00 -5.816 6.07e-09 *** XLASTGIFT 7.567e-03 1.767e-03 4.283 1.85e-05 *** XPEPSTRFL -3.302e-01 5.562e-02 -5.936 2.93e-09 *** XAR 5.091e-02 2.125e-01 0.240 0.810633 XAZ 1.390e-01 1.698e-01 0.819 0.413017 XCA 4.036e-01 1.365e-01 2.958 0.003101 ** XCO 2.650e-01 1.769e-01 1.498 0.134211 XFL 1.566e-01 1.429e-01 1.096 0.272890 XGA 1.465e-01 1.586e-01 0.924 0.355617 XHI 6.318e-01 2.815e-01 2.244 0.024823 * XIA -1.748e-02 1.966e-01 -0.089 0.929180 XID 5.446e-01 2.629e-01 2.072 0.038293 * XIL 1.017e-01 1.469e-01 0.692 0.489065 XIN 1.430e-02 1.627e-01 0.088 0.929992 XKS 7.198e-02 1.978e-01 0.364 0.715886 XKY 7.201e-02 1.861e-01 0.387 0.698835 XLA 4.555e-02 1.863e-01 0.245 0.806829 XMI 1.112e-01 1.490e-01 0.746 0.455621 XMN -6.804e-02 1.728e-01 -0.394 0.693752 XMO 1.503e-01 1.653e-01 0.910 0.363041 XMT 1.683e-01 2.678e-01 0.628 0.529690 XNC 1.936e-01 1.543e-01 1.255 0.209558 XNM 2.740e-01 2.228e-01 1.230 0.218841 XNV 7.333e-02 2.177e-01 0.337 0.736222 XOK -7.684e-02 1.852e-01 -0.415 0.678292 XOR 3.836e-01 1.722e-01 2.227 0.025943 * XS1 -5.454e-01 4.908e-01 -1.111 0.266505 XS2 3.526e-01 2.435e-01 1.448 0.147487 XS3 -1.053e-01 1.808e-01 -0.583 0.560198 XS4 1.296e-01 2.125e-01 0.610 0.542080 XS5 2.649e-01 1.747e-01 1.517 0.129351 XTN 1.160e-02 1.681e-01 0.069 0.944966 XTX 1.185e-01 1.444e-01 0.821 0.411659 XWA 2.643e-01 1.581e-01 1.671 0.094651 . XWI -4.792e-02 1.652e-01 -0.290 0.771823 XRECP3 6.015e-01 1.225e-01 4.912 9.03e-07 *** XDOB_miss -2.269e-01 9.654e-02 -2.350 0.018756 * XDOB_linr 1.906e-04 5.203e-05 3.664 0.000248 *** XDOB_quad -2.576e-08 6.786e-09 -3.796 0.000147 *** XMAILCODE -4.372e-01 1.451e-01 -3.014 0.002579 ** XMHUC2 5.183e-02 2.057e-02 2.520 0.011753 * XLASTDATE 2.091e-03 3.557e-04 5.880 4.13e-09 *** XMINRAMNT 3.675e-03 2.492e-03 1.475 0.140218 XLAST.by.PEP 3.472e-02 2.689e-03 12.912 < 2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 4.464 on 66856 degrees of freedom Multiple R-Squared: 0.009356, Adjusted R-squared: 0.008718 F-statistic: 14.68 on 43 and 66856 DF, p-value: < 2.2e-16 > > outvec <- fit$fitted.values[idx.lrn] > outlab <- "yhat.lrn" > outfil <- "yhat_lrn_intr.dat" > write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil) > > outvec <- fit$fitted.values[idx.val] > outlab <- "yhat.val" > outfil <- "yhat_val_intr.dat" > write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil) > > outvec <- fit$fitted.values[idx.tst] > outlab <- "yhat.tst" > outfil <- "yhat_tst_intr.dat" > write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil) > > rm(outvec,outlab,outfil) > > x0 <- c(0,n.lrn) > y0 <- c(0,sum(y.lrn-0.68)) > > x1 <- (1:n.lrn) > y1 <- fit$fitted.values[1:n.lrn] > y1 <- y1-0.68 > y1 <- sort(y1) > y1 <- y1[n.lrn:1] > y1 <- cumsum(y1) > > idx <- 1:n.lrn > > print(paste("maximum profit in learning sample is ", max(y1))) [1] "maximum profit in learning sample is 12486.2488227675" > print(paste("maximum occurs at ", idx[y1==max(y1)])) [1] "maximum occurs at 38276" > > idx <- seq(1,n.lrn,length=200) > > x1 <- x1[idx] > y1 <- y1[idx] > > source("psopts.r"); > postscript(file="cty_intr_lif.eps"); > > plot(x=c(x0,x1),y=c(y0,y1),ylab="dollars",xlab="size of mailing",type="n") > lines(x=x0,y=y0,col="green") > lines(x=x1,y=y1,col="red") > > dev.off() null device 1 > proc.time() [1] 23.86 4.94 28.94 0.00 0.00 >