# 19.01.2021 library(rpart) library(rpart.plot) library(caret) options(OutDec=",") dataset = read.csv2(file="data_for_analysis_Energy-related_behaviours_of_consumers_from_Silesia.csv", header=TRUE, row.names=1) names(dataset) head(dataset) attach(dataset) l.zm=ncol(dataset) l.obkt=nrow(dataset) dataTree = NULL dataTree = dataset[1:1174,-c(1,2,3,4,69,70)] names(dataTree) tail(dataTree) no.qVar=ncol(dataTree)-3 for (j in 1:no.qVar){ dataTree[,j] = as.factor(dataTree[,j]) } print("Classification errors of a tree built on a training set:", quote=FALSE) m=12 model.rpart = NULL model.rpart = rpart(as.factor(dataTree$Class) ~ ., data=dataTree, control = rpart.control(minsplit=5, cp=0.001, usesurrogate=0, maxsurrogate=0)) tab.cp=model.rpart$cptable print(tab.cp) my.cp=tab.cp[m,1] print("The variant that was chosen:", quote=F) print(m) print(paste("The tree complexity/pruning parameter for this variant is equal", my.cp), quote=F) model.rpart.opt=prune(model.rpart, cp=my.cp) dataTree.wY = subset(dataTree, select=-Class) rpart.pred=predict(model.rpart.opt, dataTree.wY, type = "class") table(dataTree$Class, rpart.pred) err.model = (1-sum(dataTree$Class==rpart.pred)/nrow(dataTree))*100 err.model summary(model.rpart) par(mfrow = c(1,1), xpd = NA) # opcja 'xpd = NA' - otherwise on some devices the text is clipped plot(model.rpart.opt, main="") text(model.rpart.opt, cex=1.5) varImp(model.rpart.opt, surrogates = FALSE, competes = TRUE)