Analyzing and Visualizing Data - Residency Individual Project

srikiranvadla
Project_Code_Part2.R

library(readr) library(dplyr) #install.packages("party") library(party) library(rpart) library(rpart.plot) library(ROCR) set.seed(100) setwd("C:/Users/ialsmadi/Desktop/University_of_Cumberlands/Lectures/") getwd() Credits <- read.csv("Credit_Card.csv", header = TRUE) prop.table(Credits$default.payment.next.month) summary(Credits$default.payment.next.month) Credits$default.payment.next.month = as.factor(Credits$default.payment.next.month) head(Credits) summary(Credits$default.payment.next.month) #generating test and train data - Data selected randomly with a 80/20 split trainIndex <- sample(1:nrow(Credits), 0.8 * nrow(Credits)) train <- Credits[trainIndex,] test <- Credits[-trainIndex,] formulas <- as.formula(Credits$default.payment.next.month ~ .) #tree construction based on information gain tree = rpart(formulas, data=Credits, method = 'class', parms = list(split = "information")) rpart.plot(tree) predicted= predict(tree,Credits) summary(predicted) # split between training and testing n <- nrow(Credits) Credits.train <- Credits[1:round(.8 * n),] Credits.test <- Credits[(round(.8 * n)+1):n,] rtree_fit1 <- rpart(Credits$default.payment.next.month ~ ., Credits, method="class") rpart.plot(rtree_fit1) summary(rtree_fit1) rtree_fit2 <- rpart(Credits.train$default.payment.next.month ~ ., Credits.train) rpart.plot(rtree_fit2) summary(rtree_fit2) rtree_fit3 <- rpart(Credits.test$default.payment.next.month ~ ., Credits.test) rpart.plot(rtree_fit3) summary(rtree_fit3) # Conditional partitioning is implemented in the "ctree" method rtree_fit4 <- ctree(Credits$default.payment.next.month ~ ., Credits) plot(rtree_fit4) summary(rtree_fit4) # Give the chart file a name. png(file = "decision_tree2.png") # Create the tree. output.tree <- ctree( Credits$default.payment.next.month ~ LIMIT_BAL+SEX+EDUCATION+MARRIAGE+AGE, data = Credits) # Plot the tree. plot(output.tree) # Save the file. dev.off() # We used the fit tree from the train data and test with the test data predicted= predict(rtree_fit3,Credits.test) summary(predicted)