Exploratory Data Analysis
library(dplyr) library(tidyverse) library(readxl) telecomcustomers <- read.csv("C:/Users/ERNEST AND MARTIN/Desktop/telecomcustomers.csv") View(telecomcustomers) attributes(telecomcustomers) structure(telecomcustomers) head(telecomcustomers) summerize(telecomcustomers) summarise(telecomcustomers) telecomcustomers%>% geom_bar() telecomcustomers%>% aes(gender) telecomcustomers%>% aes(x=gender) geom_bar(telecomcustomers$gender) library(gglot) library(ggplot2) ggplot(telecomcustomers,aes(x=gender))+geom_bar() ggplot(telecomcustomers, aes(x = gender)) + geom_bar(fill = "cornflowerblue", color="black") + labs(x = "Gender", y = "Frequency", title = "Customers Gender in the study") ggplot(telecomcustomers, aes(x = MonthlyCharges)) + geom_bar(fill = "cornflowerblue", color="black") + labs(x = "Monthly Charges", y = "Frequency", title = "Customers Monthly Charge") ggplot(telecomcustomers, aes(x = PaymentMethod)) + geom_bar() + labs(x = "", y = "Frequency", title = "Customers by Payment Method") + theme(axis.text.x = element_text(angle = 45, hjust = 1) ) telecomcustomers%>% select(InternetService,OnlineSecurity,DeviceProtection,StreamingMovies,MonthlyCharges,TotalCharges) telecomcustomers$InternetService is.factor(telecomcustomers$InternetService) mydata=is.factor(telecomcustomers$InternetService) mydata head(mydata) summary(telecomcustomers) is.factor(telecomcustomers$TotalCharges) is.factor(telecomcustomers$OnlineSecurity) is.factor(telecomcustomers$StreamingMovies) is.factor(telecomcustomers$DeviceProtection ) is.numeric(telecomcustomers$TotalCharges) telecomcustomers%>% mutate(telecomcustomers$OnlineSecurity=as.numeric(telecomcustomers$OnlineSecurity),telecomcustomers$StreamingMovies=as.numeric(telecomcustomers$StreamingMovies),telecomcustomers$DeviceProtection=as.numeric(telecomcustomers$DeviceProtection)) telecomcustomers$OnlineSecurity=as.numeric(telecomcustomers$OnlineSecurity) telecomcustomers$OnlineSecurity is.numeric(telecomcustomers$OnlineSecurity) telecomcustomers$StreamingMovies=as.numeric(telecomcustomers$StreamingMovies) telecomcustomers$DeviceProtection=as.numeric(telecomcustomers$DeviceProtection) telecomcustomers$StreamingMovie telecomcustomers$DeviceProtection library(readr) telecomcustomers <- read_csv("C:/Users/ERNEST AND MARTIN/Desktop/telecomcustomers.csv") View(telecomcustomers) library(dplyr) library(ggplot2) library(tidyverse) mydata=select(telecomcustomers$InternetService,telecomcustomers$OnlineSecurity,telecomcustomers$DeviceProtection,telecomcustomers$StreamingMovies,telecomcustomers$MonthlyCharges,telecomcustomers$TotalCharges) telecomcustomers%>% select(telecomcustomers$InternetService,telecomcustomers$OnlineSecurity,telecomcustomers$DeviceProtection,telecomcustomers$StreamingMovies,telecomcustomers$MonthlyCharges,telecomcustomers$TotalCharges) telecomcustomers$InternetService head(telecomcustomers) is.factor(telecomcustomers$InternetService) mutate(telecomcustomers,internetS=as.numeric(InternetService),Osecurity=as.numeric(OnlineSecurity),Dprotection=as.numeric(DeviceProtection),StreamMovie=as.numeric(StreamingMovies)) is.numeric(Osecurity) InternetService=as.numeric(telecomcustomers$InternetService) InternetService=as.factor(telecomcustomers$InternetService) mutate(telecomcustomers,internetS=as.factor(InternetService),Osecurity=as.factor(OnlineSecurity),Dprotection=as.factor(DeviceProtection),StreamMovie=as.factor(StreamingMovies)) mutate(telecomcustomers,InternetService=as.numeric(internetS),OnlineSecurity=as.numeric(Osecurity),DeviceProtection=as.numeric(Dprotection),StreamingMovies=as.numeric(StreamMovie)) telecomcustomers <- read.csv("C:/Users/ERNEST AND MARTIN/Desktop/telecomcustomers.csv") telecomcustomers head(telecomcustomers) is.factor(DeviceProtection) is.factor(telecomcustomers$DeviceProtection) library(tidyverse) library(dplyr) library(tidyverse) mydata=mutate(telecomcustomers,InternetSer=as.numeric(InternetService),OnlineSec=as.numeric(OnlineSecurity),DeviceProtec=as.numeric(DeviceProtection),StrMovies=as.numeric(StreamingMovies)) mydata=mutate(telecomcustomers,InternetSer=as.numeric(InternetService),OnlineSec=as.numeric(OnlineSecurity),DeviceProtec=as.numeric(DeviceProtection),StrMovies=as.numeric(StreamingMovies),Mcharges=as.numeric(MonthlyCharges),TCharges=as.numeric(TotalCharges)) mydata InternetService=as.numeric(telecomcustomers$InternetService) mydata%>% select_if(numeric()) head(mydata) select_if(mydata,numeric()) newdata=mydata[c("InternetSer","OnlineSec","DeviceProtec","Mcharges","TCharges")] newdata head(newdata) telecomcustomers%>% + select(InternetService,OnlineSecurity,DeviceProtection,Stre amingMovies,MonthlyCharges,TotalCharges) telecomcustomers%>% + select(InternetService,OnlineSecurity,DeviceProtection,StreamingMovies,MonthlyCharges,TotalCharges) mydata%>% select(InternetService,OnlineSecurity,DeviceProtection,StreamingMovies,MonthlyCharges,TotalCharges) summary(newdata) ggplot(newdata, aes(x = InternetSer, y = Mcharges)) + geom_point(color= "steelblue") + geom_smooth(method = "lm") z.test(Mcharges, y = InternetSer, alternative = "two.sided", mu = 0, sigma.x = NULL, sigma.y = NULL, conf.level = 0.95) ggplot(newdata, aes(x = Mcharges)) + geom_histogram(fill = "cornflowerblue", color = "white") + facet_wrap(~InternetSer, ncol = 1) + labs(title = "Monthly Charges histograms by Internet service provider") plotdata <- Salaries %>% group_by(rank) %>% summarize(mean_salary = mean(salary)) plotdata <- newdata %>% group_by(InternetSer) %>% summarize(mean_Monthlycharges = mean(newdata)) newdata %>% group_by(InternetSer) %>% summarize(mean_salary = mean(newdata)) head(newdata) is.numeric(newdata) ggplot(newdata, aes(x = OnlineSec, fill = DeviceProtec)) + geom_bar(position = position_dodge(preserve = "single")) library(scales) ggplot(newdata, aes(x = factor(class, levels = c("1", "2", "3")), fill = factor(DeviceProtec, levels = c("1", "2", "3"), labels = c("Yes", "No", "No internet service")))) + geom_bar(position = "fill") + scale_y_continuous(breaks = seq(0, 1, .2), label = percent) + scale_fill_brewer(palette = "Set2") + labs(y = "device protection ", fill = "Drive Train", x = "Online security", title = "Online security by device protection") + theme_minimal() ggplot(newdata, aes(x = OnlineSec, fill = DeviceProtec)) + geom_bar(position = position_dodge(preserve = "single")) ggplot(mydata, aes(x = TCharges)) + geom_histogram(fill = "cornflowerblue", color = "white") + facet_wrap(~StrMovies, ncol = 1) + labs(title = "Total Charges histograms by Streaming Movies") ggplot(newdata, aes(x = Mcharges)) + geom_histogram(fill = "cornflowerblue", color = "white") + facet_wrap(~InternetSer, ncol = 1) + labs(title = "Monthly Charges histograms by Internet service provider") ggplot(newdata, aes(x = OnlineSec, fill = DeviceProtec)) + geom_bar(position = position_dodge(preserve = "single")) ggplot(newdata, aes(x = OnlineSec, fill = DeviceProtec)) + geom_bar(position = position_dodge(preserve = "single")) ggplot(mydata, aes(x = TCharges)) + geom_histogram(fill = "cornflowerblue", color = "white") + facet_wrap(~StrMovies, ncol = 1) + labs(title = "Total Charges histograms by Streaming Movies")