Analyzing data with R
load(load("C:/Users/ERNEST AND MARTIN/Downloads/download2.RData")) head(complaints) library(dplyr) library(ggplot2) library(tidyverse) colSums(is.na(complaints))/nrow(complaints) str(complaints) complaints$state summarise(complaints) summary(complaints) data=complaints%>% count( state ) data newdata <- filter(complaints, state %in% c("OH")) newdata data=complaints%>%count( product ) data issue=complaints%>%count( issue ) issue newdata$year <- lubridate::year(newdata$date_received) newdata$month <- lubridate::month(newdata$date_received) newdata$day <- lubridate::day(newdata$date_received) newdata$date.received <- NULL monthly <- newdata <- newdata %>% group_by(year, month) per_month <- monthly %>% dplyr::summarize(num_complaint = n()) tail(per_month, n=1) head(per_month,n=1) per_month$Date <- paste(per_month$year, per_month$month,sep = "-") per_month <- per_month[c("Date", "num_complaint")] cc <- ts((per_month$num_complaint),start = c(2018,1), end = c(2021, 12), frequency = 12) start(cc) end(cc) library(lubridate) library(tseries) library(forecast) boxplot(cc ~ cycle(cc)) plot(stl(cc, s.window = 'periodic', t.window = 15)) seasonplot(cc, year.labels = T, year.labels.left = T, col = 1:4, labelgap = 0.4, main = 'Comparing Seasons' ) library(ggplot2); library(RColorBrewer); library(gridExtra) library(wordcloud) library(devtools) colcount.product = length(unique(newdata$product)) getPalette = colorRampPalette(brewer.pal(8, "Set2")) ggplot(newdata, aes(x= product)) + geom_bar(aes(fill = factor(product))) + theme(axis.text.x = element_blank()) + scale_fill_manual(values = getPalette(colcount.product)) ggplot(newdata, aes(x= submitted_via )) + geom_bar(aes(fill = factor(submitted_via))) + scale_fill_manual(values = getPalette(length(unique(newdata$submitted_via)))) + coord_polar() + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank(), axis.text.x = element_text(size = 12), axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) ggplot(newdata[newdata$timely_response %in% "No",], aes(x = factor(1), fill = product)) + geom_bar(width = 1) + coord_polar(theta = "y") + theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.title.y = element_blank(), axis.title.x = element_blank()) + scale_fill_manual(values = getPalette(colcount.product)) + labs(title = "Products that failed to provide timely response") ggplot(newdata,aes(x =product , fill = timely_response)) + geom_bar(position = "stack")+theme(axis.text.x = element_text(angle = 45, hjust = 1))+labs(y = "Count", fill = "Timely Response", x = "Products", title = "Timely Response by Products") library(treemapify) tree <- newdata %>% count(submitted_via) ggplot(tree, aes(fill = submitted_via, area = n, label = submitted_via)) + geom_treemap() + geom_treemap_text(colour = "white", place = "centre") + labs(title = "Complaints by Mode os Submission") + theme(legend.position = "none") tbl <- xtabs(~timely_response + submitted_via + products, newdata) tbl <- xtabs(~timely_response + submitted_via + product, newdata) ftable(tbl) library(vcd) mosaic(tbl, main = "Consumers Complaints")