RESEARCH AND DATA ANALYSIS #1
# Week 1 Exercise -HMGT400 install.packages('plyr') library(plyr) # Pl change the location of file hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep = ',') #1: See the variables' names names (hosp) #You need to make sure you have the following variable in the dataset ##1 hospital_beds; Hospital beds ##2 total_hospital_employees_on_payr;Number of paid Employee ##3 total_hospital_non_paid_workers; Number of non-paid Employee ##4 total_hosp_cost; Total hospital cost ##5 log_hosp_revenue; Total hospital revenues ##6 total_hospital_medicare_days; Available Medicare days ##7 total_hospital_medicaid_days; Available Medicaid days ##8 total_hospital_discharges; Total Hospital Discharge ##9 total_hospital_medicare_discharg; Medicare discharge ##10 total_hospital_medicaid_discharg; Medicaid discharge #2, generate subset data fro 2011 and 2012 # This command shows that how many observations are available for 2011 and 2012 table(hosp$year) #3, The table asked N, Mean and St. Dev for 2011 and 2012 # so you can generate two subset data one for 2011 and one for 2012 hosp11 <- subset(hosp, hosp$year=="2011") hosp12 <- subset(hosp, hosp$year=="2012") # For this week exercise you need to install the plyr package #4 You can go with each variale and dataset # lets starte with the first row of the table ## Hospital beds -2011 ###4a-- N mytable <- table(hosp11$hospital_beds) summary(mytable) ###4b-- Mean mean(hosp11$hospital_beds) ###4c-- StDev sd(hosp11$hospital_beds) #### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add them to first row of table ## Hospital beds -2012 # here you need to change dataset to hosp12 #4d-- N mytable <- table(hosp12$hospital_beds) summary(mytable) ###4e-- Mean mean(hosp12$hospital_beds) ###4f-- StDev sd(hosp12$hospital_beds) #### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add them to first row of table #### As you see there are n=1525, mean=376.80 and stdev=579.84 for 2012, add them to first row of table #5 The next is to run a ttest and see the p-value # the ttest command gives you p-value but first you need to generate a factor variable # for you, go with this command # 5a. generate factor variable for year aggregate(hospital_beds ~ year, hosp, mean) year1 <- as.factor(hosp$year) t.test(hosp$hospital_beds, hosp$year1, paired = F) # You can also look at the distribution of data boxplot (hosp$hospital_beds~ hosp$year, main="Figure 1. Boxplot of hospital beds in 2011 & 2012",font.main = 1, xlab="Year", ylab="# of Beds") ## as you see the p-value is almost zero : 2.2e-16 ~0.0000 # attach(hosp11) # Now follow steps 4a, 4b, 4c, 4d, 4e and 5a for next variables: total_hospital_employees_on_payr #6: you can also use the apply command and see the results #See for more information https://www.stat.berkeley.edu/~spector/Rcourse.pdf apply (hosp11[-1],2,mean, na.rm=TRUE) apply (hosp11[-1],2,sd, na.rm=TRUE) apply (hosp12[-1],2,mean, na.rm=TRUE) apply (hosp12[-1],2,sd, na.rm=TRUE) # Thank you, Dr. Zare