EXERCISE 3 AND 4
# Week 1 Exercise -HMGT400
install.packages('plyr') library(plyr)
# Pl change the location of file hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep = ',')
#1: See the variables' names names (hosp) #You need to make sure you have the following variable in the dataset ##1 hospital_beds; Hospital beds ##2 total_hospital_employees_on_payr;Number of paid Employee ##3 total_hospital_non_paid_workers; Number of non-paid Employee ##4 total_hosp_cost; Total hospital cost ##5 log_hosp_revenue; Total hospital revenues ##6 total_hospital_medicare_days; Available Medicare days ##7 total_hospital_medicaid_days; Available Medicaid days ##8 total_hospital_discharges; Total Hospital Discharge ##9 total_hospital_medicare_discharg; Medicare discharge ##10 total_hospital_medicaid_discharg; Medicaid discharge
#2, generate subset data fro 2011 and 2012 # This command shows that how many observations are available for 2011 and 2012 table(hosp$year)
#3, The table asked N, Mean and St. Dev for 2011 and 2012 # so you can generate two subset data one for 2011 and one for 2012
hosp11 <- subset(hosp, hosp$year=="2011") hosp12 <- subset(hosp, hosp$year=="2012")
# For this week exercise you need to install the plyr package
#4 You can go with each variale and dataset # lets starte with the first row of the table ## Hospital beds -2011 ###4a-- N mytable <- table(hosp11$hospital_beds) summary(mytable) ###4b-- Mean mean(hosp11$hospital_beds) ###4c-- StDev sd(hosp11$hospital_beds)
#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add them to first row of table
## Hospital beds -2012 # here you need to change dataset to hosp12 #4d-- N
mytable <- table(hosp12$hospital_beds) summary(mytable)
###4e-- Mean mean(hosp12$hospital_beds) ###4f-- StDev sd(hosp12$hospital_beds)
#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add them to first row of table #### As you see there are n=1525, mean=376.80 and stdev=579.84 for 2012, add them to first row of table
#5 The next is to run a ttest and see the p-value # the ttest command gives you p-value but first you need to generate a factor variable # for you, go with this command # 5a. generate factor variable for year
aggregate(hospital_beds ~ year, hosp, mean)
year1 <- as.factor(hosp$year) t.test(hosp$hospital_beds, hosp$year1, paired = F)
# You can also look at the distribution of data boxplot (hosp$hospital_beds~ hosp$year, main="Figure 1. Boxplot of hospital beds in 2011 & 2012",font.main = 1, xlab="Year", ylab="# of Beds")
## as you see the p-value is almost zero : 2.2e-16 ~0.0000 # attach(hosp11) # Now follow steps 4a, 4b, 4c, 4d, 4e and 5a for next variables: total_hospital_employees_on_payr
#6: you can also use the apply command and see the results #See for more informationhttps://www.stat.berkeley.edu/~spector/Rcourse.pdf
apply (hosp11[-1],2,mean, na.rm=TRUE) apply (hosp11[-1],2,sd, na.rm=TRUE)
apply (hosp12[-1],2,mean, na.rm=TRUE) apply (hosp12[-1],2,sd, na.rm=TRUE)
# Thank you, Dr. Zare
# Step 1: Install package dplyr & read it install.packages('dplyr') library(dplyr)
# Step 2: Read your data
# Pl change the location of file hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep = ',')
#Step 3: See the variables' names names (hosp)
#You need to make sure you have the following variable in the dataset ##1 hospital_beds; Hospital beds ##2 total_hospital_employees_on_payr;Number of paid Employee ##3 total_hospital_non_paid_workers; Number of non-paid Employee ##4 total_hosp_cost; Total hospital cost ##5 log_hosp_revenue; Total hospital revenues ##6 total_hospital_medicare_days; Available Medicare days ##7 total_hospital_medicaid_days; Available Medicaid days ##8 total_hospital_discharges; Total Hospital Discharge ##9 total_hospital_medicare_discharg; Medicare discharge ##10 total_hospital_medicaid_discharg; Medicaid discharge
# step 4: see number of obs. for teaching and non-teaching hospitals # This command shows that how many observations are available for 2011 and 2012 table(hosp$teaching_hospital) # Note, if 1== teaching , o=non-teaching
# Step 5: group the variable teaching by using the group_by command teach <- group_by(hosp, teaching_hospital)
# Step 6: See the means summarize (teach, bed=mean(hospital_beds, na.rm=T), payer=mean(total_hospital_employees_on_payr, na.rm=T), nopayer=mean(total_hospital_non_paid_workers, na.rm=T), cost=mean(total_hosp_cost, na.rm=T), revenue=mean(log_hosp_revenue, na.rm=T), medicare=mean(total_hospital_medicare_days, na.rm=T), mediciad=mean(total_hospital_medicaid_days, na.rm=T), totdis=mean(total_hospital_discharges, na.rm=T), mediciaredis=mean(total_hospital_medicare_discharg, na.rm=T), mediciaddis=mean(total_hospital_medicaid_discharg, na.rm=T), resident=mean(interns_and_residents, na.rm=T), member=mean(system_member, na.rm=T)) # Step 7: See the SD summarize (teach, bed=sd(hospital_beds, na.rm=T), payer=sd(total_hospital_employees_on_payr, na.rm=T), nopayer=sd(total_hospital_non_paid_workers, na.rm=T), cost=sd(total_hosp_cost, na.rm=T), revenue=sd(log_hosp_revenue, na.rm=T), medicare=sd(total_hospital_medicare_days, na.rm=T), mediciad=sd(total_hospital_medicaid_days, na.rm=T), totdis=sd(total_hospital_discharges, na.rm=T), mediciaredis=sd(total_hospital_medicare_discharg, na.rm=T), mediciaddis=sd(total_hospital_medicaid_discharg, na.rm=T), resident=sd(interns_and_residents, na.rm=T), member=sd(system_member, na.rm=T))
# Step 8: Generate 2 dataset for a ttest. hosp_t <- subset(hosp, hosp$teaching_hospital==1) hosp_nt <- subset(hosp, hosp$teaching_hospital==0)
# Step 9: See the results of ttest
t.test(hosp_t$hospital_beds, hosp_nt$hospital_beds, paired = F)
# continue for all other variables # Thank you
#
Week
1
Exercise
-
HMGT400
install.packages('plyr')
library(plyr)
#
Pl
change
the
location
of
file
hosp
<
-
read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv
",
header=T,
sep
=
',')
#1:
See
the
variables'
names
names
(hosp)
#You
need
to
make
sure
you
have
the
following
variable
in
the
dataset
##1
hospital_beds;
Hospital
beds
##2
total_hospital_employees_on_payr;Number
of
paid
Employee
##3
total_hospital_non_pai
d_workers;
Number
of
non
-
paid
Employee
##4
total_hosp_cost;
Total
hospital
cost
##5
log_hosp_revenue;
Total
hospital
revenues
##6
total_hospital_medicare_days;
Available
Medicare
days
##7
total_hospital_medicaid_days;
Available
Medicaid
days
##8
total_hosp
ital_discharges;
Total
Hospital
Discharge
##9
total_hospital_medicare_discharg;
Medicare
discharge
##10
total_hospital_medicaid_discharg;
Medicaid
discharge
#2,
generate
subset
data
fro
2011
and
2012
#
This
command
shows
that
how
many
observations
are
avai
lable
for
2011
and
2012
table(hosp$year)
#3,
The
table
asked
N,
Mean
and
St.
Dev
for
2011
and
2012
#
so
you
can
generate
two
subset
data
one
for
2011
and
one
for
2012
hosp11
<
-
subset(hosp,
hosp$year=="2011")
hosp12
<
-
subset(hosp,
hosp$year=="2012")
#
For
this
week
exercise
you
need
to
install
the
plyr
package
#4
You
can
go
with
each
variale
and
dataset
#
lets
starte
with
the
first
row
of
the
table
##
Hospital
beds
-
2011
###4a
--
N
mytable
<
-
table(hosp11$hospital_beds)
summary(mytable)
###4b
--
Mean
mean(hosp11$hospital_beds)
###4c
--
StDev
sd(hosp11$hospital_beds)
####
As
you
see
there
are
n=1505,
mean=376.60
and
stdev=560.89
for
2011,
add
them
to
first
row
of
table
##
Hospital
beds
-
2012
#
here
you
need
to
change
dataset
to
hosp12
#4d
--
N
mytable
<
-
table(hosp12$hospital_beds)
summary(mytable)
###4e
--
Mean
mean(hosp12$hospital_beds)
###4f
--
StDev
sd(hosp12$hospital_beds)
####
As
you
see
there
are
n=1505,
mean=376.60
and
stdev=560.89
for
2011,
add
them
to
first
row
of
table
####
As
you
see
there
are
n=
1525,
mean=376.80
and
stdev=579.84
for
2012,
add
them
to
first
row
of
table
#5
The
next
is
to
run
a
ttest
and
see
the
p
-
value
#
the
ttest
command
gives
you
p
-
value
but
first
you
need
to
generate
a
factor
# Week 1 Exercise -HMGT400
install.packages('plyr')
library(plyr)
# Pl change the location of file
hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep
= ',')
#1: See the variables' names
names (hosp)
#You need to make sure you have the following variable in the dataset
##1 hospital_beds; Hospital beds
##2 total_hospital_employees_on_payr;Number of paid Employee
##3 total_hospital_non_paid_workers; Number of non-paid Employee
##4 total_hosp_cost; Total hospital cost
##5 log_hosp_revenue; Total hospital revenues
##6 total_hospital_medicare_days; Available Medicare days
##7 total_hospital_medicaid_days; Available Medicaid days
##8 total_hospital_discharges; Total Hospital Discharge
##9 total_hospital_medicare_discharg; Medicare discharge
##10 total_hospital_medicaid_discharg; Medicaid discharge
#2, generate subset data fro 2011 and 2012
# This command shows that how many observations are available for 2011 and
2012
table(hosp$year)
#3, The table asked N, Mean and St. Dev for 2011 and 2012
# so you can generate two subset data one for 2011 and one for 2012
hosp11 <- subset(hosp, hosp$year=="2011")
hosp12 <- subset(hosp, hosp$year=="2012")
# For this week exercise you need to install the plyr package
#4 You can go with each variale and dataset
# lets starte with the first row of the table
## Hospital beds -2011
###4a-- N
mytable <- table(hosp11$hospital_beds)
summary(mytable)
###4b-- Mean
mean(hosp11$hospital_beds)
###4c-- StDev
sd(hosp11$hospital_beds)
#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add
them to first row of table
## Hospital beds -2012
# here you need to change dataset to hosp12
#4d-- N
mytable <- table(hosp12$hospital_beds)
summary(mytable)
###4e-- Mean
mean(hosp12$hospital_beds)
###4f-- StDev
sd(hosp12$hospital_beds)
#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add
them to first row of table
#### As you see there are n=1525, mean=376.80 and stdev=579.84 for 2012, add
them to first row of table
#5 The next is to run a ttest and see the p-value
# the ttest command gives you p-value but first you need to generate a factor