EXERCISE 3 AND 4

profilejamex.a.bowher2s
codesforweek1and2.docx

# Week 1 Exercise -HMGT400

install.packages('plyr') library(plyr)

# Pl change the location of file hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep = ',')

#1: See the variables' names names (hosp) #You need to make sure you have the following variable in the dataset ##1 hospital_beds; Hospital beds ##2 total_hospital_employees_on_payr;Number of paid Employee ##3 total_hospital_non_paid_workers; Number of non-paid Employee ##4 total_hosp_cost; Total hospital cost ##5 log_hosp_revenue; Total hospital revenues ##6 total_hospital_medicare_days; Available Medicare days ##7 total_hospital_medicaid_days; Available Medicaid days ##8 total_hospital_discharges; Total Hospital Discharge ##9 total_hospital_medicare_discharg; Medicare discharge ##10 total_hospital_medicaid_discharg; Medicaid discharge

#2, generate subset data fro 2011 and 2012 # This command shows that how many observations are available for 2011 and 2012 table(hosp$year)

#3, The table asked N, Mean and St. Dev for 2011 and 2012 # so you can generate two subset data one for 2011 and one for 2012

hosp11 <- subset(hosp, hosp$year=="2011") hosp12 <- subset(hosp, hosp$year=="2012")

# For this week exercise you need to install the plyr package

#4 You can go with each variale and dataset # lets starte with the first row of the table ## Hospital beds -2011 ###4a-- N mytable <- table(hosp11$hospital_beds) summary(mytable) ###4b-- Mean mean(hosp11$hospital_beds) ###4c-- StDev sd(hosp11$hospital_beds)

#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add them to first row of table

## Hospital beds -2012 # here you need to change dataset to hosp12 #4d-- N

mytable <- table(hosp12$hospital_beds) summary(mytable)

###4e-- Mean mean(hosp12$hospital_beds) ###4f-- StDev sd(hosp12$hospital_beds)

#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add them to first row of table #### As you see there are n=1525, mean=376.80 and stdev=579.84 for 2012, add them to first row of table

#5 The next is to run a ttest and see the p-value # the ttest command gives you p-value but first you need to generate a factor variable # for you, go with this command # 5a. generate factor variable for year

aggregate(hospital_beds ~ year, hosp, mean)

year1 <- as.factor(hosp$year) t.test(hosp$hospital_beds, hosp$year1,  paired = F)

# You can also look at the distribution of data boxplot (hosp$hospital_beds~ hosp$year,          main="Figure 1. Boxplot of hospital beds in 2011 & 2012",font.main = 1,          xlab="Year",           ylab="# of Beds")

## as you see the p-value is almost zero : 2.2e-16 ~0.0000 # attach(hosp11) # Now follow steps 4a, 4b, 4c, 4d, 4e and 5a for next variables: total_hospital_employees_on_payr

#6: you can also use the apply command and see the results  #See for more informationhttps://www.stat.berkeley.edu/~spector/Rcourse.pdf

apply (hosp11[-1],2,mean, na.rm=TRUE) apply (hosp11[-1],2,sd, na.rm=TRUE)

apply (hosp12[-1],2,mean, na.rm=TRUE) apply (hosp12[-1],2,sd, na.rm=TRUE)

# Thank you, Dr. Zare

########### # Week -2 ########## # This week you can work with the DPLYR packages this package help you to get your results with only a few lines codes

# Step 1: Install package dplyr & read it install.packages('dplyr') library(dplyr)

# Step 2: Read your data

# Pl change the location of file hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep = ',')

#Step 3: See the variables' names names (hosp)

#You need to make sure you have the following variable in the dataset ##1 hospital_beds; Hospital beds ##2 total_hospital_employees_on_payr;Number of paid Employee ##3 total_hospital_non_paid_workers; Number of non-paid Employee ##4 total_hosp_cost; Total hospital cost ##5 log_hosp_revenue; Total hospital revenues ##6 total_hospital_medicare_days; Available Medicare days ##7 total_hospital_medicaid_days; Available Medicaid days ##8 total_hospital_discharges; Total Hospital Discharge ##9 total_hospital_medicare_discharg; Medicare discharge ##10 total_hospital_medicaid_discharg; Medicaid discharge

# step 4: see number of obs. for teaching and non-teaching hospitals # This command shows that how many observations are available for 2011 and 2012 table(hosp$teaching_hospital) # Note, if 1== teaching , o=non-teaching

# Step 5: group the variable teaching by using the group_by command teach <- group_by(hosp, teaching_hospital)

# Step 6: See the means  summarize (teach, bed=mean(hospital_beds, na.rm=T),            payer=mean(total_hospital_employees_on_payr, na.rm=T),            nopayer=mean(total_hospital_non_paid_workers, na.rm=T),            cost=mean(total_hosp_cost, na.rm=T),            revenue=mean(log_hosp_revenue, na.rm=T),            medicare=mean(total_hospital_medicare_days, na.rm=T),            mediciad=mean(total_hospital_medicaid_days, na.rm=T),            totdis=mean(total_hospital_discharges, na.rm=T),            mediciaredis=mean(total_hospital_medicare_discharg, na.rm=T),            mediciaddis=mean(total_hospital_medicaid_discharg, na.rm=T),            resident=mean(interns_and_residents, na.rm=T),            member=mean(system_member, na.rm=T)) # Step 7: See the SD  summarize (teach, bed=sd(hospital_beds, na.rm=T),            payer=sd(total_hospital_employees_on_payr, na.rm=T),            nopayer=sd(total_hospital_non_paid_workers, na.rm=T),            cost=sd(total_hosp_cost, na.rm=T),            revenue=sd(log_hosp_revenue, na.rm=T),            medicare=sd(total_hospital_medicare_days, na.rm=T),            mediciad=sd(total_hospital_medicaid_days, na.rm=T),            totdis=sd(total_hospital_discharges, na.rm=T),            mediciaredis=sd(total_hospital_medicare_discharg, na.rm=T),            mediciaddis=sd(total_hospital_medicaid_discharg, na.rm=T),            resident=sd(interns_and_residents, na.rm=T),            member=sd(system_member, na.rm=T))

# Step 8: Generate 2 dataset for a ttest. hosp_t <- subset(hosp, hosp$teaching_hospital==1) hosp_nt <- subset(hosp, hosp$teaching_hospital==0)

# Step 9: See the results of ttest

t.test(hosp_t$hospital_beds, hosp_nt$hospital_beds, paired = F)

# continue for all other variables # Thank you

#

Week

1

Exercise

-

HMGT400

install.packages('plyr')

library(plyr)

#

Pl

change

the

location

of

file

hosp

<

-

read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv

",

header=T,

sep

=

',')

#1:

See

the

variables'

names

names

(hosp)

#You

need

to

make

sure

you

have

the

following

variable

in

the

dataset

##1

hospital_beds;

Hospital

beds

##2

total_hospital_employees_on_payr;Number

of

paid

Employee

##3

total_hospital_non_pai

d_workers;

Number

of

non

-

paid

Employee

##4

total_hosp_cost;

Total

hospital

cost

##5

log_hosp_revenue;

Total

hospital

revenues

##6

total_hospital_medicare_days;

Available

Medicare

days

##7

total_hospital_medicaid_days;

Available

Medicaid

days

##8

total_hosp

ital_discharges;

Total

Hospital

Discharge

##9

total_hospital_medicare_discharg;

Medicare

discharge

##10

total_hospital_medicaid_discharg;

Medicaid

discharge

#2,

generate

subset

data

fro

2011

and

2012

#

This

command

shows

that

how

many

observations

are

avai

lable

for

2011

and

2012

table(hosp$year)

#3,

The

table

asked

N,

Mean

and

St.

Dev

for

2011

and

2012

#

so

you

can

generate

two

subset

data

one

for

2011

and

one

for

2012

hosp11

<

-

subset(hosp,

hosp$year=="2011")

hosp12

<

-

subset(hosp,

hosp$year=="2012")

#

For

this

week

exercise

you

need

to

install

the

plyr

package

#4

You

can

go

with

each

variale

and

dataset

#

lets

starte

with

the

first

row

of

the

table

##

Hospital

beds

-

2011

###4a

--

N

mytable

<

-

table(hosp11$hospital_beds)

summary(mytable)

###4b

--

Mean

mean(hosp11$hospital_beds)

###4c

--

StDev

sd(hosp11$hospital_beds)

####

As

you

see

there

are

n=1505,

mean=376.60

and

stdev=560.89

for

2011,

add

them

to

first

row

of

table

##

Hospital

beds

-

2012

#

here

you

need

to

change

dataset

to

hosp12

#4d

--

N

mytable

<

-

table(hosp12$hospital_beds)

summary(mytable)

###4e

--

Mean

mean(hosp12$hospital_beds)

###4f

--

StDev

sd(hosp12$hospital_beds)

####

As

you

see

there

are

n=1505,

mean=376.60

and

stdev=560.89

for

2011,

add

them

to

first

row

of

table

####

As

you

see

there

are

n=

1525,

mean=376.80

and

stdev=579.84

for

2012,

add

them

to

first

row

of

table

#5

The

next

is

to

run

a

ttest

and

see

the

p

-

value

#

the

ttest

command

gives

you

p

-

value

but

first

you

need

to

generate

a

factor

# Week 1 Exercise -HMGT400

install.packages('plyr')

library(plyr)

# Pl change the location of file

hosp <- read.csv("C:/Users/hzare/Dropbox/HMGT400HOSPITAL.csv", header=T, sep

= ',')

#1: See the variables' names

names (hosp)

#You need to make sure you have the following variable in the dataset

##1 hospital_beds; Hospital beds

##2 total_hospital_employees_on_payr;Number of paid Employee

##3 total_hospital_non_paid_workers; Number of non-paid Employee

##4 total_hosp_cost; Total hospital cost

##5 log_hosp_revenue; Total hospital revenues

##6 total_hospital_medicare_days; Available Medicare days

##7 total_hospital_medicaid_days; Available Medicaid days

##8 total_hospital_discharges; Total Hospital Discharge

##9 total_hospital_medicare_discharg; Medicare discharge

##10 total_hospital_medicaid_discharg; Medicaid discharge

#2, generate subset data fro 2011 and 2012

# This command shows that how many observations are available for 2011 and

2012

table(hosp$year)

#3, The table asked N, Mean and St. Dev for 2011 and 2012

# so you can generate two subset data one for 2011 and one for 2012

hosp11 <- subset(hosp, hosp$year=="2011")

hosp12 <- subset(hosp, hosp$year=="2012")

# For this week exercise you need to install the plyr package

#4 You can go with each variale and dataset

# lets starte with the first row of the table

## Hospital beds -2011

###4a-- N

mytable <- table(hosp11$hospital_beds)

summary(mytable)

###4b-- Mean

mean(hosp11$hospital_beds)

###4c-- StDev

sd(hosp11$hospital_beds)

#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add

them to first row of table

## Hospital beds -2012

# here you need to change dataset to hosp12

#4d-- N

mytable <- table(hosp12$hospital_beds)

summary(mytable)

###4e-- Mean

mean(hosp12$hospital_beds)

###4f-- StDev

sd(hosp12$hospital_beds)

#### As you see there are n=1505, mean=376.60 and stdev=560.89 for 2011, add

them to first row of table

#### As you see there are n=1525, mean=376.80 and stdev=579.84 for 2012, add

them to first row of table

#5 The next is to run a ttest and see the p-value

# the ttest command gives you p-value but first you need to generate a factor