w7.docx

This week exercise in somehow is different, please use the RStudio codes to generate a data and then repeat the analysis you performed for the week5 (there is no need to report N, Mean and St. Dev) only report the regression results using the attached table. Please remember after running the codes each student will create one unique dataset, so you are not able to compare your findings with other class members, please use the same table and models for this analysis. 

############################################################ # GENERATE A RANDOM DATA SET. CAN BE SET TO LONG OR WIDE. # # DATA SET HAS FACTORS AND NUMERIC VARIABLES AND CAN # # OPTIONALLY GIVE BUDGET EXPENDITURES AS A PROPORTION. # # CAN ALSO TELL A PROPORTION OF CELLS TO BE MISSING VALUES # ############################################################ # The following codes generate 50,000 data and then run a regression model # This command is useful for simulation, whenever you have a table of data # and like to randomly generate data ############################################################ DFgen <- DFmaker <- function(n=50000, type=wide, digits=2,  proportion=FALSE, na.rate=0) { rownamer <- function(dataframe){ x <- as.data.frame(dataframe) rownames(x) <- NULL return(x) } dfround <- function(dataframe, digits = 0){ df <- dataframe df[,sapply(df, is.numeric)] <-round(df[,sapply(df, is.numeric)], digits)  return(df) }

TYPE <- as.character(substitute(type)) time1 <- sample(1:100, n, replace = TRUE) + abs(rnorm(n)) DF <- data.frame(id = paste0("ID.", 1:n),  system_member= sample(c("Yes", "No"), n, replace = TRUE), own = sample(c("NFP", "FP", "Public", "Other"), n,  replace = TRUE, prob=c(.25, .2, .25, .3)), total_hosp_revenue = sample(0:2000000, n, replace = TRUE), hospital_beds = sample(0:550, n, replace = TRUE), medicare_discharge_ratio = sample(0:1, n, replace = TRUE), medicaid_discharge_ratio = sample(0:1, n, replace = TRUE), benefit = sample(-200000:2000000, n, replace = TRUE), score = rnorm(n),  time1,  time2 = c(time1 + 2 * abs(rnorm(n))),  time3 = c(time1 + (4 * abs(rnorm(n))))) DF <- switch(TYPE,  wide = DF,  long = {DF <- reshape(DF, direction = "long", idvar = "id", varying = c("time1","time2", "time3"), v.names = c("value"), timevar = "time", times = c("time1", "time2", "time3")) rownamer(DF)},  stop("Invalid Data \"type\"")) return(dfround(DF, digits=digits)) } ############## # TRY IT OUT # ############## data <- { DFgen()  DFgen(type="long")  DFmaker(1000)  DFgen(prop=T)  DFgen(na.rate=0) } randomdata <- data

# This command convert data to excel if you like to see the data in excel remove the # # write.table(data, file = "D:/UMUC/Stat200/randomdata/STAT200-4.csv", sep = ",", col.names = NA, qmethod = "double")

own1 <- factor(randomdata$own) system <- factor(randomdata$system_member)

# Step 10: run regression models # 1st Model: # Model 1: Using bed as a continuous variable model1 <- lm(benefit ~ hospital_beds + own1, data=randomdata) summary(model1) # REPORT ONLY eSTIMATES AND sT. ERR.

# Model 2: model2 <- lm(benefit ~ hospital_beds + own1 + system, data=randomdata) summary(model2)

# Model 3: model3 <- lm(benefit ~ hospital_beds + own1 + system + medicare_discharge_ratio + medicaid_discharge_ratio , data=randomdata) summary(model3)

# Model 4: model4 <- lm(total_hosp_revenue ~ hospital_beds + own1 + system + medicare_discharge_ratio + medicaid_discharge_ratio , data=randomdata) summary(model4)

This week exercise in somehow is different, please use the RStudio codes to generate a data and

then repeat the analysis you performed for the week5 (there is no need to report N, Mean and St. Dev)

only report the regression results using the attached tabl

e. Please remember after running the codes

each student will create one unique dataset, so you are not able to compare your findings with other class

members, please use the same table and models for this analysis.

########################################

###################

#

#

GENERATE

A

RANDOM

DATA

SET.

CAN

BE

SET

TO

LONG

OR

WIDE.

#

#

DATA

SET

HAS

FACTORS

AND

NUMERIC

VARIABLES

AND

CAN

#

#

OPTIONALLY

GIVE

BUDGET

EXPENDITURES

AS

A

PROPORTION.

#

#

CAN

ALSO

TELL

A

PROPORTION

OF

CELLS

TO

BE

MISSING

VALUES

#

##

#########################################################

#

#

The

following

codes

generate

50,000

data

and

then

run

a

regression

mode

l

#

This

command

is

useful

for

simulation,

whenever

you

have

a

table

of

dat

a

#

and

like

to

randomly

generate

dat

a

##########

#################################################

#

DFgen

<

-

DFmaker

<

-

function(n=50000,

type=wide,

digits=2,

proportion=FALSE,

na.rate=0)

{

rownamer

<

-

function(dataframe)

{

x

<

-

as.data.frame(dataframe

)

rownames(x)

<

-

NUL

L

return(x

)

}

dfround

<

-

functi

on(dataframe,

digits

=

0)

{

df

<

-

datafram

e

df[,sapply(df,

is.numeric)]

<

-

round(df[,sapply(df,

is.numeric)],

digits)

return(df

)

}

TYPE

<

-

as.character(substitute(type)

)

time1

<

-

sample(1:100,

n,

replace

=

TRUE)

+

abs(rnorm(n)

)

DF

<

-

data.frame(id

=

paste0(

"ID.",

1:n),

system_member=

sample(c("Yes",

"No"),

n,

replace

=

TRUE)

,

own

=

sample(c("NFP",

"FP",

"Public",

"Other"),

n,

replace

=

TRUE,

prob=c(.25,

.2,

.25,

.3))

,

total_hosp_revenue

=

sample(0:2000000,

n,

replace

=

TRUE)

,

hospital_beds

=

sample(0:550,

n,

replace

=

TRUE)

,

medicare_discharge_ratio

=

sample(0:1,

n,

replace

=

TRUE)

,

medicaid_discharge_ratio

=

sample(0:1,

n,

replace

=

TRUE)

,

benefit

=

sample(

-

200000:2000000,

n,

replace

=

TRUE)

,

score

=

rnorm(n),

time1,

time2

=

c(time1

+

2

*

abs(rnorm(n))),

time3

=

c(time1

+

(4

*

abs(rnorm(n))))

)

DF

<

-

switch(TYPE,

wide

=

DF,

long

=

{DF

<

-

reshape(DF,

direction

=

"long",

idvar

=

"id"

,

varying

=

c("time1","time2",

"time3")

,

v.names

=

c("value")

,

timevar

=

"time",

times

=

c("time1",

"time2",

"time3")

)

rownamer(DF)},

stop("Invalid

Data

\

"type

\

"")

)

return(dfround(DF,

digits=digits)

)

}

This week exercise in somehow is different, please use the RStudio codes to generate a data and

then repeat the analysis you performed for the week5 (there is no need to report N, Mean and St. Dev)

only report the regression results using the attached table. Please remember after running the codes

each student will create one unique dataset, so you are not able to compare your findings with other class

members, please use the same table and models for this analysis.

############################################################

# GENERATE A RANDOM DATA SET. CAN BE SET TO LONG OR WIDE. #

# DATA SET HAS FACTORS AND NUMERIC VARIABLES AND CAN #

# OPTIONALLY GIVE BUDGET EXPENDITURES AS A PROPORTION. #

# CAN ALSO TELL A PROPORTION OF CELLS TO BE MISSING VALUES #

############################################################

# The following codes generate 50,000 data and then run a regression model

# This command is useful for simulation, whenever you have a table of data

# and like to randomly generate data

############################################################

DFgen <- DFmaker <- function(n=50000, type=wide, digits=2,

proportion=FALSE, na.rate=0) {

rownamer <- function(dataframe){

x <- as.data.frame(dataframe)

rownames(x) <- NULL

return(x)

}

dfround <- function(dataframe, digits = 0){

df <- dataframe

df[,sapply(df, is.numeric)] <-round(df[,sapply(df, is.numeric)], digits)

return(df)

}

TYPE <- as.character(substitute(type))

time1 <- sample(1:100, n, replace = TRUE) + abs(rnorm(n))

DF <- data.frame(id = paste0("ID.", 1:n),

system_member= sample(c("Yes", "No"), n, replace = TRUE),

own = sample(c("NFP", "FP", "Public", "Other"), n,

replace = TRUE, prob=c(.25, .2, .25, .3)),

total_hosp_revenue = sample(0:2000000, n, replace = TRUE),

hospital_beds = sample(0:550, n, replace = TRUE),

medicare_discharge_ratio = sample(0:1, n, replace = TRUE),

medicaid_discharge_ratio = sample(0:1, n, replace = TRUE),

benefit = sample(-200000:2000000, n, replace = TRUE),

score = rnorm(n),

time1,

time2 = c(time1 + 2 * abs(rnorm(n))),

time3 = c(time1 + (4 * abs(rnorm(n)))))

DF <- switch(TYPE,

wide = DF,

long = {DF <- reshape(DF, direction = "long", idvar = "id",

varying = c("time1","time2", "time3"),

v.names = c("value"),

timevar = "time", times = c("time1", "time2", "time3"))

rownamer(DF)},

stop("Invalid Data \"type\""))

return(dfround(DF, digits=digits))

}