This week exercise in somehow is different, please use the RStudio codes to generate a data and then repeat the analysis you performed for the week5 (there is no need to report N, Mean and St. Dev) only report the regression results using the attached table. Please remember after running the codes each student will create one unique dataset, so you are not able to compare your findings with other class members, please use the same table and models for this analysis.
############################################################
# GENERATE A RANDOM DATA SET. CAN BE SET TO LONG OR WIDE. #
# DATA SET HAS FACTORS AND NUMERIC VARIABLES AND CAN #
# OPTIONALLY GIVE BUDGET EXPENDITURES AS A PROPORTION. #
# CAN ALSO TELL A PROPORTION OF CELLS TO BE MISSING VALUES #
############################################################
# The following codes generate 50,000 data and then run a regression model
# This command is useful for simulation, whenever you have a table of data
# and like to randomly generate data
############################################################
DFgen <- DFmaker <- function(n=50000, type=wide, digits=2,
proportion=FALSE, na.rate=0) {
rownamer <- function(dataframe){
x <- as.data.frame(dataframe)
rownames(x) <- NULL
return(x)
}
dfround <- function(dataframe, digits = 0){
df <- dataframe
df[,sapply(df, is.numeric)] <-round(df[,sapply(df, is.numeric)], digits)
return(df)
}
TYPE <- as.character(substitute(type))
time1 <- sample(1:100, n, replace = TRUE) + abs(rnorm(n))
DF <- data.frame(id = paste0("ID.", 1:n),
system_member= sample(c("Yes", "No"), n, replace = TRUE),
own = sample(c("NFP", "FP", "Public", "Other"), n,
replace = TRUE, prob=c(.25, .2, .25, .3)),
total_hosp_revenue = sample(0:2000000, n, replace = TRUE),
hospital_beds = sample(0:550, n, replace = TRUE),
medicare_discharge_ratio = sample(0:1, n, replace = TRUE),
medicaid_discharge_ratio = sample(0:1, n, replace = TRUE),
benefit = sample(-200000:2000000, n, replace = TRUE),
score = rnorm(n),
time1,
time2 = c(time1 + 2 * abs(rnorm(n))),
time3 = c(time1 + (4 * abs(rnorm(n)))))
DF <- switch(TYPE,
wide = DF,
long = {DF <- reshape(DF, direction = "long", idvar = "id",
varying = c("time1","time2", "time3"),
v.names = c("value"),
timevar = "time", times = c("time1", "time2", "time3"))
rownamer(DF)},
stop("Invalid Data \"type\""))
return(dfround(DF, digits=digits))
}
##############
# TRY IT OUT #
##############
data <- {
DFgen()
DFgen(type="long")
DFmaker(1000)
DFgen(prop=T)
DFgen(na.rate=0)
}
randomdata <- data
# This command convert data to excel if you like to see the data in excel remove the #
# write.table(data, file = "D:/UMUC/Stat200/randomdata/STAT200-4.csv", sep = ",", col.names = NA, qmethod = "double")
own1 <- factor(randomdata$own)
system <- factor(randomdata$system_member)
# Step 10: run regression models
# 1st Model:
# Model 1: Using bed as a continuous variable
model1 <- lm(benefit ~ hospital_beds + own1, data=randomdata)
summary(model1)
# REPORT ONLY eSTIMATES AND sT. ERR.
# Model 2:
model2 <- lm(benefit ~ hospital_beds + own1 + system, data=randomdata)
summary(model2)
# Model 3:
model3 <- lm(benefit ~ hospital_beds + own1 + system + medicare_discharge_ratio + medicaid_discharge_ratio , data=randomdata)
summary(model3)
# Model 4:
model4 <- lm(total_hosp_revenue ~ hospital_beds + own1 + system + medicare_discharge_ratio + medicaid_discharge_ratio , data=randomdata)
summary(model4)
This week exercise in somehow is different, please use the RStudio codes to generate a data and
then repeat the analysis you performed for the week5 (there is no need to report N, Mean and St. Dev)
only report the regression results using the attached tabl
e. Please remember after running the codes
each student will create one unique dataset, so you are not able to compare your findings with other class
members, please use the same table and models for this analysis.
########################################
###################
#
#
GENERATE
A
RANDOM
DATA
SET.
CAN
BE
SET
TO
LONG
OR
WIDE.
#
#
DATA
SET
HAS
FACTORS
AND
NUMERIC
VARIABLES
AND
CAN
#
#
OPTIONALLY
GIVE
BUDGET
EXPENDITURES
AS
A
PROPORTION.
#
#
CAN
ALSO
TELL
A
PROPORTION
OF
CELLS
TO
BE
MISSING
VALUES
#
##
#########################################################
#
#
The
following
codes
generate
50,000
data
and
then
run
a
regression
mode
l
#
This
command
is
useful
for
simulation,
whenever
you
have
a
table
of
dat
a
#
and
like
to
randomly
generate
dat
a
##########
#################################################
#
DFgen
<
-
DFmaker
<
-
function(n=50000,
type=wide,
digits=2,
proportion=FALSE,
na.rate=0)
{
rownamer
<
-
function(dataframe)
{
x
<
-
as.data.frame(dataframe
)
rownames(x)
<
-
NUL
L
return(x
)
}
dfround
<
-
functi
on(dataframe,
digits
=
0)
{
df
<
-
datafram
e
df[,sapply(df,
is.numeric)]
<
-
round(df[,sapply(df,
is.numeric)],
digits)
return(df
)
}
TYPE
<
-
as.character(substitute(type)
)
time1
<
-
sample(1:100,
n,
replace
=
TRUE)
+
abs(rnorm(n)
)
DF
<
-
data.frame(id
=
paste0(
"ID.",
1:n),
system_member=
sample(c("Yes",
"No"),
n,
replace
=
TRUE)
,
own
=
sample(c("NFP",
"FP",
"Public",
"Other"),
n,
replace
=
TRUE,
prob=c(.25,
.2,
.25,
.3))
,
total_hosp_revenue
=
sample(0:2000000,
n,
replace
=
TRUE)
,
hospital_beds
=
sample(0:550,
n,
replace
=
TRUE)
,
medicare_discharge_ratio
=
sample(0:1,
n,
replace
=
TRUE)
,
medicaid_discharge_ratio
=
sample(0:1,
n,
replace
=
TRUE)
,
benefit
=
sample(
-
200000:2000000,
n,
replace
=
TRUE)
,
score
=
rnorm(n),
time1,
time2
=
c(time1
+
2
*
abs(rnorm(n))),
time3
=
c(time1
+
(4
*
abs(rnorm(n))))
)
DF
<
-
switch(TYPE,
wide
=
DF,
long
=
{DF
<
-
reshape(DF,
direction
=
"long",
idvar
=
"id"
,
varying
=
c("time1","time2",
"time3")
,
v.names
=
c("value")
,
timevar
=
"time",
times
=
c("time1",
"time2",
"time3")
)
rownamer(DF)},
stop("Invalid
Data
\
"type
\
"")
)
return(dfround(DF,
digits=digits)
)
}
This week exercise in somehow is different, please use the RStudio codes to generate a data and
then repeat the analysis you performed for the week5 (there is no need to report N, Mean and St. Dev)
only report the regression results using the attached table. Please remember after running the codes
each student will create one unique dataset, so you are not able to compare your findings with other class
members, please use the same table and models for this analysis.
############################################################
# GENERATE A RANDOM DATA SET. CAN BE SET TO LONG OR WIDE. #
# DATA SET HAS FACTORS AND NUMERIC VARIABLES AND CAN #
# OPTIONALLY GIVE BUDGET EXPENDITURES AS A PROPORTION. #
# CAN ALSO TELL A PROPORTION OF CELLS TO BE MISSING VALUES #
############################################################
# The following codes generate 50,000 data and then run a regression model
# This command is useful for simulation, whenever you have a table of data
# and like to randomly generate data
############################################################
DFgen <- DFmaker <- function(n=50000, type=wide, digits=2,
proportion=FALSE, na.rate=0) {
rownamer <- function(dataframe){
x <- as.data.frame(dataframe)
rownames(x) <- NULL
return(x)
}
dfround <- function(dataframe, digits = 0){
df <- dataframe
df[,sapply(df, is.numeric)] <-round(df[,sapply(df, is.numeric)], digits)
return(df)
}
TYPE <- as.character(substitute(type))
time1 <- sample(1:100, n, replace = TRUE) + abs(rnorm(n))
DF <- data.frame(id = paste0("ID.", 1:n),
system_member= sample(c("Yes", "No"), n, replace = TRUE),
own = sample(c("NFP", "FP", "Public", "Other"), n,
replace = TRUE, prob=c(.25, .2, .25, .3)),
total_hosp_revenue = sample(0:2000000, n, replace = TRUE),
hospital_beds = sample(0:550, n, replace = TRUE),
medicare_discharge_ratio = sample(0:1, n, replace = TRUE),
medicaid_discharge_ratio = sample(0:1, n, replace = TRUE),
benefit = sample(-200000:2000000, n, replace = TRUE),
score = rnorm(n),
time1,
time2 = c(time1 + 2 * abs(rnorm(n))),
time3 = c(time1 + (4 * abs(rnorm(n)))))
DF <- switch(TYPE,
wide = DF,
long = {DF <- reshape(DF, direction = "long", idvar = "id",
varying = c("time1","time2", "time3"),
v.names = c("value"),
timevar = "time", times = c("time1", "time2", "time3"))
rownamer(DF)},
stop("Invalid Data \"type\""))
return(dfround(DF, digits=digits))
}