R studio Assignment

profileharishk
R-File-13.pdf

Dr. Olufemi Ajimoko

ITS 836 - Data Science and Big Data Analytics

Installing R

-------------

https://cran.r-project.org

https://www.rstudio.com

h <- "Hello"

yourname <- readline("What is your name?")

print(paste(h, yourname))

rm(list = ls())

To run the programm - Ctrl A & Ctrl enter

File - new file - rscript OR Ctrl Shift N

To clear screen - Ctrl L

-----------------------------

> id <- 1:15

> age <- c(14,23,21,51,12,32,15,23,17,25,41,54,36,24,19)

Hithisisanassignment HithisisalsoanassignmentbutIhabvenoideawhy

> name <- c("Bill", "Carl", "Matt", "Bobby", "Henry",

+ "Tom", "Bucky", "Emily","Jim", "Joe", "Kate",

+ "Vick", "Debby", "Mike", "Mill")

> Youth <- data.frame(id,age,name)

>

> Youth[1:5,]

id age name

1 1 14 Bill

2 2 23 Carl

3 3 21 Matt

4 4 51 Bobby

5 5 12 Henry

>

> mean(Youth$id)

[1] 8

> with(Youth, mean(id))

[1] 8

>

> # Redefining variables

> Y <- cbind(id)

> X <- cbind(age, name)

>

> summary(Y)

id

Min. : 1.0

1st Qu.: 4.5

Median : 8.0

Mean : 8.0

3rd Qu.:11.5

Max. :15.0

> summary(X)

age name

23 :2 Bill :1

12 :1 Bobby :1

14 :1 Bucky :1

15 :1 Carl :1

17 :1 Debby :1

19 :1 Emily :1

(Other):8 (Other):9

>

> # List the variables

> names(Youth)

[1] "id" "age" "name"

>

> # Show first lines of data

> head(Youth)

id age name

1 1 14 Bill

2 2 23 Carl

3 3 21 Matt

4 4 51 Bobby

5 5 12 Henry

6 6 32 Tom

> Youth[1:5,]

id age name

1 1 14 Bill

2 2 23 Carl

3 3 21 Matt

4 4 51 Bobby

5 5 12 Henry

>

> ls()

[1] "age" "id" "name" "X" "Y" "Youth"

> str(Youth)

'data.frame': 15 obs. of 3 variables:

$ id : int 1 2 3 4 5 6 7 8 9 10 ...

$ age : num 14 23 21 51 12 32 15 23 17 25 ...

$ name: Factor w/ 15 levels "Bill","Bobby",..: 1 4 11 2 7 14 3 6 8 9 ...

> hist(Youth$id, main ="Hist of age", xlab = "age")

>

> x <- na.omit(airquality)

x <- na.omit(births2006.smpl$DBWT)

str(Youth)

summary(name)

ls()

rm(list=ls())

----------------------

# Set working directory to where csv file is located

setwd("C:/BDA/Data")

# Read the data

mydata<- read.csv("C:/Data/intro_auto.csv")

attach(mydata)

# List the variables

names(mydata)

# Show first lines of data

head(mydata)

mydata[1:10,]

# Descriptive statistics

summary(mpg)

sd(mpg)

length(mpg)

summary(price)

sd(price)

# Sort the data

sort(make)

# Frequency tables

table(make)

table (make, foreign)

# Correlation among variables

cor(price, mpg)

# T-test for mean of one group

t.test(mpg, mu=20)

# ANOVA for equality of means for two groups

anova(lm(mpg ~ factor(foreign)))

# OLS regression - mpg (dependent variable) and weight, length and foreign (independent variables)

olsreg <- lm(mpg ~ weight + length + foreign)

summary(olsreg)

# summary(lm(mpg ~ weight + length + foreign))

# Plotting data

plot (mpg ~ weight)

olsreg1 <- lm(mpg ~ weight)

abline(olsreg1)

# Redefining variables

Y <- cbind(mpg)

X <- cbind(weight, length, foreign)

summary(Y)

summary(X)

olsreg <- lm(Y ~ X)

summary(olsreg)

# Install and use packages

# install.packages("plm")

# library(plm)