R studio Assignment
Dr. Olufemi Ajimoko
ITS 836 - Data Science and Big Data Analytics
Installing R
-------------
https://cran.r-project.org
https://www.rstudio.com
h <- "Hello"
yourname <- readline("What is your name?")
print(paste(h, yourname))
rm(list = ls())
To run the programm - Ctrl A & Ctrl enter
File - new file - rscript OR Ctrl Shift N
To clear screen - Ctrl L
-----------------------------
> id <- 1:15
> age <- c(14,23,21,51,12,32,15,23,17,25,41,54,36,24,19)
Hithisisanassignment HithisisalsoanassignmentbutIhabvenoideawhy
> name <- c("Bill", "Carl", "Matt", "Bobby", "Henry",
+ "Tom", "Bucky", "Emily","Jim", "Joe", "Kate",
+ "Vick", "Debby", "Mike", "Mill")
> Youth <- data.frame(id,age,name)
>
> Youth[1:5,]
id age name
1 1 14 Bill
2 2 23 Carl
3 3 21 Matt
4 4 51 Bobby
5 5 12 Henry
>
> mean(Youth$id)
[1] 8
> with(Youth, mean(id))
[1] 8
>
> # Redefining variables
> Y <- cbind(id)
> X <- cbind(age, name)
>
> summary(Y)
id
Min. : 1.0
1st Qu.: 4.5
Median : 8.0
Mean : 8.0
3rd Qu.:11.5
Max. :15.0
> summary(X)
age name
23 :2 Bill :1
12 :1 Bobby :1
14 :1 Bucky :1
15 :1 Carl :1
17 :1 Debby :1
19 :1 Emily :1
(Other):8 (Other):9
>
> # List the variables
> names(Youth)
[1] "id" "age" "name"
>
> # Show first lines of data
> head(Youth)
id age name
1 1 14 Bill
2 2 23 Carl
3 3 21 Matt
4 4 51 Bobby
5 5 12 Henry
6 6 32 Tom
> Youth[1:5,]
id age name
1 1 14 Bill
2 2 23 Carl
3 3 21 Matt
4 4 51 Bobby
5 5 12 Henry
>
> ls()
[1] "age" "id" "name" "X" "Y" "Youth"
> str(Youth)
'data.frame': 15 obs. of 3 variables:
$ id : int 1 2 3 4 5 6 7 8 9 10 ...
$ age : num 14 23 21 51 12 32 15 23 17 25 ...
$ name: Factor w/ 15 levels "Bill","Bobby",..: 1 4 11 2 7 14 3 6 8 9 ...
> hist(Youth$id, main ="Hist of age", xlab = "age")
>
> x <- na.omit(airquality)
x <- na.omit(births2006.smpl$DBWT)
str(Youth)
summary(name)
ls()
rm(list=ls())
----------------------
# Set working directory to where csv file is located
setwd("C:/BDA/Data")
# Read the data
mydata<- read.csv("C:/Data/intro_auto.csv")
attach(mydata)
# List the variables
names(mydata)
# Show first lines of data
head(mydata)
mydata[1:10,]
# Descriptive statistics
summary(mpg)
sd(mpg)
length(mpg)
summary(price)
sd(price)
# Sort the data
sort(make)
# Frequency tables
table(make)
table (make, foreign)
# Correlation among variables
cor(price, mpg)
# T-test for mean of one group
t.test(mpg, mu=20)
# ANOVA for equality of means for two groups
anova(lm(mpg ~ factor(foreign)))
# OLS regression - mpg (dependent variable) and weight, length and foreign (independent variables)
olsreg <- lm(mpg ~ weight + length + foreign)
summary(olsreg)
# summary(lm(mpg ~ weight + length + foreign))
# Plotting data
plot (mpg ~ weight)
olsreg1 <- lm(mpg ~ weight)
abline(olsreg1)
# Redefining variables
Y <- cbind(mpg)
X <- cbind(weight, length, foreign)
summary(Y)
summary(X)
olsreg <- lm(Y ~ X)
summary(olsreg)
# Install and use packages
# install.packages("plm")
# library(plm)