Project proposal and Final research project
Importing Data From Yahoo Finance
Import CSV File from Yahoo Finance
Browser: http://finance.yahoo.com
Search: AMZN (always verify the ticker symbol)
Click: Historical Data (link)
Time Period (modify):
Start Date: 2010-12-31
End Date: 2013-12-31
Apply (click)
Download (click)
Importing Data From Yahoo Finance
Create “date” variable (date on csv file is factor, we need as.date)
getwd() # verify your working directory
setwd() # set working directory to file location
data.AMZN <- read.csv("AMZN.csv",header=TRUE)
head(data.AMZN) #check data
class(data.AMZN$Date) # check Date variable
date <- as.Date(data.AMZN$Date,format="%Y-%m-%d") #create variable date
head(date) # verify format
Tip: don’t open the CSV file on Excel and save it, it will change the date format to %m/%d/%y
Importing Data From Yahoo Finance
Replace the date variable in data.AMZN with the date
data.AMZN <- cbind(date, data.AMZN[,-1]) # -1 deletes the original Date variable
Sort the Data in chronological order (if data is not in order)
data.AMZN <- data.AMZN[order(data.AMZN$date),]
Importing Data From Yahoo Finance
Check class of data.AMZN
class(data.AMZN) # should be data.frame
# we will be using xts (eXtensible Time Series)
# use XTS package
library(xts)
data.AMZN <- xts(data.AMZN[,2:7],order.by=data.AMZN[,1])
# xts takes two arguments; data and index columns
# data columns are 2 to 7
# index column is 1
class(data.AMZN) # should be “xts” “zoo”
Importing Data From Yahoo Finance
Rename variables (we can’t use the generic names if we work with multiple stocks)
names(data.AMZN) # using “names” we can manipulate the variable names
# "Open” "High” "Low” "Close” "Adj.Close" "Volume"
names(data.AMZN) <- paste(c("AMZN.Open","AMZN.High","AMZN.Low","AMZN.Close","AMZN.Adjusted","AMZN.Volume"))
# paste tells R that the variables names are changing
head(data.AMZN)
Checking the Data
Plotting the Data (verify that we have complete data)
plot(data.AMZN$AMZN.Close)
Checking the Data
Plotting the Data (simlate missing data)
data.missing <- (data.AMZN[-400:-500,]) #delete observations 400 to 500
plot(data.missing$AMZN.Close)
Checking the Dimensions
Verifying the number of columns and rows
dim(data.AMZN)
[1] 754 6
Output summary statistics
summary(data.AMZN)
Basic Manipulation Techniques
Keeping only the first row
AMZN.onlyFirst <- data.AMZN[1,] #first row of data
Deleting the first row
AMZN.delFirst <- data.AMZN[-1,] #delete first row of data from set
Look at first and last row
data.AMZN[c(1,nrow(data.AMZN)),]
Looking at the first three rows and the last row
data.AMZN[c(1:3,nrow(data.AMZN)),]
Keeping one column
AMZN.onlyPrice <- data.AMZN[,4] or AMZN.onlyPrice2 <- data.AMZN$AMZN.Close
AMZN.onlyPrice[c(1:3,nrow(AMZN.onlyPrice)),]
Subsetting using dates
For xts objects identify the dates using “index”
Interested in closing values – values for 2012
xts.2012 <- subset(data.AMZN[,4],
+ index(data.AMZN) >= "2012-01-01" &
+ index(data.AMZN) <= "2012-12-31")
xts.2012[c(1:3,nrow(xts.2012))]
For data.frame objects
AMZN.2012 <- cbind(index(data.AMZN),
+ data.frame(data.AMZN[,4])
AMZN.2012[c(1:3,nrow(AMZN.2012)),]
Converting Daily prices to weekly/monthly
to.weekly command – covert into weekly data
wk <- data.AMZN
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]
to.monthly command – covert into monthly data
mo <- data.AMZN
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]
Candlestick Chart using Monthly Data
Before plotting, first create a open-high-low-close (OHLC) object.
Use quantmod package
library(quantmod)
OHLC <- data.monthly[-1,-6]
AMZN.ohlc <- as.quantmod.OHLC(OHLC,col.names=c("Open","High","Low","Close","Volume"))
AMZN.ohlc[c(1:3,nrow(AMZN.ohlc)),]
Candlestick Chart using Monthly Data
chartSeries(AMZN.ohlc,theme="white.mono",name="AMZN OHLC")
Comparing Capital Gains of Multiple Securities Over Time
Import data for each of the four securities - AMZN
data.AMZN <- read.csv("AMZN.csv",header=TRUE)
date <- as.Date(data.AMZN$Date,format="%Y-%m-%d")
data.AMZN <- cbind(date, data.AMZN[,-1])
data.AMZN <- data.AMZN[order(data.AMZN$date),]
data.AMZN <- xts(data.AMZN[,2:7],order.by=data.AMZN[,1])
names(data.AMZN) <- paste(c("AMZN.Open","AMZN.High","AMZN.Low","AMZN.Close","AMZN.Adjusted","AMZN.Volume"))
data.AMZN[c(1:3,nrow(data.AMZN)),]
Comparing Capital Gains of Multiple Securities Over Time
Import data for each of the four securities - IBM
data.IBM <- read.csv("IBM.csv",header=TRUE)
date <- as.Date(data.IBM$Date,format="%Y-%m-%d")
data.IBM <- cbind(date, data.IBM[,-1])
data.IBM <- data.IBM[order(data.IBM$date),]
data.IBM <- xts(data.IBM[,2:7],order.by=data.IBM[,1])
names(data.IBM) <- paste(c("IBM.Open","IBM.High","IBM.Low","IBM.Close","IBM.Adjusted","IBM.Volume"))
data.IBM[c(1:3,nrow(data.IBM)),]
Comparing Capital Gains of Multiple Securities Over Time
Import data for each of the four securities - AAPL
data.AAPL <- read.csv("AAPL.csv",header=TRUE)
date <- as.Date(data.AAPL$Date,format="%Y-%m-%d")
data.AAPL <- cbind(date, data.AAPL[,-1])
data.AAPL <- data.AAPL[order(data.AAPL$date),]
data.AAPL <- xts(data.AAPL[,2:7],order.by=data.AAPL[,1])
names(data.AAPL) <- paste(c("AAPL.Open","AAPL.High","AAPL.Low","AAPL.Close","AAPL.Adjusted","AAPL.Volume"))
data.AAPL[c(1:3,nrow(data.AAPL)),]
Comparing Capital Gains of Multiple Securities Over Time
Import data for each of the four securities – S&P 500 (GSPC)
data.GSPC <- read.csv("GSPC.csv",header=TRUE)
date <- as.Date(data.GSPC$Date,format="%Y-%m-%d")
data.GSPC <- cbind(date, data.GSPC[,-1])
data.GSPC <- data.GSPC[order(data.GSPC$date),]
data.GSPC <- xts(data.GSPC[,2:7],order.by=data.GSPC[,1])
names(data.GSPC) <- paste(c("GSPC.Open","GSPC.High","GSPC.Low","GSPC.Close","GSPC.Adjusted","GSPC.Volume"))
data.GSPC[c(1:3,nrow(data.GSPC)),]
Comparing Capital Gains of Multiple Securities Over Time
Combine Data Into one Data Object
Close.Prices <- data.AMZN$AMZN.Close
Close.Prices <-
cbind(Close.Prices,data.GSPC$GSPC.Close,data.AAPL$AAPL.Close,data.IBM$IBM.Close)
Close.Prices[c(1:3,nrow(Close.Prices)),]
Comparing Capital Gains of Multiple Securities Over Time
Convert Data into a data.frame
multi.df <- cbind(index(Close.Prices),data.frame(Close.Prices))
names(multi.df) <- paste(c("date","AMZN","GSPC","AAPL","IBM"))
rownames(multi.df) <- seq(1,nrow(multi.df),1)
multi.df[c(1:3,nrow(multi.df)),]
Comparing Capital Gains of Multiple Securities Over Time
Calculate Normalized Values for Each Security
Create an index for each security with values that equal the price of the security on each day divided by the security’s price on December 31, 2010.
multi.df$AMZN.idx <- multi.df$AMZN/multi.df$AMZN[1]
multi.df$GSPC.idx <- multi.df$GSPC/multi.df$GSPC[1]
multi.df$AAPL.idx <- multi.df$AAPL/multi.df$AAPL[1]
multi.df$IBM.idx <- multi.df$IBM/multi.df$IBM[1]
options(digits=5) # this option remains in place until end of session
multi.df[c(1:3,nrow(multi.df)),]
Comparing Capital Gains of Multiple Securities Over Time
Plot the Capital Appreciation of Each Security
For the x variable use date, for y use S&P 500 Index
plot(x=multi.df$date,
y=multi.df$GSPC.idx,
type="l",
xlab="Date",
ylab="Value of Investment ($)",
col="black",
lty=1,
lwd=2,
main="Value of $1 Investment in
AMZN, IBM, AAPL, and the S&P 500 Index
December 31, 2010 - December 31, 2013")
Comparing Capital Gains of Multiple Securities Over Time
Adding the lines for the other three securities
lines(x=multi.df$date,
y=multi.df$AMZN.idx,
col="black",
lty=2,
lwd=1)
Comparing Capital Gains of Multiple Securities Over Time
Adding the lines for the other three securities
lines(x=multi.df$date,
y=multi.df$IBM.idx,
col="black",
lty=2,
lwd=1)
Comparing Capital Gains of Multiple Securities Over Time
Adding the lines for the other three securities
lines(x=multi.df$date,
y=multi.df$AAPL.idx,
col="black",
lty=2,
lwd=1)
Comparing Capital Gains of Multiple Securities Over Time
Adding a line to denote the starting investment value of $1
abline(h=1,lty=1,col="black")
Comparing Capital Gains of Multiple Securities Over Time
Adding a legend
legend("topleft",
c("AMZN","IBM","AAPL","S&P 500 Index"),
col=c("black","gray","gray","black"),
lty=c(2,2,1,1),
lwd=c(1,1,1,2))
Comparing Capital Gains of Multiple Securities Over Time
The plot cuts off the values of some securities.
y.range <- range(multi.df[,6:9]) # find minimum/maximum of all 4 securities
y.range
[1] 0.87404 2.24661
Use these values in the inside the plot command called “ylim”
plot(x=multi.df$date,
y=multi.df$GSPC.idx,
type="l",
xlab="Date",
ylim=y.range,
ylab="Value of Investment ($)",
col="black",
lty=1,
lwd=2,
main="Value of $1 Investment in
AMZN, IBM, AAPL, and the S&P 500 Index
December 31, 2010 - December 31, 2013")
Technical Analysis Example
Trend: Simple Moving Average Crossover
Obtain Closing Prices for AMZN Stock
AMZN.sma <- data.AMZN[,4]
AMZN.sma[c(1:3,nrow(AMZN.sma)),]
Technical Analysis Example
Trend: Simple Moving Average Crossover
Calculate the Rolling 50-Day and 200-Day Average Price
AMZN.sma$sma50 <- rollmeanr(AMZN.sma$AMZN.Close,k=50)
AMZN.sma$sma200 <- rollmeanr(AMZN.sma$AMZN.Close,k=200)
AMZN.sma[c(1:3,nrow(AMZN.sma)),]
NA - The first 50 observations are required to calculate the first 50-day moving average (similar with the sma200)
Technical Analysis Example
Subset to 2012 and 2013 data
AMZN.sma2012 <- subset(AMZN.sma,
index(AMZN.sma)>"2012-01-01")
Technical Analysis Example
Plot the SMA
y.range <- range(AMZN.sma2012,na.rm=TRUE)
y.range
par(mfrow=c(1,1))
plot(x=index(AMZN.sma2012),
xlab="Date",
y=AMZN.sma2012$AMZN.Close,
ylim=y.range,
ylab="Price ($)",
type="l",
main="Amazon - Simple Moving Average
January 1, 2012 - December 31, 2013")
lines(x=index(AMZN.sma2012),y=AMZN.sma2012$sma50)
lines(x=index(AMZN.sma2012),y=AMZN.sma2012$sma200,lty=2)
legend("topleft",
c("Amazon Price","50-Day Moving Average", "200-Day Moving Average"),
lty=c(1,1,2))
Volatility: Bollinger Bands
Bollinger Bands (a measure of volatility)
20-day simple moving average (SMA)
upper band: two std.dev above the 20-day SMA
lower band: two std.dev below the 20-day SMA
Obtain Closing Price for AMZN
AMZN.bb <- data.AMZN[,4]
AMZN.bb[c(1:3,nrow(AMZN.bb)),]
Calculate Rolling 20-Day Mean and Std.Dev
AMZN.bb$avg <- rollmeanr(AMZN.bb$AMZN.Close,k=20)
AMZN.bb$sd <- rollapply(AMZN.bb$AMZN.Close,width = 20,FUN = sd,fill = NA)
AMZN.bb[c(1:3,nrow(AMZN.bb)),]
Volatility: Bollinger Bands
Subset to 2013 Data only
AMZN.bb2013 <- subset(AMZN.bb,
index(AMZN.bb)>="2013-01-01")
AMZN.bb2013[c(1:3,nrow(AMZN.bb2013)),]
Calculate the Bollinger Bands
AMZN.bb2013$sd2up <- AMZN.bb2013$avg+2*AMZN.bb2013$sd
AMZN.bb2013$sd2down <- AMZN.bb2013$avg-2*AMZN.bb2013$sd
AMZN.bb2013[c(1:3,nrow(AMZN.bb2013)),]
Volatility: Bollinger Bands
Plot the Bollinger Bands
y.range <- range(AMZN.bb2013[,-3],na.rm=TRUE)
plot(x=index(AMZN.bb2013),
xlab="Date",
y=AMZN.bb2013$AMZN.Close,
ylim = y.range,
ylab = "Price ($)",
type = "l",
lwd=3,
main="AMZN - Bollinger Bands (20 days, 2 deviations)
January 1, 2013 - December 31,2013")
lines(x=index(AMZN.bb2013),y=AMZN.bb2013$avg,lty=2)
lines(x=index(AMZN.bb2013),y=AMZN.bb2013$sd2up,col="gray40")
lines(x=index(AMZN.bb2013),y=AMZN.bb2013$sd2down,col="gray40")
legend("topleft",
c("AMZN Price","20-Day Moving Average","Upper Band","Lower Band"),
lty=c(1,2,1,1),
lwd=c(3,1,1,1),
col=c("black","black","gray40","gray40"))