########-----This code written by Sina Motamedi with great help from Hiroyoshi Okawa-----########

########-----University of Toronto, April 2009-----########

########-----This code has been made available under the GNU Free Documentation License Version 1.3, November 2008-----########

########-----A copy of the license can be found at http://www.gnu.org/licenses/fdl-1.3.txt-----########



##---The purpose of this program is to implement a pairs trading strategy on two securities.---##



#Anything that begins with '#' is considered a comment by R and is not executed. R is also case sensitive, so be careful.


#This reads the data as matrices into R. Everything in R is a matrix. To change which stocks are used in the analysis, adjust the files below. Right now the two stocks are Kilroy Realty Corp (KRC) and Cap Lease Inc (LSE). X and Y are data for 2 years, while X2 and Y2 are data for 2.5 years. Make sure these .txt files are sorted with time ascending. You can alternatively import .csv files using the "read.csv" function.

    X<-read.table("C://R-code/KRC.txt")

    Y<-read.table("C://R-code/LSE.txt")

    X2<-read.table("C://R-code/2.5 Years/KRC2.txt")

    Y2<-read.table("C://R-code/2.5 Years/LSE2.txt")



#This names the columns of the matrices, so that we can reference them later. We will be interested in "Close", the closing daily stock price.

    colnames(X)<-c("Date","Open","High","Low", "Close", "Volume")

    colnames(Y)<-c("Date","Open","High","Low", "Close", "Volume")

    colnames(X2)<-c("Date","Open","High","Low", "Close", "Volume")

    colnames(Y2)<-c("Date","Open","High","Low", "Close", "Volume")



#This plots the series, giving them certiain colour. You may have to adjust the axes limits (i.e. 'ylim' and/or 'xlim').

    plot(Y$Close,type="l", col="blue", xlab="Time", ylab="Closing Price", ylim=c(9,87))

    lines(X$Close,type="l",col="red")



#This regresses the two series, and then provides summary of the estimates.

    model<-lm(X$Close ~Y$Close)

    summary(model)


#This defines the coefficients from the above regression as 'a' and 'b'.

    a<-array(coef(model))[1]

    b<-array(coef(model))[2]


#This defines the spread based on the regression coefficients.

    Z<-X$Close-b*Y$Close-a


#This tests whether the spread Z is stationary using the Phillips-Perron unit root test.

    PP.test(Z)


#This plots Z.

    plot(Z, type="l", col="blue")


#This creates the ACF and PACF plots of Z.

    acf(Z)

    pacf(Z)


#This estimates Z as an AR(p) process, with p to be estimated, which we call 'AR'. R will choose the p that minimizes the AIC.

    AR<-ar(Z)

    AR


#This produces the histogram of the errors of AR from above.

    hist(AR$resid)


#This will produce the QQ-plot for the errors of AR from above.

    qqnorm(AR$resid)

    qqline(AR$resid)


#Now we run Ljung-Box test on the errors of AR.

    Box.test(AR$resid, type='Ljung', lag=AR$order)


#This simulates the AR process over 1,000,000 periods, and calls it 'ARsim'.

    ARsim<-arima.sim(list(ar=AR$ar),n=1000000,innov=rnorm(n=1000000,mean=0,sd(Z)))


#This checks how many times the simulated process 'ARsim' crosses 0, and calls it 'crossings'. It then displays 'crossings' and the average period to cross.

    crossings<-0

    for(i in 1:999999)

        {

        if (ARsim[i]<0) test<- -1 else test<-1

        if (ARsim[i+1]<0) test1<- -1 else test1<-1

        if (test*test1<0) crossings<-crossings+1

        }


    crossings

    1000000/crossings



#This creates the spread Z2 over the entire history plus the implimentation period.

    Z2<-X2$Close-a-b*Y2$Close


#This plots Z2.

    plot(Z2, type="l", col="blue")


#This code will calculate the profit of the two year period using our strategy for various thresholds.

    cross0<-matrix(nrow=502,ncol=1)

    cross0[1]<-0

   

    for(i in 1:501)

        {

        if (Z[i]<0) test<- -1 else test<-1

        if (Z[i+1]<0) test1<- -1 else test1<-1

        if (test*test1<0) cross0[i+1]<-1 else cross0[i+1]<-0

        }

   

    DELTA<-matrix(nrow=1,ncol=140)

    DELTA[1]<-.1

   

    for(k in 1:140)

        {

        DELTA[k+1]<-DELTA[k]+.1

        }

   

    crossld<-matrix(nrow=502,ncol=140)

    crossud<-matrix(nrow=502,ncol=140)

   

    for(j in 1:140)

        {

        crossud[1,j]<-0

        crossld[1,j]<-0

        }

   

    for(k in 1:140)

        {

        for (i in 1:501)

            {

            if ((Z[i]-DELTA[k])<0) test<- -1 else test<-1

            if ((Z[i+1]-DELTA[k])<0) test1<- -1 else test1<-1

            if (test*test1<0) crossud[i+1,k]<-1 else crossud[i+1,k]<-0

            }

        }

   

    for(k in 1:140)

        {

        for (i in 1:501)

            {

            if ((Z[i]+DELTA[k])<0) test<- -1 else test<-1

            if ((Z[i+1]+DELTA[k])<0) test1<- -1 else test1<-1

            if (test*test1<0) crossld[i+1,k]<-1 else crossld[i+1,k]<-0

            }

        }

   

    crossd<-crossud+crossld

    trade<-matrix(nrow=502,ncol=140)

    for(k in 1:140)

        {

        for(i in 1:502)

            {

            trade[i,k]<-0

            }

        }

    for(k in 1:140)

        {

        for(i in 1:502)

            {

            if(crossd[i,k]==1)

                {

                for(j in i:502)

                    {

                    if(cross0[j]==1)

                        {

                        trade[j,k]<-1

                        break

                        }

                    }

                }

            }

        }


    profit<-matrix(nrow=140,ncol=2)

    colnames(profit)<-c("DELTA","Profit")

    for(k in 1:140) profit[k,1]<-DELTA[k]

    for(k in 1:140) profit[k,2]<-sum(trade[,k])*DELTA[k]


#This plots the various possible profits.

    plot(profit)


#This tells you what and where the profit is maximized.

    max(profit[,2])

    profit[which.max(profit[,2]),1]