#Step 1: Find path of data file and copy path (if using windows)
#Step 2: In the console below, type readClipboard()
#Step 3: Copy and paste path to the line below in quotes
#CHANGE BETWEEN QUOTES IN THIS LINE TO REFLECT DIRECTORY OF DATA:
myDataLocation = "/Users/jonathan/Dropbox/!EPSY 905/Lectures/13 PCA, EFA, and CFA/mv16epsy905_lecture13"
#SET WORKING DIRECTORY TO LOCATION OF DATA FILE
setwd(myDataLocation)
#IMPORT DATA AND PUT INTO DATASET
data01 = read.csv(file="mv16epsy905_lecture13a.csv",header=TRUE)
data02 = read.csv(file="mv16epsy905_lecture13b.csv",header=TRUE)
#AUTOMATING PACKAGES NEEDED FOR ANALYSES--------------------------------------------------------------------
haspackage = require("lavaan")
## Loading required package: lavaan
## This is lavaan 0.5-20
## lavaan is BETA software! Please report any bugs.
if (haspackage==FALSE){
install.packages("lavaan")
}
library(lavaan)
#Advanced Matrix Operations --------------------------------------------------------------------------------
#correlation matrix of SAT data
sat_corrmat = cor(data01)
#eigenvalues and eigenvectors of correlation matrix:
sat_eigen = eigen(x = sat_corrmat, symmetric = TRUE)
sat_eigen$values
## [1] 1.7752238 0.2247762
sat_eigen$vectors
## [,1] [,2]
## [1,] 0.7071068 -0.7071068
## [2,] 0.7071068 0.7071068
#demonstration of spectral decomposition
corr_rank1 = sat_eigen$values[1] * sat_eigen$vectors[,1] %*% t(sat_eigen$vectors[,1])
corr_rank1
## [,1] [,2]
## [1,] 0.8876119 0.8876119
## [2,] 0.8876119 0.8876119
corr_rank2 = corr_rank1 + sat_eigen$values[2] * sat_eigen$vectors[,2] %*% t(sat_eigen$vectors[,2])
corr_rank2
## [,1] [,2]
## [1,] 1.0000000 0.7752238
## [2,] 0.7752238 1.0000000
#Principal Components Analysis of SAT data ---------------------------------------------------------------
#PCA of Correlation matrix (scale. = TRUE)
sat_pca_corr = prcomp(x = data01, scale. = TRUE)
#show the results (compare to eigenvalues/eigenvectors)
sat_pca_corr
## Standard deviations:
## [1] 1.3323753 0.4741057
##
## Rotation:
## PC1 PC2
## SATV -0.7071068 0.7071068
## SATM -0.7071068 -0.7071068
#show the summary statistics
summary(sat_pca_corr)
## Importance of components:
## PC1 PC2
## Standard deviation 1.3324 0.4741
## Proportion of Variance 0.8876 0.1124
## Cumulative Proportion 0.8876 1.0000
#create same analysis but with covariance matrix (for visual) scale.=FALSE (covariance matrix)
sat_pca_cov = prcomp(x = data01, scale. = FALSE)
#create augmented data matrix for plot
data01a = data01
data01a$type = "Raw"
data01b = data.frame(SATV = sat_pca_cov$x[,1], SATM = sat_pca_cov$x[,2], type="PC")
data01c = rbind(data01a, data01b)
plot(x = data01c$SATV, y = data01c$SATM, ylab = "SATM/PC2", xlab = "SATV/PC1", cex.main=1.5, frame.plot=FALSE, col=ifelse(data01c$type=="Raw", "red", "blue"))
legend(0, 400, pch=1, col=c("red", "blue"), c("Data", "PCs"), bty="o", box.col="darkgreen", cex=1.5)

#Principal Components analysis of Gambling Data------------------------------------------------------------
#listwise removal of missing data (common in PCA -- but still a problem)
data02a = data02[which(is.na(data02$X1)==FALSE & is.na(data02$X3)==FALSE & is.na(data02$X5)==FALSE & is.na(data02$X9)==FALSE & is.na(data02$X10)==FALSE &
is.na(data02$X13)==FALSE & is.na(data02$X14)==FALSE & is.na(data02$X18)==FALSE & is.na(data02$X21)==FALSE & is.na(data02$X23)==FALSE),]
#analysis of covariance matrix of gambling data items
gambling_pca_cov = prcomp(x = data02a, scale. = FALSE)
gambling_pca_cov
## Standard deviations:
## [1] 2.0484706 1.3228618 1.0883066 0.8360793 0.7509565 0.7336463 0.6861620
## [8] 0.6483578 0.5823026 0.4643768
##
## Rotation:
## PC1 PC2 PC3 PC4 PC5
## X1 -0.3126472 0.10459331 0.1662869 -0.843735151 0.364943452
## X3 -0.2262923 0.07828499 0.1775587 -0.133596441 -0.524752411
## X5 -0.3199396 0.09403404 0.2294114 0.025555994 -0.492411902
## X9 -0.2433589 0.08687880 0.1596160 0.030715826 -0.117324662
## X10 -0.2798829 0.08714575 0.2352173 0.084941435 -0.194587548
## X13 -0.3293561 0.15569572 0.1863762 0.220349288 0.326257817
## X14 -0.4493615 -0.86166945 -0.2311989 0.005504759 -0.004148522
## X18 -0.3471876 0.40458523 -0.8343757 -0.028338155 -0.100291367
## X21 -0.2720582 0.13735301 0.0617232 0.198445605 0.179410916
## X23 -0.3258344 0.09836941 0.1385856 0.415552100 0.385544166
## PC6 PC7 PC8 PC9 PC10
## X1 -0.08957759 0.090472261 0.002045810 0.028416754 0.039583080
## X3 0.68009146 0.343237952 -0.191565009 -0.015252456 0.025623885
## X5 -0.62592643 -0.090211626 -0.395234597 -0.043942452 0.187031376
## X9 -0.06172329 -0.152582899 0.152138070 0.044974999 -0.916942010
## X10 -0.02579154 -0.093484479 0.842208707 -0.027303911 0.306918670
## X13 0.29451341 -0.504704067 -0.225373117 -0.524435934 0.101286325
## X14 0.02615629 -0.037268705 -0.001356776 -0.004497823 0.002084634
## X18 -0.01866511 0.009093169 0.052785485 -0.074203041 0.001980035
## X21 0.15471581 -0.249558686 -0.142870079 0.841805796 0.128352736
## X23 -0.14657740 0.717890018 -0.025173918 -0.071163812 -0.032749511
summary(gambling_pca_cov)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 2.0485 1.3229 1.0883 0.83608 0.75096 0.73365
## Proportion of Variance 0.4043 0.1686 0.1141 0.06736 0.05434 0.05186
## Cumulative Proportion 0.4043 0.5730 0.6871 0.75447 0.80881 0.86067
## PC7 PC8 PC9 PC10
## Standard deviation 0.68616 0.64836 0.58230 0.46438
## Proportion of Variance 0.04537 0.04051 0.03267 0.02078
## Cumulative Proportion 0.90604 0.94655 0.97922 1.00000
prop_var = t(summary(gambling_pca_cov)$importance[2:3,])
#creating a scree plot and a proportion of variance plot
par(mfrow = c(1,2))
plot(gambling_pca_cov, type="l", main = "Scree Plot of PCA Eigenvalues", lwd = 5)
matplot(prop_var, type="l", main = "Proportion of Variance Explained by Component", lwd = 5)
legend(x=5, y=.5, legend = c("Component Variance", "Cumulative Variance"), lty = 1:2, lwd=5, col=1:2)
