#Step 1: Find path of data file and copy path (if using windows)
#Step 2: In the console below, type readClipboard()
#Step 3: Copy and paste path to the line below in quotes

#CHANGE BETWEEN QUOTES IN THIS LINE TO REFLECT DIRECTORY OF DATA:
myDataLocation = "/Users/jonathan/Dropbox/!EPSY 905/Lectures/13 PCA, EFA, and CFA/mv16epsy905_lecture13"

#SET WORKING DIRECTORY TO LOCATION OF DATA FILE
setwd(myDataLocation)

#IMPORT DATA AND PUT INTO DATASET
data01 = read.csv(file="mv16epsy905_lecture13a.csv",header=TRUE)
data02 = read.csv(file="mv16epsy905_lecture13b.csv",header=TRUE)

#AUTOMATING PACKAGES NEEDED FOR ANALYSES--------------------------------------------------------------------
haspackage = require("lavaan")
## Loading required package: lavaan
## This is lavaan 0.5-20
## lavaan is BETA software! Please report any bugs.
if (haspackage==FALSE){
  install.packages("lavaan")
}
library(lavaan)

#Advanced Matrix Operations --------------------------------------------------------------------------------

#correlation matrix of SAT data
sat_corrmat = cor(data01)

#eigenvalues and eigenvectors of correlation matrix:
sat_eigen = eigen(x = sat_corrmat, symmetric = TRUE)

sat_eigen$values
## [1] 1.7752238 0.2247762
sat_eigen$vectors
##           [,1]       [,2]
## [1,] 0.7071068 -0.7071068
## [2,] 0.7071068  0.7071068
#demonstration of spectral decomposition
corr_rank1 = sat_eigen$values[1] * sat_eigen$vectors[,1] %*% t(sat_eigen$vectors[,1])
corr_rank1
##           [,1]      [,2]
## [1,] 0.8876119 0.8876119
## [2,] 0.8876119 0.8876119
corr_rank2 = corr_rank1 + sat_eigen$values[2] * sat_eigen$vectors[,2] %*% t(sat_eigen$vectors[,2])
corr_rank2
##           [,1]      [,2]
## [1,] 1.0000000 0.7752238
## [2,] 0.7752238 1.0000000
#Principal Components Analysis of SAT data ---------------------------------------------------------------
#PCA of Correlation matrix (scale. = TRUE)
sat_pca_corr = prcomp(x = data01, scale. = TRUE)

#show the results (compare to eigenvalues/eigenvectors)
sat_pca_corr
## Standard deviations:
## [1] 1.3323753 0.4741057
## 
## Rotation:
##             PC1        PC2
## SATV -0.7071068  0.7071068
## SATM -0.7071068 -0.7071068
#show the summary statistics
summary(sat_pca_corr)
## Importance of components:
##                           PC1    PC2
## Standard deviation     1.3324 0.4741
## Proportion of Variance 0.8876 0.1124
## Cumulative Proportion  0.8876 1.0000
#create same analysis but with covariance matrix (for visual) scale.=FALSE (covariance matrix) 
sat_pca_cov = prcomp(x = data01, scale. = FALSE)

#create augmented data matrix for plot
data01a = data01
data01a$type = "Raw"

data01b = data.frame(SATV = sat_pca_cov$x[,1], SATM = sat_pca_cov$x[,2], type="PC")
data01c = rbind(data01a, data01b)

plot(x = data01c$SATV, y = data01c$SATM, ylab = "SATM/PC2", xlab = "SATV/PC1", cex.main=1.5, frame.plot=FALSE, col=ifelse(data01c$type=="Raw", "red", "blue"))
legend(0, 400, pch=1, col=c("red", "blue"), c("Data", "PCs"), bty="o",  box.col="darkgreen", cex=1.5)

#Principal Components analysis of Gambling Data------------------------------------------------------------
#listwise removal of missing data (common in PCA -- but still a problem)
data02a = data02[which(is.na(data02$X1)==FALSE & is.na(data02$X3)==FALSE & is.na(data02$X5)==FALSE & is.na(data02$X9)==FALSE & is.na(data02$X10)==FALSE &
                       is.na(data02$X13)==FALSE & is.na(data02$X14)==FALSE & is.na(data02$X18)==FALSE & is.na(data02$X21)==FALSE & is.na(data02$X23)==FALSE),]

#analysis of covariance matrix of gambling data items
gambling_pca_cov = prcomp(x = data02a, scale. = FALSE) 
gambling_pca_cov
## Standard deviations:
##  [1] 2.0484706 1.3228618 1.0883066 0.8360793 0.7509565 0.7336463 0.6861620
##  [8] 0.6483578 0.5823026 0.4643768
## 
## Rotation:
##            PC1         PC2        PC3          PC4          PC5
## X1  -0.3126472  0.10459331  0.1662869 -0.843735151  0.364943452
## X3  -0.2262923  0.07828499  0.1775587 -0.133596441 -0.524752411
## X5  -0.3199396  0.09403404  0.2294114  0.025555994 -0.492411902
## X9  -0.2433589  0.08687880  0.1596160  0.030715826 -0.117324662
## X10 -0.2798829  0.08714575  0.2352173  0.084941435 -0.194587548
## X13 -0.3293561  0.15569572  0.1863762  0.220349288  0.326257817
## X14 -0.4493615 -0.86166945 -0.2311989  0.005504759 -0.004148522
## X18 -0.3471876  0.40458523 -0.8343757 -0.028338155 -0.100291367
## X21 -0.2720582  0.13735301  0.0617232  0.198445605  0.179410916
## X23 -0.3258344  0.09836941  0.1385856  0.415552100  0.385544166
##             PC6          PC7          PC8          PC9         PC10
## X1  -0.08957759  0.090472261  0.002045810  0.028416754  0.039583080
## X3   0.68009146  0.343237952 -0.191565009 -0.015252456  0.025623885
## X5  -0.62592643 -0.090211626 -0.395234597 -0.043942452  0.187031376
## X9  -0.06172329 -0.152582899  0.152138070  0.044974999 -0.916942010
## X10 -0.02579154 -0.093484479  0.842208707 -0.027303911  0.306918670
## X13  0.29451341 -0.504704067 -0.225373117 -0.524435934  0.101286325
## X14  0.02615629 -0.037268705 -0.001356776 -0.004497823  0.002084634
## X18 -0.01866511  0.009093169  0.052785485 -0.074203041  0.001980035
## X21  0.15471581 -0.249558686 -0.142870079  0.841805796  0.128352736
## X23 -0.14657740  0.717890018 -0.025173918 -0.071163812 -0.032749511
summary(gambling_pca_cov)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     2.0485 1.3229 1.0883 0.83608 0.75096 0.73365
## Proportion of Variance 0.4043 0.1686 0.1141 0.06736 0.05434 0.05186
## Cumulative Proportion  0.4043 0.5730 0.6871 0.75447 0.80881 0.86067
##                            PC7     PC8     PC9    PC10
## Standard deviation     0.68616 0.64836 0.58230 0.46438
## Proportion of Variance 0.04537 0.04051 0.03267 0.02078
## Cumulative Proportion  0.90604 0.94655 0.97922 1.00000
prop_var = t(summary(gambling_pca_cov)$importance[2:3,])
#creating a scree plot and a proportion of variance plot

par(mfrow = c(1,2))
plot(gambling_pca_cov, type="l", main = "Scree Plot of PCA Eigenvalues", lwd = 5)
matplot(prop_var, type="l", main = "Proportion of Variance Explained by Component", lwd = 5)
legend(x=5, y=.5, legend = c("Component Variance", "Cumulative Variance"), lty = 1:2, lwd=5, col=1:2)

#new variables from PCA:
View(gambling_pca_cov$x)

#EFA of Gambling Data with crazy constraints ------------------------------------------------------------------------------------

#step 1: determine number of factors in data

#one-factor model
EFA_1factor = factanal(x = data02a, factors = 1, rotation = "none")
EFA_1factor
## 
## Call:
## factanal(x = data02a, factors = 1, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.677 0.728 0.550 0.417 0.527 0.488 0.857 0.816 0.538 0.573 
## 
## Loadings:
##     Factor1
## X1  0.569  
## X3  0.521  
## X5  0.670  
## X9  0.764  
## X10 0.688  
## X13 0.715  
## X14 0.378  
## X18 0.429  
## X21 0.680  
## X23 0.653  
## 
##                Factor1
## SS loadings      3.828
## Proportion Var   0.383
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 161.14 on 35 degrees of freedom.
## The p-value is 4.23e-18
#two-factor model
EFA_2factor = factanal(x = data02a, factors = 2, rotation = "none")
EFA_2factor
## 
## Call:
## factanal(x = data02a, factors = 2, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.680 0.727 0.525 0.375 0.488 0.456 0.858 0.774 0.424 0.564 
## 
## Loadings:
##     Factor1 Factor2
## X1   0.563         
## X3   0.517         
## X5   0.669  -0.166 
## X9   0.770  -0.181 
## X10  0.690  -0.190 
## X13  0.720   0.160 
## X14  0.374         
## X18  0.432   0.200 
## X21  0.700   0.293 
## X23  0.652   0.104 
## 
##                Factor1 Factor2
## SS loadings      3.861   0.269
## Proportion Var   0.386   0.027
## Cumulative Var   0.386   0.413
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 56.43 on 26 degrees of freedom.
## The p-value is 0.000497
#constraint demonstration (lambda^T psi-1 lambda = diag)
Lambda = matrix(EFA_2factor$loadings, ncol=2)
Psi = diag(EFA_2factor$uniquenesses)

t(Lambda) %*% solve(Psi) %*% Lambda
##              [,1]         [,2]
## [1,] 7.698164e+00 -1.94289e-16
## [2,] 4.163336e-17  5.57827e-01
#three-factor model
EFA_3factor = factanal(x = data02a, factors = 3, rotation = "none")
EFA_3factor
## 
## Call:
## factanal(x = data02a, factors = 3, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.661 0.714 0.534 0.005 0.523 0.464 0.831 0.782 0.388 0.548 
## 
## Loadings:
##     Factor1 Factor2 Factor3
## X1   0.438   0.347   0.164 
## X3   0.402   0.315   0.159 
## X5   0.563   0.331   0.198 
## X9   0.997                 
## X10  0.586   0.322   0.171 
## X13  0.541   0.487         
## X14  0.275   0.257   0.164 
## X18  0.299   0.344  -0.101 
## X21  0.508   0.537  -0.256 
## X23  0.463   0.485         
## 
##                Factor1 Factor2 Factor3
## SS loadings      2.940   1.378   0.231
## Proportion Var   0.294   0.138   0.023
## Cumulative Var   0.294   0.432   0.455
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 31.24 on 18 degrees of freedom.
## The p-value is 0.027
#constraint demonstration (lambda^T psi-1 lambda = diag)
Lambda = matrix(EFA_3factor$loadings, ncol=3)
Psi = diag(EFA_3factor$uniquenesses)

t(Lambda) %*% solve(Psi) %*% Lambda
##               [,1]          [,2]          [,3]
## [1,]  2.026141e+02  2.220446e-16 -3.122502e-16
## [2,] -2.220446e-16  2.683701e+00 -1.873501e-16
## [3,] -2.636780e-16 -1.942890e-16  4.352294e-01
#four-factor model
EFA_4factor = factanal(x = data02a, factors = 4, rotation = "none")
EFA_4factor
## 
## Call:
## factanal(x = data02a, factors = 4, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.529 0.718 0.535 0.324 0.495 0.489 0.848 0.778 0.292 0.005 
## 
## Loadings:
##     Factor1 Factor2 Factor3 Factor4
## X1   0.356   0.457   0.127   0.346 
## X3   0.322   0.403                 
## X5   0.452   0.480   0.174         
## X9   0.468   0.630   0.196  -0.144 
## X10  0.458   0.502   0.186         
## X13  0.509   0.494                 
## X14  0.292   0.227                 
## X18  0.316   0.294  -0.141   0.128 
## X21  0.491   0.548  -0.408         
## X23  0.997                         
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      2.542   1.933   0.327   0.183
## Proportion Var   0.254   0.193   0.033   0.018
## Cumulative Var   0.254   0.447   0.480   0.499
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 10.2 on 11 degrees of freedom.
## The p-value is 0.512
#constraint demonstration (lambda^T psi-1 lambda = diag)
Lambda = matrix(EFA_4factor$loadings, ncol=4)
Psi = diag(EFA_4factor$uniquenesses)

t(Lambda) %*% solve(Psi) %*% Lambda
##               [,1]          [,2]          [,3]          [,4]
## [1,]  2.023692e+02  0.000000e+00 -2.081668e-16  1.318390e-16
## [2,]  0.000000e+00  4.558736e+00  7.524363e-17 -1.139496e-16
## [3,] -1.942890e-16  1.860491e-16  9.039206e-01 -2.110721e-16
## [4,]  6.245005e-17 -1.070108e-16 -2.041332e-16  3.539411e-01
#step 2: interpret the factors for final solution (4-factor model)

#varimax rotation
EFA_4factor_varimax = factanal(x = data02a, factors = 4, rotation = "varimax")
EFA_4factor_varimax
## 
## Call:
## factanal(x = data02a, factors = 4, rotation = "varimax")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.529 0.718 0.535 0.324 0.495 0.489 0.848 0.778 0.292 0.005 
## 
## Loadings:
##     Factor1 Factor2 Factor3 Factor4
## X1  0.301   0.209   0.114   0.569  
## X3  0.360   0.213   0.113   0.307  
## X5  0.532   0.224   0.202   0.301  
## X9  0.714   0.291   0.157   0.239  
## X10 0.597   0.231   0.202   0.234  
## X13 0.414   0.461   0.236   0.268  
## X14 0.224   0.129   0.162   0.242  
## X18 0.147   0.346   0.144   0.246  
## X21 0.300   0.746   0.184   0.166  
## X23 0.266   0.270   0.904   0.188  
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      1.772   1.255   1.086   0.873
## Proportion Var   0.177   0.125   0.109   0.087
## Cumulative Var   0.177   0.303   0.411   0.499
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 10.2 on 11 degrees of freedom.
## The p-value is 0.512
#promax rotation
EFA_4factor_varimax = factanal(x = data02a, factors = 4, rotation = "promax")
EFA_4factor_varimax
## 
## Call:
## factanal(x = data02a, factors = 4, rotation = "promax")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.529 0.718 0.535 0.324 0.495 0.489 0.848 0.778 0.292 0.005 
## 
## Loadings:
##     Factor1 Factor2 Factor3 Factor4
## X1                  -0.104   0.861 
## X3   0.249                   0.308 
## X5   0.530                   0.171 
## X9   0.885                         
## X10  0.699                         
## X13  0.254           0.368   0.101 
## X14                          0.274 
## X18 -0.126           0.314   0.269 
## X21                  0.906  -0.106 
## X23          1.037                 
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      1.712   1.102   1.072   1.040
## Proportion Var   0.171   0.110   0.107   0.104
## Cumulative Var   0.171   0.281   0.389   0.493
## 
## Factor Correlations:
##         Factor1 Factor2 Factor3 Factor4
## Factor1   1.000   0.635  -0.628  -0.631
## Factor2   0.635   1.000  -0.736  -0.852
## Factor3  -0.628  -0.736   1.000   0.768
## Factor4  -0.631  -0.852   0.768   1.000
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 10.2 on 11 degrees of freedom.
## The p-value is 0.512
#CFA version of EFA using lavaan ---------------------------------------------------------------------------------------

#NOTE: THIS VERSION USES DATA WHERE ANY INCOMPLETE OBSERVATIONS ARE REMOVED TO MATCH NUMBERS (EXAMPLE PURPOSES ONLY)
#IN PRACTICE: DO NOT USE LISTWISE DELETION AS ML WILL STILL WORK!

#step #1: determining number of factors

#one factor CFA
CFA_1factor.syntax = "
factor1 =~ X1 + X3 + X5 + X9 + X10 + X13 + X14 + X18 + X21 + X23
"

#for comparison with EFA we are using standardized factors (var = 1; mean = 0)
CFA_1factor.model = cfa(model = CFA_1factor.syntax, data = data02a, estimator = "MLR", std.lv = TRUE)
summary(CFA_1factor.model, fit.measures = TRUE, standardized = TRUE)
## lavaan (0.5-20) converged normally after  26 iterations
## 
##   Number of observations                          1333
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic              161.846     104.001
##   Degrees of freedom                                35          35
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.556
##     for the Yuan-Bentler correction
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             4148.081    2238.585
##   Degrees of freedom                                45          45
##   P-value                                        0.000       0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.969       0.969
##   Tucker-Lewis Index (TLI)                       0.960       0.960
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -16575.733  -16575.733
##   Scaling correction factor                                  2.354
##     for the MLR correction
##   Loglikelihood unrestricted model (H1)     -16494.810  -16494.810
##   Scaling correction factor                                  1.924
##     for the MLR correction
## 
##   Number of free parameters                         30          30
##   Akaike (AIC)                               33211.466   33211.466
##   Bayesian (BIC)                             33367.322   33367.322
##   Sample-size adjusted Bayesian (BIC)        33272.025   33272.025
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.052       0.038
##   90 Percent Confidence Interval          0.044  0.060       0.032  0.045
##   P-value RMSEA <= 0.05                          0.318       0.997
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.026       0.026
## 
## Parameter Estimates:
## 
##   Information                                 Observed
##   Standard Errors                   Robust.huber.white
## 
## Latent Variables:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 =~                                                            
##     X1                0.581    0.039   15.002    0.000    0.581    0.569
##     X3                0.451    0.038   11.896    0.000    0.451    0.521
##     X5                0.647    0.041   15.684    0.000    0.647    0.670
##     X9                0.542    0.034   16.182    0.000    0.542    0.764
##     X10               0.598    0.034   17.355    0.000    0.598    0.688
##     X13               0.684    0.037   18.334    0.000    0.684    0.715
##     X14               0.562    0.038   14.601    0.000    0.562    0.378
##     X18               0.547    0.038   14.438    0.000    0.547    0.429
##     X21               0.564    0.036   15.774    0.000    0.564    0.680
##     X23               0.635    0.033   19.346    0.000    0.635    0.653
## 
## Intercepts:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                1.818    0.028   65.015    0.000    1.818    1.781
##     X3                1.549    0.024   65.300    0.000    1.549    1.789
##     X5                1.588    0.026   60.049    0.000    1.588    1.645
##     X9                1.420    0.019   72.984    0.000    1.420    1.999
##     X10               1.560    0.024   65.466    0.000    1.560    1.793
##     X13               1.547    0.026   59.027    0.000    1.547    1.617
##     X14               2.340    0.041   57.474    0.000    2.340    1.574
##     X18               1.794    0.035   51.372    0.000    1.794    1.407
##     X21               1.431    0.023   63.009    0.000    1.431    1.726
##     X23               1.561    0.027   58.619    0.000    1.561    1.606
##     factor1           0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                0.706    0.063   11.130    0.000    0.706    0.677
##     X3                0.546    0.043   12.732    0.000    0.546    0.728
##     X5                0.513    0.047   10.888    0.000    0.513    0.550
##     X9                0.210    0.016   12.932    0.000    0.210    0.417
##     X10               0.399    0.043    9.297    0.000    0.399    0.527
##     X13               0.447    0.047    9.541    0.000    0.447    0.488
##     X14               1.894    0.078   24.226    0.000    1.894    0.857
##     X18               1.326    0.096   13.842    0.000    1.326    0.816
##     X21               0.370    0.036   10.303    0.000    0.370    0.538
##     X23               0.542    0.047   11.570    0.000    0.542    0.573
##     factor1           1.000                               1.000    1.000
EFA_1factor
## 
## Call:
## factanal(x = data02a, factors = 1, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.677 0.728 0.550 0.417 0.527 0.488 0.857 0.816 0.538 0.573 
## 
## Loadings:
##     Factor1
## X1  0.569  
## X3  0.521  
## X5  0.670  
## X9  0.764  
## X10 0.688  
## X13 0.715  
## X14 0.378  
## X18 0.429  
## X21 0.680  
## X23 0.653  
## 
##                Factor1
## SS loadings      3.828
## Proportion Var   0.383
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 161.14 on 35 degrees of freedom.
## The p-value is 4.23e-18
#two factor CFA: one item removed from factor 2 and zero covariance between factors

CFA_2factor.syntax = "
factor1 =~ X1 + X3 + X5 + X9 + X10 + X13 + X14 + X18 + X21 + X23
factor2 =~      X3 + X5 + X9 + X10 + X13 + X14 + X18 + X21 + X23

factor1 ~ 0*factor2
"

#for comparison with EFA we are using standardized factors (var = 1; mean = 0)
CFA_2factor.model = cfa(model = CFA_2factor.syntax, data = data02a, estimator = "MLR", std.lv = TRUE)
summary(CFA_2factor.model, fit.measures = TRUE, standardized = TRUE)
## lavaan (0.5-20) converged normally after  64 iterations
## 
##   Number of observations                          1333
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic               56.704      39.189
##   Degrees of freedom                                26          26
##   P-value (Chi-square)                           0.000       0.047
##   Scaling correction factor                                  1.447
##     for the Yuan-Bentler correction
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             4148.081    2238.585
##   Degrees of freedom                                45          45
##   P-value                                        0.000       0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.993       0.994
##   Tucker-Lewis Index (TLI)                       0.987       0.990
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -16523.162  -16523.162
##   Scaling correction factor                                  2.243
##     for the MLR correction
##   Loglikelihood unrestricted model (H1)     -16494.810  -16494.810
##   Scaling correction factor                                  1.924
##     for the MLR correction
## 
##   Number of free parameters                         39          39
##   Akaike (AIC)                               33124.324   33124.324
##   Bayesian (BIC)                             33326.937   33326.937
##   Sample-size adjusted Bayesian (BIC)        33203.051   33203.051
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.030       0.020
##   90 Percent Confidence Interval          0.019  0.040       0.007  0.029
##   P-value RMSEA <= 0.05                          0.999       1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.015       0.015
## 
## Parameter Estimates:
## 
##   Information                                 Observed
##   Standard Errors                   Robust.huber.white
## 
## Latent Variables:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 =~                                                            
##     X1                0.578    0.039   14.876    0.000    0.578    0.566
##     X3                0.452    0.038   11.780    0.000    0.452    0.522
##     X5                0.659    0.041   16.022    0.000    0.659    0.682
##     X9                0.557    0.032   17.200    0.000    0.557    0.784
##     X10               0.613    0.035   17.707    0.000    0.613    0.705
##     X13               0.671    0.042   16.164    0.000    0.671    0.702
##     X14               0.559    0.039   14.279    0.000    0.559    0.376
##     X18               0.524    0.045   11.627    0.000    0.524    0.411
##     X21               0.555    0.043   13.021    0.000    0.555    0.669
##     X23               0.621    0.034   18.196    0.000    0.621    0.639
##   factor2 =~                                                            
##     X3               -0.023    0.049   -0.466    0.641   -0.023   -0.027
##     X5               -0.098    0.069   -1.425    0.154   -0.098   -0.101
##     X9               -0.076    0.069   -1.093    0.274   -0.076   -0.107
##     X10              -0.108    0.075   -1.430    0.153   -0.108   -0.124
##     X13               0.218    0.072    3.034    0.002    0.218    0.228
##     X14              -0.011    0.082   -0.140    0.888   -0.011   -0.008
##     X18               0.306    0.073    4.205    0.000    0.306    0.240
##     X21               0.297    0.089    3.344    0.001    0.297    0.358
##     X23               0.161    0.069    2.322    0.020    0.161    0.166
## 
## Regressions:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 ~                                                             
##     factor2           0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                1.818    0.028   65.015    0.000    1.818    1.781
##     X3                1.549    0.024   65.300    0.000    1.549    1.789
##     X5                1.588    0.026   60.049    0.000    1.588    1.645
##     X9                1.420    0.019   72.984    0.000    1.420    1.999
##     X10               1.560    0.024   65.466    0.000    1.560    1.793
##     X13               1.547    0.026   59.027    0.000    1.547    1.617
##     X14               2.340    0.041   57.474    0.000    2.340    1.574
##     X18               1.794    0.035   51.372    0.000    1.794    1.407
##     X21               1.431    0.023   63.009    0.000    1.431    1.726
##     X23               1.561    0.027   58.619    0.000    1.561    1.606
##     factor1           0.000                               0.000    0.000
##     factor2           0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                0.709    0.065   10.958    0.000    0.709    0.680
##     X3                0.545    0.043   12.668    0.000    0.545    0.727
##     X5                0.489    0.053    9.306    0.000    0.489    0.525
##     X9                0.189    0.020    9.463    0.000    0.189    0.375
##     X10               0.369    0.043    8.665    0.000    0.369    0.488
##     X13               0.417    0.048    8.621    0.000    0.417    0.456
##     X14               1.896    0.078   24.415    0.000    1.896    0.858
##     X18               1.258    0.101   12.423    0.000    1.258    0.774
##     X21               0.291    0.041    7.040    0.000    0.291    0.424
##     X23               0.534    0.051   10.465    0.000    0.534    0.564
##     factor1           1.000                               1.000    1.000
##     factor2           1.000                               1.000    1.000
EFA_2factor
## 
## Call:
## factanal(x = data02a, factors = 2, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.680 0.727 0.525 0.375 0.488 0.456 0.858 0.774 0.424 0.564 
## 
## Loadings:
##     Factor1 Factor2
## X1   0.563         
## X3   0.517         
## X5   0.669  -0.166 
## X9   0.770  -0.181 
## X10  0.690  -0.190 
## X13  0.720   0.160 
## X14  0.374         
## X18  0.432   0.200 
## X21  0.700   0.293 
## X23  0.652   0.104 
## 
##                Factor1 Factor2
## SS loadings      3.861   0.269
## Proportion Var   0.386   0.027
## Cumulative Var   0.386   0.413
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 56.43 on 26 degrees of freedom.
## The p-value is 0.000497
#three factor CFA: 
CFA_3factor.syntax = "
factor1 =~ X1 + X3 + X5 + X9 + X10 + X13 + X14 + X18 + X21 + X23
factor2 =~      X3 + X5 + X9 + X10 + X13 + X14 + X18 + X21 + X23
factor3 =~           X5 + X9 + X10 + X13 + X14 + X18 + X21 + X23

factor1 ~ 0*factor2 + 0*factor3
factor2 ~ 0*factor3
"

#for comparison with EFA we are using standardized factors (var = 1; mean = 0)
CFA_3factor.model = cfa(model = CFA_3factor.syntax, data = data02a, estimator = "MLR", std.lv = TRUE)
## Warning in lav_object_post_check(lavobject): lavaan WARNING: some estimated
## variances are negative
## Warning in lav_object_post_check(lavobject): lavaan WARNING: observed
## variable error term matrix (theta) is not positive definite; use
## inspect(fit,"theta") to investigate.
summary(CFA_3factor.model, fit.measures = TRUE, standardized = TRUE)
## lavaan (0.5-20) converged normally after 245 iterations
## 
##   Number of observations                          1333
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic               31.402      32.372
##   Degrees of freedom                                18          18
##   P-value (Chi-square)                           0.026       0.020
##   Scaling correction factor                                  0.970
##     for the Yuan-Bentler correction
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             4148.081    2238.585
##   Degrees of freedom                                45          45
##   P-value                                        0.000       0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.997       0.993
##   Tucker-Lewis Index (TLI)                       0.992       0.984
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -16510.511  -16510.511
##   Scaling correction factor                                  2.290
##     for the MLR correction
##   Loglikelihood unrestricted model (H1)     -16494.810  -16494.810
##   Scaling correction factor                                  1.924
##     for the MLR correction
## 
##   Number of free parameters                         47          47
##   Akaike (AIC)                               33115.022   33115.022
##   Bayesian (BIC)                             33359.195   33359.195
##   Sample-size adjusted Bayesian (BIC)        33209.897   33209.897
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.024       0.024
##   90 Percent Confidence Interval          0.008  0.037       0.009  0.038
##   P-value RMSEA <= 0.05                          1.000       0.999
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.012       0.012
## 
## Parameter Estimates:
## 
##   Information                                 Observed
##   Standard Errors                   Robust.huber.white
## 
## Latent Variables:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 =~                                                            
##     X1                0.595    0.043   13.915    0.000    0.595    0.582
##     X3                0.464    0.043   10.770    0.000    0.464    0.535
##     X5                0.654    0.041   15.870    0.000    0.654    0.677
##     X9                0.527    0.043   12.127    0.000    0.527    0.742
##     X10               0.593    0.044   13.580    0.000    0.593    0.681
##     X13               0.648    0.048   13.617    0.000    0.648    0.677
##     X14               0.603    0.084    7.199    0.000    0.603    0.406
##     X18               0.512    0.053    9.603    0.000    0.512    0.401
##     X21               0.522    0.051   10.283    0.000    0.522    0.630
##     X23               0.631    0.046   13.755    0.000    0.631    0.649
##   factor2 =~                                                            
##     X3               -0.009    0.091   -0.095    0.924   -0.009   -0.010
##     X5               -0.016    0.604   -0.027    0.978   -0.016   -0.017
##     X9                0.120    4.094    0.029    0.977    0.120    0.169
##     X10               0.009    0.646    0.014    0.989    0.009    0.010
##     X13               0.265    0.233    1.136    0.256    0.265    0.277
##     X14              -0.071    0.275   -0.257    0.797   -0.071   -0.047
##     X18               0.294    0.567    0.518    0.604    0.294    0.230
##     X21               0.381    0.496    0.768    0.443    0.381    0.459
##     X23               0.152    0.567    0.268    0.789    0.152    0.156
##   factor3 =~                                                            
##     X5               -0.081    0.154   -0.526    0.599   -0.081   -0.084
##     X9               -0.484    0.936   -0.517    0.605   -0.484   -0.681
##     X10              -0.093    0.404   -0.230    0.818   -0.093   -0.107
##     X13               0.022    2.195    0.010    0.992    0.022    0.023
##     X14               0.048    0.829    0.058    0.954    0.048    0.032
##     X18               0.079    2.207    0.036    0.971    0.079    0.062
##     X21               0.056    3.082    0.018    0.985    0.056    0.068
##     X23               0.075    1.095    0.069    0.945    0.075    0.077
## 
## Regressions:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 ~                                                             
##     factor2           0.000                               0.000    0.000
##     factor3           0.000                               0.000    0.000
##   factor2 ~                                                             
##     factor3           0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                1.818    0.028   65.015    0.000    1.818    1.781
##     X3                1.549    0.024   65.300    0.000    1.549    1.789
##     X5                1.588    0.026   60.049    0.000    1.588    1.645
##     X9                1.420    0.019   72.984    0.000    1.420    1.999
##     X10               1.560    0.024   65.466    0.000    1.560    1.793
##     X13               1.547    0.026   59.027    0.000    1.547    1.617
##     X14               2.340    0.041   57.474    0.000    2.340    1.574
##     X18               1.794    0.035   51.372    0.000    1.794    1.407
##     X21               1.431    0.023   63.009    0.000    1.431    1.726
##     X23               1.561    0.027   58.619    0.000    1.561    1.606
##     factor1           0.000                               0.000    0.000
##     factor2           0.000                               0.000    0.000
##     factor3           0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                0.689    0.069    9.938    0.000    0.689    0.661
##     X3                0.535    0.053   10.050    0.000    0.535    0.714
##     X5                0.498    0.057    8.756    0.000    0.498    0.535
##     X9               -0.022    1.675   -0.013    0.990   -0.022   -0.043
##     X10               0.397    0.059    6.734    0.000    0.397    0.524
##     X13               0.425    0.049    8.640    0.000    0.425    0.464
##     X14               1.839    0.162   11.315    0.000    1.839    0.832
##     X18               1.271    0.099   12.827    0.000    1.271    0.782
##     X21               0.267    0.048    5.611    0.000    0.267    0.388
##     X23               0.519    0.054    9.538    0.000    0.519    0.549
##     factor1           1.000                               1.000    1.000
##     factor2           1.000                               1.000    1.000
##     factor3           1.000                               1.000    1.000
EFA_3factor
## 
## Call:
## factanal(x = data02a, factors = 3, rotation = "none")
## 
## Uniquenesses:
##    X1    X3    X5    X9   X10   X13   X14   X18   X21   X23 
## 0.661 0.714 0.534 0.005 0.523 0.464 0.831 0.782 0.388 0.548 
## 
## Loadings:
##     Factor1 Factor2 Factor3
## X1   0.438   0.347   0.164 
## X3   0.402   0.315   0.159 
## X5   0.563   0.331   0.198 
## X9   0.997                 
## X10  0.586   0.322   0.171 
## X13  0.541   0.487         
## X14  0.275   0.257   0.164 
## X18  0.299   0.344  -0.101 
## X21  0.508   0.537  -0.256 
## X23  0.463   0.485         
## 
##                Factor1 Factor2 Factor3
## SS loadings      2.940   1.378   0.231
## Proportion Var   0.294   0.138   0.023
## Cumulative Var   0.294   0.432   0.455
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 31.24 on 18 degrees of freedom.
## The p-value is 0.027
#fit is not significantly worse...can stop here!

#re-examining three factor estimates
summary(CFA_3factor.model, fit.measures = TRUE, standardized = TRUE)
## lavaan (0.5-20) converged normally after 245 iterations
## 
##   Number of observations                          1333
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic               31.402      32.372
##   Degrees of freedom                                18          18
##   P-value (Chi-square)                           0.026       0.020
##   Scaling correction factor                                  0.970
##     for the Yuan-Bentler correction
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             4148.081    2238.585
##   Degrees of freedom                                45          45
##   P-value                                        0.000       0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.997       0.993
##   Tucker-Lewis Index (TLI)                       0.992       0.984
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -16510.511  -16510.511
##   Scaling correction factor                                  2.290
##     for the MLR correction
##   Loglikelihood unrestricted model (H1)     -16494.810  -16494.810
##   Scaling correction factor                                  1.924
##     for the MLR correction
## 
##   Number of free parameters                         47          47
##   Akaike (AIC)                               33115.022   33115.022
##   Bayesian (BIC)                             33359.195   33359.195
##   Sample-size adjusted Bayesian (BIC)        33209.897   33209.897
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.024       0.024
##   90 Percent Confidence Interval          0.008  0.037       0.009  0.038
##   P-value RMSEA <= 0.05                          1.000       0.999
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.012       0.012
## 
## Parameter Estimates:
## 
##   Information                                 Observed
##   Standard Errors                   Robust.huber.white
## 
## Latent Variables:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 =~                                                            
##     X1                0.595    0.043   13.915    0.000    0.595    0.582
##     X3                0.464    0.043   10.770    0.000    0.464    0.535
##     X5                0.654    0.041   15.870    0.000    0.654    0.677
##     X9                0.527    0.043   12.127    0.000    0.527    0.742
##     X10               0.593    0.044   13.580    0.000    0.593    0.681
##     X13               0.648    0.048   13.617    0.000    0.648    0.677
##     X14               0.603    0.084    7.199    0.000    0.603    0.406
##     X18               0.512    0.053    9.603    0.000    0.512    0.401
##     X21               0.522    0.051   10.283    0.000    0.522    0.630
##     X23               0.631    0.046   13.755    0.000    0.631    0.649
##   factor2 =~                                                            
##     X3               -0.009    0.091   -0.095    0.924   -0.009   -0.010
##     X5               -0.016    0.604   -0.027    0.978   -0.016   -0.017
##     X9                0.120    4.094    0.029    0.977    0.120    0.169
##     X10               0.009    0.646    0.014    0.989    0.009    0.010
##     X13               0.265    0.233    1.136    0.256    0.265    0.277
##     X14              -0.071    0.275   -0.257    0.797   -0.071   -0.047
##     X18               0.294    0.567    0.518    0.604    0.294    0.230
##     X21               0.381    0.496    0.768    0.443    0.381    0.459
##     X23               0.152    0.567    0.268    0.789    0.152    0.156
##   factor3 =~                                                            
##     X5               -0.081    0.154   -0.526    0.599   -0.081   -0.084
##     X9               -0.484    0.936   -0.517    0.605   -0.484   -0.681
##     X10              -0.093    0.404   -0.230    0.818   -0.093   -0.107
##     X13               0.022    2.195    0.010    0.992    0.022    0.023
##     X14               0.048    0.829    0.058    0.954    0.048    0.032
##     X18               0.079    2.207    0.036    0.971    0.079    0.062
##     X21               0.056    3.082    0.018    0.985    0.056    0.068
##     X23               0.075    1.095    0.069    0.945    0.075    0.077
## 
## Regressions:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##   factor1 ~                                                             
##     factor2           0.000                               0.000    0.000
##     factor3           0.000                               0.000    0.000
##   factor2 ~                                                             
##     factor3           0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                1.818    0.028   65.015    0.000    1.818    1.781
##     X3                1.549    0.024   65.300    0.000    1.549    1.789
##     X5                1.588    0.026   60.049    0.000    1.588    1.645
##     X9                1.420    0.019   72.984    0.000    1.420    1.999
##     X10               1.560    0.024   65.466    0.000    1.560    1.793
##     X13               1.547    0.026   59.027    0.000    1.547    1.617
##     X14               2.340    0.041   57.474    0.000    2.340    1.574
##     X18               1.794    0.035   51.372    0.000    1.794    1.407
##     X21               1.431    0.023   63.009    0.000    1.431    1.726
##     X23               1.561    0.027   58.619    0.000    1.561    1.606
##     factor1           0.000                               0.000    0.000
##     factor2           0.000                               0.000    0.000
##     factor3           0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  Z-value  P(>|z|)   Std.lv  Std.all
##     X1                0.689    0.069    9.938    0.000    0.689    0.661
##     X3                0.535    0.053   10.050    0.000    0.535    0.714
##     X5                0.498    0.057    8.756    0.000    0.498    0.535
##     X9               -0.022    1.675   -0.013    0.990   -0.022   -0.043
##     X10               0.397    0.059    6.734    0.000    0.397    0.524
##     X13               0.425    0.049    8.640    0.000    0.425    0.464
##     X14               1.839    0.162   11.315    0.000    1.839    0.832
##     X18               1.271    0.099   12.827    0.000    1.271    0.782
##     X21               0.267    0.048    5.611    0.000    0.267    0.388
##     X23               0.519    0.054    9.538    0.000    0.519    0.549
##     factor1           1.000                               1.000    1.000
##     factor2           1.000                               1.000    1.000
##     factor3           1.000                               1.000    1.000
#NO FACTOR LOADINGS ON FACTOR 2 OR FACTOR 3 ARE SIGNIFICANTLY DIFFERENT FROM ZERO -- SO WE DON'T HAVE FACTORS -- THE ONE FACTOR MODEL IS BEST!