#Developed 12/1/2025 - 12/21/2025 #still have problems in estimating factor indeterminancy #code taken from equations for Spearman method by #CFA from Dhaene and Rosseel (SEM, 2024 (31) 1-13) # Closed form estimation of Confirmatory factoring # D and R discuss multiple approaches but recommend the Spearman algorithm as best # follows a paper by Guttman CFA <- function(r,keys=NULL,g=FALSE,cor="cor", use="pairwise", n.obs=NA, orthog=FALSE, weight=NULL, correct=0, method="regression", missing=FALSE,impute="none" ) { cl <- match.call() # 3 ways of defining the model: a keys list ala scoreIems, matrix, or lavaan style if(is.null(keys)) {X <- matrix(rep(1,ncol(r)),ncol=1) rownames(X) <- colnames(r)} else { if(is.list(keys)) {X <- make.keys(r,keys)} else {if(is.matrix(keys)) {X <- keys} else {X <- lavParse(keys)} #just a matrix of 1, 0, -1 } #converts the keys variables to 1, 0 matrix (if not already in that form) } if( is.list(keys)) {select <- sub("-","",unlist(keys))} else { select<- rownames(X)} # to speed up scoring one set of scales from many items if(any( !(select %in% colnames(r)) )) { cat("\nVariable names in keys are incorrectly specified. Offending items are ", select[which(!(select %in% colnames(r)))],"\n") stop("I am stopping because of improper input. See above for a list of bad item(s). ")} if(!g) {r <-r[,select] X <-X[select,,drop=FALSE]} if(!isCorrelation(r)) {n.obs <- nrow(r) original.data <- r #save these for scores switch(cor, cor = {if(!is.null(weight)) {r <- cor.wt(r,w=weight)$r} else { r <- cor(r,use=use)} }, cov = {if(!is.null(weight)) {r <- cor.wt(r,w=weight,cor=FALSE)$r} else { r <- cov(r,use=use)} covar <- TRUE}, #fixed 10/30/25 wtd = { r <- cor.wt(r,w=weight)$r}, spearman = {r <- cor(r,use=use,method="spearman")}, kendall = {r <- cor(r,use=use,method="kendall")}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) R <- S <- r } else {R <- S <- r if(is.na(n.obs)){ n.obs <- 100 cat("\n Number of observations not specified. Arbitrarily set to 100.\n")} original.data <- r} diagS <- diag(S) #save this for later #flip correlations to go along with directions of keys flip <- X %*% rep(1,ncol(X)) #combine all keys into one to allow us to select the over all model if(is.null(keys)) { p1 <- principal(r,scores=FALSE) flip <- 1- 2* (p1$loadings < -.1)} flipd <- diag(nrow=ncol(R), ncol=ncol(R)) rownames(flipd) <- colnames(flipd)<- rownames(R) diag(flipd)[rownames(flipd) %in% rownames(flip)] <- flip S <- flipd %*% S %*% t(flipd) #negatively keyed variables are flipped mask <- abs(X %*% t(X)) #S <- mask * S #select a block diagonal form #need to find the communalities by each factor using the Spearman method nfact <- ncol(X) nvar <- nrow(X) dof <- nvar * (nvar-1)/2 - nvar - (!orthog) * nfact*(nfact-1)/2 #number of correlations - number of factor loadings estimated - correlations keys <- X X <- abs(X) H2 <- NULL for (i in 1:nfact) { #this is the brute force way of doing it, look more carefully at D and R H2 <- c(H2,Spearman.h2(S[X[,i]>0,X[,i]>0]))} #H2 is the vector of 1 factor communalities diag(S) <- H2 #put the communalities on the diagonal if(nrow(X) != nrow(S)) { Xplus <- matrix(0,nrow(S)-nrow(X),ncol(X)) rownames(Xplus) <- rownames(S)[!(rownames(S) %in% rownames(X))] X <- rbind(X,Xplus) } L0 <- t(X)%*% S #DR equation 4 P0 <- t(X) %*% S %*% X #equation 5 if(ncol(P0)>1) { D <- diag(diag(P0)) # variances L <- t(L0) %*% invMatSqrt(D) #divide by sd eq 6 factor structure P <- invMatSqrt(D) %*% P0 %*% invMatSqrt(D) #converts P0 to correlations eq 7 if(orthog) {P <- diag(1,nfact)} #Lambda <- L %*% solve(P) #not simple structure --- better to use L *X %*% P eq 8 Lambda <- (L * X) %*% P #not equation 8, but produces Pattern flipf <- matrix(flip,ncol=nfact,nrow=nrow(Lambda)) Lambda <- Lambda*flipf #get the signs right L <- L *flipf colnames(Lambda) <- colnames(X)# paste0("CF",1:nfact) rownames(Lambda) <- colnames(R) colnames(P) <- rownames(P) <- colnames(Lambda) #Ls <- Lambda * abs(keys) #forced simple structure L <- L * X #force a simple structure } else { D= diag(1) L <- Lambda <- as.matrix(sqrt(diag(S))) rownames(L) <- colnames(R) if(is.null(colnames(L))) colnames(L) <- "CF1" P <- NULL Ls <- NULL Phi <-NULL} Lambda <- as.matrix(Lambda) scores <- factor.scores(x=original.data,f=L,Phi=P,method=method, missing=missing,impute=impute) residual <- R-L %*% t(Lambda) rownames(X) <- rownames(L) colnames(L) <- colnames(X) stats <- fa.stats(r=R,f=L ,phi=P,n.obs=n.obs, dof=dof) result <- list(loadings=L,Phi=P,Pattern=Lambda, communalities=H2, dof = dof, stats=stats, r=r, residual=residual, rms = stats$rms, RMSEA =stats$RMSEA[1], chi = stats$chi, BIC = stats$BIC, STATISTIC = stats$STATISTIC, scores= scores$scores, weights=scores$weights, model = X, #the original model as parsed Call=cl ) class(result) <- c("psych", "CFA") return(result) } sumLower <- function(R){ return( sum(R[lower.tri(R)]))} Spearman.h2 <- function(R){ #from Harman chapter 7 diag(R) <- 0 sumr <- colSums(R) sumr2 <- colSums(R^2) h2 <- (sumr^2 - sumr2)/(2*(sumLower(R)-sumr)) return(h2)} ########## #convert lavaan cfa instructions into matrix form #local function #12/11/25 to help CFA lavParse <- function(model) { short <- gsub(" ", "", model ) #drop all blanks #first, break by lines lines <- strsplit(short,"\n") #break into a new line for each factor fact <- strsplit(lines[[1]],"=~") # break into factor names and variables fnames <- 1:length(fact) vnames<- NULL for (i in 1:length(fact)) {fnames[i]<- fact[[i]] [1] vect <- strsplit(fact[[i]][2],"\\+") for (j in 1:length(vect[[1]])) {vnames <- c(vnames,vect[[1]][j])} } vnames <- vnames[!duplicated(vnames)] v.mat <- matrix(0,ncol=length(fact),nrow=length(vnames)) colnames(v.mat) <- fnames rownames(v.mat) <- vnames #now put the 1s in for (i in 1:length(fact)) {fnames[i]<- fact[[i]] [1] vect <- strsplit(fact[[i]][2],"\\+") for (j in 1:length(vect[[1]])) {v.mat[vect[[1]][j],i] <- 1 } } return(v.mat)} ##### CFA.bifactor <- function(r,keys=NULL,g=TRUE,cor="cor", use="pairwise", n.obs=NA, orthog=FALSE, weight=NULL, correct=0, method="regression", missing=FALSE,impute="none" ) { cl <- match.call() #save for result #first some housekeeping if(is.list(keys)) {X <- make.keys(r,keys)} else {if(is.matrix(keys)) {X <- keys} else {X <- lavParse(keys)} #just a matrix of 1, 0, -1 } #converts the keys variables to 1, 0 matrix (if not already in that form) if( is.list(keys)) {select <- sub("-","",unlist(keys))} else { select<- rownames(X)} # to speed up scoring one set of scales from many items if(any( !(select %in% colnames(r)) )) { cat("\nVariable names in keys are incorrectly specified. Offending items are ", select[which(!(select %in% colnames(r)))],"\n") stop("I am stopping because of improper input. See above for a list of bad item(s). ")} if(!g) {r <-r[,select] X <-X[select,,drop=FALSE]} #now, we use the keys to reverse code some items #do we need to find correlations, and if so, to flip them? if(!isCorrelation(r)) {n.obs <- nrow(r) original.data <- r #save these for second pass switch(cor, cor = {if(!is.null(weight)) {r <- cor.wt(r,w=weight)$r} else { r <- cor(r,use=use)} }, cov = {if(!is.null(weight)) {r <- cor.wt(r,w=weight,cor=FALSE)$r} else { r <- cov(r,use=use)} covar <- TRUE}, #fixed 10/30/25 wtd = { r <- cor.wt(r,w=weight)$r}, spearman = {r <- cor(r,use=use,method="spearman")}, kendall = {r <- cor(r,use=use,method="kendall")}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) } #flip correlations to go along with directions of keys flip <- X %*% rep(1,ncol(X)) #combine all keys into one to allow us to select the over all model flipd <- diag(nrow=ncol(r), ncol=ncol(r)) rownames(flipd) <- colnames(flipd)<- colnames(r) diag(flipd)[rownames(flipd) %in% rownames(flip)] <- flip r <- flipd %*% r %*% t(flipd) #negatively keyed variables are flipped gf <- CFA(r,g=g,cor=cor, use=use, n.obs=n.obs) n.obs <- gf$stats$n.obs resid.g <- gf$residual diag(resid.g) <- 1 group <- CFA(resid.g,keys,g=g,n.obs=n.obs) bifactor <- cbind(gf$loadings,group$loadings) h2 <- rowSums(bifactor^2) colnames(bifactor)[1] <- "g" result <- list(loadings=bifactor,communality=h2,n.obs=n.obs,stats=group$stats, Call=cl) class(result) <- c("psych", "CFA") return(result ) }