#Developed 12/1/2025 - 12/21/2025 
#still have problems in estimating factor indeterminancy
#code taken from equations for Spearman method by
#CFA from Dhaene and Rosseel (SEM, 2024 (31) 1-13)
# Closed form estimation of Confirmatory factoring

# D and R discuss multiple approaches but recommend the Spearman algorithm as best
# follows a paper by Guttman 

CFA <- function(r,keys=NULL,g=FALSE,cor="cor", use="pairwise", n.obs=NA, orthog=FALSE, weight=NULL, correct=0, method="regression", missing=FALSE,impute="none" ) {
cl <- match.call()
# 3 ways of defining the model: a keys list ala scoreIems, matrix, or lavaan style
if(is.null(keys)) {X <- matrix(rep(1,ncol(r)),ncol=1)
    rownames(X) <- colnames(r)} else { 
 if(is.list(keys)) {X <- make.keys(r,keys)} else {if(is.matrix(keys)) {X <- keys} else {X <- lavParse(keys)}  #just a matrix of 1, 0, -1
 }  #converts the keys variables to 1, 0 matrix (if not already in that form)
}

if( is.list(keys)) {select <- sub("-","",unlist(keys))} else {
    select<- rownames(X)}  # to speed up scoring one set of scales from many items
if(any( !(select %in% colnames(r)) )) {
 cat("\nVariable names in keys are incorrectly specified. Offending items are ", select[which(!(select %in% colnames(r)))],"\n")
 stop("I am stopping because of improper input.   See above for a list of bad item(s). ")}
      	if(!g) {r <-r[,select] 
      	X <-X[select,,drop=FALSE]}

 if(!isCorrelation(r)) {n.obs <- nrow(r)
 original.data <- r   #save these for scores 
  switch(cor, 
       cor = {if(!is.null(weight))  {r <- cor.wt(r,w=weight)$r} else  {
                                     r <- cor(r,use=use)}
                                     },
       cov = {if(!is.null(weight))  {r <- cor.wt(r,w=weight,cor=FALSE)$r} else  {
                                     r <- cov(r,use=use)}
                                       covar <- TRUE},  #fixed 10/30/25
       wtd = { r <- cor.wt(r,w=weight)$r},
       spearman = {r <- cor(r,use=use,method="spearman")},
       kendall = {r <- cor(r,use=use,method="kendall")},
       tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho},
       poly = {r <- polychoric(r,correct=correct,weight=weight)$rho},
       tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho},
       polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho},
       mixed = {r <- mixedCor(r,use=use,correct=correct)$rho},
       Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho},
       YuleQ = {r <- YuleCor(r,1)$rho},
       YuleY = {r <- YuleCor(r,.5)$rho } 
       )
       
     R <- S <- r
     
         } else {R <- S <- r
     if(is.na(n.obs)){ n.obs <- 100
         cat("\n Number of observations not specified. Arbitrarily set to 100.\n")}
     original.data  <- r}
     
     
    diagS <- diag(S)  #save this for later

#flip correlations to go along with directions of keys

flip  <- X %*% rep(1,ncol(X))    #combine all keys into one   to allow us to select the over all model
if(is.null(keys)) { p1 <- principal(r,scores=FALSE)
    flip <- 1- 2* (p1$loadings < -.1)}
    
                    
flipd <- diag(nrow=ncol(R), ncol=ncol(R))
rownames(flipd) <- colnames(flipd)<- rownames(R)
diag(flipd)[rownames(flipd) %in% rownames(flip)] <- flip
S  <- flipd %*% S %*% t(flipd)   #negatively keyed variables are flipped

mask <- abs(X %*% t(X))
#S <- mask * S     #select a block diagonal form


#need to find the communalities by each factor using the Spearman method
nfact <- ncol(X)
nvar <- nrow(X)

dof <- nvar * (nvar-1)/2 - nvar - (!orthog) * nfact*(nfact-1)/2 #number of correlations - number of factor loadings estimated - correlations
keys <- X
X <- abs(X)  
H2 <- NULL
for (i in 1:nfact) {    #this is the brute force way of doing it, look more carefully at D and R
  H2 <- c(H2,Spearman.h2(S[X[,i]>0,X[,i]>0]))}   #H2 is the vector of 1 factor communalities
  
  
  diag(S) <- H2   #put the communalities on the diagonal


if(nrow(X) != nrow(S))  {
Xplus <- matrix(0,nrow(S)-nrow(X),ncol(X))
  rownames(Xplus) <- rownames(S)[!(rownames(S) %in% rownames(X))]

 X <- rbind(X,Xplus) }
L0 <- t(X)%*% S   #DR equation 4
P0 <- t(X) %*% S %*% X  #equation 5
if(ncol(P0)>1) {
   D <- diag(diag(P0))  # variances
   L <- t(L0) %*% invMatSqrt(D)  #divide by sd eq 6     factor structure
   P <- invMatSqrt(D) %*% P0 %*% invMatSqrt(D)   #converts P0 to correlations eq 7
if(orthog) {P <- diag(1,nfact)} 
#Lambda <- L %*% solve(P)  #not simple structure  --- better to use L *X  %*%  P     eq 8
Lambda <- (L * X) %*% P    #not equation 8, but produces Pattern
flipf <- matrix(flip,ncol=nfact,nrow=nrow(Lambda))

Lambda <- Lambda*flipf   #get the signs right
L <- L *flipf

colnames(Lambda) <-   colnames(X)# paste0("CF",1:nfact)
rownames(Lambda) <- colnames(R)

colnames(P) <- rownames(P) <- colnames(Lambda)
#Ls <- Lambda  * abs(keys)   #forced simple structure
L <- L * X       #force a simple structure

} else { D= diag(1)
       L <- Lambda <- as.matrix(sqrt(diag(S)))
       rownames(L) <- colnames(R)
       if(is.null(colnames(L))) colnames(L) <- "CF1"
       P <- NULL
       Ls <- NULL
       Phi  <-NULL}

Lambda <- as.matrix(Lambda)


scores <- factor.scores(x=original.data,f=L,Phi=P,method=method, missing=missing,impute=impute)

residual <- R-L %*% t(Lambda)
rownames(X) <- rownames(L) 
colnames(L) <- colnames(X)
stats <- fa.stats(r=R,f=L ,phi=P,n.obs=n.obs, dof=dof)
result <- list(loadings=L,Phi=P,Pattern=Lambda, communalities=H2, dof = dof,
                stats=stats, r=r, residual=residual,
  rms = stats$rms,
  RMSEA =stats$RMSEA[1],
  chi = stats$chi,
  BIC  = stats$BIC,
  STATISTIC = stats$STATISTIC,
  scores= scores$scores,
 weights=scores$weights,
 model = X,   #the original model as parsed
  Call=cl
  )
class(result) <- c("psych", "CFA")
return(result)
}

sumLower <- function(R){
   return( sum(R[lower.tri(R)]))}
   
Spearman.h2 <- function(R){   #from Harman chapter 7 
  diag(R) <- 0
  sumr <- colSums(R)
  sumr2 <- colSums(R^2)
  h2 <- (sumr^2 - sumr2)/(2*(sumLower(R)-sumr))
return(h2)}
  
  
  ##########
#convert lavaan cfa instructions into matrix form
#local function 
#12/11/25 to help CFA      
lavParse <- function(model) {
short <- gsub(" ", "", model )  #drop all blanks
#first, break by lines
lines <- strsplit(short,"\n") #break into a new line for each factor
fact <- strsplit(lines[[1]],"=~") #  break into factor names and variables
fnames <- 1:length(fact)
vnames<- NULL
 for (i in 1:length(fact)) {fnames[i]<- fact[[i]] [1]
    vect <- strsplit(fact[[i]][2],"\\+")
    for (j in 1:length(vect[[1]])) {vnames <- c(vnames,vect[[1]][j])}
    }

vnames <- vnames[!duplicated(vnames)]
v.mat <- matrix(0,ncol=length(fact),nrow=length(vnames))
colnames(v.mat) <- fnames
rownames(v.mat) <- vnames
#now put the 1s in
for (i in 1:length(fact)) {fnames[i]<- fact[[i]] [1]
    vect <- strsplit(fact[[i]][2],"\\+")
    for (j in 1:length(vect[[1]])) {v.mat[vect[[1]][j],i] <- 1
    }
    }
return(v.mat)}

#####


CFA.bifactor <- function(r,keys=NULL,g=TRUE,cor="cor", use="pairwise", n.obs=NA, orthog=FALSE, weight=NULL, correct=0, method="regression", missing=FALSE,impute="none" ) {
cl <- match.call()  #save for result
#first some housekeeping
 if(is.list(keys)) {X <- make.keys(r,keys)} else {if(is.matrix(keys)) {X <- keys} else {X <- lavParse(keys)}  #just a matrix of 1, 0, -1
 }  #converts the keys variables to 1, 0 matrix (if not already in that form)

if( is.list(keys)) {select <- sub("-","",unlist(keys))} else {
    select<- rownames(X)}  # to speed up scoring one set of scales from many items
if(any( !(select %in% colnames(r)) )) {
 cat("\nVariable names in keys are incorrectly specified. Offending items are ", select[which(!(select %in% colnames(r)))],"\n")
 stop("I am stopping because of improper input.   See above for a list of bad item(s). ")}
      	if(!g) {r <-r[,select] 
      	X <-X[select,,drop=FALSE]}
#now, we use the keys to reverse code some items
 
 #do we need to find correlations, and if so, to flip them?
 if(!isCorrelation(r)) {n.obs <- nrow(r)
 original.data <- r   #save these for second pass 
  switch(cor, 
       cor = {if(!is.null(weight))  {r <- cor.wt(r,w=weight)$r} else  {
                                     r <- cor(r,use=use)}
                                     },
       cov = {if(!is.null(weight))  {r <- cor.wt(r,w=weight,cor=FALSE)$r} else  {
                                     r <- cov(r,use=use)}
                                       covar <- TRUE},  #fixed 10/30/25
       wtd = { r <- cor.wt(r,w=weight)$r},
       spearman = {r <- cor(r,use=use,method="spearman")},
       kendall = {r <- cor(r,use=use,method="kendall")},
       tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho},
       poly = {r <- polychoric(r,correct=correct,weight=weight)$rho},
       tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho},
       polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho},
       mixed = {r <- mixedCor(r,use=use,correct=correct)$rho},
       Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho},
       YuleQ = {r <- YuleCor(r,1)$rho},
       YuleY = {r <- YuleCor(r,.5)$rho } 
       )
       }
  #flip correlations to go along with directions of keys

flip  <- X %*% rep(1,ncol(X))    #combine all keys into one   to allow us to select the over all model

flipd <- diag(nrow=ncol(r), ncol=ncol(r))
rownames(flipd) <- colnames(flipd)<- colnames(r)
diag(flipd)[rownames(flipd) %in% rownames(flip)] <- flip
r  <- flipd %*% r %*% t(flipd)   #negatively keyed variables are flipped
   	
      	
      	gf <- CFA(r,g=g,cor=cor, use=use, n.obs=n.obs)

n.obs <- gf$stats$n.obs
resid.g <- gf$residual
diag(resid.g) <- 1
group <- CFA(resid.g,keys,g=g,n.obs=n.obs)
bifactor <- cbind(gf$loadings,group$loadings)
h2 <- rowSums(bifactor^2)
colnames(bifactor)[1] <- "g"
result <- list(loadings=bifactor,communality=h2,n.obs=n.obs,stats=group$stats, Call=cl)
class(result) <- c("psych", "CFA")

return(result ) 
}