# Load required packages
library(psych)
library(performance)
library(GGally)
library(car)
library(writexl)

# Select the relevant variables from df_standardised
efa_vars <- df_standardised[, c("TMT_A_RT", "TMT_B_RT", "Forwards_Total", 
                                "Backwards_Total", "Go", "NoGo", "Go_RT", "FalseAlarm")]

# 1. Adequate Sample Size
n_participants <- nrow(df_standardised)
n_variables <- ncol(efa_vars)

cat("Number of Participants:", n_participants, "\n")
cat("Number of Variables:", n_variables, "\n")
cat("Participants per Variable Ratio:", n_participants / n_variables, "\n")

if (n_participants / n_variables >= 5) {
  cat("Sample size is adequate for EFA.\n")
} else {
  cat("Sample size may be too small for reliable EFA.\n")
}

# 2. Adequacy of Correlations Between Variables

# Bartlett’s Test of Sphericity using pairwise complete obs
cor_matrix <- cor(efa_vars, use = "pairwise.complete.obs")
bartlett_test <- cortest.bartlett(cor_matrix, n = nrow(efa_vars))

cat("\nBartlett’s Test of Sphericity:\n")
cat("Chi-squared:", bartlett_test$chisq, "\n")
cat("p-value:", bartlett_test$p.value, "\n")
cat("Degrees of Freedom:", bartlett_test$df, "\n")

if (bartlett_test$p.value < 0.05) {
  cat("Bartlett’s test is significant (p < 0.05), indicating sufficient correlations for EFA.\n")
} else {
  cat("Bartlett’s test is not significant, indicating insufficient correlations for EFA.\n")
}

# KMO Test
kmo_test <- KMO(efa_vars)
cat("\nKaiser-Meyer-Olkin (KMO) Measure of Sampling Adequacy:\n")
cat("Overall KMO:", kmo_test$MSA, "\n")

if (kmo_test$MSA >= 0.60) {
  cat("KMO value is acceptable for EFA (>= 0.60).\n")
} else {
  cat("KMO value is too low for EFA (< 0.60).\n")
}

cat("KMO for each variable:\n")
print(kmo_test$MSAi)

# 3. Linearity – Correlation Matrix
cat("\nChecking pairwise correlations for linearity:\n")
print(cor_matrix)

# 5. Absence of Multicollinearity
cat("\nDeterminant of Correlation Matrix:\n")
determinant <- det(cor_matrix)
cat("Determinant of the correlation matrix:", determinant, "\n")

if (determinant > 0.00001) {
  cat("No multicollinearity detected (Determinant > 0.00001).\n")
} else {
  cat("Multicollinearity may be present (Determinant < 0.00001).\n")
}

# 6. Outliers
cat("\nOutliers Check using the performance package (complete cases only):\n")

# Temporarily drop rows with missing values for outlier checking
efa_vars_complete <- efa_vars[complete.cases(efa_vars), ]

# Run the outlier check
outliers_check <- performance::check_outliers(efa_vars_complete)
print(outliers_check)

# 7. Run PCA (using regression scores, missing values allowed)
pca_result <- principal(efa_vars, nfactors = 2, rotate = "promax", scores = TRUE, missing = TRUE)

# Print results
cat("\nPCA Component Loadings:\n")
print(pca_result, digits = 3, sort = TRUE)

cat("\nComponent Loadings (cutoff 0.3):\n")
print.psych(pca_result, cut = 0.3)

# 8. Scree plot
fa.parallel(efa_vars, fa = "pc", n.iter = 100)

# 9. Extract component scores into df
df_standardised$PCA_RC1 <- pca_result$scores[, 1]
df_standardised$PCA_RC2 <- pca_result$scores[, 2]

# 10. Save final dataset as df2.xlsx
write_xlsx(df_standardised, path = path.expand("~/Desktop/df2.xlsx"))