# Load required packages library(psych) library(performance) library(GGally) library(car) library(writexl) # Select the relevant variables from df_standardised efa_vars <- df_standardised[, c("TMT_A_RT", "TMT_B_RT", "Forwards_Total", "Backwards_Total", "Go", "NoGo", "Go_RT", "FalseAlarm")] # 1. Adequate Sample Size n_participants <- nrow(df_standardised) n_variables <- ncol(efa_vars) cat("Number of Participants:", n_participants, "\n") cat("Number of Variables:", n_variables, "\n") cat("Participants per Variable Ratio:", n_participants / n_variables, "\n") if (n_participants / n_variables >= 5) { cat("Sample size is adequate for EFA.\n") } else { cat("Sample size may be too small for reliable EFA.\n") } # 2. Adequacy of Correlations Between Variables # Bartlett’s Test of Sphericity using pairwise complete obs cor_matrix <- cor(efa_vars, use = "pairwise.complete.obs") bartlett_test <- cortest.bartlett(cor_matrix, n = nrow(efa_vars)) cat("\nBartlett’s Test of Sphericity:\n") cat("Chi-squared:", bartlett_test$chisq, "\n") cat("p-value:", bartlett_test$p.value, "\n") cat("Degrees of Freedom:", bartlett_test$df, "\n") if (bartlett_test$p.value < 0.05) { cat("Bartlett’s test is significant (p < 0.05), indicating sufficient correlations for EFA.\n") } else { cat("Bartlett’s test is not significant, indicating insufficient correlations for EFA.\n") } # KMO Test kmo_test <- KMO(efa_vars) cat("\nKaiser-Meyer-Olkin (KMO) Measure of Sampling Adequacy:\n") cat("Overall KMO:", kmo_test$MSA, "\n") if (kmo_test$MSA >= 0.60) { cat("KMO value is acceptable for EFA (>= 0.60).\n") } else { cat("KMO value is too low for EFA (< 0.60).\n") } cat("KMO for each variable:\n") print(kmo_test$MSAi) # 3. Linearity – Correlation Matrix cat("\nChecking pairwise correlations for linearity:\n") print(cor_matrix) # 5. Absence of Multicollinearity cat("\nDeterminant of Correlation Matrix:\n") determinant <- det(cor_matrix) cat("Determinant of the correlation matrix:", determinant, "\n") if (determinant > 0.00001) { cat("No multicollinearity detected (Determinant > 0.00001).\n") } else { cat("Multicollinearity may be present (Determinant < 0.00001).\n") } # 6. Outliers cat("\nOutliers Check using the performance package (complete cases only):\n") # Temporarily drop rows with missing values for outlier checking efa_vars_complete <- efa_vars[complete.cases(efa_vars), ] # Run the outlier check outliers_check <- performance::check_outliers(efa_vars_complete) print(outliers_check) # 7. Run PCA (using regression scores, missing values allowed) pca_result <- principal(efa_vars, nfactors = 2, rotate = "promax", scores = TRUE, missing = TRUE) # Print results cat("\nPCA Component Loadings:\n") print(pca_result, digits = 3, sort = TRUE) cat("\nComponent Loadings (cutoff 0.3):\n") print.psych(pca_result, cut = 0.3) # 8. Scree plot fa.parallel(efa_vars, fa = "pc", n.iter = 100) # 9. Extract component scores into df df_standardised$PCA_RC1 <- pca_result$scores[, 1] df_standardised$PCA_RC2 <- pca_result$scores[, 2] # 10. Save final dataset as df2.xlsx write_xlsx(df_standardised, path = path.expand("~/Desktop/df2.xlsx"))