#' @title SPARK-X: Non-parametric Kernel-based SVG Detection
#'
#' @description
#' Detect spatially variable genes using SPARK-X, a non-parametric method
#' that tests for spatial expression patterns using multiple kernels.
#'
#' @name CalSVG_SPARKX
NULL


#' Detect SVGs using SPARK-X Method
#'
#' @description
#' SPARK-X is a scalable non-parametric method for identifying spatially
#' variable genes. It uses variance component score tests with multiple
#' spatial kernels (projection, Gaussian, and cosine) to detect various
#' types of spatial expression patterns.
#'
#' @param expr_matrix Numeric matrix of gene expression values.
#'   \itemize{
#'     \item Rows: genes
#'     \item Columns: spatial locations (spots/cells)
#'     \item Values: raw counts or normalized counts (NOT log-transformed)
#'   }
#'   Note: SPARK-X works best with count data, not log-transformed data.
#'
#' @param spatial_coords Numeric matrix of spatial coordinates.
#'   \itemize{
#'     \item Rows: spatial locations (must match columns of expr_matrix)
#'     \item Columns: x, y coordinates
#'   }
#'
#' @param kernel_option Character string specifying which kernels to use.
#'   \itemize{
#'     \item \code{"mixture"} (default): Test with all 11 kernels:
#'       1 projection + 5 Gaussian + 5 cosine. Most comprehensive but slower.
#'       Recommended for detecting diverse spatial patterns.
#'     \item \code{"single"}: Test with projection kernel only. Faster but
#'       may miss some pattern types.
#'   }
#'
#' @param adjust_method Character string for p-value adjustment.
#'   Default is "BY" (Benjamini-Yekutieli), which is more conservative and
#'   appropriate when tests may be correlated.
#'   Other options: "BH", "bonferroni", "holm", "none".
#'
#' @param n_threads Integer. Number of parallel threads. Default is 1.
#'   Higher values significantly speed up computation for large datasets.
#'
#' @param verbose Logical. Print progress messages. Default is TRUE.
#'
#' @return A data.frame with SVG detection results. Columns:
#'   \itemize{
#'     \item \code{gene}: Gene identifier
#'     \item \code{p.value}: Combined p-value across all kernels (ACAT method)
#'     \item \code{p.adj}: Multiple testing adjusted p-value
#'     \item If \code{kernel_option = "mixture"}, additional columns for
#'       individual kernel statistics and p-values (stat_*, pval_*)
#'   }
#'
#' @details
#' \strong{Method Overview:}
#'
#' SPARK-X uses a variance component score test framework:
#' \deqn{T_g = \frac{n \cdot y_g^T K y_g}{\|y_g\|^2}}
#'
#' where:
#' \itemize{
#'   \item y_g = expression vector for gene g
#'   \item K = spatial kernel matrix (derived from coordinates)
#'   \item n = number of spatial locations
#' }
#'
#' \strong{Kernel Types:}
#' \itemize{
#'   \item \code{Projection kernel}: Linear kernel based on scaled coordinates.
#'     Detects gradients and linear spatial trends.
#'   \item \code{Gaussian kernels}: Multiple bandwidth Gaussian RBF kernels.
#'     Detect localized hotspots of different sizes.
#'   \item \code{Cosine kernels}: Multiple frequency periodic kernels.
#'     Detect periodic/oscillating spatial patterns.
#' }
#'
#' \strong{P-value Computation:}
#' \itemize{
#'   \item Individual kernel p-values: Davies' method for quadratic forms
#'   \item Combined p-value: ACAT (Aggregated Cauchy Association Test)
#' }
#'
#' \strong{Advantages:}
#' \itemize{
#'   \item Non-parametric: No distributional assumptions
#'   \item Scalable: O(n) complexity, handles millions of cells
#'   \item Multiple kernels: Detects diverse pattern types
#'   \item Robust: ACAT combination handles correlated tests
#' }
#'
#' \strong{Computational Considerations:}
#' \itemize{
#'   \item \code{mixture} option: ~11x slower than \code{single}
#'   \item Memory: O(n) per gene, efficient for large datasets
#'   \item Parallelization provides near-linear speedup
#' }
#'
#' @examples
#' # Load example data
#' data(example_svg_data)
#' expr <- example_svg_data$counts[1:20, ]  # Use counts (not log)
#' coords <- example_svg_data$spatial_coords
#'
#' # Fast mode with single kernel (no extra dependencies)
#' results <- CalSVG_SPARKX(expr, coords, 
#'                          kernel_option = "single",
#'                          verbose = FALSE)
#' head(results)
#'
#' @references
#' Zhu, J., Sun, S., & Zhou, X. (2021). SPARK-X: non-parametric modeling
#' enables scalable and robust detection of spatial expression patterns
#' for large spatial transcriptomic studies. Genome Biology.
#'
#' @seealso
#' \code{\link{CalSVG}}, \code{\link{ACAT_combine}}
#'
#' @export
CalSVG_SPARKX <- function(expr_matrix,
                          spatial_coords,
                          kernel_option = c("mixture", "single"),
                          adjust_method = "BY",
                          n_threads = 1L,
                          verbose = TRUE) {

    # Match arguments
    kernel_option <- match.arg(kernel_option)

    # =========================================================================
    # Input Validation and Preprocessing
    # =========================================================================

    if (!is.matrix(expr_matrix)) {
        expr_matrix <- as.matrix(expr_matrix)
    }

    if (!is.matrix(spatial_coords)) {
        spatial_coords <- as.matrix(spatial_coords)
    }

    # Ensure matching samples
    if (is.null(colnames(expr_matrix))) {
        colnames(expr_matrix) <- paste0("spot_", seq_len(ncol(expr_matrix)))
    }
    if (is.null(rownames(spatial_coords))) {
        rownames(spatial_coords) <- colnames(expr_matrix)
    }

    common_samples <- intersect(colnames(expr_matrix), rownames(spatial_coords))
    if (length(common_samples) == 0) {
        stop("No matching samples between expr_matrix and spatial_coords")
    }

    expr_matrix <- expr_matrix[, common_samples, drop = FALSE]
    spatial_coords <- spatial_coords[common_samples, , drop = FALSE]

    # Remove cells/spots with zero total counts
    total_counts <- colSums(expr_matrix)
    keep_cells <- total_counts > 0
    if (sum(!keep_cells) > 0) {
        if (verbose) {
            message(sprintf("Removing %d spots with zero counts", sum(!keep_cells)))
        }
        expr_matrix <- expr_matrix[, keep_cells, drop = FALSE]
        spatial_coords <- spatial_coords[keep_cells, , drop = FALSE]
    }

    # Remove genes with zero counts
    gene_sums <- rowSums(expr_matrix)
    keep_genes <- gene_sums > 0
    if (sum(!keep_genes) > 0) {
        if (verbose) {
            message(sprintf("Removing %d genes with zero counts", sum(!keep_genes)))
        }
        expr_matrix <- expr_matrix[keep_genes, , drop = FALSE]
    }

    n_genes <- nrow(expr_matrix)
    n_spots <- ncol(expr_matrix)

    if (verbose) {
        message("=== CalSVG_SPARKX ===")
        message(sprintf("  Genes: %d", n_genes))
        message(sprintf("  Spots: %d", n_spots))
        message(sprintf("  Kernel option: %s", kernel_option))
    }

    # =========================================================================
    # Pre-filter Genes
    # =========================================================================

    # Filter genes with zero variance (cannot test)
    gene_vars <- apply(expr_matrix, 1, var)
    valid_genes <- which(gene_vars > 1e-10)
    if (length(valid_genes) < n_genes && verbose) {
        message(sprintf("  Filtering %d genes with zero variance", n_genes - length(valid_genes)))
    }

    # =========================================================================
    # Define Kernel Functions (Scientifically Accurate SPARK-X Kernels)
    # =========================================================================

    # Center and scale coordinates
    X_centered <- apply(spatial_coords, 2, function(x) x - mean(x))

    # Compute pairwise distance matrix for Gaussian/Cosine kernels
    compute_dist_sq <- function(coords) {
        n <- nrow(coords)
        D2 <- matrix(0, n, n)
        for (i in 1:(n-1)) {
            for (j in (i+1):n) {
                d2 <- sum((coords[i,] - coords[j,])^2)
                D2[i,j] <- d2
                D2[j,i] <- d2
            }
        }
        return(D2)
    }

    dist_sq <- compute_dist_sq(X_centered)

    # Get bandwidth quantiles based on actual distances
    nonzero_dists <- sqrt(dist_sq[lower.tri(dist_sq)])
    bandwidths <- quantile(nonzero_dists, probs = seq(0.2, 1, by = 0.2))

    # Kernel function: Linear/Projection kernel
    compute_linear_kernel <- function(X) {
        # K = X * (X'X)^{-1} * X'
        XtX <- crossprod(X)
        XtX_inv <- tryCatch(solve(XtX), error = function(e) {
            MASS::ginv(XtX)  # Use pseudoinverse if singular
        })
        K <- X %*% XtX_inv %*% t(X)
        return(K)
    }

    # Kernel function: Gaussian RBF kernel
    compute_gaussian_kernel <- function(dist_sq, bandwidth) {
        K <- exp(-dist_sq / (2 * bandwidth^2))
        # Center the kernel (important for SPARK-X)
        n <- nrow(K)
        H <- diag(n) - matrix(1/n, n, n)
        K <- H %*% K %*% H
        return(K)
    }

    # Kernel function: Periodic/Cosine kernel
    compute_cosine_kernel <- function(dist_sq, bandwidth) {
        D <- sqrt(dist_sq)
        K <- cos(2 * pi * D / bandwidth)
        # Center the kernel
        n <- nrow(K)
        H <- diag(n) - matrix(1/n, n, n)
        K <- H %*% K %*% H
        return(K)
    }

    # =========================================================================
    # Core SPARK-X Test Function
    # =========================================================================

    sparkx_test_kernel <- function(K, kernel_name) {
        n <- nrow(K)

        # Compute eigenvalues of kernel (for p-value calculation)
        K_eigen <- eigen(K, symmetric = TRUE, only.values = TRUE)$values
        K_eigen <- K_eigen[K_eigen > 1e-10]  # Keep positive eigenvalues

        # Compute test statistics for each gene (vectorized)
        stats <- numeric(n_genes)
        pvals <- numeric(n_genes)

        for (g in valid_genes) {
            y <- expr_matrix[g, ]
            y_centered <- y - mean(y)
            y_ss <- sum(y_centered^2)

            if (y_ss < 1e-10) {
                stats[g] <- NA
                pvals[g] <- NA
                next
            }

            # SPARK-X test statistic: T = n * y' * K * y / ||y||^2
            stat <- n * as.numeric(t(y_centered) %*% K %*% y_centered) / y_ss
            stats[g] <- stat

            # Gene-specific eigenvalue scaling
            y_bar <- mean(y)
            y_lam <- 1 - n * y_bar^2 / sum(y^2)

            # Compute p-value using Davies or Liu method
            scaled_eigen <- y_lam * K_eigen
            scaled_eigen <- scaled_eigen[scaled_eigen > 1e-10]

            if (length(scaled_eigen) > 0) {
                pvals[g] <- davies_pvalue(stat, scaled_eigen)
            } else {
                pvals[g] <- NA
            }
        }

        # Fill NA for filtered genes
        stats[!seq_len(n_genes) %in% valid_genes] <- NA
        pvals[!seq_len(n_genes) %in% valid_genes] <- NA

        return(list(stat = stats, pval = pvals))
    }

    # =========================================================================
    # Run Tests with All Kernels
    # =========================================================================

    results_list <- list()

    # 1. Linear/Projection kernel
    if (verbose) message("Testing with linear (projection) kernel...")
    K_linear <- compute_linear_kernel(X_centered)
    results_list[["linear"]] <- sparkx_test_kernel(K_linear, "linear")

    if (kernel_option == "mixture") {
        # 2. Gaussian kernels (5 different bandwidths)
        for (i in 1:5) {
            if (verbose) message(sprintf("Testing with Gaussian kernel %d (bw=%.2f)...", i, bandwidths[i]))
            K_gauss <- compute_gaussian_kernel(dist_sq, bandwidths[i])
            results_list[[paste0("gaussian_", i)]] <- sparkx_test_kernel(K_gauss, paste0("gaussian_", i))
        }

        # 3. Cosine/Periodic kernels (5 different periods)
        for (i in 1:5) {
            if (verbose) message(sprintf("Testing with Cosine kernel %d (period=%.2f)...", i, bandwidths[i]))
            K_cos <- compute_cosine_kernel(dist_sq, bandwidths[i])
            results_list[[paste0("cosine_", i)]] <- sparkx_test_kernel(K_cos, paste0("cosine_", i))
        }
    }

    # =========================================================================
    # Combine Results
    # =========================================================================

    if (verbose) message("Combining p-values...")

    # Extract statistics and p-values
    all_stats <- sapply(results_list, function(x) x$stat)
    all_pvals <- sapply(results_list, function(x) x$pval)

    rownames(all_stats) <- rownames(all_pvals) <- rownames(expr_matrix)

    # Combine p-values using ACAT (with NA handling and validation)
    combined_pvals <- apply(all_pvals, 1, function(pvals) {
        pvals <- pvals[!is.na(pvals) & is.finite(pvals)]
        # Clamp p-values to valid range
        pvals <- pmax(1e-300, pmin(1 - 1e-10, pvals))
        if (length(pvals) == 0) return(NA)
        tryCatch(ACAT_combine(pvals), error = function(e) NA)
    })

    # Adjust p-values
    adjusted_pvals <- p.adjust(combined_pvals, method = adjust_method)

    # =========================================================================
    # Create Output
    # =========================================================================

    results <- data.frame(
        gene = rownames(expr_matrix),
        p.value = combined_pvals,
        p.adj = adjusted_pvals,
        stringsAsFactors = FALSE
    )

    # Add individual kernel results if mixture
    if (kernel_option == "mixture") {
        for (kernel_name in names(results_list)) {
            results[[paste0("stat_", kernel_name)]] <- results_list[[kernel_name]]$stat
            results[[paste0("pval_", kernel_name)]] <- results_list[[kernel_name]]$pval
        }
    } else {
        results$stat_linear <- results_list[["linear"]]$stat
        results$pval_linear <- results_list[["linear"]]$pval
    }

    # Sort by combined p-value
    results <- results[order(results$p.value), ]
    rownames(results) <- NULL

    # =========================================================================
    # Summary
    # =========================================================================

    if (verbose) {
        n_sig <- sum(results$p.adj < 0.05, na.rm = TRUE)
        message(sprintf("  Significant genes (adj.p < 0.05): %d", n_sig))
        message("=== Done ===")
    }

    return(results)
}
