#' @title nnSVG: Nearest-Neighbor Gaussian Process SVG Detection
#'
#' @description
#' Detect spatially variable genes using nnSVG, a method based on
#' nearest-neighbor Gaussian processes for scalable spatial modeling.
#'
#' @name CalSVG_nnSVG
NULL


#' Detect SVGs using nnSVG Method
#'
#' @description
#' nnSVG uses nearest-neighbor Gaussian processes (NNGP) to model spatial
#' correlation structure in gene expression. It performs likelihood ratio
#' tests comparing spatial vs. non-spatial models to identify SVGs.
#'
#' @param expr_matrix Numeric matrix of gene expression values.
#'   \itemize{
#'     \item Rows: genes
#'     \item Columns: spatial locations (spots/cells)
#'     \item Values: log-normalized counts (e.g., from scran::logNormCounts)
#'   }
#'
#' @param spatial_coords Numeric matrix of spatial coordinates.
#'   \itemize{
#'     \item Rows: spatial locations (must match columns of expr_matrix)
#'     \item Columns: x, y coordinates
#'   }
#'
#' @param X Optional numeric matrix of covariates to regress out.
#'   \itemize{
#'     \item Rows: spatial locations (same order as spatial_coords)
#'     \item Columns: covariates (e.g., batch, cell type indicators)
#'   }
#'   Default is NULL (intercept-only model).
#'
#' @param n_neighbors Integer. Number of nearest neighbors for NNGP model.
#'   Default is 10.
#'   \itemize{
#'     \item 5-10: Faster, captures local patterns
#'     \item 15-20: Better likelihood estimates, slower
#'   }
#'   Values > 15 rarely improve results but increase computation time.
#'
#' @param order Character string specifying coordinate ordering scheme.
#'   \itemize{
#'     \item \code{"AMMD"} (default): Approximate Maximum Minimum Distance.
#'       Better for most datasets. Requires >= 65 spots.
#'     \item \code{"Sum_coords"}: Order by sum of coordinates.
#'       Use for very small datasets (< 65 spots).
#'   }
#'
#' @param cov_model Character string specifying the covariance function.
#'   Default is "exponential".
#'   \itemize{
#'     \item \code{"exponential"}: Most commonly used, computationally stable
#'     \item \code{"gaussian"}: Smoother patterns, requires stabilization
#'     \item \code{"spherical"}: Finite range correlation
#'     \item \code{"matern"}: Flexible smoothness (includes additional nu parameter)
#'   }
#'
#' @param adjust_method Character string for p-value adjustment.
#'   Default is "BH" (Benjamini-Hochberg).
#'
#' @param n_threads Integer. Number of parallel threads. Default is 1.
#'   Set to number of available cores for faster computation.
#'
#' @param verbose Logical. Print progress messages. Default is FALSE.
#'
#' @return A data.frame with SVG detection results. Columns:
#'   \itemize{
#'     \item \code{gene}: Gene identifier
#'     \item \code{sigma.sq}: Spatial variance estimate (sigma^2)
#'     \item \code{tau.sq}: Nonspatial variance estimate (tau^2, nugget)
#'     \item \code{phi}: Range parameter estimate (controls spatial correlation decay)
#'     \item \code{prop_sv}: Proportion of spatial variance = sigma.sq / (sigma.sq + tau.sq)
#'     \item \code{loglik}: Log-likelihood of spatial model
#'     \item \code{loglik_lm}: Log-likelihood of non-spatial model (linear model)
#'     \item \code{LR_stat}: Likelihood ratio test statistic = -2 * (loglik_lm - loglik)
#'     \item \code{rank}: Rank by LR statistic (1 = highest)
#'     \item \code{p.value}: P-value from chi-squared distribution (df = 2)
#'     \item \code{p.adj}: Adjusted p-value
#'     \item \code{runtime}: Computation time per gene (seconds)
#'   }
#'
#' @details
#' \strong{Method Overview:}
#'
#' nnSVG models gene expression as a Gaussian process:
#' \deqn{y = X\beta + \omega + \epsilon}
#'
#' where:
#' \itemize{
#'   \item y = expression vector
#'   \item X = covariate matrix, beta = coefficients
#'   \item omega ~ GP(0, sigma^2 * C(phi)) = spatial random effect
#'   \item epsilon ~ N(0, tau^2) = non-spatial noise
#'   \item C(phi) = covariance function with range phi
#' }
#'
#' \strong{Nearest-Neighbor Approximation:}
#' Full GP has O(n^3) complexity. NNGP approximates using only k nearest
#' neighbors, reducing complexity to O(n * k^3) = O(n).
#'
#' \strong{Statistical Test:}
#' Likelihood ratio test comparing:
#' \itemize{
#'   \item H0 (null): y = X*beta + epsilon (no spatial effect)
#'   \item H1 (alternative): y = X*beta + omega + epsilon (with spatial effect)
#' }
#' LR statistic follows chi-squared with df = 2 (testing sigma.sq and phi).
#'
#' \strong{Effect Size:}
#' Proportion of spatial variance (prop_sv) measures effect size:
#' \itemize{
#'   \item prop_sv near 1: Strong spatial pattern
#'   \item prop_sv near 0: Little spatial structure
#' }
#'
#' \strong{Computational Notes:}
#' \itemize{
#'   \item Requires BRISC package for NNGP fitting
#'   \item O(n) complexity per gene with NNGP approximation
#'   \item Parallelization over genes provides good speedup
#'   \item Memory: O(n * k) per gene
#' }
#'
#' @examples
#' # Load example data
#' data(example_svg_data)
#' expr <- example_svg_data$logcounts[1:10, ]  # Small subset
#' coords <- example_svg_data$spatial_coords
#'
#' \donttest{
#' # Basic usage (requires BRISC package)
#' if (requireNamespace("BRISC", quietly = TRUE)) {
#'     results <- CalSVG_nnSVG(expr, coords, verbose = FALSE)
#'     head(results)
#' }
#' }
#'
#' @references
#' Weber, L.M. et al. (2023) nnSVG for the scalable identification of
#' spatially variable genes using nearest-neighbor Gaussian processes.
#' Nature Communications.
#'
#' Datta, A. et al. (2016) Hierarchical Nearest-Neighbor Gaussian Process
#' Models for Large Geostatistical Datasets. JASA.
#'
#' @seealso
#' \code{\link{CalSVG}}, BRISC package documentation
#'
#' @export
CalSVG_nnSVG <- function(expr_matrix,
                         spatial_coords,
                         X = NULL,
                         n_neighbors = 10L,
                         order = c("AMMD", "Sum_coords"),
                         cov_model = c("exponential", "gaussian", "spherical", "matern"),
                         adjust_method = "BH",
                         n_threads = 1L,
                         verbose = FALSE) {

    # Match arguments
    order <- match.arg(order)
    cov_model <- match.arg(cov_model)

    # =========================================================================
    # Check BRISC Dependency
    # =========================================================================

    if (!requireNamespace("BRISC", quietly = TRUE)) {
        stop(
            "Package 'BRISC' is required for nnSVG method.\n",
            "Please install it with: install.packages('BRISC')\n",
            "BRISC provides efficient nearest-neighbor Gaussian process estimation."
        )
    }

    # =========================================================================
    # Input Validation
    # =========================================================================

    if (!is.matrix(expr_matrix)) {
        expr_matrix <- as.matrix(expr_matrix)
    }

    if (!is.matrix(spatial_coords)) {
        spatial_coords <- as.matrix(spatial_coords)
    }

    # Ensure matching samples
    if (is.null(colnames(expr_matrix))) {
        colnames(expr_matrix) <- paste0("spot_", seq_len(ncol(expr_matrix)))
    }
    if (is.null(rownames(spatial_coords))) {
        rownames(spatial_coords) <- colnames(expr_matrix)
    }

    common_samples <- intersect(colnames(expr_matrix), rownames(spatial_coords))
    if (length(common_samples) == 0) {
        stop("No matching samples between expr_matrix and spatial_coords")
    }

    expr_matrix <- expr_matrix[, common_samples, drop = FALSE]
    spatial_coords <- spatial_coords[common_samples, , drop = FALSE]

    n_genes <- nrow(expr_matrix)
    n_spots <- ncol(expr_matrix)

    # Check minimum spots for AMMD ordering
    if (order == "AMMD" && n_spots < 65) {
        warning("AMMD ordering requires >= 65 spots. Switching to Sum_coords.")
        order <- "Sum_coords"
    }

    # Validate covariates
    if (!is.null(X)) {
        if (nrow(X) != n_spots) {
            stop("X must have same number of rows as spots in expr_matrix")
        }
    }

    if (verbose) {
        message("=== CalSVG_nnSVG ===")
        message(sprintf("  Genes: %d", n_genes))
        message(sprintf("  Spots: %d", n_spots))
        message(sprintf("  Neighbors: %d", n_neighbors))
        message(sprintf("  Covariance model: %s", cov_model))
    }

    # =========================================================================
    # Scale Coordinates
    # =========================================================================

    # Scale coordinates proportionally (preserving aspect ratio)
    range_all <- max(apply(spatial_coords, 2, function(col) diff(range(col))))
    coords_scaled <- apply(spatial_coords, 2, function(col) {
        (col - min(col)) / range_all
    })

    # =========================================================================
    # Compute Ordering and Neighbors
    # =========================================================================

    if (verbose) message("Computing coordinate ordering...")
    ordering <- BRISC::BRISC_order(coords_scaled, order = order, verbose = FALSE)

    if (verbose) message("Computing nearest neighbors...")
    neighbor_info <- BRISC::BRISC_neighbor(
        coords_scaled,
        n.neighbors = n_neighbors,
        n_omp = 1,
        search.type = "tree",
        ordering = ordering,
        verbose = FALSE
    )

    # =========================================================================
    # Fit NNGP Models
    # =========================================================================

    if (verbose) message("Fitting NNGP models for each gene...")

    # Function to fit one gene
    # Note: BRISC's n_omp=1 is used to avoid OpenMP conflicts with R parallel
    fit_one_gene <- function(gene_idx) {
        y <- expr_matrix[gene_idx, ]

        # Skip genes with zero variance (fast path)
        if (var(y) < 1e-10) {
            return(list(
                sigma.sq = NA, tau.sq = NA, phi = NA, loglik = NA,
                success = FALSE, runtime = 0
            ))
        }

        runtime <- system.time({
            result <- tryCatch({
                brisc_out <- BRISC::BRISC_estimation(
                    coords = coords_scaled,
                    y = y,
                    x = X,
                    n.neighbors = n_neighbors,
                    n_omp = 1,  # Single thread to avoid conflicts
                    order = order,
                    cov.model = cov_model,
                    search.type = "tree",
                    verbose = FALSE,
                    ordering = ordering,
                    neighbor = neighbor_info
                )

                list(
                    sigma.sq = brisc_out$Theta["sigma.sq"],
                    tau.sq = brisc_out$Theta["tau.sq"],
                    phi = brisc_out$Theta["phi"],
                    loglik = brisc_out$log_likelihood,
                    success = TRUE
                )
            }, error = function(e) {
                list(
                    sigma.sq = NA,
                    tau.sq = NA,
                    phi = NA,
                    loglik = NA,
                    success = FALSE
                )
            })
        })

        result$runtime <- runtime["elapsed"]
        return(result)
    }

    # Run for all genes (with parallelization if available)
    if (n_threads > 1) {
        if (requireNamespace("BiocParallel", quietly = TRUE)) {
            BPPARAM <- BiocParallel::MulticoreParam(workers = n_threads)
            results_list <- BiocParallel::bplapply(
                seq_len(n_genes),
                fit_one_gene,
                BPPARAM = BPPARAM
            )
        } else if (.Platform$OS.type != "windows") {
            results_list <- parallel::mclapply(
                seq_len(n_genes),
                fit_one_gene,
                mc.cores = n_threads
            )
        } else {
            if (verbose) message("  Parallel unavailable on Windows without BiocParallel")
            results_list <- lapply(seq_len(n_genes), fit_one_gene)
        }
    } else {
        if (verbose && n_genes > 50) {
            pb <- txtProgressBar(min = 0, max = n_genes, style = 3)
        }

        results_list <- lapply(seq_len(n_genes), function(i) {
            result <- fit_one_gene(i)
            if (verbose && n_genes > 50) setTxtProgressBar(pb, i)
            return(result)
        })

        if (verbose && n_genes > 50) close(pb)
    }

    # =========================================================================
    # Compute Non-spatial Model Likelihoods
    # =========================================================================

    if (verbose) message("Computing non-spatial model likelihoods...")

    loglik_lm <- vapply(seq_len(n_genes), function(i) {
        y <- expr_matrix[i, ]
        if (is.null(X)) {
            X_lm <- rep(1, n_spots)
        } else {
            X_lm <- X
        }
        tryCatch({
            as.numeric(logLik(lm(y ~ X_lm - 1)))
        }, error = function(e) NA)
    }, numeric(1))

    # =========================================================================
    # Compile Results
    # =========================================================================

    # Extract results
    sigma.sq <- sapply(results_list, function(x) x$sigma.sq)
    tau.sq <- sapply(results_list, function(x) x$tau.sq)
    phi <- sapply(results_list, function(x) x$phi)
    loglik <- sapply(results_list, function(x) x$loglik)
    runtime <- sapply(results_list, function(x) x$runtime)

    # Proportion of spatial variance
    prop_sv <- sigma.sq / (sigma.sq + tau.sq)

    # Likelihood ratio test
    LR_stat <- -2 * (loglik_lm - loglik)
    LR_stat[LR_stat < 0] <- 0  # Ensure non-negative

    # P-values (chi-squared with df = 2)
    pval <- 1 - pchisq(LR_stat, df = 2)

    # Adjusted p-values
    padj <- p.adjust(pval, method = adjust_method)

    # Rank by LR statistic
    rank <- rank(-LR_stat, ties.method = "min", na.last = TRUE)

    # Create results data frame (unified column names: p.value, p.adj)
    results <- data.frame(
        gene = rownames(expr_matrix),
        sigma.sq = sigma.sq,
        tau.sq = tau.sq,
        phi = phi,
        prop_sv = prop_sv,
        loglik = loglik,
        loglik_lm = loglik_lm,
        LR_stat = LR_stat,
        rank = rank,
        p.value = pval,
        p.adj = padj,
        runtime = runtime,
        stringsAsFactors = FALSE
    )

    # Sort by rank
    results <- results[order(results$rank), ]
    rownames(results) <- NULL

    # =========================================================================
    # Summary
    # =========================================================================

    if (verbose) {
        n_sig <- sum(results$p.adj < 0.05, na.rm = TRUE)
        n_failed <- sum(is.na(results$sigma.sq))
        message(sprintf("  Significant genes (adj.p < 0.05): %d", n_sig))
        if (n_failed > 0) {
            message(sprintf("  Failed fits: %d", n_failed))
        }
        message(sprintf("  Total runtime: %.1f seconds", sum(results$runtime, na.rm = TRUE)))
        message("=== Done ===")
    }

    return(results)
}
