#' @title binSpect: Binary Spatial Enrichment Test for SVG Detection
#'
#' @description
#' Detect spatially variable genes using the binSpect approach from Giotto.
#' This method binarizes gene expression and tests for spatial enrichment
#' of high-expressing cells using Fisher's exact test.
#'
#' @name CalSVG_binSpect
NULL


#' Detect SVGs using binSpect Method
#'
#' @description
#' Identifies spatially variable genes by:
#' 1. Binarizing gene expression (high/low)
#' 2. Building a spatial neighborhood network
#' 3. Testing whether high-expressing cells tend to be neighbors of other
#'    high-expressing cells more than expected by chance
#'
#' @param expr_matrix Numeric matrix of gene expression values.
#'   \itemize{
#'     \item Rows: genes
#'     \item Columns: spatial locations (spots/cells)
#'     \item Values: normalized expression (e.g., log counts or normalized counts)
#'   }
#'
#' @param spatial_coords Numeric matrix of spatial coordinates.
#'   \itemize{
#'     \item Rows: spatial locations (must match columns of expr_matrix)
#'     \item Columns: x, y (and optionally z) coordinates
#'   }
#'
#' @param bin_method Character string specifying binarization method.
#'   \itemize{
#'     \item \code{"kmeans"} (default): K-means clustering with k=2.
#'       Automatically separates high and low expression groups.
#'       Robust to different expression distributions.
#'     \item \code{"rank"}: Top percentage by expression rank.
#'       More consistent across genes with different distributions.
#'       Controlled by \code{rank_percent} parameter.
#'   }
#'
#' @param rank_percent Numeric (0-100). For \code{bin_method = "rank"},
#'   the percentage of cells to classify as "high expressing".
#'   Default is 30 (top 30% are "high").
#'   \itemize{
#'     \item Lower values (10-20%): Focus on highly expressed cells
#'     \item Higher values (40-50%): Include moderately expressed cells
#'   }
#'
#' @param network_method Character string specifying spatial network construction.
#'   \itemize{
#'     \item \code{"delaunay"} (default): Delaunay triangulation
#'     \item \code{"knn"}: K-nearest neighbors
#'   }
#'
#' @param k Integer. Number of neighbors for KNN network. Default is 10.
#'
#' @param do_fisher_test Logical. Whether to perform Fisher's exact test.
#'   Default is TRUE.
#'   \itemize{
#'     \item TRUE: Returns p-values from Fisher's exact test
#'     \item FALSE: Returns only odds ratios (faster)
#'   }
#'
#' @param adjust_method Character string for p-value adjustment.
#'   Default is "fdr" (Benjamini-Hochberg). See \code{p.adjust()} for options.
#'
#' @param n_threads Integer. Number of parallel threads. Default is 1.
#'
#' @param verbose Logical. Print progress messages. Default is TRUE.
#'
#' @return A data.frame with SVG detection results, sorted by significance/score.
#'   Columns:
#'   \itemize{
#'     \item \code{gene}: Gene identifier
#'     \item \code{estimate}: Odds ratio from 2x2 contingency table.
#'       OR > 1 indicates spatial clustering of high-expressing cells.
#'     \item \code{p.value}: P-value from Fisher's exact test (if requested)
#'     \item \code{p.adj}: Adjusted p-value
#'     \item \code{score}: Combined score = -log10(p.value) * estimate
#'     \item \code{high_expr_count}: Number of high-expressing cells
#'   }
#'
#' @details
#' \strong{Method Overview:}
#'
#' binSpect constructs a 2x2 contingency table for each gene based on:
#' \itemize{
#'   \item Cell A expression: High (1) or Low (0)
#'   \item Cell B expression: High (1) or Low (0)
#' }
#'
#' For all pairs of neighboring cells (edges in the spatial network):
#' \tabular{lcc}{
#'   \tab Cell B Low \tab Cell B High \cr
#'   Cell A Low \tab n_00 \tab n_01 \cr
#'   Cell A High \tab n_10 \tab n_11 \cr
#' }
#'
#' \strong{Statistical Test:}
#' Fisher's exact test is used to test whether n_11 (both neighbors high)
#' is greater than expected under independence.
#'
#' \strong{Odds Ratio Interpretation:}
#' \itemize{
#'   \item OR = 1: No spatial pattern
#'   \item OR > 1: High-expressing cells cluster together (positive spatial pattern)
#'   \item OR < 1: High-expressing cells avoid each other (negative pattern)
#' }
#'
#' \strong{Advantages:}
#' \itemize{
#'   \item Fast computation (no covariance matrix inversion)
#'   \item Robust to outliers through binarization
#'   \item Interpretable odds ratio statistic
#' }
#'
#' \strong{Considerations:}
#' \itemize{
#'   \item Binarization threshold affects results
#'   \item K-means may produce unstable results for bimodal distributions
#'   \item Rank method more stable but arbitrary threshold
#' }
#'
#' @examples
#' # Load example data
#' data(example_svg_data)
#' expr <- example_svg_data$logcounts[1:20, ]
#' coords <- example_svg_data$spatial_coords
#' 
#' \donttest{
#' # Basic usage (requires RANN package)
#' if (requireNamespace("RANN", quietly = TRUE)) {
#'     results <- CalSVG_binSpect(expr, coords, 
#'                                network_method = "knn", k = 10,
#'                                verbose = FALSE)
#'     head(results)
#' }
#' }
#'
#' @references
#' Dries, R. et al. (2021) Giotto: a toolbox for integrative analysis and
#' visualization of spatial expression data. Genome Biology.
#'
#' @seealso
#' \code{\link{CalSVG}}, \code{\link{binarize_expression}},
#' \code{\link{buildSpatialNetwork}}
#'
#' @export
CalSVG_binSpect <- function(expr_matrix,
                            spatial_coords,
                            bin_method = c("kmeans", "rank"),
                            rank_percent = 30,
                            network_method = c("delaunay", "knn"),
                            k = 10L,
                            do_fisher_test = TRUE,
                            adjust_method = "fdr",
                            n_threads = 1L,
                            verbose = TRUE) {

    # Match arguments
    bin_method <- match.arg(bin_method)
    network_method <- match.arg(network_method)

    # =========================================================================
    # Input Validation
    # =========================================================================

    if (!is.matrix(expr_matrix)) {
        expr_matrix <- as.matrix(expr_matrix)
    }

    if (!is.matrix(spatial_coords)) {
        spatial_coords <- as.matrix(spatial_coords)
    }

    # Ensure matching samples
    if (is.null(colnames(expr_matrix))) {
        colnames(expr_matrix) <- paste0("spot_", seq_len(ncol(expr_matrix)))
    }
    if (is.null(rownames(spatial_coords))) {
        rownames(spatial_coords) <- colnames(expr_matrix)
    }

    common_samples <- intersect(colnames(expr_matrix), rownames(spatial_coords))
    if (length(common_samples) == 0) {
        stop("No matching samples between expr_matrix and spatial_coords")
    }

    expr_matrix <- expr_matrix[, common_samples, drop = FALSE]
    spatial_coords <- spatial_coords[common_samples, , drop = FALSE]

    n_genes <- nrow(expr_matrix)
    n_spots <- ncol(expr_matrix)

    if (verbose) {
        message("=== CalSVG_binSpect ===")
        message(sprintf("  Genes: %d", n_genes))
        message(sprintf("  Spots: %d", n_spots))
        message(sprintf("  Binarization: %s", bin_method))
        message(sprintf("  Network: %s", network_method))
    }

    # =========================================================================
    # Build Spatial Network
    # =========================================================================

    if (verbose) message("Building spatial neighborhood network...")

    W <- buildSpatialNetwork(
        coords = spatial_coords,
        method = network_method,
        k = k,
        binary = TRUE,
        verbose = FALSE
    )

    W <- W[common_samples, common_samples]

    # Convert adjacency matrix to edge list
    edges <- which(W > 0, arr.ind = TRUE)
    # Keep only unique edges (undirected)
    edges <- edges[edges[, 1] < edges[, 2], , drop = FALSE]

    spatial_network <- data.frame(
        from = colnames(W)[edges[, 1]],
        to = colnames(W)[edges[, 2]],
        stringsAsFactors = FALSE
    )

    n_edges <- nrow(spatial_network)

    if (verbose) {
        message(sprintf("  Network: %d edges", n_edges))
    }

    # =========================================================================
    # Binarize Expression
    # =========================================================================

    if (verbose) message("Binarizing gene expression...")

    bin_matrix <- binarize_expression(
        expr_matrix,
        method = bin_method,
        rank_percent = rank_percent,
        n_threads = n_threads,
        verbose = FALSE
    )

    # =========================================================================
    # Calculate Spatial Enrichment
    # =========================================================================

    if (verbose) message("Computing spatial enrichment...")

    # Create column index lookup
    col_idx <- seq_len(ncol(bin_matrix))
    names(col_idx) <- colnames(bin_matrix)

    # Get indices for from and to cells
    from_idx <- col_idx[spatial_network$from]
    to_idx <- col_idx[spatial_network$to]

    # Function to compute enrichment for one gene
    compute_enrichment_one <- function(gene_idx) {
        bin_vec <- bin_matrix[gene_idx, ]

        # Get binary values for edge endpoints
        from_vals <- bin_vec[from_idx]
        to_vals <- bin_vec[to_idx]

        # Create combination strings
        combns <- paste0(from_vals, "-", to_vals)

        # Count combinations
        combo_table <- table(factor(combns, levels = c("0-0", "0-1", "1-0", "1-1")))

        # Build 2x2 contingency table
        # Rows: from cell (0 or 1)
        # Cols: to cell (0 or 1)
        cont_table <- matrix(
            c(combo_table["0-0"], combo_table["0-1"],
              combo_table["1-0"], combo_table["1-1"]),
            nrow = 2, byrow = TRUE
        )

        # Calculate statistics
        if (do_fisher_test) {
            ft <- tryCatch({
                fisher.test(cont_table)
            }, error = function(e) {
                list(p.value = NA, estimate = NA)
            })

            return(c(
                p.value = ft$p.value,
                estimate = unname(ft$estimate),
                high_count = sum(bin_vec)
            ))
        } else {
            # Just calculate odds ratio
            OR <- (cont_table[2, 2] * cont_table[1, 1]) /
                  (cont_table[2, 1] * cont_table[1, 2])
            return(c(
                p.value = NA,
                estimate = OR,
                high_count = sum(bin_vec)
            ))
        }
    }

    # Apply to all genes
    if (n_threads > 1 && .Platform$OS.type != "windows") {
        results_list <- parallel::mclapply(
            seq_len(n_genes),
            compute_enrichment_one,
            mc.cores = n_threads
        )
    } else {
        if (verbose && n_genes > 100) {
            pb <- txtProgressBar(min = 0, max = n_genes, style = 3)
        }

        results_list <- lapply(seq_len(n_genes), function(i) {
            result <- compute_enrichment_one(i)
            if (verbose && n_genes > 100) setTxtProgressBar(pb, i)
            return(result)
        })

        if (verbose && n_genes > 100) close(pb)
    }

    # =========================================================================
    # Compile Results
    # =========================================================================

    results_matrix <- do.call(rbind, results_list)

    results <- data.frame(
        gene = rownames(expr_matrix),
        estimate = results_matrix[, "estimate"],
        p.value = results_matrix[, "p.value"],
        high_expr_count = results_matrix[, "high_count"],
        stringsAsFactors = FALSE
    )

    # Handle zero p-values
    if (do_fisher_test) {
        min_pval <- min(results$p.value[results$p.value > 0], na.rm = TRUE)
        results$p.value[results$p.value == 0] <- min_pval

        # Adjust p-values
        results$p.adj <- p.adjust(results$p.value, method = adjust_method)

        # Calculate score
        results$score <- -log10(results$p.value) * results$estimate

        # Sort by score
        results <- results[order(-results$score, results$p.value), ]
    } else {
        results$adj.p.value <- NA
        results$score <- results$estimate

        # Sort by odds ratio
        results <- results[order(-results$estimate), ]
    }

    rownames(results) <- NULL

    # =========================================================================
    # Summary
    # =========================================================================

    if (verbose) {
        if (do_fisher_test) {
            n_sig_raw <- sum(results$p.value < 0.05, na.rm = TRUE)
            n_sig_adj <- sum(results$p.adj < 0.05, na.rm = TRUE)
            message(sprintf("  Significant (p < 0.05): %d raw, %d adjusted",
                           n_sig_raw, n_sig_adj))
        }
        median_or <- median(results$estimate, na.rm = TRUE)
        message(sprintf("  Median odds ratio: %.2f", median_or))
        message("=== Done ===")
    }

    return(results)
}
