#' @title Simulate Spatial Transcriptomics Data with Known SVGs
#'
#' @description
#' Functions to generate simulated spatial transcriptomics data with known
#' spatially variable genes (ground truth). Useful for benchmarking and testing.
#'
#' @return See individual function documentation for return values.
#' @name data_simulation
NULL


#' Simulate Spatial Transcriptomics Data
#'
#' @description
#' Generates a simulated spatial transcriptomics dataset with a mixture of
#' spatially variable genes (SVGs) and non-spatially variable genes.
#' Uses scientifically accurate count distributions (Negative Binomial).
#'
#' @param n_spots Integer. Number of spatial locations. Default is 500.
#' @param n_genes Integer. Total number of genes. Default is 200.
#' @param n_svg Integer. Number of spatially variable genes. Default is 50.
#' @param grid_type Character. Type of spatial layout.
#'   \itemize{
#'     \item \code{"hexagonal"} (default): Visium-like hexagonal grid
#'     \item \code{"square"}: Square grid
#'     \item \code{"random"}: Random spatial distribution
#'   }
#' @param pattern_types Character vector. Types of spatial patterns for SVGs.
#'   Any combination of:
#'   \itemize{
#'     \item \code{"gradient"}: Linear spatial gradient
#'     \item \code{"hotspot"}: Localized expression hotspots
#'     \item \code{"periodic"}: Periodic/oscillating patterns
#'     \item \code{"cluster"}: Clustered expression
#'   }
#'   Default is all four types.
#' @param mean_counts Numeric. Mean expression level for baseline. Default is 50.
#' @param dispersion Numeric. Dispersion parameter for Negative Binomial.
#'   Smaller values = more overdispersion. Default is 5.
#'
#' @return A list containing:
#'   \itemize{
#'     \item \code{counts}: Matrix of gene counts (genes × spots)
#'     \item \code{spatial_coords}: Matrix of spatial coordinates (spots × 2)
#'     \item \code{gene_info}: Data.frame with gene metadata including
#'       \code{is_svg} (TRUE/FALSE) and \code{pattern_type}
#'     \item \code{logcounts}: Log-normalized counts (log2(counts + 1))
#'   }
#'
#' @details
#' \strong{Spatial Patterns:}
#' \itemize{
#'   \item \strong{Gradient}: Expression increases linearly along x-axis
#'   \item \strong{Hotspot}: High expression in circular regions
#'   \item \strong{Periodic}: Sine wave pattern along x-axis
#'   \item \strong{Cluster}: Expression in spatially defined clusters
#' }
#'
#' \strong{Count Distribution:}
#' Counts are drawn from Negative Binomial distribution:
#' \deqn{X \sim NB(\mu, \phi)}
#' where mu is the mean (modulated by spatial pattern) and phi is dispersion.
#'
#' @examples
#' # Set seed for reproducibility before calling
#' set.seed(42)
#' sim_data <- simulate_spatial_data(n_spots = 200, n_genes = 50, n_svg = 10)
#' str(sim_data, max.level = 1)
#' 
#' \donttest{
#' # Use with SVG detection (requires RANN)
#' if (requireNamespace("RANN", quietly = TRUE)) {
#'     results <- CalSVG_MERINGUE(sim_data$counts, sim_data$spatial_coords,
#'                                network_method = "knn", k = 10, verbose = FALSE)
#' }
#' }
#'
#' @export
simulate_spatial_data <- function(n_spots = 500,
                                   n_genes = 200,
                                   n_svg = 50,
                                   grid_type = c("hexagonal", "square", "random"),
                                   pattern_types = c("gradient", "hotspot", "periodic", "cluster"),
                                   mean_counts = 50,
                                   dispersion = 5) {

    # Validate inputs
    grid_type <- match.arg(grid_type)
    
    # Note: Use set.seed() before calling this function for reproducibility

    if (n_svg > n_genes) {
        stop("n_svg cannot be greater than n_genes")
    }

    # =========================================================================
    # Generate Spatial Coordinates
    # =========================================================================

    spatial_coords <- switch(grid_type,
        "hexagonal" = generate_hexagonal_grid(n_spots),
        "square" = generate_square_grid(n_spots),
        "random" = generate_random_coords(n_spots)
    )

    rownames(spatial_coords) <- paste0("spot_", seq_len(nrow(spatial_coords)))
    colnames(spatial_coords) <- c("x", "y")

    # Scale to [0, 100]
    spatial_coords[, 1] <- (spatial_coords[, 1] - min(spatial_coords[, 1])) /
                           (max(spatial_coords[, 1]) - min(spatial_coords[, 1])) * 100
    spatial_coords[, 2] <- (spatial_coords[, 2] - min(spatial_coords[, 2])) /
                           (max(spatial_coords[, 2]) - min(spatial_coords[, 2])) * 100

    n_spots_actual <- nrow(spatial_coords)

    # =========================================================================
    # Generate Spatial Patterns for SVGs
    # =========================================================================

    # Assign pattern types to SVGs
    n_per_pattern <- ceiling(n_svg / length(pattern_types))
    pattern_assignments <- rep(pattern_types, each = n_per_pattern)[1:n_svg]

    # Generate pattern effect matrices
    svg_effects <- matrix(1, nrow = n_svg, ncol = n_spots_actual)

    for (i in seq_len(n_svg)) {
        pattern <- pattern_assignments[i]
        svg_effects[i, ] <- generate_spatial_pattern(
            spatial_coords,
            pattern_type = pattern
        )
    }

    # =========================================================================
    # Generate Count Matrix
    # =========================================================================

    counts <- matrix(0, nrow = n_genes, ncol = n_spots_actual)
    rownames(counts) <- paste0("gene_", seq_len(n_genes))
    colnames(counts) <- rownames(spatial_coords)

    gene_info <- data.frame(
        gene = rownames(counts),
        is_svg = c(rep(TRUE, n_svg), rep(FALSE, n_genes - n_svg)),
        pattern_type = c(pattern_assignments, rep("none", n_genes - n_svg)),
        stringsAsFactors = FALSE
    )

    # Generate SVG counts (with spatial pattern)
    for (i in seq_len(n_svg)) {
        # Modulate mean by spatial effect
        spot_means <- mean_counts * svg_effects[i, ]
        spot_means <- pmax(spot_means, 0.1)  # Ensure positive

        # Draw from Negative Binomial
        counts[i, ] <- rnbinom(n_spots_actual,
                               mu = spot_means,
                               size = dispersion)
    }

    # Generate non-SVG counts (no spatial pattern)
    for (i in (n_svg + 1):n_genes) {
        # Random baseline mean for each gene
        gene_mean <- mean_counts * runif(1, 0.5, 2)

        counts[i, ] <- rnbinom(n_spots_actual,
                               mu = gene_mean,
                               size = dispersion)
    }

    # =========================================================================
    # Log-normalize
    # =========================================================================

    # Simple log2(counts + 1) normalization
    logcounts <- log2(counts + 1)

    # =========================================================================
    # Return
    # =========================================================================

    return(list(
        counts = counts,
        logcounts = logcounts,
        spatial_coords = spatial_coords,
        gene_info = gene_info,
        params = list(
            n_spots = n_spots_actual,
            n_genes = n_genes,
            n_svg = n_svg,
            grid_type = grid_type,
            pattern_types = pattern_types,
            mean_counts = mean_counts,
            dispersion = dispersion
        )
    ))
}


#' Generate Hexagonal Grid Coordinates
#'
#' @description Creates Visium-style hexagonal grid.
#' @param n_spots Approximate number of spots desired.
#' @return Matrix of x, y coordinates.
#' @keywords internal
generate_hexagonal_grid <- function(n_spots) {
    # Calculate grid dimensions
    n_side <- ceiling(sqrt(n_spots / 0.866))  # Hex packing efficiency

    coords <- matrix(NA, nrow = 0, ncol = 2)

    for (row in seq_len(n_side)) {
        x_offset <- ifelse(row %% 2 == 0, 0.5, 0)
        y <- row * sqrt(3) / 2

        for (col in seq_len(n_side)) {
            x <- col + x_offset
            coords <- rbind(coords, c(x, y))

            if (nrow(coords) >= n_spots) break
        }
        if (nrow(coords) >= n_spots) break
    }

    return(coords[1:min(n_spots, nrow(coords)), , drop = FALSE])
}


#' Generate Square Grid Coordinates
#'
#' @description Creates regular square grid.
#' @param n_spots Approximate number of spots desired.
#' @return Matrix of x, y coordinates.
#' @keywords internal
generate_square_grid <- function(n_spots) {
    n_side <- ceiling(sqrt(n_spots))
    x <- rep(seq_len(n_side), n_side)
    y <- rep(seq_len(n_side), each = n_side)
    coords <- cbind(x, y)[1:n_spots, , drop = FALSE]
    return(coords)
}


#' Generate Random Coordinates
#'
#' @description Creates random spatial distribution.
#' @param n_spots Number of spots.
#' @return Matrix of x, y coordinates.
#' @keywords internal
generate_random_coords <- function(n_spots) {
    coords <- cbind(
        x = runif(n_spots, 0, 100),
        y = runif(n_spots, 0, 100)
    )
    return(coords)
}


#' Generate Spatial Expression Pattern
#'
#' @description
#' Creates a spatial effect multiplier for gene expression.
#'
#' @param coords Matrix of spatial coordinates (spots × 2).
#' @param pattern_type Type of pattern.
#'
#' @return Numeric vector of effect sizes (multipliers) for each spot.
#'
#' @keywords internal
generate_spatial_pattern <- function(coords, pattern_type) {
    # Note: Seed should be set externally by user for reproducibility
    n <- nrow(coords)
    x <- coords[, 1]
    y <- coords[, 2]

    # Normalize coordinates to [0, 1]
    x_norm <- (x - min(x)) / (max(x) - min(x) + 1e-10)
    y_norm <- (y - min(y)) / (max(y) - min(y) + 1e-10)

    effect <- switch(pattern_type,

        "gradient" = {
            # Linear gradient along random direction
            angle <- runif(1, 0, 2 * pi)
            proj <- x_norm * cos(angle) + y_norm * sin(angle)
            0.2 + 1.8 * proj  # Range [0.2, 2.0]
        },

        "hotspot" = {
            # One or more circular hotspots
            n_hotspots <- sample(1:3, 1)
            effect <- rep(0.3, n)

            for (h in seq_len(n_hotspots)) {
                cx <- runif(1, 0.2, 0.8)
                cy <- runif(1, 0.2, 0.8)
                radius <- runif(1, 0.1, 0.25)

                dist <- sqrt((x_norm - cx)^2 + (y_norm - cy)^2)
                hotspot_effect <- exp(-dist^2 / (2 * radius^2))
                effect <- effect + 2 * hotspot_effect
            }
            effect
        },

        "periodic" = {
            # Periodic wave pattern
            freq <- runif(1, 2, 5)
            angle <- runif(1, 0, 2 * pi)
            proj <- x_norm * cos(angle) + y_norm * sin(angle)

            1 + 0.8 * sin(2 * pi * freq * proj)  # Range [0.2, 1.8]
        },

        "cluster" = {
            # Spatial clusters with high expression
            n_clusters <- sample(2:4, 1)
            effect <- rep(0.3, n)

            for (c in seq_len(n_clusters)) {
                cx <- runif(1, 0.1, 0.9)
                cy <- runif(1, 0.1, 0.9)

                dist <- sqrt((x_norm - cx)^2 + (y_norm - cy)^2)
                in_cluster <- dist < runif(1, 0.1, 0.2)
                effect[in_cluster] <- effect[in_cluster] + runif(1, 1, 2)
            }
            effect
        }
    )

    return(effect)
}


#' Example Spatial Transcriptomics Data
#'
#' @description
#' A pre-generated example dataset for testing SVG detection methods.
#' Contains 500 spots and 200 genes, with 50 known SVGs.
#'
#' @format A list with components:
#' \describe{
#'   \item{counts}{Integer matrix (200 genes × 500 spots) of raw counts}
#'   \item{logcounts}{Numeric matrix of log2(counts + 1)}
#'   \item{spatial_coords}{Numeric matrix (500 spots × 2) of x, y coordinates}
#'   \item{gene_info}{Data.frame with columns: gene, is_svg, pattern_type}
#' }
#'
#' @examples
#' data(example_svg_data)
#' str(example_svg_data)
#'
#' \donttest{
#' # Run SVG detection (requires RANN package)
#' if (requireNamespace("RANN", quietly = TRUE)) {
#'     results <- CalSVG_MERINGUE(
#'         example_svg_data$counts,
#'         example_svg_data$spatial_coords,
#'         verbose = FALSE
#'     )
#'
#'     # Check accuracy
#'     truth <- example_svg_data$gene_info$is_svg
#'     detected <- results$p.adj < 0.05
#'     print(table(truth, detected))
#' }
#' }
#'
#' @source Simulated using \code{\link{simulate_spatial_data}}
#'
#' @return A list containing the example dataset (see Format section).
#' @name example_svg_data
#' @docType data
NULL
