% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CalSVG_binSpect.R
\name{CalSVG_binSpect}
\alias{CalSVG_binSpect}
\title{binSpect: Binary Spatial Enrichment Test for SVG Detection}
\usage{
CalSVG_binSpect(
  expr_matrix,
  spatial_coords,
  bin_method = c("kmeans", "rank"),
  rank_percent = 30,
  network_method = c("delaunay", "knn"),
  k = 10L,
  do_fisher_test = TRUE,
  adjust_method = "fdr",
  n_threads = 1L,
  verbose = TRUE
)
}
\arguments{
\item{expr_matrix}{Numeric matrix of gene expression values.
\itemize{
  \item Rows: genes
  \item Columns: spatial locations (spots/cells)
  \item Values: normalized expression (e.g., log counts or normalized counts)
}}

\item{spatial_coords}{Numeric matrix of spatial coordinates.
\itemize{
  \item Rows: spatial locations (must match columns of expr_matrix)
  \item Columns: x, y (and optionally z) coordinates
}}

\item{bin_method}{Character string specifying binarization method.
\itemize{
  \item \code{"kmeans"} (default): K-means clustering with k=2.
    Automatically separates high and low expression groups.
    Robust to different expression distributions.
  \item \code{"rank"}: Top percentage by expression rank.
    More consistent across genes with different distributions.
    Controlled by \code{rank_percent} parameter.
}}

\item{rank_percent}{Numeric (0-100). For \code{bin_method = "rank"},
the percentage of cells to classify as "high expressing".
Default is 30 (top 30% are "high").
\itemize{
  \item Lower values (10-20%): Focus on highly expressed cells
  \item Higher values (40-50%): Include moderately expressed cells
}}

\item{network_method}{Character string specifying spatial network construction.
\itemize{
  \item \code{"delaunay"} (default): Delaunay triangulation
  \item \code{"knn"}: K-nearest neighbors
}}

\item{k}{Integer. Number of neighbors for KNN network. Default is 10.}

\item{do_fisher_test}{Logical. Whether to perform Fisher's exact test.
Default is TRUE.
\itemize{
  \item TRUE: Returns p-values from Fisher's exact test
  \item FALSE: Returns only odds ratios (faster)
}}

\item{adjust_method}{Character string for p-value adjustment.
Default is "fdr" (Benjamini-Hochberg). See \code{p.adjust()} for options.}

\item{n_threads}{Integer. Number of parallel threads. Default is 1.}

\item{verbose}{Logical. Print progress messages. Default is TRUE.}
}
\value{
A data.frame with SVG detection results, sorted by significance/score.
  Columns:
  \itemize{
    \item \code{gene}: Gene identifier
    \item \code{estimate}: Odds ratio from 2x2 contingency table.
      OR > 1 indicates spatial clustering of high-expressing cells.
    \item \code{p.value}: P-value from Fisher's exact test (if requested)
    \item \code{p.adj}: Adjusted p-value
    \item \code{score}: Combined score = -log10(p.value) * estimate
    \item \code{high_expr_count}: Number of high-expressing cells
  }
}
\description{
Detect spatially variable genes using the binSpect approach from Giotto.
This method binarizes gene expression and tests for spatial enrichment
of high-expressing cells using Fisher's exact test.

Identifies spatially variable genes by:
1. Binarizing gene expression (high/low)
2. Building a spatial neighborhood network
3. Testing whether high-expressing cells tend to be neighbors of other
   high-expressing cells more than expected by chance
}
\details{
\strong{Method Overview:}

binSpect constructs a 2x2 contingency table for each gene based on:
\itemize{
  \item Cell A expression: High (1) or Low (0)
  \item Cell B expression: High (1) or Low (0)
}

For all pairs of neighboring cells (edges in the spatial network):
\tabular{lcc}{
  \tab Cell B Low \tab Cell B High \cr
  Cell A Low \tab n_00 \tab n_01 \cr
  Cell A High \tab n_10 \tab n_11 \cr
}

\strong{Statistical Test:}
Fisher's exact test is used to test whether n_11 (both neighbors high)
is greater than expected under independence.

\strong{Odds Ratio Interpretation:}
\itemize{
  \item OR = 1: No spatial pattern
  \item OR > 1: High-expressing cells cluster together (positive spatial pattern)
  \item OR < 1: High-expressing cells avoid each other (negative pattern)
}

\strong{Advantages:}
\itemize{
  \item Fast computation (no covariance matrix inversion)
  \item Robust to outliers through binarization
  \item Interpretable odds ratio statistic
}

\strong{Considerations:}
\itemize{
  \item Binarization threshold affects results
  \item K-means may produce unstable results for bimodal distributions
  \item Rank method more stable but arbitrary threshold
}
}
\examples{
# Load example data
data(example_svg_data)
expr <- example_svg_data$logcounts[1:20, ]
coords <- example_svg_data$spatial_coords

\donttest{
# Basic usage (requires RANN package)
if (requireNamespace("RANN", quietly = TRUE)) {
    results <- CalSVG_binSpect(expr, coords, 
                               network_method = "knn", k = 10,
                               verbose = FALSE)
    head(results)
}
}

}
\references{
Dries, R. et al. (2021) Giotto: a toolbox for integrative analysis and
visualization of spatial expression data. Genome Biology.
}
\seealso{
\code{\link{CalSVG}}, \code{\link{binarize_expression}},
\code{\link{buildSpatialNetwork}}
}
