#' Detect Column-Level Data Drift
#'
#' Compares reference and target datasets to identify column-level drift
#' using statistical distance measures.
#'
#' @param reference A data.frame representing baseline data
#' @param target A data.frame representing new incoming data
#' @param alpha Significance level for drift detection
#'
#' @return A data.frame with drift statistics per column
#'
#' @examples
#' ref <- data.frame(
#'   age = c(25, 30, 35, 40),
#'   city = c("A", "B", "A", "C")
#' )
#'
#' new <- data.frame(
#'   age = c(26, 31, 36, 41),
#'   city = c("A", "B", "C", "C")
#' )
#'
#' detect_column_drift(ref, new)
#'
#' @export
detect_column_drift <- function(reference, target, alpha = 0.05) {

  if (!is.data.frame(reference) || !is.data.frame(target)) {
    stop("Both reference and target must be data.frames")
  }

  common_cols <- intersect(names(reference), names(target))

  results <- lapply(common_cols, function(col) {

    ref_col <- reference[[col]]
    tgt_col <- target[[col]]

    if (is.numeric(ref_col)) {
      test <- stats::ks.test(ref_col, tgt_col)
      score <- as.numeric(test$statistic)
      p_value <- test$p.value
      drift <- p_value < alpha

      method <- "KS-test"

    } else {
      ref_tab <- table(ref_col)
      tgt_tab <- table(tgt_col)

      all_levels <- union(names(ref_tab), names(tgt_tab))
      ref_tab <- ref_tab[all_levels]; ref_tab[is.na(ref_tab)] <- 0
      tgt_tab <- tgt_tab[all_levels]; tgt_tab[is.na(tgt_tab)] <- 0

      score <- sum((ref_tab / sum(ref_tab) - tgt_tab / sum(tgt_tab))^2)
      p_value <- NA
      drift <- score > alpha

      method <- "Categorical distance"
    }

    data.frame(
      column = col,
      method = method,
      drift_score = score,
      p_value = p_value,
      drift_detected = drift
    )
  })

  do.call(rbind, results)
}
