

prior_probs <- function(dic, sentence_prob = 1.0) {

  if (sentence_prob <= 0 || sentence_prob > 1) {
    stop("sentence_prob must be in (0, 1].")
  }

  trans <- dic[dic$row_type == "trans.", ]

  # --- Keep only single-sign entries with at least one count ---
  has_dot   <- grepl(".", trans$sign_name, fixed = TRUE)
  has_space <- grepl(" ", trans$sign_name, fixed = TRUE)
  single    <- trans[!has_dot & !has_space, ]
  single    <- single[!is.na(single$count) & single$count > 0L, ]
  single    <- single[!is.na(single$type) & trimws(single$type) != "", ]

  if (nrow(single) == 0L) {
    stop("No single-sign entries with counts found in dictionary.")
  }

  # --- Normalise type strings ---
  single$type <- trimws(single$type)

  # --- All unique grammar types ---
  all_types <- sort(unique(single$type))
  K <- length(all_types)

  # --- For each sign: compute relative frequencies across all types ---
  signs <- unique(single$sign_name)
  n_signs <- length(signs)

  # Matrix: one row per sign, one column per grammar type
  rel_freq <- matrix(0, nrow = n_signs, ncol = K,
                     dimnames = list(signs, all_types))

  for (i in seq_along(signs)) {
    entries <- single[single$sign_name == signs[i], ]

    for (j in seq_len(nrow(entries))) {
      col_idx <- match(entries$type[j], all_types)
      if (!is.na(col_idx)) {
        rel_freq[i, col_idx] <- rel_freq[i, col_idx] + entries$count[j]
      }
    }

    # Standardise to sum = 1
    total <- sum(rel_freq[i, ])
    if (total > 0) {
      rel_freq[i, ] <- rel_freq[i, ] / total
    }
  }

  # --- A priori probability: mean relative frequency across all signs ---
  p <- colMeans(rel_freq)

  # --- Correction for verb underrepresentation ---
  # A type is "verb-like" if it is V, or an operator with return type V
  is_verb <- vapply(all_types, function(type_str) {
    if (type_str == "V") return(TRUE)
    is_op <- grepl("\u2192", type_str) || grepl("->", type_str, fixed = TRUE)
    if (!is_op) return(FALSE)
    ret <- sub(".*(\u2192|->)\\s*", "", type_str)
    trimws(ret) == "V"
  }, logical(1))

  x <- ifelse(is_verb, 1 / sentence_prob, 1)
  p_corrected <- p * x / sum(p * x)

  attr(p_corrected, "sentence_prob") <- sentence_prob
  p_corrected
}
