findmleHMM <- function(J, x, obsdist, obspar, Pi, EM = FALSE, verbose = TRUE, seed = NULL, ...) {

  # Set seed if provided for reproducibility
  if (!is.null(seed)) {
    set.seed(seed)
  }

  # Validate input parameters
  if (!is.numeric(J) || floor(J) != J || J < 2) {
    stop("J must be an integer strictly greater than 1")
  }

  if (!obsdist %in% c("pois", "norm", "weibull", "zip", "nbinom", "zinb", "exp", "gamma", "lnorm", "gev", "ZInormal", "ZIgamma")) {
    stop("observation distribution is not supported")
  }

  if (EM == TRUE) {
    # EM algorithm approach via HSMM definition with geometric dwelltimes

    # Extract self-transition probabilities from Pi
    self_probs <- diag(Pi)

    # Create off-diagonal transition matrix for HSMM
    Pi_hsmm <- Pi
    diag(Pi_hsmm) <- 0

    # Normalize rows to create proper transition matrix
    row_sums <- rowSums(Pi_hsmm)
    for (i in 1:J) {
      if (row_sums[i] > 0) {
        Pi_hsmm[i, ] <- Pi_hsmm[i, ] / row_sums[i]
      } else {
        # If row sum is 0, create uniform distribution over other states
        Pi_hsmm[i, -i] <- 1 / (J - 1)
      }
    }

    # Convert self-transition probabilities to geometric dwell parameters
    dwellpar <- list()
    dwellpar$prob <- numeric(J)

    for (j in 1:J) {
      # Geometric parameter: probability of leaving state
      p_geom <- 1 - self_probs[j]
      # Constrain to reasonable bounds to avoid numerical issues
      p_geom <- max(min(p_geom, 0.99), 0.01)
      dwellpar$prob[j] <- p_geom
    }

    # Fit HSMM with geometric dwell times
    HMM <- findmleHSMM(
      x = x,
      J = J,  # Maximum dwell time
      obsdist = obsdist,
      dwelldist = "geom",
      obspar = obspar,
      dwellpar = dwellpar,
      Pi = Pi_hsmm,
      shift = FALSE,
      verbose = verbose,
      seed = seed,  # Pass seed to findmleHSMM
      ...
    )

    # Extract observation parameters from HSMM results
    estimate <- list()
    if (obsdist == "norm") {
      estimate$mean <- HMM$observationparameters$mean
      estimate$sd <- HMM$observationparameters$sd
    } else if (obsdist == "pois") {
      estimate$lambda <- HMM$observationparameters$lambda
    } else if (obsdist == "weibull") {
      estimate$shape <- HMM$observationparameters$shape
      estimate$scale <- HMM$observationparameters$scale
    } else if (obsdist == "zip") {
      estimate$lambda <- HMM$observationparameters$lambda
      estimate$pi <- HMM$observationparameters$pi
    } else if (obsdist == "nbinom") {
      estimate$mu <- HMM$observationparameters$mu
      estimate$size <- HMM$observationparameters$size
    } else if (obsdist == "zinb") {
      estimate$mu <- HMM$observationparameters$mu
      estimate$size <- HMM$observationparameters$size
      estimate$pi <- HMM$observationparameters$pi
    } else if (obsdist == "exp") {
      estimate$rate <- HMM$observationparameters$rate
    } else if (obsdist == "gamma") {
      estimate$shape <- HMM$observationparameters$shape
      estimate$rate <- HMM$observationparameters$rate
    } else if (obsdist == "lnorm") {
      estimate$meanlog <- HMM$observationparameters$meanlog
      estimate$sdlog <- HMM$observationparameters$sdlog
    } else if (obsdist == "gev") {
      estimate$loc <- HMM$observationparameters$loc
      estimate$scale <- HMM$observationparameters$scale
      estimate$shape <- HMM$observationparameters$shape
    } else if (obsdist == "ZInormal"){
      estimate$mean <- HMM$observationparameters$mean
      estimate$sd <- HMM$observationparameters$sd
      estimate$pi <- HMM$observationparameters$pi
    } else if (obsdist == "ZIgamma"){
      estimate$shape <- HMM$observationparameters$shape
      estimate$rate <- HMM$observationparameters$rate
      estimate$pi <- HMM$observationparameters$pi
    }

    # Reconstruct HMM transition matrix from HSMM results
    Pi_result <- HMM$Pi
    geom_probs <- HMM$dwellparameters$prob

    # Convert geometric probabilities back to self-transition probabilities
    self_trans <- 1 - geom_probs

    for (j in 1:J) {
      # Scale off-diagonal elements by probability of leaving state
      Pi_result[j, ] <- Pi_result[j, ] * (1 - self_trans[j])
      # Set diagonal element to self-transition probability
      Pi_result[j, j] <- self_trans[j]
    }

    estimate$Pi <- Pi_result
    estimate$delta <- HMM$delta

    # Extract final log-likelihood
    loglik <- HMM$loglikelihoods[length(HMM$loglikelihoods)]

    # Extract Information Criterion
    AIC <- HMM$AIC
    BIC <- HMM$BIC

    return(list(estimate = estimate, loglik = loglik, AIC = AIC, BIC = BIC))

  } else if (EM == FALSE) {
    # Direct numerical optimization approach

    # Convert initial parameters to working parameters for unconstrained optimizer
    wparams <- params.n2w(J = J , obsdist = obsdist, Pi=Pi, obspar = obspar)

    # Minimize negative log-likelihood using nlm
    minimise <- nlm(negloglikHMM, wparams, x = x, J = J, obsdist = obsdist, hessian = TRUE)

    # Convert optimized parameters back to natural scale
    estimate <- params.w2n(J, minimise$estimate, obsdist = obsdist)
    minnegloglik <- minimise$minimum

    # Calculate number of parameters and AIC
    p <- J * (J - 1) + J * length(estimate[1:(length(estimate) - 2)])
    AIC <- 2 * (minnegloglik + p)
    BIC <- (p*log(length(x))) + (2 * minnegloglik)

    return(list(estimate = estimate, loglik = -minnegloglik, AIC = AIC, BIC=BIC, hessian = minimise$hessian))
  }
}
