% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/llm_pipeline.R
\name{validate_matches_llm}
\alias{validate_matches_llm}
\title{Validate Matches using LLM (Azure OpenAI)}
\usage{
validate_matches_llm(
  data,
  query_name_col,
  dict_name_col,
  output_dir = tempdir(),
  filename_stem = "match_validation",
  batch_size = 20,
  api_key = Sys.getenv("AZURE_API_KEY"),
  endpoint = Sys.getenv("AZURE_ENDPOINT"),
  deployment = Sys.getenv("AZURE_DEPLOYMENT")
)
}
\arguments{
\item{data}{Data frame. Must contain the columns specified by \code{query_name_col} and \code{dict_name_col}.}

\item{query_name_col}{String. Column containing the user's query name (Employer).}

\item{dict_name_col}{String. Column containing the dictionary match name (Registry).}

\item{output_dir}{String. Directory to save temporary chunks and final results. Defaults to \code{tempdir()}.}

\item{filename_stem}{String. Base name for output files.}

\item{batch_size}{Integer. Number of rows to process before saving a chunk.}

\item{api_key}{String. Azure API Key. Defaults to \code{Sys.getenv("AZURE_API_KEY")}.}

\item{endpoint}{String. Azure Endpoint. Defaults to \code{Sys.getenv("AZURE_ENDPOINT")}.}

\item{deployment}{String. Deployment name. Defaults to \code{Sys.getenv("AZURE_DEPLOYMENT")}.}
}
\value{
A data frame with added \code{LLM_decision} and \code{LLM_reason} columns.
}
\description{
Sends doubtful matches (not "Perfect" or "Unmatched") to an LLM for verification.
Supports resuming from interruptions via chunk files.
}
\examples{
\dontrun{
# Sample matched data
matched_data <- data.frame(
  employer_name = c("BMW", "Siemens"),
  registry_name = c("BMW AG", "SAP SE"),
  dict_id = c("D001", "D002"),
  match_type = c("Fuzzy", "Fuzzy")
)

# Validate using LLM (requires Azure credentials)
validated <- validate_matches_llm(
  data = matched_data,
  query_name_col = "employer_name",
  dict_name_col = "registry_name"
)

print(validated)
}
}
