% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/validation.R
\name{validate}
\alias{validate}
\alias{validate.gp}
\alias{validate.dgp}
\alias{validate.lgp}
\title{Validate a constructed GP, DGP, or linked (D)GP emulator}
\usage{
validate(
  object,
  x_test,
  y_test,
  method,
  sample_size,
  verb,
  M,
  force,
  cores,
  ...
)

\method{validate}{gp}(
  object,
  x_test = NULL,
  y_test = NULL,
  method = "mean_var",
  sample_size = 50,
  verb = TRUE,
  M = 50,
  force = FALSE,
  cores = 1,
  ...
)

\method{validate}{dgp}(
  object,
  x_test = NULL,
  y_test = NULL,
  method = "mean_var",
  sample_size = 50,
  verb = TRUE,
  M = 50,
  force = FALSE,
  cores = 1,
  ...
)

\method{validate}{lgp}(
  object,
  x_test = NULL,
  y_test = NULL,
  method = "mean_var",
  sample_size = 50,
  verb = TRUE,
  M = 50,
  force = FALSE,
  cores = 1,
  ...
)
}
\arguments{
\item{object}{can be one of the following:
\itemize{
\item the S3 class \code{gp}.
\item the S3 class \code{dgp}.
\item the S3 class \code{lgp}.
}}

\item{x_test}{OOS testing input data:
\itemize{
\item if \code{object} is an instance of the \code{gp} or \code{dgp} class, \code{x_test} is a matrix where each row is a new input location to be used for validating the emulator and each column is an input dimension.
\item if \code{object} is an instance of the \code{lgp} class, \code{x_test} must be a matrix representing the global input, where each row corresponds to a test data point and each column represents a global input dimension.
The column indices in \code{x_test} must align with the indices specified in the \code{From_Output} column of the \code{struc} data frame (used in \code{\link[=lgp]{lgp()}}),
corresponding to rows where the \code{From_Emulator} column is \code{"Global"}.
}

\code{x_test} must be provided if \code{object} is an instance of the \code{lgp}. \code{x_test} must also be provided if \code{y_test} is provided. Defaults to \code{NULL}, in which case LOO validation is performed.}

\item{y_test}{the OOS output data corresponding to \code{x_test}:
\itemize{
\item if \code{object} is an instance of the \code{gp} class, \code{y_test} is a matrix with only one column where each row represents the output corresponding to the matching row of \code{x_test}.
\item if \code{object} is an instance of the \code{dgp} class, \code{y_test} is a matrix where each row represents the output corresponding to the matching row of \code{x_test} and with columns representing output dimensions.
\item if \code{object} is an instance of the \code{lgp} class, \code{y_test} can be a single matrix or a list of matrices:
\itemize{
\item if \code{y_test} is a single matrix, then there should be only one emulator in the final layer of the linked emulator system and \code{y_test}
represents the emulator's output with rows being testing positions and columns being output dimensions.
\item if \code{y_test} is a list, then \code{y_test} should have \emph{L} matrices, where \emph{L} is the number of emulators in the final layer of the system.
Each matrix has its rows corresponding to testing positions and columns corresponding to output dimensions of the associated emulator
in the final layer.
}
}

\code{y_test} must be provided if \code{object} is an instance of the \code{lgp}. \code{y_test} must also be provided if \code{x_test} is provided. Defaults to \code{NULL}, in which case LOO validation is performed.}

\item{method}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#updated}{\figure{lifecycle-updated.svg}{options: alt='[Updated]'}}}{\strong{[Updated]}} the prediction approach to use for validation: either the mean-variance approach (\code{"mean_var"}) or the sampling approach (\code{"sampling"}). For details see \code{\link[=predict]{predict()}}.
Defaults to \code{"mean_var"}.}

\item{sample_size}{the number of samples to draw for each given imputation if \code{method = "sampling"}. Defaults to \code{50}.}

\item{verb}{a bool indicating if trace information for validation should be printed during function execution.
Defaults to \code{TRUE}.}

\item{M}{the size of the conditioning set for the Vecchia approximation in emulator validation. This argument is only used if the emulator \code{object}
was constructed under the Vecchia approximation. Defaults to \code{50}.}

\item{force}{a bool indicating whether to force LOO or OOS re-evaluation when the \code{loo} or \code{oos} slot already exists in \code{object}. When \code{force = FALSE},
\code{\link[=validate]{validate()}} will only re-evaluate the emulators if the \code{x_test} and \code{y_test} are not identical to the values in the \code{oos} slot. If the existing \code{loo} or \code{oos} validation used a different \code{M} in a Vecchia approximation or a different \code{method} to the one prescribed in this call, the emulator will be re-evaluated. Set \code{force} to \code{TRUE} when LOO or OOS re-evaluation
is required. Defaults to \code{FALSE}.}

\item{cores}{the number of processes to be used for validation. If set to \code{NULL}, the number of processes is set to \verb{max physical cores available \%/\% 2}.
Defaults to \code{1}.}

\item{...}{N/A.}
}
\value{
\itemize{
\item If \code{object} is an instance of the \code{gp} class, an updated \code{object} is returned with an additional slot called \code{loo} (for LOO cross validation) or
\code{oos} (for OOS validation) that contains:
\itemize{
\item two slots called \code{x_train} (or \code{x_test}) and \code{y_train} (or \code{y_test}) that contain the validation data points for LOO (or OOS).
\item a column matrix called \code{mean}, if \code{method = "mean_var"}, or \code{median}, if \code{method = "sampling"}, that contains the predictive means or medians of the
GP emulator at validation positions.
\item three column matrices called \code{std}, \code{lower}, and \code{upper} that contain the predictive standard deviations and credible intervals of the
GP emulator at validation positions. If \code{method = "mean_var"}, the upper and lower bounds of a credible interval are two standard deviations above
and below the predictive mean. If \code{method = "sampling"}, the upper and lower bounds of a credible interval are 2.5th and 97.5th percentiles.
\item a numeric value called \code{rmse} that contains the root mean/median squared error of the GP emulator.
\item a numeric value called \code{nrmse} that contains the (max-min) normalized root mean/median squared error of the GP emulator. The max-min normalization
uses the maximum and minimum values of the validation outputs contained in \code{y_train} (or \code{y_test}).
\item an integer called \code{M} that contains the size of the conditioning set used for the Vecchia approximation, if used, for emulator validation.
\item an integer called \code{sample_size} that contains the number of samples used for validation if \code{method = "sampling"}.
}

The rows of matrices (\code{mean}, \code{median}, \code{std}, \code{lower}, and \code{upper}) correspond to the validation positions.
\item If \code{object} is an instance of the \code{dgp} class, an updated \code{object} is returned with an additional slot called \code{loo} (for LOO cross validation) or
\code{oos} (for OOS validation) that contains:
\itemize{
\item two slots called \code{x_train} (or \code{x_test}) and \code{y_train} (or \code{y_test}) that contain the validation data points for LOO (or OOS).
\item a matrix called \code{mean}, if \code{method = "mean_var"}, or \code{median}, if \code{method = "sampling"}, that contains the predictive means or medians of the
DGP emulator at validation positions.
\item three matrices called \code{std}, \code{lower}, and \code{upper} that contain the predictive standard deviations and credible intervals of the
DGP emulator at validation positions. If \code{method = "mean_var"}, the upper and lower bounds of a credible interval are two standard deviations above
and below the predictive mean. If \code{method = "sampling"}, the upper and lower bounds of a credible interval are 2.5th and 97.5th percentiles.
\item a vector called \code{rmse} that contains the root mean/median squared errors of the DGP emulator across different output
dimensions.
\item a vector called \code{nrmse} that contains the (max-min) normalized root mean/median squared errors of the DGP emulator across different output
dimensions. The max-min normalization uses the maximum and minimum values of the validation outputs contained in \code{y_train} (or \code{y_test}).
\item an integer called \code{M} that contains size of the conditioning set used for the Vecchia approximation, if used, for emulator validation.
\item an integer called \code{sample_size} that contains the number of samples used for validation if \code{method = "sampling"}.
}

The rows and columns of matrices (\code{mean}, \code{median}, \code{std}, \code{lower}, and \code{upper}) correspond to the validation positions and DGP emulator output
dimensions, respectively.
\item \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#updated}{\figure{lifecycle-updated.svg}{options: alt='[Updated]'}}}{\strong{[Updated]}} If \code{object} is an instance of the \code{dgp} class with a categorical likelihood, an updated \code{object} is returned with an additional slot called \code{loo}
(for LOO cross validation) or \code{oos} (for OOS validation) that contains:
\itemize{
\item two slots called \code{x_train} (or \code{x_test}) and \code{y_train} (or \code{y_test}) that contain the validation data points for LOO (or OOS).
\item a vector called \code{label} that contains predictive labels from the DGP emulator at validation positions.
\item a matrix called \code{probability} that contains mean predictive probabilities for each class from the DGP emulator at validation positions. The matrix has its rows corresponding
to validation positions and columns corresponding to different classes.
\item a scalar called \code{log_loss} that represents the log loss of the trained DGP classifier. Log loss measures the
accuracy of probabilistic predictions, with lower values indicating better classification performance. \code{log_loss} ranges from \code{0} to positive infinity, where a
value closer to \code{0} suggests more confident and accurate predictions.
\item a scalar called \code{accuracy} that represents the accuracy of the trained DGP classifier. Accuracy measures the proportion of correctly classified instances among
all predictions, with higher values indicating better classification performance. accuracy ranges from \code{0} to \code{1}, where a value closer to \code{1} suggests more
reliable and precise predictions.
\item a slot named \code{method} indicating whether the matrix in the \code{probability} slot were obtained using the \code{"mean-var"} method or the \code{"sampling"} method.
\item an integer called \code{M} that contains size of the conditioning set used for the Vecchia approximation, if used, in emulator validation.
\item an integer called \code{sample_size} that contains the number of samples used for validation.
}
\item If \code{object} is an instance of the \code{lgp} class, an updated \code{object} is returned with an additional slot called \code{oos} (for OOS validation) that contains:
\itemize{
\item two slots called \code{x_test} and \code{y_test} that contain the validation data points for OOS.
\item a list called \code{mean}, if \code{method = "mean_var"}, or \code{median}, if \code{method = "sampling"}, that contains the predictive means or medians of
the linked (D)GP emulator at validation positions.
\item three lists called \code{std}, \code{lower}, and \code{upper} that contain the predictive standard deviations and credible intervals of
the linked (D)GP emulator at validation positions. If \code{method = "mean_var"}, the upper and lower bounds of a credible interval are two standard
deviations above and below the predictive mean. If \code{method = "sampling"}, the upper and lower bounds of a credible interval are 2.5th and 97.5th percentiles.
\item a list called \code{rmse} that contains the root mean/median squared errors of the linked (D)GP emulator.
\item a list called \code{nrmse} that contains the (max-min) normalized root mean/median squared errors of the linked (D)GP emulator. The max-min normalization
uses the maximum and minimum values of the validation outputs contained in \code{y_test}.
\item an integer called \code{M} that contains size of the conditioning set used for the Vecchia approximation, if used, in emulator validation.
\item an integer called \code{sample_size} that contains the number of samples used for validation if \code{method = "sampling"}.
}

Each element in \code{mean}, \code{median}, \code{std}, \code{lower}, \code{upper}, \code{rmse}, and \code{nrmse} corresponds to a (D)GP emulator in the final layer of the linked (D)GP
emulator.
}
}
\description{
This function calculates Leave-One-Out (LOO) cross validation or Out-Of-Sample (OOS) validation statistics for a constructed GP, DGP, or linked (D)GP emulator.
}
\details{
See further examples and tutorials at \url{https://mingdeyu.github.io/dgpsi-R/}.
}
\note{
\itemize{
\item When both \code{x_test} and \code{y_test} are \code{NULL}, LOO cross validation will be implemented. Otherwise, OOS validation will
be implemented. LOO validation is only applicable to a GP or DGP emulator (i.e., \code{object} is an instance of the \code{gp} or \code{dgp}
class). If a linked (D)GP emulator (i.e., \code{object} is an instance of the \code{lgp} class) is provided, \code{x_test} and \code{y_test} must
also be provided for OOS validation.
}
}
\examples{
\dontrun{

# See gp(), dgp(), or lgp() for an example.
}
}
