% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/approach.R, R/approach_categorical.R,
%   R/approach_copula.R, R/approach_ctree.R, R/approach_empirical.R,
%   R/approach_gaussian.R, R/approach_independence.R,
%   R/approach_regression_separate.R, R/approach_regression_surrogate.R,
%   R/approach_timeseries.R, R/approach_vaeac.R
\name{setup_approach}
\alias{setup_approach}
\alias{setup_approach.combined}
\alias{setup_approach.categorical}
\alias{setup_approach.copula}
\alias{setup_approach.ctree}
\alias{setup_approach.empirical}
\alias{setup_approach.gaussian}
\alias{setup_approach.independence}
\alias{setup_approach.regression_separate}
\alias{setup_approach.regression_surrogate}
\alias{setup_approach.timeseries}
\alias{setup_approach.vaeac}
\title{Set up the framework for the chosen approach}
\usage{
setup_approach(internal, ...)

\method{setup_approach}{combined}(internal, ...)

\method{setup_approach}{categorical}(
  internal,
  categorical.joint_prob_dt = NULL,
  categorical.epsilon = 0.001,
  ...
)

\method{setup_approach}{copula}(internal, ...)

\method{setup_approach}{ctree}(
  internal,
  ctree.mincriterion = 0.95,
  ctree.minsplit = 20,
  ctree.minbucket = 7,
  ctree.sample = TRUE,
  ...
)

\method{setup_approach}{empirical}(
  internal,
  empirical.type = "fixed_sigma",
  empirical.eta = 0.95,
  empirical.fixed_sigma = 0.1,
  empirical.n_samples_aicc = 1000,
  empirical.eval_max_aicc = 20,
  empirical.start_aicc = 0.1,
  empirical.cov_mat = NULL,
  model = NULL,
  predict_model = NULL,
  ...
)

\method{setup_approach}{gaussian}(internal, gaussian.mu = NULL, gaussian.cov_mat = NULL, ...)

\method{setup_approach}{independence}(internal, ...)

\method{setup_approach}{regression_separate}(
  internal,
  regression.model = parsnip::linear_reg(),
  regression.tune_values = NULL,
  regression.vfold_cv_para = NULL,
  regression.recipe_func = NULL,
  ...
)

\method{setup_approach}{regression_surrogate}(
  internal,
  regression.model = parsnip::linear_reg(),
  regression.tune_values = NULL,
  regression.vfold_cv_para = NULL,
  regression.recipe_func = NULL,
  regression.surrogate_n_comb =
    internal$iter_list[[length(internal$iter_list)]]$n_coalitions - 2,
  ...
)

\method{setup_approach}{timeseries}(
  internal,
  timeseries.fixed_sigma = 2,
  timeseries.bounds = c(NULL, NULL),
  ...
)

\method{setup_approach}{vaeac}(
  internal,
  vaeac.depth = 3,
  vaeac.width = 32,
  vaeac.latent_dim = 8,
  vaeac.activation_function = torch::nn_relu,
  vaeac.lr = 0.001,
  vaeac.n_vaeacs_initialize = 4,
  vaeac.epochs = 100,
  vaeac.extra_parameters = list(),
  ...
)
}
\arguments{
\item{internal}{List.
Not used directly, but passed through from \code{\link[=explain]{explain()}}.}

\item{...}{Arguments passed to specific classes. See below.}

\item{categorical.joint_prob_dt}{Data.table. (Optional)
Containing the joint probability distribution for each combination of feature
values.
\code{NULL} means it is estimated from the \code{x_train} and \code{x_explain}.}

\item{categorical.epsilon}{Numeric value. (Optional)
If \code{categorical.joint_prob_dt} is not supplied, probabilities/frequencies are
estimated using \code{x_train}. If certain observations occur in \code{x_explain} and NOT in \code{x_train},
then epsilon is used as the proportion of times that these observations occur in the training data.
In theory, this proportion should be zero, but this causes an error later in the Shapley computation.}

\item{ctree.mincriterion}{Numeric scalar or vector.
Either a scalar or vector of length equal to the number of features in the model.
The value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
If it is a vector, this indicates which value to use when conditioning on various numbers of features.
The default value is 0.95.}

\item{ctree.minsplit}{Numeric scalar.
Determines the minimum value that the sum of the left and right daughter nodes must reach for a split.
The default value is 20.}

\item{ctree.minbucket}{Numeric scalar.
Determines the minimum sum of weights in a terminal node required for a split.
The default value is 7.}

\item{ctree.sample}{Boolean.
If \code{TRUE} (default), then the method always samples \code{n_MC_samples} observations from the leaf nodes
(with replacement).
If \code{FALSE} and the number of observations in the leaf node is less than \code{n_MC_samples},
the method will take all observations in the leaf.
If \code{FALSE} and the number of observations in the leaf node is more than \code{n_MC_samples},
the method will sample \code{n_MC_samples} observations (with replacement).
This means that there will always be sampling in the leaf unless
\code{sample = FALSE} \emph{and} the number of obs in the node is less than \code{n_MC_samples}.}

\item{empirical.type}{Character.
Must be one of \code{"fixed_sigma"} (default), \code{"AICc_each_k"}, \code{"AICc_full"} or \code{"independence"}.
Note: \code{"empirical.type = independence"} is deprecated; use \code{approach = "independence"} instead.
\code{"fixed_sigma"} uses a fixed bandwidth (set through \code{empirical.fixed_sigma}) in the kernel density estimation.
\code{"AICc_each_k"} and \code{"AICc_full"} optimize the bandwidth using the AICc criterion, with respectively
one bandwidth per coalition size and one bandwidth for all coalition sizes.}

\item{empirical.eta}{Numeric scalar.
Needs to be \verb{0 < empirical.eta <= 1}.
The default value is 0.95.
Represents the minimum proportion of the total empirical weight that data samples should use.
For example, if \code{empirical.eta = .8}, we choose the \code{K} samples with the largest weights so that the sum of the
weights accounts for 80\% of the total weight.
\code{empirical.eta} is the \eqn{\eta} parameter in equation (15) of
\href{https://martinjullum.com/publication/aas-2021-explaining/aas-2021-explaining.pdf}{Aas et al. (2021)}.}

\item{empirical.fixed_sigma}{Positive numeric scalar.
The default value is 0.1.
Represents the kernel bandwidth in the distance computation used when conditioning on all different coalitions.
Only used when \code{empirical.type = "fixed_sigma"}}

\item{empirical.n_samples_aicc}{Positive integer.
Number of samples to consider in AICc optimization.
The default value is 1000.
Only used when \code{empirical.type} is either \code{"AICc_each_k"} or \code{"AICc_full"}.}

\item{empirical.eval_max_aicc}{Positive integer.
Maximum number of iterations when optimizing the AICc.
The default value is 20.
Only used when \code{empirical.type} is either \code{"AICc_each_k"} or \code{"AICc_full"}.}

\item{empirical.start_aicc}{Numeric.
Start value of the \code{sigma} parameter when optimizing the AICc.
The default value is 0.1.
Only used when \code{empirical.type} is either \code{"AICc_each_k"} or \code{"AICc_full"}.}

\item{empirical.cov_mat}{Numeric matrix.
The covariance matrix of the data generating distribution used to define the Mahalanobis distance.
\code{NULL} means it is estimated from \code{x_train}.}

\item{model}{Objects.
The model object that ought to be explained.
See the documentation of \code{\link[=explain]{explain()}} for details.}

\item{predict_model}{Function.
The prediction function used when \code{model} is not natively supported.
See the documentation of \code{\link[=explain]{explain()}} for details.}

\item{gaussian.mu}{Numeric vector.
Containing the mean of the data generating distribution.
\code{NULL} means it is estimated from the \code{x_train}.}

\item{gaussian.cov_mat}{Numeric matrix.
Containing the covariance matrix of the data generating distribution.
\code{NULL} means it is estimated from the \code{x_train}.}

\item{regression.model}{A \code{tidymodels} object of class \code{model_specs}. Default is a linear regression model, i.e.,
\code{\link[parsnip:linear_reg]{parsnip::linear_reg()}}. See \href{https://www.tidymodels.org/find/parsnip/}{tidymodels} for all possible models,
and see the vignette for how to add new/own models. Note, to make it easier to call \code{explain()} from Python, the
\code{regression.model} parameter can also be a string specifying the model which will be parsed and evaluated. For
example, \verb{"parsnip::rand_forest(mtry = hardhat::tune(), trees = 100, engine = "ranger", mode = "regression")"}
is also a valid input. It is essential to include the package prefix if the package is not loaded.}

\item{regression.tune_values}{Either \code{NULL} (default), a data.frame/data.table/tibble, or a function.
The data.frame must contain the possible hyperparameter value combinations to try.
The column names must match the names of the tunable parameters specified in \code{regression.model}.
If \code{regression.tune_values} is a function, then it should take one argument \code{x} which is the training data
for the current coalition and returns a data.frame/data.table/tibble with the properties described above.
Using a function allows the hyperparameter values to change based on the size of the coalition See the regression
vignette for several examples.
Note, to make it easier to call \code{\link[=explain]{explain()}} from Python, the \code{regression.tune_values} can also be a string
containing an R function. For example,
\code{"function(x) return(dials::grid_regular(dials::mtry(c(1, ncol(x)))), levels = 3))"} is also a valid input.
It is essential to include the package prefix if the package is not loaded.}

\item{regression.vfold_cv_para}{Either \code{NULL} (default) or a named list containing
the parameters to be sent to \code{\link[rsample:vfold_cv]{rsample::vfold_cv()}}. See the regression vignette for
several examples.}

\item{regression.recipe_func}{Either \code{NULL} (default) or a function that that takes in a \code{\link[recipes:recipe]{recipes::recipe()}}
object and returns a modified \code{\link[recipes:recipe]{recipes::recipe()}} with potentially additional recipe steps. See the regression
vignette for several examples.
Note, to make it easier to call \code{\link[=explain]{explain()}} from Python, the \code{regression.recipe_func} can also be a string
containing an R function. For example,
\code{"function(recipe) return(recipes::step_ns(recipe, recipes::all_numeric_predictors(), deg_free = 2))"} is also
a valid input. It is essential to include the package prefix if the package is not loaded.}

\item{regression.surrogate_n_comb}{Positive integer.
Specifies the number of unique coalitions to apply to each training observation.
The default is the number of sampled coalitions in the present iteration.
Any integer between 1 and the default is allowed.
Larger values requires more memory, but may improve the surrogate model.
If the user sets a value lower than the maximum, we sample this amount of unique coalitions
separately for each training observations.
That is, on average, all coalitions should be equally trained.}

\item{timeseries.fixed_sigma}{Positive numeric scalar.
Represents the kernel bandwidth in the distance computation.
The default value is 2.}

\item{timeseries.bounds}{Numeric vector of length two.
Specifies the lower and upper bounds of the timeseries.
The default is \code{c(NULL, NULL)}, i.e. no bounds.
If one or both of these bounds are not \code{NULL}, we restrict the sampled time series to be between these bounds.
This is useful if the underlying time series are scaled between 0 and 1, for example.}

\item{vaeac.depth}{Positive integer (default is \code{3}). The number of hidden layers
in the neural networks of the masked encoder, full encoder, and decoder.}

\item{vaeac.width}{Positive integer (default is \code{32}). The number of neurons in each
hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}

\item{vaeac.latent_dim}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}

\item{vaeac.activation_function}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}

\item{vaeac.lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}

\item{vaeac.n_vaeacs_initialize}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
in the start. Pick the best performing one after \code{vaeac.extra_parameters$epochs_initiation_phase}
epochs (default is \code{2}) and continue training that one.}

\item{vaeac.epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
This includes \code{vaeac.extra_parameters$epochs_initiation_phase}, where the default is \code{2}.}

\item{vaeac.extra_parameters}{Named list with extra parameters to the \code{vaeac} approach. See
\code{\link[=vaeac_get_extra_para_default]{vaeac_get_extra_para_default()}} for description of possible additional parameters and their default values.}
}
\value{
Updated internal object with the approach set up.
}
\description{
Different choices of \code{approach} take different (optional) parameters,
which are forwarded from \code{\link[=explain]{explain()}}.
See the \href{https://norskregnesentral.github.io/shapr/articles/general_usage.html}{general usage vignette}
for more information about the different approaches.
}
\references{
\itemize{
\item \href{https://martinjullum.com/publication/aas-2021-explaining/aas-2021-explaining.pdf}{
Aas, K., Jullum, M., & Løland, A. (2021). Explaining individual predictions when features are dependent:
More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502}
}
}
\author{
Martin Jullum

Lars Henry Berge Olsen
}
\keyword{internal}
