% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cqplot.R
\name{cqplot}
\alias{cqplot}
\alias{cqplot.default}
\alias{cqplot.mlm}
\title{Chi Square Quantile-Quantile plots}
\usage{
cqplot(x, ...)

\method{cqplot}{mlm}(x, ...)

\method{cqplot}{default}(
  x,
  method = c("classical", "mcd", "mve"),
  detrend = FALSE,
  pch = 19,
  col = palette()[1],
  cex = par("cex"),
  ref.col = "red",
  ref.lwd = 2,
  conf = 0.95,
  env.col = "gray",
  env.lwd = 2,
  env.lty = 1,
  env.fill = TRUE,
  fill.alpha = 0.2,
  fill.color = trans.colors(ref.col, fill.alpha),
  labels = if (!is.null(rownames(x))) rownames(x) else 1:nrow(x),
  id.n,
  id.method = "r",
  id.cex = 1,
  id.col = palette()[1],
  xlab,
  ylab,
  main,
  what = deparse(substitute(x)),
  ylim,
  ...
)
}
\arguments{
\item{x}{either a numeric data frame or matrix for the default method, or an
object of class \code{"mlm"} representing a multivariate linear model.  In
the latter case, residuals from the model are plotted.}

\item{\dots}{Other arguments passed to methods}

\item{method}{estimation method used for center and covariance, one of:
\code{"classical"} (product-moment), \code{"mcd"} (minimum covariance
determinant), or \code{"mve"} (minimum volume ellipsoid).}

\item{detrend}{logical; if \code{FALSE}, the plot shows values of \eqn{D^2}
vs. \eqn{\chi^2}. if \code{TRUE}, the ordinate shows values of \eqn{D^2 -
\chi^2}}

\item{pch}{plot symbol for points. Can be a vector of length equal to the
number of rows in \code{x}.}

\item{col}{color for points. Can be a vector of length equal to the
number of rows in \code{x}.
The default is the \emph{first} entry in the
current color palette (see \code{\link[grDevices]{palette}} and
\code{\link[graphics]{par}}).}

\item{cex}{character symbol size for points.  Can be a vector of length
equal to the number of rows in \code{x}.}

\item{ref.col}{Color for the reference line}

\item{ref.lwd}{Line width for the reference line}

\item{conf}{confidence coverage for the approximate confidence envelope}

\item{env.col}{line color for the boundary of the confidence envelope}

\item{env.lwd}{line width for the confidence envelope}

\item{env.lty}{line type for the confidence envelope}

\item{env.fill}{logical; should the confidence envelope be filled?}

\item{fill.alpha}{transparency value for \code{fill.color}}

\item{fill.color}{color used to fill the confidence envelope}

\item{labels}{vector of text strings to be used to identify points, defaults
to \code{rownames(x)} or observation numbers if \code{rownames(x)} is
\code{NULL}}

\item{id.n}{number of points labeled. If \code{id.n=0}, the default, no
point identification occurs.}

\item{id.method}{point identification method. The default
\code{id.method="r"} will identify the \code{id.n} points with the largest
value of abs(y), i.e., the largest Mahalanobis DSQ. See \code{\link[car]{showLabels}} for other
options.}

\item{id.cex}{size of text for point labels}

\item{id.col}{color for point labels}

\item{xlab}{label for horizontal (theoretical quantiles) axis}

\item{ylab}{label for vertical (empirical quantiles) axis}

\item{main}{plot title}

\item{what}{the name of the object plotted; used in the construction of
\code{main} when that is not specified.}

\item{ylim}{limits for vertical axis.  If not specified, the range of the
confidence envelope is used.}
}
\value{
Returns invisibly a data.frame containing squared Mahalanobis distances (\code{DSQ}),
their \code{quantile}s and \code{p}-values
corresponding to the rows of \code{x} or the residuals of the model for the identified points,
else \code{NULL} if no points are identified.
}
\description{
A chi square quantile-quantile plots show the relationship between
data-based values which should be distributed as \eqn{\chi^2} and
corresponding quantiles from the \eqn{\chi^2} distribution.  In multivariate
analyses, this is often used both to assess multivariate normality and check
for or identify outliers.

For a data frame of numeric variables or a matrix supplied as the argument \code{x},
it uses the Mahalanobis squared distances (\eqn{D^2}) of
observations \eqn{\mathbf{x}} from the centroid \eqn{\bar{\mathbf{x}}}
taking the sample covariance matrix \eqn{\mathbf{S}} into account,
\deqn{
D^2 = (\mathbf{x} - \bar{\mathbf{x}})^\prime \; \mathbf{S}^{-1} \; (\mathbf{x} - \bar{\mathbf{x}}) \; .
}

The method for \code{"mlm"} objects fit using \code{\link[stats]{lm}} for a multivariate response
applies this to the residuals from the model.
}
\details{
\code{cqplot} is a more general version of similar functions in other
packages that produce chi square QQ plots. It allows for classical
Mahalanobis squared distances as well as robust estimates based on the MVE
and MCD; it provides an approximate confidence (concentration) envelope
around the line of unit slope, a detrended version, where the reference line
is horizontal, the ability to identify or label unusual points, and other
graphical features.

Cases with any missing values are excluded from the calculation and graph with a warning.

\subsection{Confidence envelope}{
In the typical use of QQ plots, it essential to have something in the nature of a confidence band
around the points to be able to appreciate whether, and to what degree the observed data points
differ from the reference distribution. For \code{cqplot}, this helps to assess whether the
data are reasonably distributed as multivariate normal and also to flag potential outliers.

The calculation of the confidence envelope here follows that used in the SAS
program, \url{http://www.datavis.ca/sasmac/cqplot.html} which comes from
Chambers et al. (1983), Section 6.8.

The essential formula computes the standard errors as:
\deqn{ \text{se} ( D^2_{(i)} ) = \frac{\hat{b}} {d ( q_i)} \times \sqrt{  p_i (1-p_i) / n } }
where \eqn{D^2_{(i)}} is the i-th
ordered value of \eqn{D^2}, \eqn{\hat{b}} is an estimate of the slope of
the reference line obtained from the ratio of the interquartile range of the
\eqn{D^2} values to that of the \eqn{\chi^2_p} distribution and
\eqn{d(q_i)} is the density of the chi square distribution at the quantile
\eqn{q_i}.

The pointwise confidence envelope of coverage \code{conf} = \eqn{1-\alpha} is then calculated as
\eqn{D^2_{(i)} \pm z_{1-\alpha/2} \text{se} ( D^2_{(i)} )}

Note that this confidence envelope applies only to the \eqn{D^2} computed
using the classical estimates of location (\eqn{\bar{\mathbf{x}}}) and scatter (\eqn{\mathbf{S}}). The
\code{\link[car]{qqPlot}}
function provides for simulated envelopes, but only for
a univariate measure. Oldford (2016) provides a general theory and methods
for QQ plots.
}
}
\examples{


cqplot(iris[, 1:4])

iris.mod <- lm(as.matrix(iris[,1:4]) ~ Species, data=iris)
out <- cqplot(iris.mod, id.n=3)
# show return value
out

# compare with car::qqPlot
car::qqPlot(Mahalanobis(iris[, 1:4]), dist="chisq", df=4)


# Adopted data
Adopted.mod <- lm(cbind(Age2IQ, Age4IQ, Age8IQ, Age13IQ) ~ AMED + BMIQ, 
                  data=Adopted)
cqplot(Adopted.mod, id.n=3)
cqplot(Adopted.mod, id.n=3, method="mve")


# Sake data
Sake.mod <- lm(cbind(taste, smell) ~ ., data=Sake)
cqplot(Sake.mod)
cqplot(Sake.mod, method="mve", id.n=2)

# SocialCog data -- one extreme outlier
data(SocialCog)
SC.mlm <-  lm(cbind(MgeEmotions,ToM, ExtBias, PersBias) ~ Dx,
               data=SocialCog)
cqplot(SC.mlm, id.n=1)

# data frame example: stackloss data
data(stackloss)
cqplot(stackloss[, 1:3], id.n=4)                # very strange
cqplot(stackloss[, 1:3], id.n=4, detrend=TRUE)
cqplot(stackloss[, 1:3], id.n=4, method="mve")
cqplot(stackloss[, 1:3], id.n=4, method="mcd")



}
\references{
J. Chambers, W. S. Cleveland, B. Kleiner, P. A. Tukey (1983).
\emph{Graphical methods for data analysis}, Wadsworth.

R. W. Oldford (2016), "Self calibrating quantile-quantile plots",
\emph{The American Statistician}, 70, 74-90.
}
\seealso{
\code{\link{Mahalanobis}} for calculation of Mahalanobis squared distance;

\code{\link[stats]{qqplot}}; \code{\link[car]{qqPlot}} can give a similar
result for Mahalanobis squared distances of data or residuals;
\code{\link[qqtest]{qqtest}} has many features for all types of QQ plots.

Other diagnostic plots: 
\code{\link{distancePlot}()},
\code{\link{plot.boxM}()}
}
\author{
Michael Friendly
}
\concept{diagnostic plots}
\keyword{hplot}
\keyword{multivariate}
