Flag outliers based on species ecological ranges.

library(specleanr)

Introduction to outlier detection based on species ecological ranges.

Example using species ecological ranges with other outlier detection methods.

1 Loading example datasets

data("jdsdata")
data("efidata")

wcd <- terra::rast(system.file('extdata/worldclim.tiff', package = "specleanr"))

#match and clean

matchd <- match_datasets(datasets = list(jds= jdsdata, efi =efidata),
                         lats = 'lat', lons = 'lon',
                         country = 'JDS4_site_ID',
                         species = c('scientificName', 'speciesname'),
                         date=c('sampling_date','Date'))

#matchclean <- check_names(matchd, colsp = 'species', verbose = FALSE, merge = TRUE)

db <- sf::read_sf(system.file('extdata/danube.shp.zip',
                              package = "specleanr"), quiet = TRUE)

2. Extracting environmental predictors from worldclim dataset


refdata <- pred_extract(data = matchd, raster = wcd,
                        lat = 'decimalLatitude',
                        lon = 'decimalLongitude',
                        bbox = db,
                        colsp = 'species',
                        list = TRUE,
                        verbose = FALSE,
                        minpts = 6,
                        merge = FALSE)

3. Preparing ecological ranges for Squalius cephalus

NOTE


sqcep <- refdata["Squalius cephalus"]

optdata <- data.frame(species= c("Squalius cephalus", "Abramis brama"),
                      mintemp = c(6, 1.6),maxtemp = c(8.588, 21),
                      meantemp = c(8.5, 10.4), #ecoparam
                      direction = c('greater', 'greater'))

4. Outlier detection with univariate, multivariate and species ecological ranges


squalius_outlier <- multidetect(data = sqcep, multiple = TRUE,
                      var = 'bio1',
                      output = 'outlier',
                      exclude = c('x','y'),
                      methods = c('zscore', 'adjbox', 'optimal', 'kmeans', "logboxplot", "hampel"),
                      optpar = list(optdf=optdata, optspcol = 'species',
                                    mincol = "mintemp", maxcol = "maxtemp"))

Visualise the number of outliers detected by each method


ggoutliers(squalius_outlier)

Obtaining quality controlled dataset using loess method or data labeling


squalius_qc_loess <- extract_clean_data(refdata = sqcep, 
                                      outliers = squalius_outlier, loess = TRUE)

#clean dataset
nrow(squalius_qc_loess)
#> [1] 19

#reference data
nrow(sqcep[[1]])
#> [1] 19

squalius_qc_labeled <- classify_data(refdata = sqcep, outliers = squalius_outlier)

Visualise labelled quality controlled dataset



ggenvironmentalspace(squalius_qc_labeled, 
                     type = '1D',
                     ggxangle = 45, 
                     scalecolor = 'viridis',
                     xhjust = 1,
                     legend_position = 'blank',
                     ylab = "Number of records",
                     xlab = "Outlier labels")

Summary explanation

References

  1. Schmidt-Kloiber, A., & Hering, D. (2015). www. freshwaterecology. info–an online tool that unifies, standardizes and codifies more than 20,000 European freshwater organisms and their ecological preferences. Ecological Indicators, 53, 271-282.
  2. Froese. R and Pauly D (2014). FishBase. world wide web electronic publication. fishbase. org.