man/analyse_baSAR.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyse_baSAR.R
\name{analyse_baSAR}
\alias{analyse_baSAR}
\title{Bayesian models (baSAR) applied on luminescence data}
\usage{
analyse_baSAR(
  object,
  CSV_file = NULL,
  aliquot_range = NULL,
  source_doserate = NULL,
  signal.integral,
  signal.integral.Tx = NULL,
  background.integral,
  background.integral.Tx = NULL,
  irradiation_times = NULL,
  sigmab = 0,
  sig0 = 0.025,
  distribution = "cauchy",
  baSAR_model = NULL,
  n.MCMC = 1e+05,
  fit.method = "EXP",
  fit.force_through_origin = TRUE,
  fit.includingRepeatedRegPoints = TRUE,
  method_control = list(),
  digits = 3L,
  distribution_plot = "kde",
  plot = TRUE,
  plot_reduced = TRUE,
  plot_singlePanels = FALSE,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{object}{\linkS4class{Risoe.BINfileData}, \linkS4class{RLum.Results}, \link{list} of \linkS4class{RLum.Analysis},
\link{character} or \link{list} (\strong{required}):
input object used for the Bayesian analysis. If a \code{character} is provided the function
assumes a file connection and tries to import a BIN/BINX-file using the provided path. If a \code{list} is
provided the list can only contain either \code{Risoe.BINfileData} objects or \code{character}s
providing a file connection. Mixing of both types is not allowed. If an \linkS4class{RLum.Results}
is provided the function directly starts with the Bayesian Analysis (see details)}

\item{CSV_file}{\link{character} or \link{data.frame} (\emph{optional}):
if a \code{character}, it must be the path to a CSV file with data for the
analysis. Either way, data should contain 3 columns:
the name of the file, the disc position and the grain position
(the last being 0 for multi-grain measurements).\cr}

\item{aliquot_range}{\link{numeric} (\emph{optional}):
allows to limit the range of the aliquots used for the analysis.
This argument has only an effect if the argument \code{CSV_file} is used or
the input is the previous output (i.e. is \linkS4class{RLum.Results}). In this case the
new selection will add the aliquots to the removed aliquots table.}

\item{source_doserate}{\link{numeric} (\strong{required}):
source dose rate of beta-source used for the measurement and its uncertainty
in Gy/s, e.g., \code{source_doserate = c(0.12, 0.04)}. Parameter can be provided
as \code{list}, for the case that more than one BIN-file is provided, e.g.,
\code{source_doserate = list(c(0.04, 0.004), c(0.05, 0.004))}.}

\item{signal.integral}{\link{vector} (\strong{required}):
vector with the limits for the signal integral used for the calculation,
e.g., \code{signal.integral = c(1:5)}. Ignored if \code{object} is an \linkS4class{RLum.Results} object.
The parameter can be provided as \code{list}, see \code{source_doserate}.}

\item{signal.integral.Tx}{\link{vector} (\emph{optional}):
vector with the limits for the signal integral for the Tx curve. I
f nothing is provided the value from \code{signal.integral} is used and it is ignored
if \code{object} is an \linkS4class{RLum.Results} object.
The parameter can be provided as \code{list}, see \code{source_doserate}.}

\item{background.integral}{\link{vector} (\strong{required}):
vector with the bounds for the background integral.
Ignored if \code{object} is an \linkS4class{RLum.Results} object.
The parameter can be provided as \code{list}, see \code{source_doserate}.}

\item{background.integral.Tx}{\link{vector} (\emph{optional}):
vector with the limits for the background integral for the Tx curve.
If nothing is provided the value from \code{background.integral} is used.
Ignored if \code{object} is an \linkS4class{RLum.Results} object.
The parameter can be provided as \code{list}, see \code{source_doserate}.}

\item{irradiation_times}{\link{numeric} (\emph{optional}): if set this vector replaces all irradiation
times for one aliquot and one cycle (Lx and Tx curves) and recycles it for all others cycles and aliquots.
Please note that if this argument is used, for every(!) single curve
in the dataset an irradiation time needs to be set.}

\item{sigmab}{\link{numeric} (\emph{with default}):
option to set a manual value for the overdispersion (for \code{LnTx} and \code{TnTx}),
used for the \code{Lx}/\code{Tx} error calculation. The value should be provided as
absolute squared count values, cf. \link{calc_OSLLxTxRatio}.
The parameter can be provided as \code{list}, see \code{source_doserate}.}

\item{sig0}{\link{numeric} (\emph{with default}):
allow adding an extra component of error to the final Lx/Tx error value
(e.g., instrumental error, see details is \link{calc_OSLLxTxRatio}).
The parameter can be provided as \code{list}, see \code{source_doserate}.}

\item{distribution}{\link{character} (\emph{with default}):
type of distribution that is used during Bayesian calculations for
determining the Central dose and overdispersion values.
Allowed inputs are \code{"cauchy"}, \code{"normal"} and \code{"log_normal"}.}

\item{baSAR_model}{\link{character} (\emph{optional}):
option to provide an own modified or new model for the Bayesian calculation
(see details). If an own model is provided the argument \code{distribution} is
ignored and set to \code{'user_defined'}}

\item{n.MCMC}{\link{integer} (\emph{with default}):
number of iterations for the Markov chain Monte Carlo (MCMC) simulations}

\item{fit.method}{\link{character} (\emph{with default}):
equation used for the fitting of the dose-response curve using the function
\link{plot_GrowthCurve} and then for the Bayesian modelling. Here supported methods: \code{EXP}, \code{EXP+LIN} and \code{LIN}}

\item{fit.force_through_origin}{\link{logical} (\emph{with default}):
force fitting through origin}

\item{fit.includingRepeatedRegPoints}{\link{logical} (\emph{with default}):
includes the recycling point (assumed to be measured during the last cycle)}

\item{method_control}{\link{list} (\emph{optional}):
named list of control parameters that can be directly
passed to the Bayesian analysis, e.g., \code{method_control = list(n.chains = 4)}.
See details for further information}

\item{digits}{\link{integer} (\emph{with default}):
round output to the number of given digits}

\item{distribution_plot}{\link{character} (\emph{with default}): sets the final distribution plot that
shows equivalent doses obtained using the frequentist approach and sets in the central dose
as comparison obtained using baSAR. Allowed input is \code{'abanico'} or \code{'kde'}. If set to \code{NULL} nothing is plotted.}

\item{plot}{\link{logical} (\emph{with default}):
enables or disables plot output}

\item{plot_reduced}{\link{logical} (\emph{with default}):
enables or disables the advanced plot output}

\item{plot_singlePanels}{\link{logical} (\emph{with default}):
enables or disables single plots or plots arranged by \code{analyse_baSAR}}

\item{verbose}{\link{logical} (\emph{with default}):
enables or disables verbose mode}

\item{...}{parameters that can be passed to the function \link{calc_OSLLxTxRatio}
(almost full support), \link[data.table:fread]{data.table::fread} (\code{skip}), \link{read_BIN2R} (\code{n.records},
\code{position}, \code{duplicated.rm}), see details.}
}
\value{
Function returns results numerically and graphically:

-----------------------------------\cr
\verb{[ NUMERICAL OUTPUT ]}\cr
-----------------------------------\cr

\strong{\code{RLum.Results}}-object

\strong{slot:} \strong{\verb{@data}}

\tabular{lll}{
\strong{Element} \tab \strong{Type} \tab \strong{Description}\cr
\verb{$summary} \tab \code{data.frame} \tab statistical summary, including the central dose \cr
\verb{$mcmc} \tab \code{mcmc} \tab \link[coda:mcmc.list]{coda::mcmc.list} object including raw output \cr
\verb{$models} \tab \code{character} \tab implemented models used in the baSAR-model core \cr
\verb{$input_object} \tab \code{data.frame} \tab summarising table (same format as the XLS-file) including, e.g., Lx/Tx values\cr
\verb{$removed_aliquots} \tab \code{data.frame} \tab table with removed aliquots (e.g., \code{NaN}, or \code{Inf} \code{Lx}/\code{Tx} values). If nothing was removed \code{NULL} is returned
}

\strong{slot:} \strong{\verb{@info}}

The original function call

------------------------\cr
\verb{[ PLOT OUTPUT ]}\cr
------------------------\cr
\itemize{
\item (A) Ln/Tn curves with set integration limits,
\item (B) trace plots are returned by the baSAR-model, showing the convergence of the parameters (trace)
and the resulting kernel density plots. If \code{plot_reduced = FALSE} for every(!) dose a trace and
a density plot is returned (this may take a long time),
\item (C) dose plots showing the dose for every aliquot as boxplots and the marked
HPD in within. If boxes are coloured 'orange' or 'red' the aliquot itself should be checked,
\item (D) the dose response curve resulting from the monitoring of the Bayesian modelling are
provided along with the Lx/Tx values and the HPD. Note: The amount for curves displayed
is limited to 1000 (random choice) for performance reasons,
\item (E) the final plot is the De distribution as calculated using the conventional (frequentist) approach
and the central dose with the HPDs marked within. This figure is only provided for a comparison,
no further statistical conclusion should be drawn from it.
}

\strong{Please note: If distribution was set to \code{log_normal} the central dose is given as geometric mean!}
}
\description{
This function allows the application of Bayesian models on luminescence data, measured
with the single-aliquot regenerative-dose (SAR, Murray and Wintle, 2000) protocol. In particular,
it follows the idea proposed by Combès et al., 2015 of using an hierarchical model for estimating
a central equivalent dose from a set of luminescence measurements. This function is (I) the adoption
of this approach for the R environment and (II) an extension and a technical refinement of the
published code.
}
\details{
Internally the function consists of two parts: (I) The Bayesian core for the Bayesian calculations
and applying the hierarchical model and (II) a data pre-processing part. The Bayesian core can be run
independently, if the input data are sufficient (see below). The data pre-processing part was
implemented to simplify the analysis for the user as all needed data pre-processing is done
by the function, i.e. in theory it is enough to provide a BIN/BINX-file with the SAR measurement
data. For the Bayesian analysis for each aliquot the following information are needed from the SAR analysis.
\code{LxTx}, the \code{LxTx} error and the dose values for all regeneration points.

\strong{How is the systematic error contribution calculated?}

Standard errors (so far) provided with the source dose rate are considered as systematic uncertainties
and added to final central dose by:

\deqn{systematic.error = 1/n \sum SE(source.doserate)}

\deqn{SE(central.dose.final) = \sqrt{SE(central.dose)^2 + systematic.error^2}}

Please note that this approach is rather rough and can only be valid if the source dose rate
errors, in case different readers had been used, are similar. In cases where more than
one source dose rate is provided a warning is given.

\strong{Input / output scenarios}

Various inputs are allowed for this function. Unfortunately this makes the function handling rather
complex, but at the same time very powerful. Available scenarios:

\strong{(1) - \code{object} is BIN-file or link to a BIN-file}

Finally it does not matter how the information of the BIN/BINX file are provided. The function
supports \strong{(a)} either a path to a file or directory or a \code{list} of file names or paths or
\strong{(b)} a \linkS4class{Risoe.BINfileData} object or a list of these objects. The latter one can
be produced by using the function \link{read_BIN2R}, but this function is called automatically
if only a file name and/or a path is provided. In both cases it will become the data that can be
used for the analysis.

\verb{[CSV_file = NULL]}

If no CSV file (or data frame with the same format) is provided, the
function runs an automatic process that consists of the following steps:
\enumerate{
\item Select all valid aliquots using the function \link{verify_SingleGrainData}
\item Calculate \code{Lx/Tx} values using the function \link{calc_OSLLxTxRatio}
\item Calculate De values using the function \link{plot_GrowthCurve}
}

These proceeded data are subsequently used in for the Bayesian analysis

\verb{[CSV_file != NULL]}

If a CSV file is provided (or a \code{data.frame} containing similar information)
the pre-processing phase consists of the following steps:
\enumerate{
\item Calculate \code{Lx/Tx} values using the function \link{calc_OSLLxTxRatio}
\item Calculate De values using the function \link{plot_GrowthCurve}
}

The CSV file should contain the BIN-file names and the aliquots selected
for the further analysis. This allows a manual selection of input data, as the automatic selection
by \link{verify_SingleGrainData} might be not totally sufficient.

\strong{(2) - \code{object} \verb{RLum.Results object}}

If an \linkS4class{RLum.Results} object is provided as input and(!) this object was
previously created by the function \code{analyse_baSAR()} itself, the pre-processing part
is skipped and the function starts directly with the Bayesian analysis. This option is very powerful
as it allows to change parameters for the Bayesian analysis without the need to repeat
the data pre-processing. If furthermore the argument \code{aliquot_range} is set, aliquots
can be manually excluded based on previous runs.

\strong{\code{method_control}}

These are arguments that can be passed directly to the Bayesian calculation core, supported arguments
are:

\tabular{lll}{
\strong{Parameter} \tab \strong{Type} \tab \strong{Description}\cr
\code{lower_centralD} \tab \link{numeric} \tab sets the lower bound for the expected De range. Change it only if you know what you are doing!\cr
\code{upper_centralD} \tab \link{numeric} \tab sets the upper bound for the expected De range. Change it only if you know what you are doing!\cr
\code{n.chains} \tab \link{integer} \tab sets number of parallel chains for the model (default = 3) (cf. \link[rjags:jags.model]{rjags::jags.model})\cr
\code{inits} \tab \link{list} \tab option to set initialisation values (cf. \link[rjags:jags.model]{rjags::jags.model}) \cr
\code{thin} \tab \link{numeric} \tab thinning interval for monitoring the Bayesian process (cf. \link[rjags:jags.model]{rjags::jags.model})\cr
\code{variable.names} \tab \link{character} \tab set the variables to be monitored during the MCMC run, default:
\code{'central_D'}, \code{'sigma_D'}, \code{'D'}, \code{'Q'}, \code{'a'}, \code{'b'}, \code{'c'}, \code{'g'}.
Note: only variables present in the model can be monitored.
}

\strong{User defined models}\cr

The function provides the option to modify and to define own models that can be used for
the Bayesian calculation. In the case the user wants to modify a model, a new model
can be piped into the function via the argument \code{baSAR_model} as \code{character}.
The model has to be provided in the JAGS dialect of the BUGS language (cf. \link[rjags:jags.model]{rjags::jags.model})
and parameter names given with the pre-defined names have to be respected, otherwise the function
will break.

\strong{FAQ}

Q: How can I set the seed for the random number generator (RNG)?

A: Use the argument \code{method_control}, e.g., for three MCMC chains
(as it is the default):

\if{html}{\out{<div class="sourceCode">}}\preformatted{method_control = list(
inits = list(
 list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 1),
 list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 2),
 list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 3)
))
}\if{html}{\out{</div>}}

This sets a reproducible set for every chain separately.\cr

Q: How can I modify the output plots?

A: You can't, but you can use the function output to create own, modified plots.

Q: Can I change the boundaries for the central_D?

A: Yes, we made it possible, but we DO NOT recommend it, except you know what you are doing!\cr
Example: \verb{method_control = list(lower_centralD = 10))}

Q: The lines in the baSAR-model appear to be in a wrong logical order?\cr

A: This is correct and allowed (cf. JAGS manual)

\strong{Additional arguments support via the \code{...} argument}

This list summarizes the additional arguments that can be passed to the internally used
functions.

\tabular{llll}{
\strong{Supported argument} \tab \strong{Corresponding function} \tab \strong{Default} \tab **Short description **\cr
\code{threshold} \tab \link{verify_SingleGrainData} \tab \code{30} \tab change rejection threshold for curve selection \cr
\code{skip} \tab \link[data.table:fread]{data.table::fread} \tab \code{0} \tab number of rows to be skipped during import\cr
\code{n.records} \tab \link{read_BIN2R} \tab \code{NULL} \tab limit records during BIN-file import\cr
\code{duplicated.rm} \tab \link{read_BIN2R} \tab \code{TRUE} \tab remove duplicated records in the BIN-file\cr
\code{pattern} \tab \link{read_BIN2R} \tab \code{TRUE} \tab select BIN-file by name pattern\cr
\code{position} \tab \link{read_BIN2R} \tab \code{NULL} \tab limit import to a specific position\cr
\code{background.count.distribution} \tab \link{calc_OSLLxTxRatio} \tab \code{"non-poisson"} \tab set assumed count distribution\cr
\code{fit.weights} \tab \link{plot_GrowthCurve} \tab \code{TRUE} \tab enables / disables fit weights\cr
\code{fit.bounds} \tab \link{plot_GrowthCurve} \tab \code{TRUE} \tab enables / disables fit bounds\cr
\code{n.MC} \tab \link{plot_GrowthCurve} \tab \code{100} \tab number of MC runs for error calculation\cr
\code{output.plot} \tab \link{plot_GrowthCurve} \tab \code{TRUE} \tab enables / disables dose response curve plot\cr
\code{output.plotExtended} \tab \link{plot_GrowthCurve} \tab \code{TRUE} \tab enables / disables extended dose response curve plot\cr
\code{recordType} \tab \link{get_RLum} \tab \verb{c(OSL (UVVIS), irradiation (NA)} \tab helps for the curve selection\cr
}
}
\note{
\strong{If you provide more than one BIN-file}, it is \strong{strongly} recommended to provide
a \code{list} with the same number of elements for the following parameters:

\code{source_doserate}, \code{signal.integral}, \code{signal.integral.Tx}, \code{background.integral},
\code{background.integral.Tx}, \code{sigmab}, \code{sig0}.

Example for two BIN-files: \code{source_doserate = list(c(0.04, 0.006), c(0.05, 0.006))}

\strong{The function is currently limited to work with standard Risoe BIN-files only!}
}
\section{Function version}{
 0.1.36
}

\examples{

##(1) load package test data set
data(ExampleData.BINfileData, envir = environment())

##(2) selecting relevant curves, and limit dataset
CWOSL.SAR.Data <- subset(
  CWOSL.SAR.Data,
  subset = POSITION\%in\%c(1:3) & LTYPE == "OSL")

\dontrun{
##(3) run analysis
##please not that the here selected parameters are
##choosen for performance, not for reliability
results <- analyse_baSAR(
  object = CWOSL.SAR.Data,
  source_doserate = c(0.04, 0.001),
  signal.integral = c(1:2),
  background.integral = c(80:100),
  fit.method = "LIN",
  plot = FALSE,
  n.MCMC = 200

)

print(results)


##CSV_file template
##copy and paste this the code below in the terminal
##you can further use the function write.csv() to export the example

CSV_file <-
structure(
list(
 BIN_FILE = NA_character_,
 DISC = NA_real_,
 GRAIN = NA_real_),
   .Names = c("BIN_FILE", "DISC", "GRAIN"),
   class = "data.frame",
   row.names = 1L
)

}

} 

\section{How to cite}{
Mercier, N., Kreutzer, S., 2024. analyse_baSAR(): Bayesian models (baSAR) applied on luminescence data. Function version 0.1.33. In: Kreutzer, S., Burow, C., Dietze, M., Fuchs, M.C., Schmidt, C., Fischer, M., Friedrich, J., Mercier, N., Philippe, A., Riedesel, S., Autzen, M., Mittelstrass, D., Gray, H.J., Galharret, J., Colombo, M., 2024. Luminescence: Comprehensive Luminescence Dating Data Analysis. R package version 0.9.25. https://r-lum.github.io/Luminescence/
}

\references{
Combès, B., Philippe, A., Lanos, P., Mercier, N., Tribolo, C., Guerin, G., Guibert, P., Lahaye, C., 2015.
A Bayesian central equivalent dose model for optically stimulated luminescence dating.
Quaternary Geochronology 28, 62-70. doi:10.1016/j.quageo.2015.04.001

Mercier, N., Kreutzer, S., Christophe, C., Guerin, G., Guibert, P., Lahaye, C., Lanos, P., Philippe, A.,
Tribolo, C., 2016. Bayesian statistics in luminescence dating: The 'baSAR'-model and its implementation
in the R package 'Luminescence'. Ancient TL 34, 14-21.

\strong{Further reading}

Gelman, A., Carlin, J.B., Stern, H.S., Dunson, D.B., Vehtari, A., Rubin, D.B., 2013.
Bayesian Data Analysis, Third Edition. CRC Press.

Murray, A.S., Wintle, A.G., 2000. Luminescence dating of quartz using an improved single-aliquot
regenerative-dose protocol. Radiation Measurements 32, 57-73. doi:10.1016/S1350-4487(99)00253-X

Plummer, M., 2017. JAGS Version 4.3.0 user manual. \verb{https://sourceforge.net/projects/mcmc-jags/files/Manuals/4.x/jags_user_manual.pdf/download}
}
\seealso{
\link{read_BIN2R}, \link{calc_OSLLxTxRatio}, \link{plot_GrowthCurve},
\link[data.table:fread]{data.table::fread}, \link{verify_SingleGrainData},
\link[rjags:jags.model]{rjags::jags.model}, \link[rjags:coda.samples]{rjags::coda.samples}, \link{boxplot.default}
}
\author{
Norbert Mercier, Archaésciences Bordeaux, CNRS-Université Bordeaux Montaigne (France) \cr
Sebastian Kreutzer, Institute of Geography, Heidelberg University (Germany) \cr
The underlying Bayesian model based on a contribution by Combès et al., 2015.
, RLum Developer Team}
\keyword{datagen}