Skip to content

Commit

Permalink
Merge pull request #361 from ncss-tech/get_SDA-updates
Browse files Browse the repository at this point in the history
`get_SDA_*()` function updates
  • Loading branch information
brownag authored Oct 7, 2024
2 parents f1f1057 + bd675ee commit a7f6d22
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 165 deletions.
86 changes: 50 additions & 36 deletions R/get_SDA_cosurfmorph.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
#' Get Geomorphic/Surface Morphometry Data from Soil Data Access or a local SSURGO data source and summarize by counts and proportions ("probabilities").
#'
#' @param table Target table to summarize. Default: `"cosurfmorphgc"` (3D Geomorphic Component). Alternate choices include `cosurfmorphhpp` (2D Hillslope Position), `cosurfmorphss` (Surface Shape), and `cosurfmorphmr` (Microrelief).
#' @param by Grouping variable. Default: `"compname"`
#' @param by Grouping variable. Default: `"mapunit.mukey"`
#' @param areasymbols A vector of soil survey area symbols (e.g. `'CA067'`)
#' @param mukeys A vector of map unit keys (e.g. `466627`)
#' @param WHERE WHERE clause added to SQL query. For example: `areasymbol = 'CA067'`
#' @param miscellaneous_areas Include miscellaneous areas (non-soil components) in results? Default: `FALSE`.
#' @param method _character_. One of: `"ByGroup"`, `"None"`
#' @param include_minors logical. Include minor components? Default: `TRUE`.
#' @param miscellaneous_areas logical. Include miscellaneous areas (non-soil components) in results? Default: `FALSE`.
#' @param representative_only logical. Include only representative Component Parent Material Groups? Default: `TRUE`.
#' @param db Either `'SSURGO'` (default) or `'STATSGO'`. If `'SSURGO'` is specified `areasymbol = 'US'` records are excluded. If `'STATSGO'` only `areasymbol = 'US'` records are included.
#' @param dsn Path to local SSURGO database SQLite database. Default `NULL` uses Soil Data Access.
#' @param query_string Return query instead of sending to Soil Data Access / local database. Default: `FALSE`.
Expand Down Expand Up @@ -46,18 +49,23 @@
#' get_SDA_cosurfmorph('cosurfmorphmr', WHERE = "areasymbol = 'CA630'")
#' }
get_SDA_cosurfmorph <- function(table = c("cosurfmorphgc", "cosurfmorphhpp", "cosurfmorphss", "cosurfmorphmr"),
by = "compname",
by = "mapunit.mukey",
areasymbols = NULL,
mukeys = NULL,
WHERE = NULL,
method = c("bygroup", "none"),
include_minors = TRUE,
miscellaneous_areas = FALSE,
representative_only = TRUE,
db = c('SSURGO', 'STATSGO'),
dsn = NULL,
query_string = FALSE) {

if (is.null(mukeys) && is.null(areasymbols) && is.null(WHERE)) {
stop("Please specify one of the following arguments: mukeys, areasymbols, WHERE", call. = FALSE)
}

method <- match.arg(toupper(method), c("BYGROUP", "NONE"))

if (!is.null(mukeys)) {
WHERE <- paste("mapunit.mukey IN", format_SQL_in_statement(as.integer(mukeys)))
Expand All @@ -67,7 +75,7 @@ get_SDA_cosurfmorph <- function(table = c("cosurfmorphgc", "cosurfmorphhpp", "co

db <- match.arg(toupper(db), choices = c('SSURGO', 'STATSGO'))
table <- match.arg(tolower(table), choices = c("cosurfmorphgc", "cosurfmorphhpp", "cosurfmorphss", "cosurfmorphmr"))
statsgo_filter <- switch(db, SSURGO = "!=", STATSGO = "=")
statsgo_filter <- switch(db, SSURGO = "legend.areasymbol != 'US'", STATSGO = "legend.areasymbol == 'US'", "1=1")

vars <- switch(table,
"cosurfmorphgc" = c("geomposmntn", "geomposhill", "geomposflats", "geompostrce"),
Expand All @@ -76,6 +84,7 @@ get_SDA_cosurfmorph <- function(table = c("cosurfmorphgc", "cosurfmorphhpp", "co
# NOTE: surfaceshape is calculated CONCAT(shapeacross, '/', shapedown)
"cosurfmorphmr" = "geomicrorelief")

# TODO: weight probabilities by component percentage? needs refactor
.SELECT_STATEMENT0 <- function(v) {
paste0(paste0(v, ", ", paste0(v, "_n"), ", ", paste0(paste0("round(", v, "_n / total, 2) AS p_", v)), collapse = ", "))
}
Expand All @@ -95,78 +104,83 @@ get_SDA_cosurfmorph <- function(table = c("cosurfmorphgc", "cosurfmorphhpp", "co
res
}

.JOIN_TABLE <- function(x) {
sprintf("LEFT JOIN %s ON cogeomordesc.cogeomdkey = %s.cogeomdkey", x, x)
}

.NULL_FILTER <- function(v, miscellaneous_areas = FALSE) {
if (miscellaneous_areas) return("1=1")
paste0(paste0(v, collapse = " IS NOT NULL OR "), " IS NOT NULL")
}

.ORDER_COLUMNS <- function(v) {
paste0(paste0(paste0("p_", v), collapse = " DESC, "), " DESC")
}

# excludes custom calculated columns (e.g. surfaceshape concatenated from across/down)
vars_default <- vars[!grepl("surfaceshape", vars)]

misc_area_join_type <- ifelse(miscellaneous_areas, "LEFT", "INNER")
misc_area_filter <- ifelse(miscellaneous_areas, "LEFT", "INNER")
q <- paste0("SELECT a.[BYVARNAME] AS [BYVARNAME],
if (method == "BYGROUP") {
q <- paste0("SELECT a.[BYVARNAME] AS [BYVARNAME],
", .SELECT_STATEMENT0(vars), ",
total
FROM (
SELECT [BYVAR], [BYVAR] AS BYVAR,
", .SELECT_STATEMENT1(vars_default), "
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey
INNER JOIN component ON mapunit.mukey = component.mukey
INNER JOIN component ON mapunit.mukey = component.mukey ",
ifelse(include_minors, "", "AND majcompflag = 'Yes'") ,"
", ifelse(miscellaneous_areas, "", " AND NOT component.compkind = 'Miscellaneous area'"),"
LEFT JOIN cogeomordesc ON component.cokey = cogeomordesc.cokey
", .JOIN_TABLE(table), "
WHERE legend.areasymbol ", statsgo_filter, " 'US'
", ifelse(representative_only, "AND rvindicator = 'Yes'", ""), "
", sprintf("INNER JOIN %s ON cogeomordesc.cogeomdkey = %s.cogeomdkey", table, table), "
WHERE ", statsgo_filter, "
AND (", .NULL_FILTER(vars_default, miscellaneous_areas), ")
AND ", WHERE, "
GROUP BY [BYVAR], ", paste0(vars_default, collapse = ", "), "
) AS a JOIN (SELECT [BYVAR] AS BYVAR, CAST(count([BYVAR]) AS numeric) AS total
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey
INNER JOIN component ON mapunit.mukey = component.mukey
INNER JOIN component ON mapunit.mukey = component.mukey ",
ifelse(include_minors,"", "AND majcompflag = 'Yes'") ,"
", ifelse(miscellaneous_areas, "", " AND NOT component.compkind = 'Miscellaneous area'"),"
LEFT JOIN cogeomordesc ON component.cokey = cogeomordesc.cokey
", .JOIN_TABLE(table), "
WHERE legend.areasymbol != 'US'
", ifelse(representative_only, "AND rvindicator = 'Yes'", ""),
sprintf("LEFT JOIN %s ON cogeomordesc.cogeomdkey = %s.cogeomdkey", table, table), "
WHERE ", statsgo_filter, "
AND (", .NULL_FILTER(vars_default, miscellaneous_areas), ")
AND ", WHERE, "
GROUP BY [BYVAR]) AS b
ON a.BYVAR = b.BYVAR
ORDER BY [BYVARNAME], ", .ORDER_COLUMNS(vars_default))


} else if (method == "NONE") {

if (!missing(by)) {
message("NOTE: `by` argument is ignored when method='none'")
}

q <- paste0("SELECT mapunit.mukey, component.cokey, compname, compkind, comppct_r, majcompflag, cogeomordesc.rvindicator,",
paste0(vars, collapse = ", "), "
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey
INNER JOIN component ON mapunit.mukey = component.mukey ",
ifelse(include_minors, "", "AND majcompflag = 'Yes'") ,"
", ifelse(miscellaneous_areas, "", " AND NOT component.compkind = 'Miscellaneous area'"),"
LEFT JOIN cogeomordesc ON component.cokey = cogeomordesc.cokey
", sprintf("LEFT JOIN %s ON cogeomordesc.cogeomdkey = %s.cogeomdkey", table, table), "
WHERE ", statsgo_filter, "
AND (", .NULL_FILTER(vars_default, miscellaneous_areas), ")
AND ", WHERE, "")
}

# insert grouping variable
byname <- gsub("(.*\\.)?(.*)", "\\2", by)
qsub <- gsub("[BYVARNAME]", byname, gsub("[BYVAR]", by, q, fixed = TRUE), fixed = TRUE)


if (query_string) {
return(qsub)
}
if (!is.null(dsn)) {
# if dsn is specified
if (inherits(dsn, 'DBIConnection')) {
# allow existing connections (don't close them)
res <- DBI::dbGetQuery(dsn, qsub)
} else {
# otherwise create a connection
if (requireNamespace("RSQLite")) {
con <- dbConnect(RSQLite::SQLite(), dsn)
res <- dbGetQuery(con, qsub)
RSQLite::dbDisconnect(con)
} else stop("package 'RSQLite' is required to query a local data source (`dsn`)", call. = FALSE)
}
} else {
# otherwise query from SDA
res <- SDA_query(qsub)
}

res <- SDA_query(qsub, dsn = dsn)

res
}
143 changes: 85 additions & 58 deletions R/get_SDA_hydric.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,74 +21,101 @@
#' @param mukeys vector of map unit keys
#' @param WHERE character containing SQL WHERE clause specified in terms of fields in `legend`, `mapunit`, or `component` tables, used in lieu of `mukeys` or `areasymbols`
#' @param method One of: `"Mapunit"`, `"Dominant Component"`, `"Dominant Condition"`, `"None"`
#' @param query_string Default: `FALSE`; if `TRUE` return a character string containing query that would be sent to SDA via `SDA_query`
#' @param include_minors logical. Include minor components? Default: `TRUE`.
#' @param miscellaneous_areas _logical_. Include miscellaneous areas (non-soil components) in results? Default: `TRUE`.
#' @param query_string Default: `FALSE`; if `TRUE` return a character string containing query that would be sent to SDA via `SDA_query()`
#' @param dsn Path to local SQLite database or a DBIConnection object. If `NULL` (default) use Soil Data Access API via `SDA_query()`.
#' @author Jason Nemecek, Chad Ferguson, Andrew Brown
#' @return a data.frame
#' @export
get_SDA_hydric <- function(areasymbols = NULL, mukeys = NULL, WHERE = NULL, method = "MAPUNIT", query_string = FALSE, dsn = NULL) {
get_SDA_hydric <- function(areasymbols = NULL,
mukeys = NULL,
WHERE = NULL,
method = "MAPUNIT",
include_minors = TRUE,
miscellaneous_areas = TRUE,
query_string = FALSE,
dsn = NULL) {

method <- match.arg(toupper(method), c("MAPUNIT", "DOMINANT COMPONENT", "DOMINANT CONDITION", "NONE"))

method <- match.arg(toupper(method), c("MAPUNIT", "DOMINANT COMPONENT", "DOMINANT CONDITION", "NONE"))
if (is.null(mukeys) && is.null(areasymbols) && is.null(WHERE)) {
stop("Please specify one of the following arguments: mukeys, areasymbols, WHERE", call. = FALSE)
}

if (is.null(mukeys) && is.null(areasymbols) && is.null(WHERE)) {
stop("Please specify one of the following arguments: mukeys, areasymbols, WHERE", call. = FALSE)
}
if (!is.null(mukeys)) {
WHERE <- paste("mapunit.mukey IN", format_SQL_in_statement(as.integer(mukeys)))
} else if (!is.null(areasymbols)) {
WHERE <- paste("legend.areasymbol IN", format_SQL_in_statement(areasymbols))
}

.h0 <- function(w) .LIMIT_N(paste(sprintf("SELECT ISNULL(SUM(comppct_r), 0)
FROM mapunit AS mu
INNER JOIN component AS c ON c.mukey = mu.mukey AND mapunit.mukey = mu.mukey %s %s",
ifelse(miscellaneous_areas, "", " AND NOT c.compkind = 'Miscellaneous area'"),
ifelse(include_minors, "", " AND c.majcompflag = 'Yes'")), w),
n = 1, sqlite = !is.null(dsn))

if (!is.null(mukeys)) {
WHERE <- paste("mapunit.mukey IN", format_SQL_in_statement(as.integer(mukeys)))
} else if (!is.null(areasymbols)) {
WHERE <- paste("legend.areasymbol IN", format_SQL_in_statement(areasymbols))
}
.h0 <- function(w) .LIMIT_N(paste("SELECT ISNULL(SUM(comppct_r), 0) FROM mapunit AS mu INNER JOIN component AS c ON c.mukey = mu.mukey AND mapunit.mukey = mu.mukey", w), n = 1, sqlite = !is.null(dsn))
q <- paste0("SELECT mapunit.mukey, areasymbol, musym, mapunit.muname,
(", .h0(""), ") AS total_comppct,
(", .h0("AND majcompflag = 'Yes'"), ") AS count_maj_comp,
(", .h0("AND hydricrating = 'Yes'"), ") AS all_hydric,
(", .h0("AND majcompflag = 'Yes' AND hydricrating = 'Yes'"), ") AS hydric_majors,
(", .h0("AND majcompflag = 'Yes' AND hydricrating != 'Yes'"), ") AS maj_not_hydric,
(", .h0("AND majcompflag != 'Yes' AND hydricrating = 'Yes'"), ") AS hydric_inclusions,
(", .h0("AND hydricrating != 'Yes'"), ") AS all_not_hydric,
(", .h0("AND hydricrating IS NULL"), ") AS hydric_null
INTO #main_query
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey AND ", WHERE, "
SELECT mukey, areasymbol, musym, muname,
total_comppct AS total_comppct,
hydric_majors AS hydric_majors,
hydric_inclusions AS hydric_inclusions,
CASE WHEN total_comppct = all_not_hydric + hydric_null THEN 'Nonhydric'
WHEN total_comppct = all_hydric THEN 'Hydric'
WHEN hydric_majors + hydric_inclusions >= total_comppct / 2 THEN 'Predominantly Hydric'
WHEN hydric_majors > 0 THEN 'Partially Hydric'
WHEN hydric_majors + hydric_inclusions < total_comppct / 2 THEN 'Predominantly Nonhydric'
ELSE 'Error' END AS HYDRIC_RATING
FROM #main_query")
# TODO: refactor out the temp table and CASE WHEN for HYDRIC_RATING calculated in R

q <- paste0("SELECT mapunit.mukey, areasymbol, musym, mapunit.muname,
(", .h0(""), ") AS total_comppct,
(", .h0("AND majcompflag = 'Yes'"), ") AS count_maj_comp,
(", .h0("AND hydricrating = 'Yes'"), ") AS all_hydric,
(", .h0("AND majcompflag = 'Yes' AND hydricrating = 'Yes'"), ") AS hydric_majors,
(", .h0("AND majcompflag = 'Yes' AND hydricrating != 'Yes'"), ") AS maj_not_hydric,
(", .h0("AND majcompflag != 'Yes' AND hydricrating = 'Yes'"), ") AS hydric_inclusions,
(", .h0("AND hydricrating != 'Yes'"), ") AS all_not_hydric,
(", .h0("AND hydricrating IS NULL"), ") AS hydric_null
INTO #main_query
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey AND ", WHERE, "
SELECT mukey, areasymbol, musym, muname,
total_comppct AS total_comppct,
hydric_majors AS hydric_majors,
hydric_inclusions AS hydric_inclusions,
CASE WHEN total_comppct = all_not_hydric + hydric_null THEN 'Nonhydric'
WHEN total_comppct = all_hydric THEN 'Hydric'
WHEN hydric_majors + hydric_inclusions >= total_comppct / 2 THEN 'Predominantly Hydric'
WHEN hydric_majors > 0 THEN 'Partially Hydric'
WHEN hydric_majors + hydric_inclusions < total_comppct / 2 THEN 'Predominantly Nonhydric'
ELSE 'Error' END AS HYDRIC_RATING
FROM #main_query")
# TODO: refactor out the temp table and CASE WHEN for HYDRIC_RATING calculated in R
comp_selection <- ""
hyd_selection <- ""

if (method != "MAPUNIT") {
if (method %in% c("DOMINANT COMPONENT", "DOMINANT CONDITION")) {
comp_selection <- sprintf("AND component.cokey = (%s)", .LIMIT_N(sprintf("SELECT c1.cokey FROM component AS c1
INNER JOIN mapunit AS mu1 ON c1.mukey = mu1.mukey AND c1.mukey = mapunit.mukey %s %s
ORDER BY c1.comppct_r DESC, c1.cokey", ifelse(miscellaneous_areas, "", " AND NOT c1.compkind = 'Miscellaneous area'"),
ifelse(include_minors, "", " AND c1.majcompflag = 'Yes'")),
n = 1, sqlite = !is.null(dsn)))
}

comp_selection <- ""
hyd_selection <- ""
if (method != "MAPUNIT") {
if (method %in% c("DOMINANT COMPONENT", "DOMINANT CONDITION")) {
comp_selection <- sprintf("AND component.cokey = (%s)", .LIMIT_N("SELECT c1.cokey FROM component AS c1
INNER JOIN mapunit AS mu1 ON c1.mukey = mu1.mukey AND c1.mukey = mapunit.mukey
ORDER BY c1.comppct_r DESC, c1.cokey", n = 1, sqlite = !is.null(dsn)))
}
if (method == "DOMINANT CONDITION") {
hyd_selection <- sprintf("AND hydricrating = (%s)",
.LIMIT_N(sprintf("SELECT hydricrating FROM mapunit AS mu
INNER JOIN component ON component.mukey = mapunit.mukey %s %s
GROUP BY hydricrating, comppct_r
ORDER BY SUM(comppct_r) OVER (PARTITION BY hydricrating) DESC",
ifelse(miscellaneous_areas, "", " AND NOT component.compkind = 'Miscellaneous area'"),
ifelse(include_minors, "", " AND component.majcompflag = 'Yes'")),
n = 1, sqlite = !is.null(dsn)))
}

if (method == "DOMINANT CONDITION") {
hyd_selection <- sprintf("AND hydricrating = (%s)", .LIMIT_N("SELECT hydricrating FROM mapunit AS mu
INNER JOIN component AS c ON c.mukey = mu.mukey AND mapunit.mukey = mu.mukey
GROUP BY hydricrating, comppct_r
ORDER BY SUM(comppct_r) OVER (PARTITION BY hydricrating) DESC", n = 1, sqlite = !is.null(dsn)))
}

q <- sprintf(paste0("SELECT areasymbol, musym, muname, mapunit.mukey, ",
ifelse(method == "DOMINANT CONDITION", "", "cokey, compname, compkind, comppct_r, majcompflag, "),
"hydricrating
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey
INNER JOIN component ON component.mukey = mapunit.mukey %s %s
WHERE %s"), comp_selection, hyd_selection, WHERE)
q <- sprintf(paste0("SELECT areasymbol, musym, muname, mapunit.mukey, ",
ifelse(method == "DOMINANT CONDITION", "", "cokey, compname, compkind, comppct_r, majcompflag, "),
"hydricrating
FROM legend
INNER JOIN mapunit ON mapunit.lkey = legend.lkey
INNER JOIN component ON component.mukey = mapunit.mukey %s %s %s %s
WHERE %s"),
comp_selection,
hyd_selection,
ifelse(miscellaneous_areas, "", " AND NOT component.compkind = 'Miscellaneous area'"),
ifelse(include_minors, "", " AND component.majcompflag = 'Yes'"),
WHERE)
}

if (!is.null(dsn)) {
Expand Down
Loading

0 comments on commit a7f6d22

Please sign in to comment.