Skip to content

Commit

Permalink
Merge pull request #75 from ncss-tech/osd-drainage
Browse files Browse the repository at this point in the history
[OSD] Improve drainage class parsing
  • Loading branch information
brownag authored Feb 24, 2024
2 parents 30ede6e + 1bf0d43 commit 0b86519
Show file tree
Hide file tree
Showing 2,418 changed files with 3,990 additions and 3,971 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Imports: curl, xml2, jsonlite, rvest, stringi, tibble, dplyr, pdftools, data.tab
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
knitr,
rmarkdown,
Expand Down
829 changes: 416 additions & 413 deletions R/create_OSD.R

Large diffs are not rendered by default.

30 changes: 22 additions & 8 deletions R/parseOSD_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,27 +219,41 @@
# drainage classes, in order, lower case
classes <- c("excessively", "somewhat excessively", "well", "moderately well",
"somewhat poorly", "poorly", "very poorly", "subaqueous")
class_hyphen <- gsub(" ", "[ \\-]", classes)

# combine into capturing REGEX
classes.regex <- paste0('(', paste(classes, collapse = '|'), ')', "( (to|or|and) )?",
paste0('(', paste(classes, collapse = '|'), ')'), "? drained")
classes.regex <- paste0('(', paste(class_hyphen, collapse = '|'), ')', "([ \\-]drained)?( (to|or|and) )?",
paste0('(', paste(class_hyphen, collapse = '|'), ')'),
"?[ \\-]drained|subaqueous|Drainage[ class]*[:\\-]+ ",
'(', paste(class_hyphen, collapse = '|'), ')', "([ \\-]drained)?( (to|or|and) )?",
paste0('(', paste(class_hyphen, collapse = '|'), ')?'))

# get matches
m <- stringi::stri_match(text, regex = classes.regex, mode = 'first', opts_regex = list(case_insensitive = TRUE))
m <- gsub("Drainage[ Cclass]*[:\\-]+ ", "", m, ignore.case = TRUE)

# fail gracefully in the case of no section data or no matches
if (nrow(m) < 1) {
return(NA)
}

# keep full match and convert to lower case, remove the word "drained"
m <- trimws(gsub("drained", "", tolower(m[, 1])))

# return as an ordered factor
# m <- factor(m, levels = classes, ordered = TRUE)
# factors cannot be preserved in JSON output, and wont work for multiple classes/ranges of classes
m <- trimws(gsub(" ", " ", gsub("-", " ", gsub("drained", "", tolower(m[, 1])))))

# put classes in order from excessively->subaqueous
# interpolate ranges across more than 2 classes, and concatenate with comma
m2 <- strsplit(m, "\\b(and|or|to)\\b")
m3 <- lapply(m2, function(x) {
x <- trimws(x)
y <- as.integer(factor(unique(classes[match(x, classes)]),
levels = classes, ordered = TRUE))
if (length(y) > 1) {
y <- seq(from = min(y, na.rm = TRUE), to = max(y, na.rm = TRUE))
}
ifelse(is.na(classes[y]), "", classes[y]) # TODO: use zero chars or NA?
})

return(m)
return(sapply(m3, paste0, collapse = ", "))
}

.zerochar_to_na <- function(x) {
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AABERG.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "moderately well",
"drainage_overview": "moderately well"
"drainage_overview": "well, moderately well"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABAC.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABAJO.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABALAN.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "well",
"drainage_overview": ""
"drainage_overview": "well"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABERONE.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "somewhat excessively or well",
"drainage": "somewhat excessively, well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABGESE.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABIQUA.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ABREU.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ABSHER.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "well and moderately well",
"drainage_overview": "well and moderately well"
"drainage": "well, moderately well",
"drainage_overview": "well, moderately well"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ACADEMY.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "well to moderately well",
"drainage": "well, moderately well",
"drainage_overview": ""
}
]
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ACO.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "well to somewhat excessively",
"drainage_overview": "well to somewhat excessively"
"drainage": "somewhat excessively, well",
"drainage_overview": "somewhat excessively, well"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ADAMS.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "somewhat excessively",
"drainage_overview": "excessively and somewhat excessively"
"drainage_overview": "excessively, somewhat excessively"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ADGER.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "well",
"drainage_overview": "well"
"drainage_overview": "well, moderately well"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ADILIS.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "well to moderately well",
"drainage_overview": "well to moderately well"
"drainage": "well, moderately well",
"drainage_overview": "well, moderately well"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ADJIDAUMO.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "poorly and very poorly",
"drainage_overview": "poorly"
"drainage": "poorly, very poorly",
"drainage_overview": "poorly, very poorly"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ADOLPH.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "poorly and very poorly",
"drainage_overview": "poorly and very poorly"
"drainage": "poorly, very poorly",
"drainage_overview": "poorly, very poorly"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AGER.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AGUA_DULCE.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": ""
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AGUEDA.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "well and moderately well",
"drainage": "well, moderately well",
"drainage_overview": ""
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AHL.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/AILEY.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "well",
"drainage_overview": "well"
"drainage_overview": "somewhat excessively, well"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/AINAHOU.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "poorly or very poorly",
"drainage_overview": "poorly or very poorly"
"drainage": "poorly, very poorly",
"drainage_overview": "poorly, very poorly"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/AKAN.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "poorly",
"drainage_overview": "poorly"
"drainage": "poorly, very poorly",
"drainage_overview": "poorly, very poorly"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ALAMOSA.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "poorly to somewhat poorly",
"drainage_overview": "poorly to somewhat poorly"
"drainage": "somewhat poorly, poorly",
"drainage_overview": "somewhat poorly, poorly"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ALANTHUS.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "well",
"drainage_overview": ""
"drainage_overview": "well"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ALBATON.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "poorly or very poorly",
"drainage_overview": "poorly or very poorly"
"drainage": "poorly, very poorly",
"drainage_overview": "poorly, very poorly"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ALBINAS.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ALBION.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "well",
"drainage_overview": "somewhat excessively"
"drainage": "somewhat excessively, well",
"drainage_overview": "somewhat excessively, well"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ALBRIGHTS.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "moderately well to somewhat poorly",
"drainage_overview": "moderately well to somewhat poorly"
"drainage": "moderately well, somewhat poorly",
"drainage_overview": "moderately well, somewhat poorly"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ALCESTER.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "well and moderately well",
"drainage_overview": "well and moderately well"
"drainage": "well, moderately well",
"drainage_overview": "well, moderately well"
}
]
],
Expand Down
4 changes: 2 additions & 2 deletions inst/extdata/OSD/A/ALDA.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
"SITE": [
[
{
"drainage": "somewhat poorly",
"drainage_overview": "somewhat poorly"
"drainage": "moderately well, somewhat poorly",
"drainage_overview": "moderately well, somewhat poorly"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ALDERMAND.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[
{
"drainage": "well",
"drainage_overview": ""
"drainage_overview": "well"
}
]
],
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ALET.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ALGERITA.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/OSD/A/ALICE.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"SITE": [
[
{
"drainage": "",
"drainage": "well",
"drainage_overview": "well"
}
]
Expand Down
Loading

0 comments on commit 0b86519

Please sign in to comment.