Skip to content

Commit

Permalink
999999999.lib.sh (#45): file_convert_bpc47min_de_numerordinatio() cre…
Browse files Browse the repository at this point in the history
…ated
  • Loading branch information
fititnt committed Jul 24, 2022
1 parent 904fca9 commit 1b25bb9
Showing 1 changed file with 90 additions and 1 deletion.
91 changes: 90 additions & 1 deletion officina/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ file_update_if_necessary() {
echo "${FUNCNAME[0]} ... [$fontem_archivum] --> [$objectivum_archivum]"

case "${formatum_archivum}" in
skip-validation)
# echo "INFO: skip-validation"
echo ""
;;
csv)
is_valid=$(csvclean --dry-run "$fontem_archivum")
if [ "$is_valid" != "No errors." ]; then
Expand Down Expand Up @@ -695,6 +699,83 @@ file_download_1603_xlsx() {
mv "$objectivum_archivum_temporarium" "$objectivum_archivum"
}

#######################################
# Convert a "full" .no1.tm.hxl.csv/.no11.tm.hxl.csv to
# .no1.bcp47.csv/.no11.bcp47.csv while removing overlong HXL attributes at cost
# of lose direct RDF translation. Result likely to fit < 60 characters, so
# easier to be imported on databases
#
# Globals:
# ROOTDIR (base path used for tools)
# DESTDIR (base path used for data, both source and objective)
# Arguments:
# numerordinatio
# numerordinatio_typo (values: no1, no11)
# est_temporarium_fontem (default "1", from 99999/)
# est_temporarium_objectivumm (dfault "0", from real namespace)
# Outputs:
# Convert files
#######################################
file_convert_bpc47min_de_numerordinatio() {
numerordinatio="$1"
numerordinatio_typo="${2:-"no1"}"
est_temporarium_fontem="${2:-"1"}"
est_temporarium_objectivum="${3:-"1"}"

_path=$(numerordinatio_neo_separatum "$numerordinatio" "/")
_nomen=$(numerordinatio_neo_separatum "$numerordinatio" "_")
_prefix=$(numerordinatio_neo_separatum "$numerordinatio" ":")

if [ "$est_temporarium_fontem" -eq "1" ]; then
_basim_fontem="${DESTDIR}/999999"
else
_basim_fontem="${DESTDIR}"
fi
if [ "$est_temporarium_objectivum" -eq "1" ]; then
_basim_objectivum="${DESTDIR}/999999"
else
_basim_objectivum="${DESTDIR}"
fi

# _basim_fontem="${ROOTDIR}"
# _basim_objectivum="${DESTDIR}"
# tmeta="${ROOTDIR}/999999999/0/hxltm-exemplum.tmeta.yml"

fontem_archivum="${_basim_fontem}/$_path/$_nomen.${numerordinatio_typo}.tm.hxl.csv"
objectivum_archivum="${_basim_objectivum}/$_path/$_nomen.${numerordinatio_typo}.bcp47.csv"
csv_temporarium_1="${DESTDIR}/999999/0/${_nomen}_bcp47min~TEMP~1.csv"
csv_temporarium_2="${DESTDIR}/999999/0/${_nomen}_bcp47min~TEMP~2.csv"

# set -x
"${ROOTDIR}/999999999/0/999999999_54872.py" \
--methodus=_temp_no1_to_no1_shortnames \
--real-infile-path="${fontem_archivum}" \
>"${csv_temporarium_1}"

# Temporary fix: remove some generated tags with error: +ix_error
# Somewhat temporary: remove non-merget alts: +ix_alt1|+ix_alt12|+ix_alt13
# Non-temporary: remove implicit tags: +ix_hxlattrs
hxlcut \
--exclude='#*+ix_error,#*+ix_hxlattrs,#*+ix_alt1,#*+ix_alt2,#*+ix_alt3' \
"${csv_temporarium_1}" >"${csv_temporarium_2}"

# Delete first line ,,,,,
sed -i '1d' "${csv_temporarium_2}"

"${ROOTDIR}/999999999/0/999999999_54872.py" \
--methodus=_temp_data_hxl_to_bcp47 \
--real-infile-path="${csv_temporarium_2}" >"${csv_temporarium_1}"

frictionless validate "${csv_temporarium_1}"

# set +x
file_update_if_necessary "skip-validation" \
"${csv_temporarium_1}" \
"${objectivum_archivum}"

rm "${csv_temporarium_2}"
}

#######################################
# Convert HXLTM to numerordinatio with these defaults:
# - '#meta' are removed
Expand Down Expand Up @@ -1778,6 +1859,7 @@ file_translate_csv_de_numerordinatio_q__v2() {
# est_temporarium_fontem (default "1", from 99999/)
# est_temporarium_objectivumm (dfault "0", from real namespace)
# est_non_normale
# hxlattrs (default "", example: '+rdf_p_skos_preflabel_s5000')
# Outputs:
# Convert files
#######################################
Expand All @@ -1786,6 +1868,7 @@ file_merge_numerordinatio_de_wiki_q() {
est_temporarium_fontem="${2:-"1"}"
est_temporarium_objectivum="${3:-"0"}"
est_non_normale="${4:-"0"}"
hxlattrs="${5:-""}"

_path=$(numerordinatio_neo_separatum "$numerordinatio" "/")
_nomen=$(numerordinatio_neo_separatum "$numerordinatio" "_")
Expand All @@ -1807,6 +1890,7 @@ file_merge_numerordinatio_de_wiki_q() {
objectivum_archivum="${_basim_objectivum}/$_path/$_nomen.no11.tm.hxl.csv"
objectivum_archivum_temporarium="${ROOTDIR}/999999/0/$_nomen.no11.tm.hxl.csv"
fontem_q_archivum_temporarium="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.hxl.csv"
fontem_q_archivum_temporarium_2="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.hxl.csv"
# objectivum_archivum_temporarium_b="${ROOTDIR}/999999/0/$_nomen.q.txt"
# objectivum_archivum_temporarium_b_u="${ROOTDIR}/999999/0/$_nomen.uniq.q.txt"
# objectivum_archivum_temporarium_b_u_wiki="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.hxl.csv"
Expand Down Expand Up @@ -1874,7 +1958,12 @@ file_merge_numerordinatio_de_wiki_q() {
# cp "$objectivum_archivum_temporarium" "$objectivum_archivum_temporarium.tmp"
# rm "$fontem_q_archivum_temporarium"

file_update_if_necessary csv "$objectivum_archivum_temporarium" "$objectivum_archivum"
# @TODO: disable this as file_update_if_necessary implemnt it
frictionless validate "${objectivum_archivum_temporarium}"

file_update_if_necessary "skip-validation" \
"$objectivum_archivum_temporarium" \
"$objectivum_archivum"

return 0
}
Expand Down

0 comments on commit 1b25bb9

Please sign in to comment.