Skip to content

Commit

Permalink
999999999.lib.sh (#45): file_extract_ix_wikiq() and draft of wikidata…
Browse files Browse the repository at this point in the history
…_q_ex_totalibus()
  • Loading branch information
fititnt committed Jul 20, 2022
1 parent bb1e631 commit b86d994
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 2 deletions.
2 changes: 0 additions & 2 deletions officina/999999999/1603_13.sh
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,6 @@ numerordiatio_summarium() {

hxl_vocab

exit 0

# find path/to/dir -name "*.ext1" -o -name "*.ext2"
# echo "$ROOTDIR"
# numerordiatio_search "$ROOTDIR/1603/"
Expand Down
84 changes: 84 additions & 0 deletions officina/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,49 @@ file_hotfix_duplicated_merge_key() {
mv "$hotfix_archivum_temporarium_2" "$archivum"
}

#######################################
# For an CSV file with '#item+rem+i_qcc+is_zxxx+ix_wikiq' column, extract all
# unique values and sort then, and print results, one per line
#
# Globals:
# ROOTDIR
# Arguments:
# fontem
# Outputs:
# print all items
#######################################
file_extract_ix_wikiq() {
fontem="$1"
# objectivum="$2"

_nomen=$(basename "$fontem")
_nomen="${_nomen%%.*}"

objectivum_archivum_temporarium_b="${ROOTDIR}/999999/0/$_nomen~1.q.txt"
objectivum_archivum_temporarium_b_u="${ROOTDIR}/999999/0/$_nomen~1.uniq.q.txt"

hxlcut \
--include='#item+rem+i_qcc+is_zxxx+ix_wikiq' \
"$fontem" |
hxlselect --query='#item+rem+i_qcc+is_zxxx+ix_wikiq>0' \
>"$objectivum_archivum_temporarium_b"

sed -i '1,2d' "${objectivum_archivum_temporarium_b}"

sort --version-sort --field-separator="Q" <"$objectivum_archivum_temporarium_b" | uniq >"$objectivum_archivum_temporarium_b_u"

# if [ -f "$objectivum" ]; then
# rm "$objectivum"
# fi

cat "$objectivum_archivum_temporarium_b_u"

# mv "$objectivum_archivum_temporarium_b_u" "$objectivum"

rm "$objectivum_archivum_temporarium_b_u"
rm "$objectivum_archivum_temporarium_b"
}

#######################################
# Create a codex (documentation) from an Numerordinatio standard file
#
Expand Down Expand Up @@ -2550,6 +2593,47 @@ wikidata_p_ex_totalibus() {

}

#######################################
# From a path on disk with sorted list of Q items to generate an
# .wikiq.tm.hxl.csv file, try compute "the ideal" number of requests to Wikidata
# extract the labels, and then save the result at the objectivum path if
# all things work as expected
#
# Globals:
# ROOTDIR
# Arguments:
# wikiq (string, list of Q items)
# objectivum (path, destiny of .wikiq.tm.hxl.csv)
# Outputs:
# File
#######################################
wikidata_q_ex_totalibus() {
wikiq="$1"
objectivum="$2"

_nomen=$(basename "$objectivum")
_nomen="${_nomen%%.*}"
_qitems=$(echo "$wikiq" | wc -l | cut -f1 -d' ')

# Minimum of divisions is 5
lingua_divisioni=5

echo "${FUNCNAME[0]} [$_nomen] _qitems [$_qitems] lingua_divisioni [$lingua_divisioni]"

for i in $(seq 1 $lingua_divisioni); do
echo "$i"
done

# for i in {1..19}; do
# # for i in {6..19}; do
# echo "Number: $i"
# sleep 10
# wikidata_p_ex_linguis "$numerordinatio" "1" "1" "$ex_wikidata_p" "$i" "20"
# done

return 0
}

tempfunc_merge_wikiq_files() {
fontem_1="$1"
fontem_2="$2"
Expand Down
8 changes: 8 additions & 0 deletions officina/999999999/999999_17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ ROOTDIR="$(pwd)"

# file_download_if_necessary "$DATA_1603_1_51" "1603_1_51" "csv" "tm.hxl.csv" "hxltmcli" "1"
# file_convert_numerordinatio_de_hxltm "1603_1_51" "1" "0"

### Quick tests

# file_extract_ix_wikiq "999999/1603/3/45/16/1/1/1603_3_45_16_1_1.tm.hxl.csv" "999999/0/1603_3_45_16_1_1.uniq.q.txt"
wikiq=$(file_extract_ix_wikiq "999999/1603/3/45/16/1/1/1603_3_45_16_1_1.tm.hxl.csv")

wikidata_q_ex_totalibus "$wikiq" "999999/1603/3/45/16/1/1/1603_3_45_16_1_1.wikiq.tm.hxl.csv"
exit 0
### Really boostrapping downloads, end _________________________________________

# TODO: the formats .no1.tm.hxl.csv stopped being updated with the default
Expand Down

0 comments on commit b86d994

Please sign in to comment.