Skip to content

Commit

Permalink
999999999_7200235.py (#45) --methodus=cod_ab_et_wdata MVP
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Jul 19, 2022
1 parent e69fa0a commit 09de840
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 10 deletions.
8 changes: 6 additions & 2 deletions officina/1603/16/1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

- https://unstats.un.org/unsd/methodology/m49/
- https://en.wikipedia.org/wiki/UN_M49
- https://docs.google.com/spreadsheets/d/1NjSI2LaS3SqbgYc0HdD8oIb7lofGtiHgoKKATCpwVdY/edit#gid=1088874596
- Sources
- https://docs.google.com/spreadsheets/d/1NjSI2LaS3SqbgYc0HdD8oIb7lofGtiHgoKKATCpwVdY/edit#gid=1088874596
- COD-AB (index of what is available as COD-AB, mostly hint about levels)
- WDATA-ADM0 (aditional metadata from Wikidata, mostly references to other
codes, using UN m49 as key)

## Challanges
- At first, we will store "all" the territories into 0, but the actual
way UN M49 is otganized it have several levels.
way UN M49 is organized it have several levels.
- Example:
- first level = World
- second level = Africa, Antartica, Americas, Asia, Europe, Oceania
Expand Down
41 changes: 36 additions & 5 deletions officina/999999999/0/999999999_7200235.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@
{0} --methodus='wdata_adm0'
Work with local COD AB + WDATA ADM0 . . . . . . . . . . . . . . . . . . . . . .
(Re-generate territories from COD AB + WDATA intermediate tables)
{0} --methodus='cod_ab_et_wdata' --numerordinatio-praefixo='1603_16'
Process XLSXs from external sources . . . . . . . . . . . . . . . . . . . . . .
Expand Down Expand Up @@ -706,6 +708,7 @@ def execute_cli(self, pyargs, stdin=STDIN, _stdout=sys.stdout,
no1_simplici=True, cod_ab_level__inline=True)

caput_wdata, data_wdata = hxltm_carricato(WDATA_ADM0)
# print(data_wdata[0])

caput, data = hxltm_carricato__cod_ab_et_wdata(
caput_cod, data_cod,
Expand Down Expand Up @@ -1115,6 +1118,13 @@ def hxltm_carricato__cod_ab_et_wdata(
unm49_index_cod = 1 # we assume this will be the index
unm49_index_wdata = 0 # we assume this will be the index

caput_aliis = {
'#country+code+v_unm49': '#item+rem+i_qcc+is_zxxx+ix_unm49',
'#country+code+v_iso3': '#item+rem+i_qcc+is_zxxx+ix_iso3166p1a3',
'#country+code+v_iso2': '#item+rem+i_qcc+is_zxxx+ix_iso3166p1a2',
'#meta+source+cod_ab_level': '#item+rem+i_qcc+is_zxxx+ix_zzcodablevel',
}

numerordinatio_praefixo = numerordinatio_neo_separatum(
numerordinatio_praefixo, ':')
cod_dict = {}
Expand All @@ -1123,28 +1133,49 @@ def hxltm_carricato__cod_ab_et_wdata(
for item in data_cod:
cod_dict[int(item[unm49_index_cod])] = dict(zip(caput_cod, item))
for item in data_wdata:
wdata_dict[int(item[unm49_index_wdata])] = dict(zip(caput_cod, item))
wdata_dict[int(item[unm49_index_wdata])] = dict(zip(caput_wdata, item))

# print(cod_dict)
# print(wdata_dict)
caput_novo = caput_cod
data_novis = data_cod

# print(data_wdata[0])

for unm49 in range(0, 1000):
if unm49 not in cod_dict and unm49 not in wdata_dict:
continue

if unm49 in wdata_dict:
if unm49 in cod_dict:
# print('both')
resultatum_dict[unm49] = {
**wdata_dict[unm49], **cod_dict[unm49]}
else:
resultatum_dict[unm49] = wdata_dict[unm49]
if unm49 in cod_dict:
else:
resultatum_dict[unm49] = cod_dict[unm49]

# This :0 means "root administrative level, often 'country level'"
resultatum_dict[unm49]['#item+conceptum+numerordinatio'] = \
'{0}:{1}:1'.format(numerordinatio_praefixo, str(unm49))
'{0}:{1}:0'.format(numerordinatio_praefixo, str(unm49))

_caput_novo = caput_cod
for item in caput_wdata:
if item not in _caput_novo:
_caput_novo.append(item)

for _index, _value in enumerate(_caput_novo):
# print(idx, x)
# for item in _caput_novo:
if _value in caput_aliis:
_caput_novo[_index] = caput_aliis[_value]

_caput_novo = list(dict.fromkeys(_caput_novo))

# print(_caput_novo)

caput_novo, data_novis = hxltm__ex_dict(resultatum_dict)
caput_novo, data_novis = hxltm__ex_dict(
resultatum_dict, caput=_caput_novo, caput_aliis=caput_aliis)

# print(resultatum_dict[4])
return caput_novo, data_novis
Expand Down
8 changes: 5 additions & 3 deletions officina/999999999/0/L999999999_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -4987,21 +4987,23 @@ def hxltm__ex_dict(
data_novis = []
if caput is None:
_first = list(hxltm_dict.keys())[0]
caput_novo = hxltm_dict[_first].keys()
caput = hxltm_dict[_first].keys()

# print(caput)

if caput_aliis is not None:
_hxltm_dict = {}
for clavem, v in hxltm_dict.items():
_hxltm_dict[clavem] = v
for k_old, k_new in caput_aliis:
for k_old, k_new in caput_aliis.items():
if k_old in _hxltm_dict[clavem]:
_hxltm_dict[clavem][k_new] = _hxltm_dict[clavem].pop(k_old)
else:
_hxltm_dict = hxltm_dict

for _index, res in _hxltm_dict.items():
linea_novae = []
for item in caput_novo:
for item in caput:
if item in res:
linea_novae.append(res[item])
else:
Expand Down
32 changes: 32 additions & 0 deletions officina/999999999/1603_3_12.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,33 @@ ROOTDIR="$(pwd)"

# } ORDER BY ?start

#######################################
# COD AB Index + Wikidata Adm0
#
# Globals:
# None
# Arguments:
# None
# Outputs:
# csvfile (stdout)
#######################################
1603_3_12_cod_ab_et_wdata() {

# objectivum_archivum="${ROOTDIR}/1603/3/1603_3__adm0.csv"
# objectivum_archivum_temporarium="${ROOTDIR}/1603/3/1603_3__adm0.TEMP.csv"
# objectivum_archivum_hxltm_999999="${ROOTDIR}/999999/1603/3/45/16/1/1/1603_3_45_16_1_1.tm.hxl.csv"
objectivum_archivum_temporarium="${ROOTDIR}/999999/0/1603_3_45_16_1_1.tm.hxl.csv"

set -x
"${ROOTDIR}/999999999/0/999999999_7200235.py" \
--methodus='cod_ab_et_wdata' \
--numerordinatio-praefixo='1603_16' \
>"$objectivum_archivum_temporarium"
set +x

# file_update_if_necessary csv "$objectivum_archivum_temporarium" "$objectivum_archivum"
}

#######################################
# Return list of administrative level 0 codes ("country/territory" codes)
#
Expand Down Expand Up @@ -412,6 +439,9 @@ order by (?wmCode)
# # echo ""
# # caput_hxltm_ad_bcp47 "#item+rem+i_qcc+is_zxxx+ix_wikiq" ","
# # echo ""
# exit 0
# 1603_3_12_cod_ab_et_wdata

# exit 0

1603_3_12_wikipedia_language_codes
Expand All @@ -421,6 +451,8 @@ order by (?wmCode)
1603_3_12_wikipedia_adm0_v2
1603_3_12_wikipedia_adm1_v2

1603_3_12_cod_ab_et_wdata

# temp, see later
# - https://www.wikidata.org/wiki/Help:Frequently_used_properties
# - https://www.wikidata.org/wiki/Property:P3896
Expand Down

0 comments on commit 09de840

Please sign in to comment.