Skip to content

Commit

Permalink
Add new JP characters to character lists
Browse files Browse the repository at this point in the history
  • Loading branch information
drojf committed Feb 3, 2024
1 parent 3e7f7aa commit f3c8d89
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
35 changes: 30 additions & 5 deletions scripts/CharacterInfoExtraction/PythonTextExtractor/extract.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pathlib import Path
import re

en_regex = re.compile(r'OutputLine\([^,]*,\s*[^,]*,\s*[^,]*,\s*([^,]*)')
en_regex = re.compile(r'OutputLine\([^,]*,\s*([^,]*),\s*[^,]*,\s*([^,]*)')

def load_existing_list(path):
with open(path, encoding='utf-8', newline='') as f:
return f.read()


existing_char_list = Path('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt')
existing_char_list = Path('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_JP_and_OtherLang.txt')
out_char_list = existing_char_list.with_suffix(existing_char_list.suffix + '.out')
source_directory = Path('C:/drojf/large_projects/umineko/HIGURASHI_REPOS')

Expand All @@ -17,15 +17,25 @@ def load_existing_list(path):

all_chars = set()

search_en = True
search_jp = True

for file in source_directory.rglob("*.txt"):
print(file)
with open(file, encoding='utf-8') as f:
whole_file_string = f.read()
for match in en_regex.finditer(whole_file_string):
if match:
outputline_jp_arg = match.group(1)
outputline_english_arg = match.group(1)
for c in outputline_english_arg:
all_chars.add(c)

if search_en:
for c in outputline_english_arg:
all_chars.add(c)

if search_jp:
for c in outputline_jp_arg:
all_chars.add(c)

all_chars_list = list(all_chars)
all_chars_list.sort()
Expand Down Expand Up @@ -55,11 +65,26 @@ def load_existing_list(path):
f.write(c)

# This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering
remove_list = []
for new_character in chars_to_add:
if new_character < c:
f.write(new_character)
chars_to_add.remove(new_character)
remove_list.append(new_character)
print(f"Inserting new character {new_character} at position {i} as it is less than {c}")

for item in remove_list:
chars_to_add.remove(item)

remove_list = []
for char in chars_to_add:
if char not in existing_font_set:
f.write(char)
else:
print(f"WARNING: character {char} already exists, skipping")
remove_list.append(char)

for item in remove_list:
chars_to_add.remove(item)

if chars_to_add:
raise Exception(f"One or more characters were not added {chars_to_add}")
Loading

0 comments on commit f3c8d89

Please sign in to comment.