Skip to content

Commit

Permalink
feat: uploader: implement --bin-7z
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Mar 5, 2024
1 parent 3ee0c18 commit dc37bdc
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 24 deletions.
20 changes: 10 additions & 10 deletions wikiteam3/uploader/compresser.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,14 @@ def test_integrity(self, path: Union[str, Path]) -> bool:
return r.returncode == 0

class SevenZipCompressor:
def __init__(self, bin7z: str = "7z"):
retcode = subprocess.call([bin7z, "-h"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
bin_7z = "7z"
def __init__(self, bin_7z: str = "7z"):
self.bin_7z = bin_7z
retcode = subprocess.call([self.bin_7z, "-h"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if retcode:
raise FileNotFoundError(f"7z binary not found at {bin7z}")
raise FileNotFoundError(f"7z binary not found at {self.bin_7z}")

@staticmethod
def compress_dir(dir_path: Union[str, Path], bin7z: str = "7z", level: int = 0):
def compress_dir(self, dir_path: Union[str, Path], level: int = 0):
''' Compress dir_path into dump_dir.7z and return the resolved path to the compressed file.
level:
Expand All @@ -109,11 +110,11 @@ def compress_dir(dir_path: Union[str, Path], bin7z: str = "7z", level: int = 0):
return archive_path

if level:
cmds = [bin7z, "a", "-t7z", "-m0=lzma2", f"-mx={level}", "-scsUTF-8",
cmds = [self.bin_7z, "a", "-t7z", "-m0=lzma2", f"-mx={level}", "-scsUTF-8",
"-md=64m", "-ms=off"]
else: # level == 0
assert level == 0
cmds = [bin7z, "a", "-t7z", f"-mx={level}", "-scsUTF-8", "-ms=off"]
cmds = [self.bin_7z, "a", "-t7z", f"-mx={level}", "-scsUTF-8", "-ms=off"]
cmds.extend([str(archive_temp_path), str(dir_path)])

r = subprocess.run(cmds, check=True)
Expand All @@ -124,13 +125,12 @@ def compress_dir(dir_path: Union[str, Path], bin7z: str = "7z", level: int = 0):
assert archive_path == archive_path.resolve()
return archive_path

@staticmethod
def test_integrity(path: Union[str, Path], bin7z: str = "7z") -> bool:
def test_integrity(self, path: Union[str, Path]) -> bool:
''' Test if path is a valid 7z archive. '''
if isinstance(path, str):
path = Path(path)
path = path.resolve()
r = subprocess.run([bin7z, "t", str(path)])
r = subprocess.run([self.bin_7z, "t", str(path)])
return r.returncode == 0

if __name__ == "__main__":
Expand Down
30 changes: 16 additions & 14 deletions wikiteam3/uploader/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,25 +149,26 @@ def prepare_xml_zst_file(wikidump_dir: Path, config: Config, *, parallel: bool,
return xml_zstd_file_path.resolve()


def prepare_images_7z_archive(wikidump_dir: Path, config: Config, parallel: bool) -> Path:

def prepare_images_7z_archive(wikidump_dir: Path, config: Config, parallel: bool, *,
sevenzip_compressor: SevenZipCompressor) -> Path:
images_dir = wikidump_dir / "images"
assert images_dir.exists() and images_dir.is_dir()

images_7z_archive_path = wikidump_dir / f"{config2basename(config)}-images.7z"
if not images_7z_archive_path.exists() or not images_7z_archive_path.is_file():
with NoLock() if parallel else SocketLockServer():
r = SevenZipCompressor.compress_dir(images_dir)
r = sevenzip_compressor.compress_dir(images_dir)
shutil.move(r, images_7z_archive_path)

assert SevenZipCompressor.test_integrity(images_7z_archive_path)
assert sevenzip_compressor.test_integrity(images_7z_archive_path)

assert images_7z_archive_path.exists() and images_7z_archive_path.is_file()
return images_7z_archive_path.resolve()


def prepare_files_to_upload(wikidump_dir: Path, config: Config, item: Item, *, parallel: bool,
zstd_compressor: ZstdCompressor, zstd_level: int
zstd_compressor: ZstdCompressor, zstd_level: int,
sevenzip_compressor: SevenZipCompressor
) -> Dict[str, str]:
""" return: filedict ("remote filename": "local filename") """
filedict = {} # "remote filename": "local filename"
Expand Down Expand Up @@ -219,7 +220,7 @@ def prepare_files_to_upload(wikidump_dir: Path, config: Config, item: Item, *, p
filedict[f"{config2basename(config)}-dumpMeta/{images_txt_zstd_path.name}"] = str(images_txt_zstd_path)

# images.7z
images_7z_archive_path = prepare_images_7z_archive(wikidump_dir, config, parallel)
images_7z_archive_path = prepare_images_7z_archive(wikidump_dir, config, parallel, sevenzip_compressor=sevenzip_compressor)
filedict[f"{images_7z_archive_path.name}"] = str(images_7z_archive_path)

print("=== Files already uploaded: ===")
Expand Down Expand Up @@ -340,7 +341,7 @@ def prepare_item_metadata(wikidump_dir: Path, config: Config, arg: Args) -> Tupl

def upload(arg: Args):
zstd_compressor = ZstdCompressor(bin_zstd=arg.bin_zstd)
SevenZipCompressor()
sevenzip_compressor = SevenZipCompressor(bin_7z=arg.bin_7z)
ia_keys = read_ia_keys(arg.keys_file)
wikidump_dir = arg.wikidump_dir
wikidump_dir.name # {prefix}-{wikidump_dumpdate}-wikidump (e.g. wiki.example.org-20230730-wikidump)
Expand Down Expand Up @@ -382,8 +383,11 @@ def upload(arg: Args):
item = get_item(identifier)

print("=== Preparing files to upload ===")
filedict = prepare_files_to_upload(wikidump_dir, config, item, parallel=arg.parallel,
zstd_compressor=zstd_compressor, zstd_level=arg.zstd_level)
filedict = prepare_files_to_upload(
wikidump_dir, config, item, parallel=arg.parallel,
zstd_compressor=zstd_compressor, zstd_level=arg.zstd_level,
sevenzip_compressor=sevenzip_compressor
)

print("=== Preparing metadata ===")
metadata, logo_url = prepare_item_metadata(wikidump_dir, config, arg)
Expand Down Expand Up @@ -520,16 +524,14 @@ def main():
parser.add_argument("--dry-run", action="store_true", help="Dry run, do not upload anything.")
parser.add_argument("-u", "--update", action="store_true",
help="Update existing item. [!! not implemented yet !!]")
parser.add_argument("--bin-zstd", default="zstd", dest="bin_zstd",
parser.add_argument("--bin-zstd", default=ZstdCompressor.bin_zstd, dest="bin_zstd",
help=f"Path to zstd binary. [default: {ZstdCompressor.bin_zstd}]")
parser.add_argument("--zstd-level", default=ZstdCompressor.DEFAULT_LEVEL, type=int, choices=range(17, 23),
help=f"Zstd compression level. [default: {ZstdCompressor.DEFAULT_LEVEL}] "
f"If you have a lot of RAM, recommend to use max level (22)."
)
parser.add_argument("--bin-7z", default="7z", dest="bin_7z",
help="Path to 7z binary. [default: 7z] "
"[!! not implemented yet !!]"
)
parser.add_argument("--bin-7z", default=SevenZipCompressor.bin_7z, dest="bin_7z",
help=f"Path to 7z binary. [default: {SevenZipCompressor.bin_7z}] ")
parser.add_argument("--parallel", action="store_true", help="Parallelize compression tasks")
parser.add_argument("wikidump_dir")

Expand Down

0 comments on commit dc37bdc

Please sign in to comment.